zenx 0.10.2__py3-none-any.whl → 0.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zenx/engine.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import signal
3
- from typing import List, Optional
3
+ from typing import List
4
4
  from dotenv import load_dotenv
5
5
  import pebble
6
6
  import uvloop
@@ -26,7 +26,7 @@ class Engine:
26
26
  self.shutdown_event.set()
27
27
 
28
28
 
29
- async def _execute_spider(self, spider_name: str) -> None:
29
+ async def _execute(self, spider_name: str) -> None:
30
30
  loop = asyncio.get_running_loop()
31
31
  loop.add_signal_handler(signal.SIGINT, self._shutdown_handler)
32
32
  loop.add_signal_handler(signal.SIGTERM, self._shutdown_handler)
@@ -47,41 +47,33 @@ class Engine:
47
47
  await pm.start_pipelines()
48
48
 
49
49
  spider = spider_cls(client=client, pm=pm, logger=logger, settings=settings)
50
- crawl_task: Optional[asyncio.Task] = None
51
50
  try:
52
51
  if self.forever:
53
52
  while not self.shutdown_event.is_set():
54
- crawl_task = asyncio.create_task(spider.crawl())
53
+ tasks = [asyncio.create_task(spider.crawl()) for _ in range(settings.CONCURRENCY)]
55
54
  try:
56
- await crawl_task
55
+ results = await asyncio.gather(*tasks, return_exceptions=True)
56
+ for result in results:
57
+ if isinstance(result, Exception):
58
+ raise result
57
59
  except Exception:
58
60
  logger.exception("crawl")
59
61
  await asyncio.sleep(0.01)
60
62
  else:
61
- crawl_task = asyncio.create_task(spider.crawl())
62
- await crawl_task
63
+ await spider.crawl()
63
64
  finally:
64
65
  if self.shutdown_event.is_set():
65
66
  logger.info("shutdown", spider=spider_name)
66
-
67
- if crawl_task and not crawl_task.done():
68
- crawl_task.cancel()
69
- logger.debug("cancelled", task="crawl")
70
- try:
71
- await crawl_task
72
- except asyncio.CancelledError:
73
- pass
74
67
  if spider.background_tasks:
75
68
  logger.debug("waiting", background_tasks=len(spider.background_tasks), belong_to="spider")
76
69
  await asyncio.gather(*spider.background_tasks)
77
-
78
70
  await client.close()
79
71
  await db.close()
80
72
  await pm.close_pipelines()
81
73
 
82
74
 
83
75
  def run_spider(self, spider: str) -> None:
84
- uvloop.run(self._execute_spider(spider))
76
+ uvloop.run(self._execute(spider))
85
77
 
86
78
 
87
79
  def run_spiders(self, spiders: List[str]) -> None:
zenx/settings.py CHANGED
@@ -10,6 +10,7 @@ class Settings(BaseSettings):
10
10
  MAX_SCRAPE_DELAY: int = 10 # 10 seconds
11
11
  DQ_MAX_SIZE: int = 100 # max size of the deque for memory database
12
12
  REDIS_RECORD_EXPIRY_SECONDS: int = 604800 # 7 days (7*24*60*60)
13
+ CONCURRENCY: int = 1
13
14
 
14
15
  DB_TYPE: Literal["memory", "redis"] = "memory"
15
16
  DB_NAME: str | None = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zenx
3
- Version: 0.10.2
3
+ Version: 0.10.4
4
4
  Summary: mini-framework
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: curl-cffi>=0.12.0
@@ -1,10 +1,10 @@
1
1
  zenx/cli.py,sha256=pHKhOTdqI6NQQoYK91waRIMpxCXLYtXEryzVaTbmvqc,2810
2
2
  zenx/debug_runner.py,sha256=B2Jd9A4_EHDa-ohLcwnFIxOV73FagTWXX2fl3qgwlpY,809
3
3
  zenx/discovery.py,sha256=YANVGzy2IG1fYruUud-11Y-ynyO6iEp3EjlHnhIQJQI,1014
4
- zenx/engine.py,sha256=rcrVrXdKORLST648XxGmC8sI54i1IVD32HccxGB0bxU,3028
4
+ zenx/engine.py,sha256=G3ryzQ9HjI8D47e4-tlTNMwtFYbhluEMe4jrU_-FhUQ,2850
5
5
  zenx/exceptions.py,sha256=BJXxzwwX2CU6inhppfblx8c8Z6Mhvsk7MAhQ1LAnhBg,37
6
6
  zenx/logger.py,sha256=lr45XGbV769NQcwn8-lAcPfFbR4yBN8LNh7o-i4Aa9M,1652
7
- zenx/settings.py,sha256=anTT_jSQxwIabBHCM7egYFUOnALxp4fF_26_2ZPtTt4,1044
7
+ zenx/settings.py,sha256=Ob4gvov-WVzA4GgpqmIgNcKaX7jWCAyYgpM9DXbbeHU,1069
8
8
  zenx/utils.py,sha256=ouvXUd3-HaxLS3dDKbshEf-CXRf44OcovziEVIRsWNs,725
9
9
  zenx/clients/__init__.py,sha256=CaAAuNa8DPyMdejR0KNSDDg_UzC3WxaTol5_QvwwwG8,132
10
10
  zenx/clients/database.py,sha256=AF-L7iYrWRNzUZKn7taveiihpu--mXXC6eWOrMNlqzQ,4806
@@ -21,8 +21,8 @@ zenx/resources/proto/feed_pb2.py,sha256=ZyICOLnyuXekkvV4bAHZ1nE1-wwzcYYRRrmRJCMr
21
21
  zenx/resources/proto/feed_pb2_grpc.py,sha256=Mim6FfBgIMj0PmTqHk036nVUMJH3A6I3ts6r1j3bQF8,7441
22
22
  zenx/spiders/__init__.py,sha256=rs5LuqdM2MQlUYiTGJrzkYhzN8_SSLTrR7wGjSRrrSo,25
23
23
  zenx/spiders/base.py,sha256=MeZ3wZOPOyOX4V2ufFXtYGCDtXHZO_mNfnXdKMkisuQ,1951
24
- zenx-0.10.2.dist-info/METADATA,sha256=6qMTNCG09Dd7MaVcaMSsnMPP_0NKSYuwo1VOKv2q_LA,1422
25
- zenx-0.10.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
- zenx-0.10.2.dist-info/entry_points.txt,sha256=8JXob2f1VtvzGFris-e9Usqywg7oca-cChDlH9moOZU,38
27
- zenx-0.10.2.dist-info/top_level.txt,sha256=JeXwvK86d7sB-2x-avugFnZIZa33zaHWKI8RHWJR6KY,5
28
- zenx-0.10.2.dist-info/RECORD,,
24
+ zenx-0.10.4.dist-info/METADATA,sha256=vcpC3WR_UsidsEfDdmDw0Qvlc7Sh7LYySn5wbxJKIUI,1422
25
+ zenx-0.10.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
+ zenx-0.10.4.dist-info/entry_points.txt,sha256=8JXob2f1VtvzGFris-e9Usqywg7oca-cChDlH9moOZU,38
27
+ zenx-0.10.4.dist-info/top_level.txt,sha256=JeXwvK86d7sB-2x-avugFnZIZa33zaHWKI8RHWJR6KY,5
28
+ zenx-0.10.4.dist-info/RECORD,,
File without changes