zenx 0.6.9__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zenx/pipelines/manager.py CHANGED
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  from typing import Dict, List
2
3
  from structlog import BoundLogger
3
4
 
@@ -12,29 +13,32 @@ class PipelineManager:
12
13
 
13
14
  def __init__(self, pipeline_names: List[str], logger: BoundLogger, db: DBClient, settings: Settings) -> None:
14
15
  self.logger = logger
15
- self.pipelines = [Pipeline.get_pipeline(name)(logger, db, settings) for name in pipeline_names]
16
+ self.pipelines = {name:Pipeline.get_pipeline(name)(logger, db, settings) for name in pipeline_names}
16
17
  self.settings = settings
18
+ self._fire_and_forget_pipelines = [p for p in self.pipelines.values() if p.name != "preprocess"]
17
19
 
18
20
 
19
21
  async def start_pipelines(self) -> None:
20
22
  """ connect and monitor """
21
- for pipeline in self.pipelines:
23
+ for pipeline in self.pipelines.values():
22
24
  await pipeline.start()
23
25
 
24
26
 
25
- async def process_item(self, item: Dict, spider: str) -> Dict:
26
- for pipeline in self.pipelines:
27
+ async def process_item(self, item: Dict, spider: str) -> None:
28
+ preprocess_pipeline = self.pipelines.get("preprocess")
29
+ if preprocess_pipeline:
27
30
  try:
28
- item = await pipeline.process_item(item, spider)
31
+ item = await preprocess_pipeline.process_item(item, spider)
29
32
  except DropItem:
30
- self.logger.debug("dropped", id=item.get("_id"))
31
- break
33
+ self.logger.debug("dropped", id=item.get("_id"), pipeline=preprocess_pipeline.name)
34
+ return
32
35
  except Exception:
33
- self.logger.exception("process_item", pipeline=pipeline.name, item=item)
36
+ self.logger.exception("process_item", item=item, pipeline=preprocess_pipeline.name)
34
37
  raise
35
- return item
38
+ for pipeline in self._fire_and_forget_pipelines:
39
+ asyncio.create_task(pipeline.process_item(item, spider))
36
40
 
37
41
 
38
42
  async def close_pipelines(self) -> None:
39
- for pipeline in self.pipelines:
43
+ for pipeline in self.pipelines.values():
40
44
  await pipeline.close()
@@ -18,9 +18,9 @@ class PreprocessPipeline(Pipeline):
18
18
  self.drop_if_scraped_too_late(item)
19
19
  _id = item.get("_id")
20
20
  if _id:
21
- if await self.db.exists(_id, spider):
21
+ inserted = await self.db.insert(_id, spider)
22
+ if not inserted:
22
23
  raise DropItem
23
- await self.db.insert(_id, spider)
24
24
 
25
25
  scraped_time = item['scraped_at'] - item['responded_at']
26
26
  self.logger.info("scraped", id=item.get("_id"), time_ms=scraped_time, pipeline=self.name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zenx
3
- Version: 0.6.9
3
+ Version: 0.7.1
4
4
  Summary: mini-framework
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: curl-cffi>=0.12.0
@@ -12,16 +12,16 @@ zenx/clients/http.py,sha256=fb6COYot6vidNFRBWgoU6CYEfnYWJP0JuVkydvxsHb4,5700
12
12
  zenx/pipelines/__init__.py,sha256=IxkZ0UpEJdYjLdd-PMcC9PzzzArTBNNcpgKc7NiOe5Y,131
13
13
  zenx/pipelines/base.py,sha256=N_388z5DFMeaU6wMwcClZAbQFWKh4kpAF7eUJhpQevs,1863
14
14
  zenx/pipelines/google_rpc.py,sha256=F7p5ml9W1UliZbrDrF9MFNVKlCP5pG1WpO6rdmBgKp8,4707
15
- zenx/pipelines/manager.py,sha256=LRejhnSiXospa4cMSX9Srp0jfscGnHDggXyl1aUNhtg,1300
16
- zenx/pipelines/preprocess.py,sha256=k1Ake8H3nm_S_0ZnruyXAl7cFlBhLn-wpQZHuovIH4o,774
15
+ zenx/pipelines/manager.py,sha256=bP2WIMblbWYHOJPU44l_IF3xLD0eHBYV4CCTFZc2FHI,1662
16
+ zenx/pipelines/preprocess.py,sha256=4XoQDrz9jOI9Xu_v_HxNqu9jV5luSIOk-kvhv8eMQvE,764
17
17
  zenx/pipelines/websocket.py,sha256=74XIV7_YbBngCgqKP3ZiBpDfQte-VB2G4kjNsZyycIc,4456
18
18
  zenx/resources/proto/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  zenx/resources/proto/feed_pb2.py,sha256=ZyICOLnyuXekkvV4bAHZ1nE1-wwzcYYRRrmRJCMrSoo,2810
20
20
  zenx/resources/proto/feed_pb2_grpc.py,sha256=Mim6FfBgIMj0PmTqHk036nVUMJH3A6I3ts6r1j3bQF8,7441
21
21
  zenx/spiders/__init__.py,sha256=rs5LuqdM2MQlUYiTGJrzkYhzN8_SSLTrR7wGjSRrrSo,25
22
22
  zenx/spiders/base.py,sha256=YB-KqsAzfIUTzDMy5_ElgW1mul-I4Ltft6JAJxpy4hg,1672
23
- zenx-0.6.9.dist-info/METADATA,sha256=WqdAikf-ESTs5RbByLL7hQQuvuDeFwdobnAog5CiNFM,1273
24
- zenx-0.6.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
- zenx-0.6.9.dist-info/entry_points.txt,sha256=8JXob2f1VtvzGFris-e9Usqywg7oca-cChDlH9moOZU,38
26
- zenx-0.6.9.dist-info/top_level.txt,sha256=JeXwvK86d7sB-2x-avugFnZIZa33zaHWKI8RHWJR6KY,5
27
- zenx-0.6.9.dist-info/RECORD,,
23
+ zenx-0.7.1.dist-info/METADATA,sha256=dL5OCg5Iv03S_Y2Ws-RB84NZ9YWtgIsGdCi8dU7xm6s,1273
24
+ zenx-0.7.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
+ zenx-0.7.1.dist-info/entry_points.txt,sha256=8JXob2f1VtvzGFris-e9Usqywg7oca-cChDlH9moOZU,38
26
+ zenx-0.7.1.dist-info/top_level.txt,sha256=JeXwvK86d7sB-2x-avugFnZIZa33zaHWKI8RHWJR6KY,5
27
+ zenx-0.7.1.dist-info/RECORD,,
File without changes