zenx 0.6.9__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {zenx-0.6.9 → zenx-0.7.1}/PKG-INFO +1 -1
  2. {zenx-0.6.9 → zenx-0.7.1}/pyproject.toml +1 -1
  3. zenx-0.7.1/zenx/pipelines/manager.py +44 -0
  4. {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/preprocess.py +2 -2
  5. {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/PKG-INFO +1 -1
  6. zenx-0.6.9/zenx/pipelines/manager.py +0 -40
  7. {zenx-0.6.9 → zenx-0.7.1}/setup.cfg +0 -0
  8. {zenx-0.6.9 → zenx-0.7.1}/zenx/cli.py +0 -0
  9. {zenx-0.6.9 → zenx-0.7.1}/zenx/clients/__init__.py +0 -0
  10. {zenx-0.6.9 → zenx-0.7.1}/zenx/clients/database.py +0 -0
  11. {zenx-0.6.9 → zenx-0.7.1}/zenx/clients/http.py +0 -0
  12. {zenx-0.6.9 → zenx-0.7.1}/zenx/debug_runner.py +0 -0
  13. {zenx-0.6.9 → zenx-0.7.1}/zenx/discovery.py +0 -0
  14. {zenx-0.6.9 → zenx-0.7.1}/zenx/engine.py +0 -0
  15. {zenx-0.6.9 → zenx-0.7.1}/zenx/exceptions.py +0 -0
  16. {zenx-0.6.9 → zenx-0.7.1}/zenx/logger.py +0 -0
  17. {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/__init__.py +0 -0
  18. {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/base.py +0 -0
  19. {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/google_rpc.py +0 -0
  20. {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/websocket.py +0 -0
  21. {zenx-0.6.9 → zenx-0.7.1}/zenx/resources/proto/__init__.py +0 -0
  22. {zenx-0.6.9 → zenx-0.7.1}/zenx/resources/proto/feed_pb2.py +0 -0
  23. {zenx-0.6.9 → zenx-0.7.1}/zenx/resources/proto/feed_pb2_grpc.py +0 -0
  24. {zenx-0.6.9 → zenx-0.7.1}/zenx/settings.py +0 -0
  25. {zenx-0.6.9 → zenx-0.7.1}/zenx/spiders/__init__.py +0 -0
  26. {zenx-0.6.9 → zenx-0.7.1}/zenx/spiders/base.py +0 -0
  27. {zenx-0.6.9 → zenx-0.7.1}/zenx/utils.py +0 -0
  28. {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/SOURCES.txt +0 -0
  29. {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/dependency_links.txt +0 -0
  30. {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/entry_points.txt +0 -0
  31. {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/requires.txt +0 -0
  32. {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zenx
3
- Version: 0.6.9
3
+ Version: 0.7.1
4
4
  Summary: mini-framework
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: curl-cffi>=0.12.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zenx"
3
- version = "0.6.9"
3
+ version = "0.7.1"
4
4
  description = "mini-framework"
5
5
  requires-python = ">=3.12"
6
6
  dependencies = [
@@ -0,0 +1,44 @@
1
+ import asyncio
2
+ from typing import Dict, List
3
+ from structlog import BoundLogger
4
+
5
+ from zenx.exceptions import DropItem
6
+ from zenx.pipelines.base import Pipeline
7
+ from zenx.clients.database import DBClient
8
+ from zenx.settings import Settings
9
+
10
+
11
+ class PipelineManager:
12
+
13
+
14
+ def __init__(self, pipeline_names: List[str], logger: BoundLogger, db: DBClient, settings: Settings) -> None:
15
+ self.logger = logger
16
+ self.pipelines = {name:Pipeline.get_pipeline(name)(logger, db, settings) for name in pipeline_names}
17
+ self.settings = settings
18
+ self._fire_and_forget_pipelines = [p for p in self.pipelines.values() if p.name != "preprocess"]
19
+
20
+
21
+ async def start_pipelines(self) -> None:
22
+ """ connect and monitor """
23
+ for pipeline in self.pipelines.values():
24
+ await pipeline.start()
25
+
26
+
27
+ async def process_item(self, item: Dict, spider: str) -> None:
28
+ preprocess_pipeline = self.pipelines.get("preprocess")
29
+ if preprocess_pipeline:
30
+ try:
31
+ item = await preprocess_pipeline.process_item(item, spider)
32
+ except DropItem:
33
+ self.logger.debug("dropped", id=item.get("_id"), pipeline=preprocess_pipeline.name)
34
+ return
35
+ except Exception:
36
+ self.logger.exception("process_item", item=item, pipeline=preprocess_pipeline.name)
37
+ raise
38
+ for pipeline in self._fire_and_forget_pipelines:
39
+ asyncio.create_task(pipeline.process_item(item, spider))
40
+
41
+
42
+ async def close_pipelines(self) -> None:
43
+ for pipeline in self.pipelines.values():
44
+ await pipeline.close()
@@ -18,9 +18,9 @@ class PreprocessPipeline(Pipeline):
18
18
  self.drop_if_scraped_too_late(item)
19
19
  _id = item.get("_id")
20
20
  if _id:
21
- if await self.db.exists(_id, spider):
21
+ inserted = await self.db.insert(_id, spider)
22
+ if not inserted:
22
23
  raise DropItem
23
- await self.db.insert(_id, spider)
24
24
 
25
25
  scraped_time = item['scraped_at'] - item['responded_at']
26
26
  self.logger.info("scraped", id=item.get("_id"), time_ms=scraped_time, pipeline=self.name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zenx
3
- Version: 0.6.9
3
+ Version: 0.7.1
4
4
  Summary: mini-framework
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: curl-cffi>=0.12.0
@@ -1,40 +0,0 @@
1
- from typing import Dict, List
2
- from structlog import BoundLogger
3
-
4
- from zenx.exceptions import DropItem
5
- from zenx.pipelines.base import Pipeline
6
- from zenx.clients.database import DBClient
7
- from zenx.settings import Settings
8
-
9
-
10
- class PipelineManager:
11
-
12
-
13
- def __init__(self, pipeline_names: List[str], logger: BoundLogger, db: DBClient, settings: Settings) -> None:
14
- self.logger = logger
15
- self.pipelines = [Pipeline.get_pipeline(name)(logger, db, settings) for name in pipeline_names]
16
- self.settings = settings
17
-
18
-
19
- async def start_pipelines(self) -> None:
20
- """ connect and monitor """
21
- for pipeline in self.pipelines:
22
- await pipeline.start()
23
-
24
-
25
- async def process_item(self, item: Dict, spider: str) -> Dict:
26
- for pipeline in self.pipelines:
27
- try:
28
- item = await pipeline.process_item(item, spider)
29
- except DropItem:
30
- self.logger.debug("dropped", id=item.get("_id"))
31
- break
32
- except Exception:
33
- self.logger.exception("process_item", pipeline=pipeline.name, item=item)
34
- raise
35
- return item
36
-
37
-
38
- async def close_pipelines(self) -> None:
39
- for pipeline in self.pipelines:
40
- await pipeline.close()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes