zenx 0.6.9__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zenx-0.6.9 → zenx-0.7.1}/PKG-INFO +1 -1
- {zenx-0.6.9 → zenx-0.7.1}/pyproject.toml +1 -1
- zenx-0.7.1/zenx/pipelines/manager.py +44 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/preprocess.py +2 -2
- {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/PKG-INFO +1 -1
- zenx-0.6.9/zenx/pipelines/manager.py +0 -40
- {zenx-0.6.9 → zenx-0.7.1}/setup.cfg +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/cli.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/clients/__init__.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/clients/database.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/clients/http.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/debug_runner.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/discovery.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/engine.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/exceptions.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/logger.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/__init__.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/base.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/google_rpc.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/pipelines/websocket.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/resources/proto/__init__.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/resources/proto/feed_pb2.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/resources/proto/feed_pb2_grpc.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/settings.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/spiders/__init__.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/spiders/base.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx/utils.py +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/SOURCES.txt +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/dependency_links.txt +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/entry_points.txt +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/requires.txt +0 -0
- {zenx-0.6.9 → zenx-0.7.1}/zenx.egg-info/top_level.txt +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Dict, List
|
3
|
+
from structlog import BoundLogger
|
4
|
+
|
5
|
+
from zenx.exceptions import DropItem
|
6
|
+
from zenx.pipelines.base import Pipeline
|
7
|
+
from zenx.clients.database import DBClient
|
8
|
+
from zenx.settings import Settings
|
9
|
+
|
10
|
+
|
11
|
+
class PipelineManager:
|
12
|
+
|
13
|
+
|
14
|
+
def __init__(self, pipeline_names: List[str], logger: BoundLogger, db: DBClient, settings: Settings) -> None:
|
15
|
+
self.logger = logger
|
16
|
+
self.pipelines = {name:Pipeline.get_pipeline(name)(logger, db, settings) for name in pipeline_names}
|
17
|
+
self.settings = settings
|
18
|
+
self._fire_and_forget_pipelines = [p for p in self.pipelines.values() if p.name != "preprocess"]
|
19
|
+
|
20
|
+
|
21
|
+
async def start_pipelines(self) -> None:
|
22
|
+
""" connect and monitor """
|
23
|
+
for pipeline in self.pipelines.values():
|
24
|
+
await pipeline.start()
|
25
|
+
|
26
|
+
|
27
|
+
async def process_item(self, item: Dict, spider: str) -> None:
|
28
|
+
preprocess_pipeline = self.pipelines.get("preprocess")
|
29
|
+
if preprocess_pipeline:
|
30
|
+
try:
|
31
|
+
item = await preprocess_pipeline.process_item(item, spider)
|
32
|
+
except DropItem:
|
33
|
+
self.logger.debug("dropped", id=item.get("_id"), pipeline=preprocess_pipeline.name)
|
34
|
+
return
|
35
|
+
except Exception:
|
36
|
+
self.logger.exception("process_item", item=item, pipeline=preprocess_pipeline.name)
|
37
|
+
raise
|
38
|
+
for pipeline in self._fire_and_forget_pipelines:
|
39
|
+
asyncio.create_task(pipeline.process_item(item, spider))
|
40
|
+
|
41
|
+
|
42
|
+
async def close_pipelines(self) -> None:
|
43
|
+
for pipeline in self.pipelines.values():
|
44
|
+
await pipeline.close()
|
@@ -18,9 +18,9 @@ class PreprocessPipeline(Pipeline):
|
|
18
18
|
self.drop_if_scraped_too_late(item)
|
19
19
|
_id = item.get("_id")
|
20
20
|
if _id:
|
21
|
-
|
21
|
+
inserted = await self.db.insert(_id, spider)
|
22
|
+
if not inserted:
|
22
23
|
raise DropItem
|
23
|
-
await self.db.insert(_id, spider)
|
24
24
|
|
25
25
|
scraped_time = item['scraped_at'] - item['responded_at']
|
26
26
|
self.logger.info("scraped", id=item.get("_id"), time_ms=scraped_time, pipeline=self.name)
|
@@ -1,40 +0,0 @@
|
|
1
|
-
from typing import Dict, List
|
2
|
-
from structlog import BoundLogger
|
3
|
-
|
4
|
-
from zenx.exceptions import DropItem
|
5
|
-
from zenx.pipelines.base import Pipeline
|
6
|
-
from zenx.clients.database import DBClient
|
7
|
-
from zenx.settings import Settings
|
8
|
-
|
9
|
-
|
10
|
-
class PipelineManager:
|
11
|
-
|
12
|
-
|
13
|
-
def __init__(self, pipeline_names: List[str], logger: BoundLogger, db: DBClient, settings: Settings) -> None:
|
14
|
-
self.logger = logger
|
15
|
-
self.pipelines = [Pipeline.get_pipeline(name)(logger, db, settings) for name in pipeline_names]
|
16
|
-
self.settings = settings
|
17
|
-
|
18
|
-
|
19
|
-
async def start_pipelines(self) -> None:
|
20
|
-
""" connect and monitor """
|
21
|
-
for pipeline in self.pipelines:
|
22
|
-
await pipeline.start()
|
23
|
-
|
24
|
-
|
25
|
-
async def process_item(self, item: Dict, spider: str) -> Dict:
|
26
|
-
for pipeline in self.pipelines:
|
27
|
-
try:
|
28
|
-
item = await pipeline.process_item(item, spider)
|
29
|
-
except DropItem:
|
30
|
-
self.logger.debug("dropped", id=item.get("_id"))
|
31
|
-
break
|
32
|
-
except Exception:
|
33
|
-
self.logger.exception("process_item", pipeline=pipeline.name, item=item)
|
34
|
-
raise
|
35
|
-
return item
|
36
|
-
|
37
|
-
|
38
|
-
async def close_pipelines(self) -> None:
|
39
|
-
for pipeline in self.pipelines:
|
40
|
-
await pipeline.close()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|