warren 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runtime_scripts/__init__.py +0 -0
- runtime_scripts/lib/__init__.py +0 -0
- runtime_scripts/lib/cli.py +30 -0
- runtime_scripts/lib/logging_setup.py +30 -0
- runtime_scripts/lib/pipeline.py +178 -0
- runtime_scripts/lib/runner.py +86 -0
- runtime_scripts/purge_queues.py +214 -0
- runtime_scripts/start_job_publication_worker.py +165 -0
- runtime_scripts/start_job_status_worker.py +112 -0
- runtime_scripts/start_retry_worker.py +113 -0
- runtime_scripts/start_worker.py +255 -0
- warren/__init__.py +0 -0
- warren/common.py +224 -0
- warren/constants.py +13 -0
- warren/docs/cache.md +257 -0
- warren/docs/document_store.md +477 -0
- warren/docs/rabbitmq.md +104 -0
- warren/docs/results_store.md +216 -0
- warren/docs/retry_design.md +520 -0
- warren/docs/workers.md +70 -0
- warren/exceptions.py +30 -0
- warren/jobs/__init__.py +0 -0
- warren/jobs/publishing/__init__.py +18 -0
- warren/jobs/publishing/job_documents_publisher.py +259 -0
- warren/jobs/publishing/job_publication_worker.py +140 -0
- warren/jobs/publishing/job_publication_worker_runner.py +226 -0
- warren/jobs/status/__init__.py +12 -0
- warren/jobs/status/job_status_worker.py +280 -0
- warren/jobs/status/job_status_worker_runner.py +211 -0
- warren/processors/__init__.py +10 -0
- warren/processors/base.py +56 -0
- warren/pubsub/__init__.py +0 -0
- warren/pubsub/base.py +85 -0
- warren/pubsub/common.py +90 -0
- warren/pubsub/rabbitmq/__init__.py +40 -0
- warren/pubsub/rabbitmq/aio_pika/__init__.py +33 -0
- warren/pubsub/rabbitmq/aio_pika/connection.py +52 -0
- warren/pubsub/rabbitmq/aio_pika/consumer.py +444 -0
- warren/pubsub/rabbitmq/aio_pika/publisher.py +159 -0
- warren/pubsub/rabbitmq/aio_pika/purge.py +101 -0
- warren/pubsub/rabbitmq/aio_pika/topology.py +73 -0
- warren/pubsub/rabbitmq/config.py +85 -0
- warren/retry_management/__init__.py +12 -0
- warren/retry_management/retry_worker.py +335 -0
- warren/retry_management/retry_worker_runner.py +227 -0
- warren/runtime/README.md +133 -0
- warren/runtime/USAGE.md +349 -0
- warren/runtime/__init__.py +54 -0
- warren/runtime/config.py +72 -0
- warren/runtime/infrastructure.py +90 -0
- warren/runtime/runner.py +317 -0
- warren/runtime/spec.py +124 -0
- warren/storage/__init__.py +6 -0
- warren/storage/cache/__init__.py +0 -0
- warren/storage/cache/interface.py +181 -0
- warren/storage/cache/redis.py +303 -0
- warren/storage/cached_document_store.py +188 -0
- warren/storage/document_store/__init__.py +18 -0
- warren/storage/document_store/interface.py +137 -0
- warren/storage/document_store/mongodb.py +372 -0
- warren/storage/documents/__init__.py +0 -0
- warren/storage/documents/factories.py +44 -0
- warren/storage/documents/fetcher.py +120 -0
- warren/storage/documents/interface.py +94 -0
- warren/storage/documents/location.py +71 -0
- warren/storage/documents/resolve_gcs.py +50 -0
- warren/storage/documents/resolvers.py +60 -0
- warren/storage/documents/sources.py +86 -0
- warren/storage/exceptions.py +37 -0
- warren/storage/job_results/__init__.py +8 -0
- warren/storage/job_results/interface.py +162 -0
- warren/storage/job_results/mongodb.py +355 -0
- warren/storage/jobs/__init__.py +12 -0
- warren/storage/jobs/interface.py +139 -0
- warren/storage/jobs/mongodb.py +187 -0
- warren/storage/mongo_errors.py +70 -0
- warren/storage/publishing_tracker/__init__.py +8 -0
- warren/storage/publishing_tracker/interface.py +61 -0
- warren/storage/publishing_tracker/mongodb.py +122 -0
- warren/storage/results/__init__.py +18 -0
- warren/storage/results/binary.py +214 -0
- warren/storage/results/default.py +325 -0
- warren/storage/results/factories.py +121 -0
- warren/storage/results/interface.py +111 -0
- warren/storage/retry.py +63 -0
- warren/storage/utils.py +6 -0
- warren/workers/__init__.py +0 -0
- warren/workers/messages.py +303 -0
- warren/workers/runners.py +156 -0
- warren/workers/workers.py +252 -0
- warren-0.1.1.dist-info/METADATA +126 -0
- warren-0.1.1.dist-info/RECORD +95 -0
- warren-0.1.1.dist-info/WHEEL +4 -0
- warren-0.1.1.dist-info/entry_points.txt +6 -0
- warren-0.1.1.dist-info/licenses/LICENSE +202 -0
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common CLI argument helpers for launcher scripts.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def add_common_args(parser: argparse.ArgumentParser) -> None:
|
|
10
|
+
"""Add arguments shared across all launcher scripts.
|
|
11
|
+
|
|
12
|
+
Adds: --config-file, --worker-name, --debug.
|
|
13
|
+
"""
|
|
14
|
+
parser.add_argument(
|
|
15
|
+
"--config-file",
|
|
16
|
+
type=Path,
|
|
17
|
+
default=None,
|
|
18
|
+
help=("Path to RuntimeConfig YAML. Default: ./pipeline/config.yaml"),
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--worker-name",
|
|
22
|
+
default=None,
|
|
23
|
+
help="Unique worker name (default: auto-generated).",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--debug",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
help="Enable DEBUG logging (default: INFO).",
|
|
30
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging setup for runtime script entry points.
|
|
3
|
+
|
|
4
|
+
Workaround for basics.logging auto-configuring the root logger at
|
|
5
|
+
DEBUG on import. Call ``configure_logging()`` early in ``main()``
|
|
6
|
+
before any substantive log output.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def resolve_log_level(*, debug: bool) -> int:
|
|
13
|
+
"""Return the logging level based on the --debug flag.
|
|
14
|
+
|
|
15
|
+
:param debug: whether debug logging is enabled.
|
|
16
|
+
:return: ``logging.DEBUG`` or ``logging.INFO``.
|
|
17
|
+
"""
|
|
18
|
+
return logging.DEBUG if debug else logging.INFO
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def configure_logging(*, debug: bool) -> None:
|
|
22
|
+
"""Configure logging for a CLI entry point.
|
|
23
|
+
|
|
24
|
+
:param debug: whether debug logging is enabled.
|
|
25
|
+
"""
|
|
26
|
+
from basics.logging import setup_logging
|
|
27
|
+
|
|
28
|
+
log_level = resolve_log_level(debug=debug)
|
|
29
|
+
setup_logging(log_level=log_level)
|
|
30
|
+
logging.getLogger().setLevel(log_level)
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pipeline spec and config loading utilities.
|
|
3
|
+
|
|
4
|
+
Shared by all launcher scripts for resolving pipeline specs from
|
|
5
|
+
directories, files, or dotted module paths.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib
|
|
9
|
+
import importlib.util
|
|
10
|
+
import logging
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from basics.logging import get_logger
|
|
14
|
+
|
|
15
|
+
from warren.runtime.config import RuntimeConfig
|
|
16
|
+
from warren.runtime.spec import PipelineSpec
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
module_logger: logging.Logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
DEFAULT_PIPELINE_DIR: str = "./pipeline"
|
|
22
|
+
DEFAULT_SPEC_MODULE: str = "pipeline_spec"
|
|
23
|
+
DEFAULT_SPEC_VAR: str = "PIPELINE"
|
|
24
|
+
DEFAULT_CONFIG_FILE: str = "config.yaml"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def split_var_suffix(spec_str: str) -> tuple[str, str]:
|
|
28
|
+
"""Split ``path_or_module:VAR`` into ``(path_or_module, var_name)``.
|
|
29
|
+
|
|
30
|
+
:return: tuple of (location, variable name).
|
|
31
|
+
"""
|
|
32
|
+
if ":" in spec_str:
|
|
33
|
+
location, var_name = spec_str.rsplit(":", 1)
|
|
34
|
+
return location, var_name
|
|
35
|
+
return spec_str, DEFAULT_SPEC_VAR
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def import_file(file_path: Path) -> object:
|
|
39
|
+
"""Import a .py file as a module and return it.
|
|
40
|
+
|
|
41
|
+
The spec module's own import/syntax/runtime errors propagate
|
|
42
|
+
unchanged — the calling launcher wraps them with phase context
|
|
43
|
+
(e.g. "Unable to load pipeline spec from: ..."). Only failure to
|
|
44
|
+
create the module spec raises ``ImportError`` here.
|
|
45
|
+
"""
|
|
46
|
+
spec = importlib.util.spec_from_file_location(
|
|
47
|
+
file_path.stem,
|
|
48
|
+
file_path,
|
|
49
|
+
)
|
|
50
|
+
if spec is None or spec.loader is None:
|
|
51
|
+
msg = f"Cannot create module spec from {file_path}"
|
|
52
|
+
raise ImportError(msg)
|
|
53
|
+
|
|
54
|
+
mod = importlib.util.module_from_spec(spec)
|
|
55
|
+
spec.loader.exec_module(mod)
|
|
56
|
+
return mod
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def resolve_pipeline_file(directory: Path) -> Path:
|
|
60
|
+
"""Find ``pipeline_spec.py`` inside a directory (non-recursive).
|
|
61
|
+
|
|
62
|
+
:return: path to the spec file.
|
|
63
|
+
:raises FileNotFoundError: if no pipeline_spec.py exists in the
|
|
64
|
+
directory.
|
|
65
|
+
"""
|
|
66
|
+
candidate = directory / f"{DEFAULT_SPEC_MODULE}.py"
|
|
67
|
+
if candidate.is_file():
|
|
68
|
+
return candidate
|
|
69
|
+
msg = f"No {DEFAULT_SPEC_MODULE}.py in directory: {directory}"
|
|
70
|
+
raise FileNotFoundError(msg)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def load_pipeline(
|
|
74
|
+
spec_str: str,
|
|
75
|
+
logger: logging.Logger | None = None,
|
|
76
|
+
) -> tuple[PipelineSpec, Path | None]:
|
|
77
|
+
"""Resolve a pipeline spec from the given location string.
|
|
78
|
+
|
|
79
|
+
Resolution order:
|
|
80
|
+
|
|
81
|
+
1. Split off ``:VAR`` suffix (default: ``PIPELINE``).
|
|
82
|
+
2. If the location exists as a directory -> look for
|
|
83
|
+
``pipeline_spec.py`` inside, import it.
|
|
84
|
+
3. If it exists as a file -> import it directly.
|
|
85
|
+
4. Otherwise -> try ``importlib.import_module(location)``.
|
|
86
|
+
5. Read ``VAR`` from the loaded module.
|
|
87
|
+
|
|
88
|
+
:return: tuple of (PipelineSpec, resolved_dir) where
|
|
89
|
+
``resolved_dir`` is the directory the spec was found in
|
|
90
|
+
(None for dotted-module imports).
|
|
91
|
+
:raises FileNotFoundError: if directory/file does not exist or
|
|
92
|
+
contains no pipeline_spec.py.
|
|
93
|
+
:raises ImportError: if dotted module cannot be imported.
|
|
94
|
+
:raises AttributeError: if the variable is not found in the module.
|
|
95
|
+
:raises TypeError: if the variable is not a PipelineSpec.
|
|
96
|
+
"""
|
|
97
|
+
log = logger or module_logger
|
|
98
|
+
location, var_name = split_var_suffix(spec_str)
|
|
99
|
+
|
|
100
|
+
resolved_dir: Path | None = None
|
|
101
|
+
path = Path(location)
|
|
102
|
+
|
|
103
|
+
if path.is_dir():
|
|
104
|
+
spec_file = resolve_pipeline_file(path)
|
|
105
|
+
log.info(
|
|
106
|
+
f"Loading pipeline spec from directory: {path} "
|
|
107
|
+
f"(file: {spec_file.name}, var: {var_name})"
|
|
108
|
+
)
|
|
109
|
+
mod = import_file(spec_file)
|
|
110
|
+
resolved_dir = path
|
|
111
|
+
|
|
112
|
+
elif path.is_file():
|
|
113
|
+
log.info(f"Loading pipeline spec from file: {path} (var: {var_name})")
|
|
114
|
+
mod = import_file(path)
|
|
115
|
+
resolved_dir = path.parent
|
|
116
|
+
|
|
117
|
+
else:
|
|
118
|
+
log.info(f"Loading pipeline spec from module: {location} (var: {var_name})")
|
|
119
|
+
mod = importlib.import_module(location)
|
|
120
|
+
|
|
121
|
+
module_name = getattr(mod, "__name__", location)
|
|
122
|
+
|
|
123
|
+
if not hasattr(mod, var_name):
|
|
124
|
+
msg = f"Module {module_name} has no attribute '{var_name}'"
|
|
125
|
+
raise AttributeError(msg)
|
|
126
|
+
|
|
127
|
+
pipeline = getattr(mod, var_name)
|
|
128
|
+
|
|
129
|
+
if not isinstance(pipeline, PipelineSpec):
|
|
130
|
+
msg = (
|
|
131
|
+
f"{var_name} in {module_name} is "
|
|
132
|
+
f"{type(pipeline).__name__}, expected PipelineSpec"
|
|
133
|
+
)
|
|
134
|
+
raise TypeError(msg)
|
|
135
|
+
|
|
136
|
+
return pipeline, resolved_dir
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def resolve_config_path(
|
|
140
|
+
config_file: Path | None,
|
|
141
|
+
pipeline_dir: Path | None,
|
|
142
|
+
) -> Path:
|
|
143
|
+
"""Determine the config file path.
|
|
144
|
+
|
|
145
|
+
:param config_file: explicit config path (takes precedence).
|
|
146
|
+
:param pipeline_dir: pipeline directory to look for config.yaml in.
|
|
147
|
+
:raises FileNotFoundError: if no config.yaml found in pipeline_dir.
|
|
148
|
+
:raises ValueError: if neither config_file nor pipeline_dir provided.
|
|
149
|
+
"""
|
|
150
|
+
if config_file is not None:
|
|
151
|
+
return config_file
|
|
152
|
+
|
|
153
|
+
if pipeline_dir is not None:
|
|
154
|
+
candidate = pipeline_dir / DEFAULT_CONFIG_FILE
|
|
155
|
+
if candidate.is_file():
|
|
156
|
+
return candidate
|
|
157
|
+
msg = (
|
|
158
|
+
f"No {DEFAULT_CONFIG_FILE} in pipeline directory {pipeline_dir} "
|
|
159
|
+
f"and no --config-file specified"
|
|
160
|
+
)
|
|
161
|
+
raise FileNotFoundError(msg)
|
|
162
|
+
|
|
163
|
+
msg = (
|
|
164
|
+
"--config-file is required when pipeline spec is loaded "
|
|
165
|
+
"from a Python module (no directory to infer config.yaml from)"
|
|
166
|
+
)
|
|
167
|
+
raise ValueError(msg)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def load_config(config_file: Path) -> RuntimeConfig:
|
|
171
|
+
"""Load a RuntimeConfig from a YAML file.
|
|
172
|
+
|
|
173
|
+
:raises FileNotFoundError: if the file does not exist.
|
|
174
|
+
"""
|
|
175
|
+
if not config_file.is_file():
|
|
176
|
+
msg = f"Config file not found: {config_file}"
|
|
177
|
+
raise FileNotFoundError(msg)
|
|
178
|
+
return RuntimeConfig.from_yaml(config_file)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared worker lifecycle for all launcher scripts.
|
|
3
|
+
|
|
4
|
+
Provides the single ``run()`` function that all launchers call.
|
|
5
|
+
The only thing that differs between launchers is the
|
|
6
|
+
``runner_factory_func`` that creates the runner instance.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import uuid
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from basics.logging import get_logger
|
|
15
|
+
|
|
16
|
+
from runtime_scripts.lib.pipeline import (
|
|
17
|
+
load_config,
|
|
18
|
+
)
|
|
19
|
+
from warren.exceptions import WarrenError
|
|
20
|
+
from warren.runtime.config import RuntimeConfig
|
|
21
|
+
from warren.workers.runners import WorkerRunnerBase
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
module_logger: logging.Logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
RunnerFactoryFunc = Callable[[RuntimeConfig, str], WorkerRunnerBase]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def run(
|
|
30
|
+
*,
|
|
31
|
+
runner_factory_func: RunnerFactoryFunc,
|
|
32
|
+
config_file: Path,
|
|
33
|
+
worker_name: str | None = None,
|
|
34
|
+
worker_name_prefix: str = "worker",
|
|
35
|
+
debug: bool = False,
|
|
36
|
+
logger: logging.Logger | None = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Load config, create a runner via factory, and run it.
|
|
39
|
+
|
|
40
|
+
This is the shared entry point for all launcher scripts. Each
|
|
41
|
+
launcher provides its own ``runner_factory_func`` that creates the
|
|
42
|
+
appropriate runner from ``(config, worker_name)``.
|
|
43
|
+
|
|
44
|
+
:param runner_factory_func: callable that takes
|
|
45
|
+
``(config: RuntimeConfig, worker_name: str)`` and returns a
|
|
46
|
+
``WorkerRunnerBase`` instance. Use ``partial()`` to bind
|
|
47
|
+
additional arguments (e.g. ``worker_spec``).
|
|
48
|
+
:param config_file: path to RuntimeConfig YAML.
|
|
49
|
+
:param worker_name: unique worker instance name. Defaults to
|
|
50
|
+
``<worker_name_prefix>-<uuid8>``.
|
|
51
|
+
:param worker_name_prefix: prefix for auto-generated worker names.
|
|
52
|
+
:param debug: enable DEBUG logging.
|
|
53
|
+
:param logger: optional logger override.
|
|
54
|
+
"""
|
|
55
|
+
log = logger or module_logger
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
config = load_config(config_file)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
msg = f"Unable to load config from: {config_file}"
|
|
61
|
+
raise WarrenError(msg) from e
|
|
62
|
+
|
|
63
|
+
log.info(f"Loaded config from: {config_file}")
|
|
64
|
+
|
|
65
|
+
resolved_worker_name = worker_name or f"{worker_name_prefix}-{uuid.uuid4().hex[:8]}"
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
runner = runner_factory_func(config, resolved_worker_name)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
msg = f"Unable to create runner for: {resolved_worker_name}"
|
|
71
|
+
raise WarrenError(msg) from e
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
try:
|
|
75
|
+
await runner.setup()
|
|
76
|
+
except Exception as e:
|
|
77
|
+
msg = f"Worker setup failed for: {resolved_worker_name}"
|
|
78
|
+
raise WarrenError(msg) from e
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
await runner.run()
|
|
82
|
+
except Exception as e:
|
|
83
|
+
msg = f"Worker run failed for: {resolved_worker_name}"
|
|
84
|
+
raise WarrenError(msg) from e
|
|
85
|
+
finally:
|
|
86
|
+
await runner.teardown()
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
r"""
|
|
2
|
+
Purge RabbitMQ queues and exchange.
|
|
3
|
+
|
|
4
|
+
Thin CLI wrapper around the framework ``purge_queues()`` utility.
|
|
5
|
+
Reads queue names from a pipeline spec or accepts them as CLI args.
|
|
6
|
+
|
|
7
|
+
Usage::
|
|
8
|
+
|
|
9
|
+
# From pipeline spec
|
|
10
|
+
python -m runtime_scripts.purge_queues \
|
|
11
|
+
--config-file ./pipeline/config.yaml \
|
|
12
|
+
--pipeline-spec ./pipeline
|
|
13
|
+
|
|
14
|
+
# Explicit queue names
|
|
15
|
+
python -m runtime_scripts.purge_queues \
|
|
16
|
+
--config-file ./pipeline/config.yaml \
|
|
17
|
+
--queues jobs.parser jobs.chunker jobs.embedder
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import asyncio
|
|
22
|
+
import logging
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from basics.logging import get_logger
|
|
27
|
+
from basics.logging_utils import summarize_exception_chain
|
|
28
|
+
|
|
29
|
+
from runtime_scripts.lib.logging_setup import (
|
|
30
|
+
configure_logging,
|
|
31
|
+
resolve_log_level,
|
|
32
|
+
)
|
|
33
|
+
from runtime_scripts.lib.pipeline import (
|
|
34
|
+
DEFAULT_PIPELINE_DIR,
|
|
35
|
+
load_config,
|
|
36
|
+
load_pipeline,
|
|
37
|
+
)
|
|
38
|
+
from warren.exceptions import WarrenError
|
|
39
|
+
from warren.pubsub.rabbitmq.aio_pika.connection import (
|
|
40
|
+
RMQConnectionManager,
|
|
41
|
+
)
|
|
42
|
+
from warren.pubsub.rabbitmq.aio_pika.purge import (
|
|
43
|
+
purge_queues,
|
|
44
|
+
)
|
|
45
|
+
from warren.pubsub.rabbitmq.config import (
|
|
46
|
+
RMQConnectionConfig,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
module_logger: logging.Logger = get_logger(__name__)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _parse_args() -> argparse.Namespace:
|
|
54
|
+
parser = argparse.ArgumentParser(
|
|
55
|
+
description="Purge RabbitMQ queues and exchange",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--config-file",
|
|
59
|
+
type=Path,
|
|
60
|
+
default=None,
|
|
61
|
+
help="Path to RuntimeConfig YAML. Default: ./pipeline/config.yaml",
|
|
62
|
+
)
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"--pipeline-spec",
|
|
65
|
+
type=str,
|
|
66
|
+
default=None,
|
|
67
|
+
help=(
|
|
68
|
+
"Pipeline spec location (to read queue names from worker types). "
|
|
69
|
+
f"Format: [<path>|<module>][:<var>]. Default: {DEFAULT_PIPELINE_DIR}"
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--queues",
|
|
74
|
+
nargs="+",
|
|
75
|
+
default=None,
|
|
76
|
+
help="Explicit queue names to purge (overrides --pipeline-spec).",
|
|
77
|
+
)
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
"--exchange",
|
|
80
|
+
type=str,
|
|
81
|
+
default=None,
|
|
82
|
+
help="Exchange name to delete. Read from config if not specified.",
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--debug",
|
|
86
|
+
action="store_true",
|
|
87
|
+
default=False,
|
|
88
|
+
help="Enable DEBUG logging (default: INFO).",
|
|
89
|
+
)
|
|
90
|
+
return parser.parse_args()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def describe_config(
|
|
94
|
+
config_file: Path | None,
|
|
95
|
+
pipeline_spec: str | None,
|
|
96
|
+
queues: list[str] | None,
|
|
97
|
+
exchange: str | None,
|
|
98
|
+
debug: bool,
|
|
99
|
+
logger: logging.Logger,
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Log input configuration."""
|
|
102
|
+
logger.info("Configuration:")
|
|
103
|
+
logger.info(f" config_file: {config_file}")
|
|
104
|
+
logger.info(f" pipeline_spec: {pipeline_spec}")
|
|
105
|
+
logger.info(f" queues: {queues}")
|
|
106
|
+
logger.info(f" exchange: {exchange}")
|
|
107
|
+
logger.info(f" debug: {debug}")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def run_purge(
|
|
111
|
+
*,
|
|
112
|
+
config_file: Path | None = None,
|
|
113
|
+
pipeline_spec: str | None = None,
|
|
114
|
+
queues: list[str] | None = None,
|
|
115
|
+
exchange: str | None = None,
|
|
116
|
+
debug: bool = False,
|
|
117
|
+
logger: logging.Logger | None = None,
|
|
118
|
+
) -> None:
|
|
119
|
+
"""Purge RabbitMQ queues and optionally delete the exchange.
|
|
120
|
+
|
|
121
|
+
:param config_file: path to RuntimeConfig YAML.
|
|
122
|
+
:param pipeline_spec: pipeline spec location (for deriving queue
|
|
123
|
+
names from worker types).
|
|
124
|
+
:param queues: explicit queue names (overrides pipeline_spec).
|
|
125
|
+
:param exchange: exchange name to delete. Read from config if
|
|
126
|
+
not specified.
|
|
127
|
+
:param debug: enable DEBUG logging.
|
|
128
|
+
:param logger: optional logger override.
|
|
129
|
+
"""
|
|
130
|
+
log = logger or module_logger
|
|
131
|
+
|
|
132
|
+
describe_config(
|
|
133
|
+
config_file=config_file,
|
|
134
|
+
pipeline_spec=pipeline_spec,
|
|
135
|
+
queues=queues,
|
|
136
|
+
exchange=exchange,
|
|
137
|
+
debug=debug,
|
|
138
|
+
logger=log,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
resolved_config_path = config_file or Path("./pipeline/config.yaml")
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
config = load_config(resolved_config_path)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
msg = f"Unable to load config from: {resolved_config_path}"
|
|
147
|
+
raise WarrenError(msg) from e
|
|
148
|
+
|
|
149
|
+
if queues is not None:
|
|
150
|
+
queue_names = queues
|
|
151
|
+
else:
|
|
152
|
+
spec_str = pipeline_spec or DEFAULT_PIPELINE_DIR
|
|
153
|
+
try:
|
|
154
|
+
pipeline, _ = load_pipeline(spec_str, log)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
msg = f"Unable to load pipeline spec from: {spec_str}"
|
|
157
|
+
raise WarrenError(msg) from e
|
|
158
|
+
|
|
159
|
+
exchange_name = config.rabbitmq.exchange.name
|
|
160
|
+
queue_names = [f"{exchange_name}.{wt}" for wt in pipeline.workers]
|
|
161
|
+
|
|
162
|
+
exchange_to_delete = exchange or config.rabbitmq.exchange.name
|
|
163
|
+
|
|
164
|
+
log.info(f"Queues to purge: {queue_names}")
|
|
165
|
+
log.info(f"Exchange to delete: {exchange_to_delete}")
|
|
166
|
+
|
|
167
|
+
rmq_cfg = config.rabbitmq.connection
|
|
168
|
+
try:
|
|
169
|
+
connection_manager = RMQConnectionManager(
|
|
170
|
+
RMQConnectionConfig(
|
|
171
|
+
host=rmq_cfg.host,
|
|
172
|
+
port=rmq_cfg.port,
|
|
173
|
+
login=rmq_cfg.login,
|
|
174
|
+
password=rmq_cfg.password,
|
|
175
|
+
),
|
|
176
|
+
)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
msg = f"Unable to create RMQ manager for: {rmq_cfg.host}:{rmq_cfg.port}"
|
|
179
|
+
raise WarrenError(msg) from e
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
await connection_manager.setup()
|
|
183
|
+
except Exception as e:
|
|
184
|
+
msg = f"Unable to connect to RabbitMQ at: {rmq_cfg.host}:{rmq_cfg.port}"
|
|
185
|
+
raise WarrenError(msg) from e
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
await purge_queues(
|
|
189
|
+
connection_manager=connection_manager,
|
|
190
|
+
queue_names=queue_names,
|
|
191
|
+
exchange_name=exchange_to_delete,
|
|
192
|
+
)
|
|
193
|
+
except Exception as e:
|
|
194
|
+
msg = f"Failed to purge queues: {queue_names}, exchange: {exchange_to_delete}"
|
|
195
|
+
raise WarrenError(msg) from e
|
|
196
|
+
finally:
|
|
197
|
+
await connection_manager.teardown()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def main() -> None:
|
|
201
|
+
global module_logger
|
|
202
|
+
args = _parse_args()
|
|
203
|
+
configure_logging(debug=args.debug)
|
|
204
|
+
module_logger = get_logger(__name__, log_level=resolve_log_level(debug=args.debug))
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
asyncio.run(run_purge(**vars(args), logger=module_logger))
|
|
208
|
+
except Exception as e:
|
|
209
|
+
module_logger.error(f"Purge failed: {summarize_exception_chain(e)}")
|
|
210
|
+
sys.exit(1)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
main()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
r"""
|
|
2
|
+
Job publication worker launcher.
|
|
3
|
+
|
|
4
|
+
Starts a ``JobPublicationWorkerRunner`` that consumes job messages and
|
|
5
|
+
publishes their documents into the processing pipeline.
|
|
6
|
+
|
|
7
|
+
Requires a ``--publisher-factory`` pointing to an async factory function
|
|
8
|
+
matching ``DocumentsPublisherFactoryFunc``. The runner calls this factory
|
|
9
|
+
in ``setup()`` with the shared RMQ publisher, infrastructure, config,
|
|
10
|
+
and worker name.
|
|
11
|
+
|
|
12
|
+
Usage::
|
|
13
|
+
|
|
14
|
+
python -m runtime_scripts.start_job_publication_worker \
|
|
15
|
+
--config-file ./pipeline/config.yaml \
|
|
16
|
+
--publisher-factory my_pipeline.publishers.factory:create_multi_type_publisher
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import asyncio
|
|
21
|
+
import importlib
|
|
22
|
+
import logging
|
|
23
|
+
import sys
|
|
24
|
+
from functools import partial
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from basics.logging import get_logger
|
|
28
|
+
from basics.logging_utils import summarize_exception_chain
|
|
29
|
+
|
|
30
|
+
from runtime_scripts.lib.cli import (
|
|
31
|
+
add_common_args,
|
|
32
|
+
)
|
|
33
|
+
from runtime_scripts.lib.logging_setup import (
|
|
34
|
+
configure_logging,
|
|
35
|
+
resolve_log_level,
|
|
36
|
+
)
|
|
37
|
+
from runtime_scripts.lib.runner import run
|
|
38
|
+
from warren.exceptions import WarrenError
|
|
39
|
+
from warren.jobs.publishing.job_publication_worker_runner import (
|
|
40
|
+
DocumentsPublisherFactoryFunc,
|
|
41
|
+
JobPublicationWorkerRunner,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
module_logger: logging.Logger = get_logger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _parse_args() -> argparse.Namespace:
|
|
49
|
+
parser = argparse.ArgumentParser(
|
|
50
|
+
description="Start a job publication worker",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument(
|
|
53
|
+
"--publisher-factory",
|
|
54
|
+
type=str,
|
|
55
|
+
required=True,
|
|
56
|
+
help=(
|
|
57
|
+
"Dotted path to an async factory function matching "
|
|
58
|
+
"DocumentsPublisherFactoryFunc, e.g. "
|
|
59
|
+
"my.module:create_multi_type_publisher. "
|
|
60
|
+
"The factory receives (publisher, infra, config, "
|
|
61
|
+
"worker_name) and returns a JobDocumentsPublisher."
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
add_common_args(parser)
|
|
65
|
+
return parser.parse_args()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def describe_config(
|
|
69
|
+
publisher_factory: str,
|
|
70
|
+
config_file: str | None,
|
|
71
|
+
worker_name: str | None,
|
|
72
|
+
debug: bool,
|
|
73
|
+
logger: logging.Logger,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Log input configuration before any resolution or work."""
|
|
76
|
+
logger.info("Configuration:")
|
|
77
|
+
logger.info(f" publisher_factory: {publisher_factory}")
|
|
78
|
+
logger.info(f" config_file: {config_file}")
|
|
79
|
+
logger.info(f" worker_name: {worker_name}")
|
|
80
|
+
logger.info(f" debug: {debug}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _load_publisher_factory(factory_path: str) -> DocumentsPublisherFactoryFunc:
|
|
84
|
+
"""Import a publisher factory from ``module.path:func_name``."""
|
|
85
|
+
if ":" not in factory_path:
|
|
86
|
+
msg = f"Publisher factory must be module.path:func_name, got: {factory_path}"
|
|
87
|
+
raise ValueError(msg)
|
|
88
|
+
module_path, func_name = factory_path.rsplit(":", 1)
|
|
89
|
+
mod = importlib.import_module(module_path)
|
|
90
|
+
if not hasattr(mod, func_name):
|
|
91
|
+
msg = f"Module {module_path} has no attribute '{func_name}'"
|
|
92
|
+
raise AttributeError(msg)
|
|
93
|
+
return getattr(mod, func_name)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def start_job_publication_worker(
|
|
97
|
+
*,
|
|
98
|
+
publisher_factory: str,
|
|
99
|
+
config_file: str | None = None,
|
|
100
|
+
worker_name: str | None = None,
|
|
101
|
+
debug: bool = False,
|
|
102
|
+
logger: logging.Logger | None = None,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Start a job publication worker.
|
|
105
|
+
|
|
106
|
+
:param publisher_factory: dotted path to the publisher factory
|
|
107
|
+
function (``module:func``) matching
|
|
108
|
+
``DocumentsPublisherFactoryFunc``.
|
|
109
|
+
:param config_file: path to RuntimeConfig YAML.
|
|
110
|
+
:param worker_name: unique worker instance name.
|
|
111
|
+
:param debug: enable DEBUG logging.
|
|
112
|
+
:param logger: optional logger override.
|
|
113
|
+
"""
|
|
114
|
+
log = logger or module_logger
|
|
115
|
+
|
|
116
|
+
describe_config(
|
|
117
|
+
publisher_factory=publisher_factory,
|
|
118
|
+
config_file=config_file,
|
|
119
|
+
worker_name=worker_name,
|
|
120
|
+
debug=debug,
|
|
121
|
+
logger=log,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
pub_factory_func = _load_publisher_factory(publisher_factory)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
msg = f"Unable to load publisher factory: {publisher_factory}"
|
|
128
|
+
raise WarrenError(msg) from e
|
|
129
|
+
|
|
130
|
+
resolved_config = (
|
|
131
|
+
Path(config_file) if config_file else Path("./pipeline/config.yaml")
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
runner_factory = partial(
|
|
135
|
+
JobPublicationWorkerRunner,
|
|
136
|
+
documents_publisher_factory=pub_factory_func,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
await run(
|
|
140
|
+
runner_factory_func=runner_factory,
|
|
141
|
+
config_file=resolved_config,
|
|
142
|
+
worker_name=worker_name,
|
|
143
|
+
worker_name_prefix="publication-worker",
|
|
144
|
+
debug=debug,
|
|
145
|
+
logger=log,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def main() -> None:
|
|
150
|
+
global module_logger
|
|
151
|
+
args = _parse_args()
|
|
152
|
+
configure_logging(debug=args.debug)
|
|
153
|
+
module_logger = get_logger(__name__, log_level=resolve_log_level(debug=args.debug))
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
asyncio.run(start_job_publication_worker(**vars(args), logger=module_logger))
|
|
157
|
+
except Exception as e:
|
|
158
|
+
module_logger.error(
|
|
159
|
+
f"Start job publication worker failed: {summarize_exception_chain(e)}"
|
|
160
|
+
)
|
|
161
|
+
sys.exit(1)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
if __name__ == "__main__":
|
|
165
|
+
main()
|