warren 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. runtime_scripts/__init__.py +0 -0
  2. runtime_scripts/lib/__init__.py +0 -0
  3. runtime_scripts/lib/cli.py +30 -0
  4. runtime_scripts/lib/logging_setup.py +30 -0
  5. runtime_scripts/lib/pipeline.py +178 -0
  6. runtime_scripts/lib/runner.py +86 -0
  7. runtime_scripts/purge_queues.py +214 -0
  8. runtime_scripts/start_job_publication_worker.py +165 -0
  9. runtime_scripts/start_job_status_worker.py +112 -0
  10. runtime_scripts/start_retry_worker.py +113 -0
  11. runtime_scripts/start_worker.py +255 -0
  12. warren/__init__.py +0 -0
  13. warren/common.py +224 -0
  14. warren/constants.py +13 -0
  15. warren/docs/cache.md +257 -0
  16. warren/docs/document_store.md +477 -0
  17. warren/docs/rabbitmq.md +104 -0
  18. warren/docs/results_store.md +216 -0
  19. warren/docs/retry_design.md +520 -0
  20. warren/docs/workers.md +70 -0
  21. warren/exceptions.py +30 -0
  22. warren/jobs/__init__.py +0 -0
  23. warren/jobs/publishing/__init__.py +18 -0
  24. warren/jobs/publishing/job_documents_publisher.py +259 -0
  25. warren/jobs/publishing/job_publication_worker.py +140 -0
  26. warren/jobs/publishing/job_publication_worker_runner.py +226 -0
  27. warren/jobs/status/__init__.py +12 -0
  28. warren/jobs/status/job_status_worker.py +280 -0
  29. warren/jobs/status/job_status_worker_runner.py +211 -0
  30. warren/processors/__init__.py +10 -0
  31. warren/processors/base.py +56 -0
  32. warren/pubsub/__init__.py +0 -0
  33. warren/pubsub/base.py +85 -0
  34. warren/pubsub/common.py +90 -0
  35. warren/pubsub/rabbitmq/__init__.py +40 -0
  36. warren/pubsub/rabbitmq/aio_pika/__init__.py +33 -0
  37. warren/pubsub/rabbitmq/aio_pika/connection.py +52 -0
  38. warren/pubsub/rabbitmq/aio_pika/consumer.py +444 -0
  39. warren/pubsub/rabbitmq/aio_pika/publisher.py +159 -0
  40. warren/pubsub/rabbitmq/aio_pika/purge.py +101 -0
  41. warren/pubsub/rabbitmq/aio_pika/topology.py +73 -0
  42. warren/pubsub/rabbitmq/config.py +85 -0
  43. warren/retry_management/__init__.py +12 -0
  44. warren/retry_management/retry_worker.py +335 -0
  45. warren/retry_management/retry_worker_runner.py +227 -0
  46. warren/runtime/README.md +133 -0
  47. warren/runtime/USAGE.md +349 -0
  48. warren/runtime/__init__.py +54 -0
  49. warren/runtime/config.py +72 -0
  50. warren/runtime/infrastructure.py +90 -0
  51. warren/runtime/runner.py +317 -0
  52. warren/runtime/spec.py +124 -0
  53. warren/storage/__init__.py +6 -0
  54. warren/storage/cache/__init__.py +0 -0
  55. warren/storage/cache/interface.py +181 -0
  56. warren/storage/cache/redis.py +303 -0
  57. warren/storage/cached_document_store.py +188 -0
  58. warren/storage/document_store/__init__.py +18 -0
  59. warren/storage/document_store/interface.py +137 -0
  60. warren/storage/document_store/mongodb.py +372 -0
  61. warren/storage/documents/__init__.py +0 -0
  62. warren/storage/documents/factories.py +44 -0
  63. warren/storage/documents/fetcher.py +120 -0
  64. warren/storage/documents/interface.py +94 -0
  65. warren/storage/documents/location.py +71 -0
  66. warren/storage/documents/resolve_gcs.py +50 -0
  67. warren/storage/documents/resolvers.py +60 -0
  68. warren/storage/documents/sources.py +86 -0
  69. warren/storage/exceptions.py +37 -0
  70. warren/storage/job_results/__init__.py +8 -0
  71. warren/storage/job_results/interface.py +162 -0
  72. warren/storage/job_results/mongodb.py +355 -0
  73. warren/storage/jobs/__init__.py +12 -0
  74. warren/storage/jobs/interface.py +139 -0
  75. warren/storage/jobs/mongodb.py +187 -0
  76. warren/storage/mongo_errors.py +70 -0
  77. warren/storage/publishing_tracker/__init__.py +8 -0
  78. warren/storage/publishing_tracker/interface.py +61 -0
  79. warren/storage/publishing_tracker/mongodb.py +122 -0
  80. warren/storage/results/__init__.py +18 -0
  81. warren/storage/results/binary.py +214 -0
  82. warren/storage/results/default.py +325 -0
  83. warren/storage/results/factories.py +121 -0
  84. warren/storage/results/interface.py +111 -0
  85. warren/storage/retry.py +63 -0
  86. warren/storage/utils.py +6 -0
  87. warren/workers/__init__.py +0 -0
  88. warren/workers/messages.py +303 -0
  89. warren/workers/runners.py +156 -0
  90. warren/workers/workers.py +252 -0
  91. warren-0.1.1.dist-info/METADATA +126 -0
  92. warren-0.1.1.dist-info/RECORD +95 -0
  93. warren-0.1.1.dist-info/WHEEL +4 -0
  94. warren-0.1.1.dist-info/entry_points.txt +6 -0
  95. warren-0.1.1.dist-info/licenses/LICENSE +202 -0
File without changes
File without changes
@@ -0,0 +1,30 @@
1
+ """
2
+ Common CLI argument helpers for launcher scripts.
3
+ """
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+
9
+ def add_common_args(parser: argparse.ArgumentParser) -> None:
10
+ """Add arguments shared across all launcher scripts.
11
+
12
+ Adds: --config-file, --worker-name, --debug.
13
+ """
14
+ parser.add_argument(
15
+ "--config-file",
16
+ type=Path,
17
+ default=None,
18
+ help=("Path to RuntimeConfig YAML. Default: ./pipeline/config.yaml"),
19
+ )
20
+ parser.add_argument(
21
+ "--worker-name",
22
+ default=None,
23
+ help="Unique worker name (default: auto-generated).",
24
+ )
25
+ parser.add_argument(
26
+ "--debug",
27
+ action="store_true",
28
+ default=False,
29
+ help="Enable DEBUG logging (default: INFO).",
30
+ )
@@ -0,0 +1,30 @@
1
+ """
2
+ Logging setup for runtime script entry points.
3
+
4
+ Workaround for basics.logging auto-configuring the root logger at
5
+ DEBUG on import. Call ``configure_logging()`` early in ``main()``
6
+ before any substantive log output.
7
+ """
8
+
9
+ import logging
10
+
11
+
12
+ def resolve_log_level(*, debug: bool) -> int:
13
+ """Return the logging level based on the --debug flag.
14
+
15
+ :param debug: whether debug logging is enabled.
16
+ :return: ``logging.DEBUG`` or ``logging.INFO``.
17
+ """
18
+ return logging.DEBUG if debug else logging.INFO
19
+
20
+
21
+ def configure_logging(*, debug: bool) -> None:
22
+ """Configure logging for a CLI entry point.
23
+
24
+ :param debug: whether debug logging is enabled.
25
+ """
26
+ from basics.logging import setup_logging
27
+
28
+ log_level = resolve_log_level(debug=debug)
29
+ setup_logging(log_level=log_level)
30
+ logging.getLogger().setLevel(log_level)
@@ -0,0 +1,178 @@
1
+ """
2
+ Pipeline spec and config loading utilities.
3
+
4
+ Shared by all launcher scripts for resolving pipeline specs from
5
+ directories, files, or dotted module paths.
6
+ """
7
+
8
+ import importlib
9
+ import importlib.util
10
+ import logging
11
+ from pathlib import Path
12
+
13
+ from basics.logging import get_logger
14
+
15
+ from warren.runtime.config import RuntimeConfig
16
+ from warren.runtime.spec import PipelineSpec
17
+
18
+
19
+ module_logger: logging.Logger = get_logger(__name__)
20
+
21
+ DEFAULT_PIPELINE_DIR: str = "./pipeline"
22
+ DEFAULT_SPEC_MODULE: str = "pipeline_spec"
23
+ DEFAULT_SPEC_VAR: str = "PIPELINE"
24
+ DEFAULT_CONFIG_FILE: str = "config.yaml"
25
+
26
+
27
+ def split_var_suffix(spec_str: str) -> tuple[str, str]:
28
+ """Split ``path_or_module:VAR`` into ``(path_or_module, var_name)``.
29
+
30
+ :return: tuple of (location, variable name).
31
+ """
32
+ if ":" in spec_str:
33
+ location, var_name = spec_str.rsplit(":", 1)
34
+ return location, var_name
35
+ return spec_str, DEFAULT_SPEC_VAR
36
+
37
+
38
+ def import_file(file_path: Path) -> object:
39
+ """Import a .py file as a module and return it.
40
+
41
+ The spec module's own import/syntax/runtime errors propagate
42
+ unchanged — the calling launcher wraps them with phase context
43
+ (e.g. "Unable to load pipeline spec from: ..."). Only failure to
44
+ create the module spec raises ``ImportError`` here.
45
+ """
46
+ spec = importlib.util.spec_from_file_location(
47
+ file_path.stem,
48
+ file_path,
49
+ )
50
+ if spec is None or spec.loader is None:
51
+ msg = f"Cannot create module spec from {file_path}"
52
+ raise ImportError(msg)
53
+
54
+ mod = importlib.util.module_from_spec(spec)
55
+ spec.loader.exec_module(mod)
56
+ return mod
57
+
58
+
59
+ def resolve_pipeline_file(directory: Path) -> Path:
60
+ """Find ``pipeline_spec.py`` inside a directory (non-recursive).
61
+
62
+ :return: path to the spec file.
63
+ :raises FileNotFoundError: if no pipeline_spec.py exists in the
64
+ directory.
65
+ """
66
+ candidate = directory / f"{DEFAULT_SPEC_MODULE}.py"
67
+ if candidate.is_file():
68
+ return candidate
69
+ msg = f"No {DEFAULT_SPEC_MODULE}.py in directory: {directory}"
70
+ raise FileNotFoundError(msg)
71
+
72
+
73
+ def load_pipeline(
74
+ spec_str: str,
75
+ logger: logging.Logger | None = None,
76
+ ) -> tuple[PipelineSpec, Path | None]:
77
+ """Resolve a pipeline spec from the given location string.
78
+
79
+ Resolution order:
80
+
81
+ 1. Split off ``:VAR`` suffix (default: ``PIPELINE``).
82
+ 2. If the location exists as a directory -> look for
83
+ ``pipeline_spec.py`` inside, import it.
84
+ 3. If it exists as a file -> import it directly.
85
+ 4. Otherwise -> try ``importlib.import_module(location)``.
86
+ 5. Read ``VAR`` from the loaded module.
87
+
88
+ :return: tuple of (PipelineSpec, resolved_dir) where
89
+ ``resolved_dir`` is the directory the spec was found in
90
+ (None for dotted-module imports).
91
+ :raises FileNotFoundError: if directory/file does not exist or
92
+ contains no pipeline_spec.py.
93
+ :raises ImportError: if dotted module cannot be imported.
94
+ :raises AttributeError: if the variable is not found in the module.
95
+ :raises TypeError: if the variable is not a PipelineSpec.
96
+ """
97
+ log = logger or module_logger
98
+ location, var_name = split_var_suffix(spec_str)
99
+
100
+ resolved_dir: Path | None = None
101
+ path = Path(location)
102
+
103
+ if path.is_dir():
104
+ spec_file = resolve_pipeline_file(path)
105
+ log.info(
106
+ f"Loading pipeline spec from directory: {path} "
107
+ f"(file: {spec_file.name}, var: {var_name})"
108
+ )
109
+ mod = import_file(spec_file)
110
+ resolved_dir = path
111
+
112
+ elif path.is_file():
113
+ log.info(f"Loading pipeline spec from file: {path} (var: {var_name})")
114
+ mod = import_file(path)
115
+ resolved_dir = path.parent
116
+
117
+ else:
118
+ log.info(f"Loading pipeline spec from module: {location} (var: {var_name})")
119
+ mod = importlib.import_module(location)
120
+
121
+ module_name = getattr(mod, "__name__", location)
122
+
123
+ if not hasattr(mod, var_name):
124
+ msg = f"Module {module_name} has no attribute '{var_name}'"
125
+ raise AttributeError(msg)
126
+
127
+ pipeline = getattr(mod, var_name)
128
+
129
+ if not isinstance(pipeline, PipelineSpec):
130
+ msg = (
131
+ f"{var_name} in {module_name} is "
132
+ f"{type(pipeline).__name__}, expected PipelineSpec"
133
+ )
134
+ raise TypeError(msg)
135
+
136
+ return pipeline, resolved_dir
137
+
138
+
139
+ def resolve_config_path(
140
+ config_file: Path | None,
141
+ pipeline_dir: Path | None,
142
+ ) -> Path:
143
+ """Determine the config file path.
144
+
145
+ :param config_file: explicit config path (takes precedence).
146
+ :param pipeline_dir: pipeline directory to look for config.yaml in.
147
+ :raises FileNotFoundError: if no config.yaml found in pipeline_dir.
148
+ :raises ValueError: if neither config_file nor pipeline_dir provided.
149
+ """
150
+ if config_file is not None:
151
+ return config_file
152
+
153
+ if pipeline_dir is not None:
154
+ candidate = pipeline_dir / DEFAULT_CONFIG_FILE
155
+ if candidate.is_file():
156
+ return candidate
157
+ msg = (
158
+ f"No {DEFAULT_CONFIG_FILE} in pipeline directory {pipeline_dir} "
159
+ f"and no --config-file specified"
160
+ )
161
+ raise FileNotFoundError(msg)
162
+
163
+ msg = (
164
+ "--config-file is required when pipeline spec is loaded "
165
+ "from a Python module (no directory to infer config.yaml from)"
166
+ )
167
+ raise ValueError(msg)
168
+
169
+
170
+ def load_config(config_file: Path) -> RuntimeConfig:
171
+ """Load a RuntimeConfig from a YAML file.
172
+
173
+ :raises FileNotFoundError: if the file does not exist.
174
+ """
175
+ if not config_file.is_file():
176
+ msg = f"Config file not found: {config_file}"
177
+ raise FileNotFoundError(msg)
178
+ return RuntimeConfig.from_yaml(config_file)
@@ -0,0 +1,86 @@
1
+ """
2
+ Shared worker lifecycle for all launcher scripts.
3
+
4
+ Provides the single ``run()`` function that all launchers call.
5
+ The only thing that differs between launchers is the
6
+ ``runner_factory_func`` that creates the runner instance.
7
+ """
8
+
9
+ import logging
10
+ import uuid
11
+ from collections.abc import Callable
12
+ from pathlib import Path
13
+
14
+ from basics.logging import get_logger
15
+
16
+ from runtime_scripts.lib.pipeline import (
17
+ load_config,
18
+ )
19
+ from warren.exceptions import WarrenError
20
+ from warren.runtime.config import RuntimeConfig
21
+ from warren.workers.runners import WorkerRunnerBase
22
+
23
+
24
+ module_logger: logging.Logger = get_logger(__name__)
25
+
26
+ RunnerFactoryFunc = Callable[[RuntimeConfig, str], WorkerRunnerBase]
27
+
28
+
29
+ async def run(
30
+ *,
31
+ runner_factory_func: RunnerFactoryFunc,
32
+ config_file: Path,
33
+ worker_name: str | None = None,
34
+ worker_name_prefix: str = "worker",
35
+ debug: bool = False,
36
+ logger: logging.Logger | None = None,
37
+ ) -> None:
38
+ """Load config, create a runner via factory, and run it.
39
+
40
+ This is the shared entry point for all launcher scripts. Each
41
+ launcher provides its own ``runner_factory_func`` that creates the
42
+ appropriate runner from ``(config, worker_name)``.
43
+
44
+ :param runner_factory_func: callable that takes
45
+ ``(config: RuntimeConfig, worker_name: str)`` and returns a
46
+ ``WorkerRunnerBase`` instance. Use ``partial()`` to bind
47
+ additional arguments (e.g. ``worker_spec``).
48
+ :param config_file: path to RuntimeConfig YAML.
49
+ :param worker_name: unique worker instance name. Defaults to
50
+ ``<worker_name_prefix>-<uuid8>``.
51
+ :param worker_name_prefix: prefix for auto-generated worker names.
52
+ :param debug: enable DEBUG logging.
53
+ :param logger: optional logger override.
54
+ """
55
+ log = logger or module_logger
56
+
57
+ try:
58
+ config = load_config(config_file)
59
+ except Exception as e:
60
+ msg = f"Unable to load config from: {config_file}"
61
+ raise WarrenError(msg) from e
62
+
63
+ log.info(f"Loaded config from: {config_file}")
64
+
65
+ resolved_worker_name = worker_name or f"{worker_name_prefix}-{uuid.uuid4().hex[:8]}"
66
+
67
+ try:
68
+ runner = runner_factory_func(config, resolved_worker_name)
69
+ except Exception as e:
70
+ msg = f"Unable to create runner for: {resolved_worker_name}"
71
+ raise WarrenError(msg) from e
72
+
73
+ try:
74
+ try:
75
+ await runner.setup()
76
+ except Exception as e:
77
+ msg = f"Worker setup failed for: {resolved_worker_name}"
78
+ raise WarrenError(msg) from e
79
+
80
+ try:
81
+ await runner.run()
82
+ except Exception as e:
83
+ msg = f"Worker run failed for: {resolved_worker_name}"
84
+ raise WarrenError(msg) from e
85
+ finally:
86
+ await runner.teardown()
@@ -0,0 +1,214 @@
1
+ r"""
2
+ Purge RabbitMQ queues and exchange.
3
+
4
+ Thin CLI wrapper around the framework ``purge_queues()`` utility.
5
+ Reads queue names from a pipeline spec or accepts them as CLI args.
6
+
7
+ Usage::
8
+
9
+ # From pipeline spec
10
+ python -m runtime_scripts.purge_queues \
11
+ --config-file ./pipeline/config.yaml \
12
+ --pipeline-spec ./pipeline
13
+
14
+ # Explicit queue names
15
+ python -m runtime_scripts.purge_queues \
16
+ --config-file ./pipeline/config.yaml \
17
+ --queues jobs.parser jobs.chunker jobs.embedder
18
+ """
19
+
20
+ import argparse
21
+ import asyncio
22
+ import logging
23
+ import sys
24
+ from pathlib import Path
25
+
26
+ from basics.logging import get_logger
27
+ from basics.logging_utils import summarize_exception_chain
28
+
29
+ from runtime_scripts.lib.logging_setup import (
30
+ configure_logging,
31
+ resolve_log_level,
32
+ )
33
+ from runtime_scripts.lib.pipeline import (
34
+ DEFAULT_PIPELINE_DIR,
35
+ load_config,
36
+ load_pipeline,
37
+ )
38
+ from warren.exceptions import WarrenError
39
+ from warren.pubsub.rabbitmq.aio_pika.connection import (
40
+ RMQConnectionManager,
41
+ )
42
+ from warren.pubsub.rabbitmq.aio_pika.purge import (
43
+ purge_queues,
44
+ )
45
+ from warren.pubsub.rabbitmq.config import (
46
+ RMQConnectionConfig,
47
+ )
48
+
49
+
50
+ module_logger: logging.Logger = get_logger(__name__)
51
+
52
+
53
+ def _parse_args() -> argparse.Namespace:
54
+ parser = argparse.ArgumentParser(
55
+ description="Purge RabbitMQ queues and exchange",
56
+ )
57
+ parser.add_argument(
58
+ "--config-file",
59
+ type=Path,
60
+ default=None,
61
+ help="Path to RuntimeConfig YAML. Default: ./pipeline/config.yaml",
62
+ )
63
+ parser.add_argument(
64
+ "--pipeline-spec",
65
+ type=str,
66
+ default=None,
67
+ help=(
68
+ "Pipeline spec location (to read queue names from worker types). "
69
+ f"Format: [<path>|<module>][:<var>]. Default: {DEFAULT_PIPELINE_DIR}"
70
+ ),
71
+ )
72
+ parser.add_argument(
73
+ "--queues",
74
+ nargs="+",
75
+ default=None,
76
+ help="Explicit queue names to purge (overrides --pipeline-spec).",
77
+ )
78
+ parser.add_argument(
79
+ "--exchange",
80
+ type=str,
81
+ default=None,
82
+ help="Exchange name to delete. Read from config if not specified.",
83
+ )
84
+ parser.add_argument(
85
+ "--debug",
86
+ action="store_true",
87
+ default=False,
88
+ help="Enable DEBUG logging (default: INFO).",
89
+ )
90
+ return parser.parse_args()
91
+
92
+
93
+ def describe_config(
94
+ config_file: Path | None,
95
+ pipeline_spec: str | None,
96
+ queues: list[str] | None,
97
+ exchange: str | None,
98
+ debug: bool,
99
+ logger: logging.Logger,
100
+ ) -> None:
101
+ """Log input configuration."""
102
+ logger.info("Configuration:")
103
+ logger.info(f" config_file: {config_file}")
104
+ logger.info(f" pipeline_spec: {pipeline_spec}")
105
+ logger.info(f" queues: {queues}")
106
+ logger.info(f" exchange: {exchange}")
107
+ logger.info(f" debug: {debug}")
108
+
109
+
110
+ async def run_purge(
111
+ *,
112
+ config_file: Path | None = None,
113
+ pipeline_spec: str | None = None,
114
+ queues: list[str] | None = None,
115
+ exchange: str | None = None,
116
+ debug: bool = False,
117
+ logger: logging.Logger | None = None,
118
+ ) -> None:
119
+ """Purge RabbitMQ queues and optionally delete the exchange.
120
+
121
+ :param config_file: path to RuntimeConfig YAML.
122
+ :param pipeline_spec: pipeline spec location (for deriving queue
123
+ names from worker types).
124
+ :param queues: explicit queue names (overrides pipeline_spec).
125
+ :param exchange: exchange name to delete. Read from config if
126
+ not specified.
127
+ :param debug: enable DEBUG logging.
128
+ :param logger: optional logger override.
129
+ """
130
+ log = logger or module_logger
131
+
132
+ describe_config(
133
+ config_file=config_file,
134
+ pipeline_spec=pipeline_spec,
135
+ queues=queues,
136
+ exchange=exchange,
137
+ debug=debug,
138
+ logger=log,
139
+ )
140
+
141
+ resolved_config_path = config_file or Path("./pipeline/config.yaml")
142
+
143
+ try:
144
+ config = load_config(resolved_config_path)
145
+ except Exception as e:
146
+ msg = f"Unable to load config from: {resolved_config_path}"
147
+ raise WarrenError(msg) from e
148
+
149
+ if queues is not None:
150
+ queue_names = queues
151
+ else:
152
+ spec_str = pipeline_spec or DEFAULT_PIPELINE_DIR
153
+ try:
154
+ pipeline, _ = load_pipeline(spec_str, log)
155
+ except Exception as e:
156
+ msg = f"Unable to load pipeline spec from: {spec_str}"
157
+ raise WarrenError(msg) from e
158
+
159
+ exchange_name = config.rabbitmq.exchange.name
160
+ queue_names = [f"{exchange_name}.{wt}" for wt in pipeline.workers]
161
+
162
+ exchange_to_delete = exchange or config.rabbitmq.exchange.name
163
+
164
+ log.info(f"Queues to purge: {queue_names}")
165
+ log.info(f"Exchange to delete: {exchange_to_delete}")
166
+
167
+ rmq_cfg = config.rabbitmq.connection
168
+ try:
169
+ connection_manager = RMQConnectionManager(
170
+ RMQConnectionConfig(
171
+ host=rmq_cfg.host,
172
+ port=rmq_cfg.port,
173
+ login=rmq_cfg.login,
174
+ password=rmq_cfg.password,
175
+ ),
176
+ )
177
+ except Exception as e:
178
+ msg = f"Unable to create RMQ manager for: {rmq_cfg.host}:{rmq_cfg.port}"
179
+ raise WarrenError(msg) from e
180
+
181
+ try:
182
+ await connection_manager.setup()
183
+ except Exception as e:
184
+ msg = f"Unable to connect to RabbitMQ at: {rmq_cfg.host}:{rmq_cfg.port}"
185
+ raise WarrenError(msg) from e
186
+
187
+ try:
188
+ await purge_queues(
189
+ connection_manager=connection_manager,
190
+ queue_names=queue_names,
191
+ exchange_name=exchange_to_delete,
192
+ )
193
+ except Exception as e:
194
+ msg = f"Failed to purge queues: {queue_names}, exchange: {exchange_to_delete}"
195
+ raise WarrenError(msg) from e
196
+ finally:
197
+ await connection_manager.teardown()
198
+
199
+
200
+ def main() -> None:
201
+ global module_logger
202
+ args = _parse_args()
203
+ configure_logging(debug=args.debug)
204
+ module_logger = get_logger(__name__, log_level=resolve_log_level(debug=args.debug))
205
+
206
+ try:
207
+ asyncio.run(run_purge(**vars(args), logger=module_logger))
208
+ except Exception as e:
209
+ module_logger.error(f"Purge failed: {summarize_exception_chain(e)}")
210
+ sys.exit(1)
211
+
212
+
213
+ if __name__ == "__main__":
214
+ main()
@@ -0,0 +1,165 @@
1
+ r"""
2
+ Job publication worker launcher.
3
+
4
+ Starts a ``JobPublicationWorkerRunner`` that consumes job messages and
5
+ publishes their documents into the processing pipeline.
6
+
7
+ Requires a ``--publisher-factory`` pointing to an async factory function
8
+ matching ``DocumentsPublisherFactoryFunc``. The runner calls this factory
9
+ in ``setup()`` with the shared RMQ publisher, infrastructure, config,
10
+ and worker name.
11
+
12
+ Usage::
13
+
14
+ python -m runtime_scripts.start_job_publication_worker \
15
+ --config-file ./pipeline/config.yaml \
16
+ --publisher-factory my_pipeline.publishers.factory:create_multi_type_publisher
17
+ """
18
+
19
+ import argparse
20
+ import asyncio
21
+ import importlib
22
+ import logging
23
+ import sys
24
+ from functools import partial
25
+ from pathlib import Path
26
+
27
+ from basics.logging import get_logger
28
+ from basics.logging_utils import summarize_exception_chain
29
+
30
+ from runtime_scripts.lib.cli import (
31
+ add_common_args,
32
+ )
33
+ from runtime_scripts.lib.logging_setup import (
34
+ configure_logging,
35
+ resolve_log_level,
36
+ )
37
+ from runtime_scripts.lib.runner import run
38
+ from warren.exceptions import WarrenError
39
+ from warren.jobs.publishing.job_publication_worker_runner import (
40
+ DocumentsPublisherFactoryFunc,
41
+ JobPublicationWorkerRunner,
42
+ )
43
+
44
+
45
+ module_logger: logging.Logger = get_logger(__name__)
46
+
47
+
48
+ def _parse_args() -> argparse.Namespace:
49
+ parser = argparse.ArgumentParser(
50
+ description="Start a job publication worker",
51
+ )
52
+ parser.add_argument(
53
+ "--publisher-factory",
54
+ type=str,
55
+ required=True,
56
+ help=(
57
+ "Dotted path to an async factory function matching "
58
+ "DocumentsPublisherFactoryFunc, e.g. "
59
+ "my.module:create_multi_type_publisher. "
60
+ "The factory receives (publisher, infra, config, "
61
+ "worker_name) and returns a JobDocumentsPublisher."
62
+ ),
63
+ )
64
+ add_common_args(parser)
65
+ return parser.parse_args()
66
+
67
+
68
+ def describe_config(
69
+ publisher_factory: str,
70
+ config_file: str | None,
71
+ worker_name: str | None,
72
+ debug: bool,
73
+ logger: logging.Logger,
74
+ ) -> None:
75
+ """Log input configuration before any resolution or work."""
76
+ logger.info("Configuration:")
77
+ logger.info(f" publisher_factory: {publisher_factory}")
78
+ logger.info(f" config_file: {config_file}")
79
+ logger.info(f" worker_name: {worker_name}")
80
+ logger.info(f" debug: {debug}")
81
+
82
+
83
+ def _load_publisher_factory(factory_path: str) -> DocumentsPublisherFactoryFunc:
84
+ """Import a publisher factory from ``module.path:func_name``."""
85
+ if ":" not in factory_path:
86
+ msg = f"Publisher factory must be module.path:func_name, got: {factory_path}"
87
+ raise ValueError(msg)
88
+ module_path, func_name = factory_path.rsplit(":", 1)
89
+ mod = importlib.import_module(module_path)
90
+ if not hasattr(mod, func_name):
91
+ msg = f"Module {module_path} has no attribute '{func_name}'"
92
+ raise AttributeError(msg)
93
+ return getattr(mod, func_name)
94
+
95
+
96
+ async def start_job_publication_worker(
97
+ *,
98
+ publisher_factory: str,
99
+ config_file: str | None = None,
100
+ worker_name: str | None = None,
101
+ debug: bool = False,
102
+ logger: logging.Logger | None = None,
103
+ ) -> None:
104
+ """Start a job publication worker.
105
+
106
+ :param publisher_factory: dotted path to the publisher factory
107
+ function (``module:func``) matching
108
+ ``DocumentsPublisherFactoryFunc``.
109
+ :param config_file: path to RuntimeConfig YAML.
110
+ :param worker_name: unique worker instance name.
111
+ :param debug: enable DEBUG logging.
112
+ :param logger: optional logger override.
113
+ """
114
+ log = logger or module_logger
115
+
116
+ describe_config(
117
+ publisher_factory=publisher_factory,
118
+ config_file=config_file,
119
+ worker_name=worker_name,
120
+ debug=debug,
121
+ logger=log,
122
+ )
123
+
124
+ try:
125
+ pub_factory_func = _load_publisher_factory(publisher_factory)
126
+ except Exception as e:
127
+ msg = f"Unable to load publisher factory: {publisher_factory}"
128
+ raise WarrenError(msg) from e
129
+
130
+ resolved_config = (
131
+ Path(config_file) if config_file else Path("./pipeline/config.yaml")
132
+ )
133
+
134
+ runner_factory = partial(
135
+ JobPublicationWorkerRunner,
136
+ documents_publisher_factory=pub_factory_func,
137
+ )
138
+
139
+ await run(
140
+ runner_factory_func=runner_factory,
141
+ config_file=resolved_config,
142
+ worker_name=worker_name,
143
+ worker_name_prefix="publication-worker",
144
+ debug=debug,
145
+ logger=log,
146
+ )
147
+
148
+
149
+ def main() -> None:
150
+ global module_logger
151
+ args = _parse_args()
152
+ configure_logging(debug=args.debug)
153
+ module_logger = get_logger(__name__, log_level=resolve_log_level(debug=args.debug))
154
+
155
+ try:
156
+ asyncio.run(start_job_publication_worker(**vars(args), logger=module_logger))
157
+ except Exception as e:
158
+ module_logger.error(
159
+ f"Start job publication worker failed: {summarize_exception_chain(e)}"
160
+ )
161
+ sys.exit(1)
162
+
163
+
164
+ if __name__ == "__main__":
165
+ main()