zoo-runner-common 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
base_runner.py ADDED
@@ -0,0 +1,334 @@
1
+ import logging
2
+ import os
3
+ import sys
4
+ import traceback
5
+ import types
6
+ from abc import ABC, abstractmethod
7
+
8
+ # Shared ZooStub import
9
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
10
+
11
+ try:
12
+ import zoo
13
+ except ImportError:
14
+ from zoostub import ZooStub
15
+
16
+ zoo = ZooStub()
17
+
18
+ from zoo_conf import CWLWorkflow, ZooConf, ZooInputs, ZooOutputs
19
+
20
+ logger = logging.getLogger()
21
+
22
+
23
+ class BaseRunner(ABC):
24
+ """
25
+ Base class for CWL workflow runners.
26
+ Provides common functionality and defines the interface for specific runners.
27
+ """
28
+
29
+ def __init__(self, cwl, inputs, conf, outputs, execution_handler=None):
30
+ """
31
+ Initialize the base runner.
32
+
33
+ :param cwl: CWL workflow definition (path to file or parsed CWL)
34
+ :param inputs: ZOO inputs dictionary
35
+ :param conf: ZOO configuration dictionary
36
+ :param outputs: ZOO outputs dictionary
37
+ :param execution_handler: Optional ExecutionHandler instance for hooks
38
+ """
39
+ self.cwl = cwl
40
+ self.execution_handler = execution_handler or self._create_default_handler()
41
+
42
+ # Create typed wrapper objects from ZOO dictionaries
43
+ self.conf = ZooConf(conf)
44
+ self.inputs = ZooInputs(inputs)
45
+ self.outputs = ZooOutputs(outputs)
46
+
47
+ # Parse CWL workflow
48
+ self.workflow = CWLWorkflow(self.cwl, self.conf.workflow_id)
49
+
50
+ # Legacy namespace for backward compatibility
51
+ self.zoo_conf = types.SimpleNamespace(conf=conf)
52
+
53
+ # Runner-specific state
54
+ self.namespace_name = None
55
+ self.execution = None
56
+
57
+ def _create_default_handler(self):
58
+ """Create a default handler if none provided."""
59
+
60
+ class DefaultHandler:
61
+ def pre_execution_hook(self):
62
+ pass
63
+
64
+ def post_execution_hook(self, *args, **kwargs):
65
+ pass
66
+
67
+ def get_secrets(self):
68
+ return None
69
+
70
+ def get_additional_parameters(self):
71
+ return {}
72
+
73
+ def get_pod_env_vars(self):
74
+ return None
75
+
76
+ def get_pod_node_selector(self):
77
+ return None
78
+
79
+ def handle_outputs(self, *args, **kwargs):
80
+ pass
81
+
82
+ def set_job_id(self, job_id):
83
+ pass
84
+
85
+ def get_namespace(self):
86
+ """Get namespace for Calrissian execution."""
87
+ return None
88
+
89
+ def get_service_account(self):
90
+ """Get service account for Calrissian execution."""
91
+ return None
92
+
93
+ return DefaultHandler()
94
+
95
+ def update_status(self, progress: int, message: str = ""):
96
+ """
97
+ Update execution status in ZOO.
98
+
99
+ Args:
100
+ progress: Progress percentage (0-100)
101
+ message: Status message to display
102
+ """
103
+ if hasattr(self.conf, 'conf') and "lenv" in self.conf.conf:
104
+ self.conf.conf["lenv"]["message"] = message
105
+ zoo.update_status(self.conf.conf, progress)
106
+ else:
107
+ logger.warning("Cannot update status: conf structure not available")
108
+
109
+ def get_namespace_name(self):
110
+ """
111
+ Generate a namespace name for Kubernetes resources.
112
+
113
+ Returns:
114
+ str: Namespace name in format {workflow_id}-{unique_id}
115
+ """
116
+ if self.namespace_name is None:
117
+ import uuid
118
+ unique_id = str(uuid.uuid4())[:8]
119
+ self.namespace_name = f"{self.get_workflow_id()}-{unique_id}".lower()
120
+
121
+ return self.namespace_name
122
+
123
+ def log_output(self, output):
124
+ """Log output information."""
125
+ logger.info("[BaseRunner] Output: %s", output)
126
+
127
+ def validate_inputs(self):
128
+ """Validate input parameters."""
129
+ logger.info("[BaseRunner] Validating inputs...")
130
+ return True
131
+
132
+ def prepare(self):
133
+ """
134
+ Shared pre-execution logic.
135
+ Calls execution handler hooks and prepares processing parameters.
136
+ """
137
+ logger.info("execution started")
138
+ self.update_status(progress=2, message="starting execution")
139
+
140
+ # Call pre-execution hook
141
+ if self.execution_handler and hasattr(
142
+ self.execution_handler, "pre_execution_hook"
143
+ ):
144
+ try:
145
+ self.execution_handler.pre_execution_hook()
146
+ except Exception as e:
147
+ logger.error(f"Error in pre_execution_hook: {e}")
148
+ logger.error(traceback.format_exc())
149
+ raise
150
+
151
+ logger.info("wrap CWL workflow with stage-in/out steps")
152
+
153
+ processing_parameters = {
154
+ **self.get_processing_parameters(),
155
+ **(
156
+ self.execution_handler.get_additional_parameters()
157
+ if self.execution_handler
158
+ else {}
159
+ ),
160
+ }
161
+
162
+ return types.SimpleNamespace(cwl=self.wrap(), params=processing_parameters)
163
+
164
+ def finalize(self, log, output, usage_report, tool_logs):
165
+ """
166
+ Finalization logic after execution.
167
+ Calls execution handler post-execution and output handling hooks.
168
+ """
169
+ logger.info("Finalization started")
170
+
171
+ # Call post-execution hook
172
+ if self.execution_handler and hasattr(
173
+ self.execution_handler, "post_execution_hook"
174
+ ):
175
+ try:
176
+ self.execution_handler.post_execution_hook(
177
+ log, output, usage_report, tool_logs
178
+ )
179
+ except Exception as e:
180
+ logger.error(f"Error in post_execution_hook: {e}")
181
+ logger.error(traceback.format_exc())
182
+ raise
183
+
184
+ # Call handle_outputs hook
185
+ if self.execution_handler and hasattr(self.execution_handler, "handle_outputs"):
186
+ try:
187
+ self.execution_handler.handle_outputs(
188
+ log, output, usage_report, tool_logs
189
+ )
190
+ except Exception as e:
191
+ logger.error(f"Error in handle_outputs: {e}")
192
+ logger.error(traceback.format_exc())
193
+ raise
194
+
195
+ def get_workflow_id(self):
196
+ """
197
+ Get the workflow identifier from configuration.
198
+
199
+ Returns:
200
+ str: The workflow identifier
201
+ """
202
+ return self.conf.workflow_id
203
+
204
+ def get_workflow_inputs(self, mandatory=False):
205
+ """
206
+ Get workflow input parameter names.
207
+
208
+ Args:
209
+ mandatory: If True, only return mandatory inputs (no default value)
210
+
211
+ Returns:
212
+ list: List of input parameter names
213
+ """
214
+ return self.workflow.get_workflow_inputs(mandatory=mandatory)
215
+
216
+ def get_max_cores(self):
217
+ """
218
+ Get the maximum number of cores from CWL ResourceRequirements.
219
+
220
+ Returns:
221
+ int: Maximum cores requested, or default from environment
222
+ """
223
+ resources = self.workflow.eval_resource()
224
+ max_cores = max(resources["coresMax"]) if resources["coresMax"] else None
225
+
226
+ if max_cores is None:
227
+ max_cores = int(os.environ.get("DEFAULT_MAX_CORES", "2"))
228
+
229
+ return max_cores
230
+
231
+ def get_max_ram(self):
232
+ """
233
+ Get the maximum RAM in megabytes from CWL ResourceRequirements.
234
+
235
+ Returns:
236
+ str: Maximum RAM in MB with unit (e.g., "4096Mi")
237
+ """
238
+ resources = self.workflow.eval_resource()
239
+ max_ram = max(resources["ramMax"]) if resources["ramMax"] else None
240
+
241
+ if max_ram is None:
242
+ max_ram = int(os.environ.get("DEFAULT_MAX_RAM", "4096"))
243
+
244
+ # Return as string with Mi unit
245
+ return f"{max_ram}Mi"
246
+
247
+ def get_volume_size(self, unit="Mi"):
248
+ """
249
+ Get the volume size for temporary and output directories.
250
+
251
+ Calculates based on tmpdir and outdir requirements from CWL.
252
+
253
+ Args:
254
+ unit: Unit for volume size ('Gi' for Gigabytes or 'Mi' for Megabytes)
255
+
256
+ Returns:
257
+ str: Volume size with unit (e.g., "10Gi" or "10240Mi")
258
+ """
259
+ resources = self.workflow.eval_resource()
260
+
261
+ # Get max tmpdir and outdir in MB
262
+ # Use Max if available, otherwise fall back to Min
263
+ tmpdir_max = max(resources["tmpdirMax"]) if resources["tmpdirMax"] else (max(resources["tmpdirMin"]) if resources["tmpdirMin"] else 0)
264
+ outdir_max = max(resources["outdirMax"]) if resources["outdirMax"] else (max(resources["outdirMin"]) if resources["outdirMin"] else 0)
265
+
266
+ # Total in MB
267
+ volume_size_mb = tmpdir_max + outdir_max
268
+
269
+ if volume_size_mb == 0:
270
+ # Default from environment
271
+ default = os.environ.get("DEFAULT_VOLUME_SIZE", "10Gi")
272
+ # If default doesn't match requested unit, convert
273
+ if unit not in default:
274
+ return f"10{unit}"
275
+ return default
276
+
277
+ # Convert based on requested unit
278
+ if unit == "Gi":
279
+ # Convert MB to Gi (1 Gi = 1024 Mi)
280
+ volume_size = int(volume_size_mb / 1024) + 1
281
+ else: # Mi
282
+ volume_size = volume_size_mb
283
+
284
+ return f"{volume_size}{unit}"
285
+
286
+ def assert_parameters(self, mandatory=True):
287
+ """
288
+ Validate that required workflow inputs are provided.
289
+
290
+ Args:
291
+ mandatory: If True, check only mandatory inputs
292
+
293
+ Returns:
294
+ bool: True if all required inputs are present, False otherwise
295
+ """
296
+ try:
297
+ required_inputs = self.get_workflow_inputs(mandatory=mandatory)
298
+
299
+ for required_input in required_inputs:
300
+ if required_input not in self.inputs.inputs:
301
+ error_msg = f"Missing required input: {required_input}"
302
+ logger.error(error_msg)
303
+ return False
304
+
305
+ logger.info("All required parameters are present")
306
+ return True
307
+ except Exception as e:
308
+ logger.error(f"Error checking parameters: {e}")
309
+ return False
310
+
311
+ def get_processing_parameters(self):
312
+ """
313
+ Get processing parameters from inputs.
314
+
315
+ Returns:
316
+ dict: Processing parameters suitable for CWL execution
317
+ """
318
+ return self.inputs.get_processing_parameters(workflow=self.workflow.get_workflow())
319
+
320
+ @abstractmethod
321
+ def wrap(self):
322
+ """
323
+ Wrap the CWL workflow with stage-in/stage-out steps.
324
+ Must be implemented by subclasses.
325
+ """
326
+ raise NotImplementedError("Subclasses must implement wrap()")
327
+
328
+ @abstractmethod
329
+ def execute(self):
330
+ """
331
+ Execute the CWL workflow.
332
+ Must be implemented by subclasses.
333
+ """
334
+ raise NotImplementedError("Subclasses must implement execute()")
zoo_conf.py ADDED
@@ -0,0 +1,317 @@
1
+ import inspect
2
+ import os
3
+
4
+ import attr
5
+ import cwl_utils
6
+ from cwl_utils.parser import load_document_by_yaml
7
+
8
+
9
+ # useful class for hints in CWL
10
+ @attr.s
11
+ class ResourceRequirement:
12
+ coresMin = attr.ib(default=None)
13
+ coresMax = attr.ib(default=None)
14
+ ramMin = attr.ib(default=None)
15
+ ramMax = attr.ib(default=None)
16
+ tmpdirMin = attr.ib(default=None)
17
+ tmpdirMax = attr.ib(default=None)
18
+ outdirMin = attr.ib(default=None)
19
+ outdirMax = attr.ib(default=None)
20
+
21
+ @classmethod
22
+ def from_dict(cls, env):
23
+ return cls(
24
+ **{k: v for k, v in env.items() if k in inspect.signature(cls).parameters}
25
+ )
26
+
27
+
28
+ class CWLWorkflow:
29
+ def __init__(self, cwl, workflow_id):
30
+ self.raw_cwl = cwl
31
+ self.workflow_id = workflow_id
32
+
33
+ # Load the entire CWL document and convert to v1.2
34
+ # Use load_cwl_from_yaml instead of load_document_by_yaml for proper version conversion
35
+ from cwl_loader import load_cwl_from_yaml
36
+
37
+ parsed_cwl = load_cwl_from_yaml(cwl, uri="io://", cwl_version='v1.2', sort=True)
38
+
39
+ # Ensure self.cwl is always a list containing all CWL elements
40
+ if not isinstance(parsed_cwl, list):
41
+ parsed_cwl = [parsed_cwl]
42
+
43
+ self.cwl = parsed_cwl
44
+
45
+ def get_version(self):
46
+
47
+ return self.raw_cwl.get("s:softwareVersion", "")
48
+
49
+ def get_label(self):
50
+
51
+ return self.get_workflow().label
52
+
53
+ def get_doc(self):
54
+
55
+ return self.get_workflow().doc
56
+
57
+ def get_workflow(self) -> cwl_utils.parser.cwl_v1_0.Workflow:
58
+ # returns a cwl_utils.parser.cwl_v1_0.Workflow)
59
+ ids = [elem.id.split("#")[-1] for elem in self.cwl]
60
+
61
+ return self.cwl[ids.index(self.workflow_id)]
62
+
63
+ def get_object_by_id(self, id):
64
+ ids = [elem.id.split("#")[-1] for elem in self.cwl]
65
+ # Remove leading '#' if present, and also remove 'io://' prefix if present
66
+ search_id = id.lstrip("#").replace("io://", "")
67
+ return self.cwl[ids.index(search_id)]
68
+
69
+ def get_workflow_inputs(self, mandatory=False):
70
+ inputs = []
71
+ for inp in self.get_workflow().inputs:
72
+ if mandatory:
73
+ # Use type_ instead of type (cwl-utils API change)
74
+ inp_type = getattr(inp, 'type_', getattr(inp, 'type', None))
75
+ if inp.default is not None or inp_type == ["null", "string"]:
76
+ continue
77
+ else:
78
+ inputs.append(inp.id.split("/")[-1])
79
+ else:
80
+ inputs.append(inp.id.split("/")[-1])
81
+ return inputs
82
+
83
+ @staticmethod
84
+ def has_scatter_requirement(workflow):
85
+ return any(
86
+ isinstance(
87
+ requirement,
88
+ (
89
+ cwl_utils.parser.cwl_v1_0.ScatterFeatureRequirement,
90
+ cwl_utils.parser.cwl_v1_1.ScatterFeatureRequirement,
91
+ cwl_utils.parser.cwl_v1_2.ScatterFeatureRequirement,
92
+ ),
93
+ )
94
+ for requirement in workflow.requirements
95
+ )
96
+
97
+ @staticmethod
98
+ def get_resource_requirement(elem):
99
+ """Gets the ResourceRequirement out of a CommandLineTool or Workflow
100
+
101
+ Args:
102
+ elem (CommandLineTool or Workflow): CommandLineTool or Workflow
103
+
104
+ Returns:
105
+ cwl_utils.parser.cwl_v1_2.ResourceRequirement or ResourceRequirement
106
+ """
107
+ resource_requirement = []
108
+
109
+ # look for requirements
110
+ if elem.requirements is not None:
111
+ resource_requirement = [
112
+ requirement
113
+ for requirement in elem.requirements
114
+ if isinstance(
115
+ requirement,
116
+ (
117
+ cwl_utils.parser.cwl_v1_0.ResourceRequirement,
118
+ cwl_utils.parser.cwl_v1_1.ResourceRequirement,
119
+ cwl_utils.parser.cwl_v1_2.ResourceRequirement,
120
+ ),
121
+ )
122
+ ]
123
+
124
+ if len(resource_requirement) == 1:
125
+ return resource_requirement[0]
126
+
127
+ # look for hints
128
+ if elem.hints is not None:
129
+ resource_requirement = []
130
+ for hint in elem.hints:
131
+ # Handle both dict and object types
132
+ if isinstance(hint, dict):
133
+ if hint.get("class") == "ResourceRequirement":
134
+ resource_requirement.append(ResourceRequirement.from_dict(hint))
135
+ elif hasattr(hint, 'class_'):
136
+ if hint.class_ == "ResourceRequirement":
137
+ resource_requirement.append(hint)
138
+
139
+ if len(resource_requirement) == 1:
140
+ return resource_requirement[0]
141
+
142
+ def eval_resource(self):
143
+ resources = {
144
+ "coresMin": [],
145
+ "coresMax": [],
146
+ "ramMin": [],
147
+ "ramMax": [],
148
+ "tmpdirMin": [],
149
+ "tmpdirMax": [],
150
+ "outdirMin": [],
151
+ "outdirMax": [],
152
+ }
153
+
154
+ for elem in self.cwl:
155
+ if isinstance(
156
+ elem,
157
+ (
158
+ cwl_utils.parser.cwl_v1_0.Workflow,
159
+ cwl_utils.parser.cwl_v1_1.Workflow,
160
+ cwl_utils.parser.cwl_v1_2.Workflow,
161
+ ),
162
+ ):
163
+ if resource_requirement := self.get_resource_requirement(elem):
164
+ for resource_type in [
165
+ "coresMin",
166
+ "coresMax",
167
+ "ramMin",
168
+ "ramMax",
169
+ "tmpdirMin",
170
+ "tmpdirMax",
171
+ "outdirMin",
172
+ "outdirMax",
173
+ ]:
174
+ if getattr(resource_requirement, resource_type):
175
+ resources[resource_type].append(
176
+ getattr(resource_requirement, resource_type)
177
+ )
178
+ for step in elem.steps:
179
+ if resource_requirement := self.get_resource_requirement(
180
+ self.get_object_by_id(step.run[1:])
181
+ ):
182
+ multiplier = (
183
+ int(os.getenv("SCATTER_MULTIPLIER", 2))
184
+ if step.scatter
185
+ else 1
186
+ )
187
+ for resource_type in [
188
+ "coresMin",
189
+ "coresMax",
190
+ "ramMin",
191
+ "ramMax",
192
+ "tmpdirMin",
193
+ "tmpdirMax",
194
+ "outdirMin",
195
+ "outdirMax",
196
+ ]:
197
+ if getattr(resource_requirement, resource_type):
198
+ resources[resource_type].append(
199
+ getattr(resource_requirement, resource_type)
200
+ * multiplier
201
+ )
202
+ return resources
203
+
204
+
205
+ class ZooConf:
206
+ def __init__(self, conf):
207
+ self.conf = conf
208
+ self.workflow_id = self.conf["lenv"]["Identifier"]
209
+
210
+
211
+ class ZooInputs:
212
+ def __init__(self, inputs):
213
+ # this conversion is necessary
214
+ # because zoo converts array of length 1 to a string
215
+ for inp in inputs:
216
+ if (
217
+ "maxOccurs" in inputs[inp].keys()
218
+ and int(inputs[inp]["maxOccurs"]) > 1
219
+ and not isinstance(inputs[inp]["value"], list)
220
+ ):
221
+ inputs[inp]["value"] = [inputs[inp]["value"]]
222
+
223
+ self.inputs = inputs
224
+
225
+ def get_input_value(self, key):
226
+ try:
227
+ return self.inputs[key]["value"]
228
+ except KeyError as exc:
229
+ raise exc
230
+ except TypeError:
231
+ pass
232
+
233
+ def get_processing_parameters(self, workflow=None):
234
+ """Returns a list with the input parameters keys
235
+
236
+ Args:
237
+ workflow: Optional CWL workflow object (currently unused, for future compatibility)
238
+ """
239
+ import json
240
+
241
+ res = {}
242
+ allowed_types = ["integer", "float", "boolean", "double"]
243
+
244
+ for key, value in self.inputs.items():
245
+ if "format" in value and not("dataType" in value and value["dataType"] in allowed_types):
246
+ res[key] = {
247
+ "format": value["format"],
248
+ "value": value["value"],
249
+ }
250
+ elif "dataType" in value:
251
+ if isinstance(value["dataType"], list):
252
+ if value["dataType"][0] in allowed_types:
253
+ if value["dataType"][0] in ["double", "float"]:
254
+ res[key] = [float(item) for item in value["value"]]
255
+ elif value["dataType"][0] == "integer":
256
+ res[key] = [int(item) for item in value["value"]]
257
+ elif value["dataType"][0] == "boolean":
258
+ res[key] = [bool(item) for item in value["value"]]
259
+ else:
260
+ res[key] = value["value"]
261
+ else:
262
+ if value["value"] == "NULL":
263
+ res[key] = None
264
+ else:
265
+ if value["dataType"] in ["double", "float"]:
266
+ res[key] = float(value["value"])
267
+ elif value["dataType"] == "integer":
268
+ res[key] = int(value["value"])
269
+ elif value["dataType"] == "boolean":
270
+ res[key] = bool(value["value"])
271
+ else:
272
+ res[key] = value["value"]
273
+ else:
274
+ if "cache_file" in value:
275
+ if "isArray" in value and value["isArray"] == "true":
276
+ res[key] = []
277
+ for i in range(len(value["value"])):
278
+ res[key].append({
279
+ "format": value["mimeType"][i] if "mimeType" in value else "text/plain",
280
+ "value": value["value"][i],
281
+ })
282
+ else:
283
+ res[key] = {
284
+ "format": value.get("mimeType", "text/plain"),
285
+ "value": value["value"]
286
+ }
287
+ else:
288
+ if "lowerCorner" in value and "upperCorner" in value:
289
+ res[key] = {
290
+ "format": "ogc-bbox",
291
+ "bbox": json.loads(value["value"]),
292
+ "crs": value["crs"].replace("http://www.opengis.net/def/crs/OGC/1.3/", "")
293
+ }
294
+ else:
295
+ res[key] = value["value"]
296
+ return res
297
+
298
+
299
+ class ZooOutputs:
300
+ def __init__(self, outputs):
301
+ self.outputs = outputs
302
+ # decuce the output key
303
+ output_keys = list(self.outputs.keys())
304
+ if len(output_keys) > 0:
305
+ self.output_key = output_keys[0]
306
+ else:
307
+ self.output_key = "stac"
308
+ if "stac" not in self.outputs.keys():
309
+ self.outputs["stac"] = {}
310
+
311
+ def get_output_parameters(self):
312
+ """Returns a list with the output parameters keys"""
313
+ return {key: value["value"] for key, value in self.outputs.items()}
314
+
315
+ def set_output(self, value):
316
+ """set the output result value"""
317
+ self.outputs[self.output_key]["value"] = value
@@ -0,0 +1,217 @@
1
+ Metadata-Version: 2.4
2
+ Name: zoo-runner-common
3
+ Version: 0.1.1
4
+ Summary: Shared utilities for ZOO-Project CWL runners
5
+ Author-email: Aryan Khare <kharearyan78@gmail.com>, Gérald Fenoy <gerald.fenoy@geolabs.fr>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/ZOO-Project/zoo-runner-common
8
+ Project-URL: Documentation, https://zoo-project.github.io/zoo-runner-common/
9
+ Project-URL: Repository, https://github.com/ZOO-Project/zoo-runner-common
10
+ Project-URL: Issues, https://github.com/ZOO-Project/zoo-runner-common/issues
11
+ Project-URL: Changelog, https://github.com/ZOO-Project/zoo-runner-common/blob/main/CHANGELOG.md
12
+ Keywords: zoo-project,cwl,runner,workflow,ogc,api,processes
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: loguru>=0.7.0
24
+
25
+ # zoo-runner-common
26
+
27
+ A shared utility library for ZOO-Project CWL runners – centralizing reusable components across runners like **Calrissian**, **Argo Workflows**, and **WES**.
28
+
29
+ ---
30
+
31
+ ## Overview
32
+
33
+ The `zoo-runner-common` repository provides core shared components used across multiple ZOO CWL runners. It **eliminates code duplication** by hosting:
34
+
35
+ - **Common base class** (`BaseRunner`) with 8+ shared methods
36
+ - **Zoo-specific configuration handlers** (`ZooConf`, `ZooInputs`, `ZooOutputs`)
37
+ - **CWL workflow parsing** and resource evaluation (`CWLWorkflow`)
38
+ - **Execution handler interface** (`ExecutionHandler`) for hooks
39
+ - **Service stubs** (`ZooStub`) to communicate with ZOO kernel
40
+
41
+ ### Key Benefits
42
+
43
+ - ✅ **~437 lines of code duplication eliminated** across 3 runners
44
+ - ✅ **Single source of truth** for common functionality
45
+ - ✅ **Easier maintenance** - fix once, benefit everywhere
46
+ - ✅ **Consistent behavior** across all runners
47
+
48
+ ---
49
+
50
+ ## Directory Structure
51
+
52
+ ```
53
+ zoo-runner-common/
54
+ ├── base_runner.py # Abstract BaseRunner with common methods
55
+ ├── zoo_conf.py # ZooConf, ZooInputs, ZooOutputs, CWLWorkflow
56
+ ├── handlers.py # ExecutionHandler abstract base class
57
+ ├── zoostub.py # ZooStub for ZOO kernel communication
58
+ └── __init__.py # Package initialization
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Installation
64
+
65
+ ### As a dependency (recommended)
66
+
67
+ Add to your runner's `pyproject.toml`:
68
+
69
+ ```toml
70
+ [project]
71
+ dependencies = [
72
+ "zoo-runner-common @ git+https://github.com/ZOO-Project/zoo-runner-common.git@main",
73
+ ]
74
+ ```
75
+
76
+ ### Local development
77
+
78
+ ```bash
79
+ export PYTHONPATH="$PYTHONPATH:/path/to/zoo-runner-common"
80
+ ```
81
+
82
+ Or use relative imports:
83
+
84
+ ```python
85
+ import sys
86
+ sys.path.insert(0, os.path.abspath('../zoo-runner-common'))
87
+ from base_runner import BaseRunner
88
+ from zoo_conf import ZooConf, ZooInputs, ZooOutputs, CWLWorkflow
89
+ from handlers import ExecutionHandler
90
+ ```
91
+
92
+ ---
93
+
94
+ ## Components
95
+
96
+ ### BaseRunner (base_runner.py)
97
+
98
+ Abstract base class providing common functionality for all CWL runners:
99
+
100
+ **Methods provided:**
101
+ - `get_workflow_id()` - Get workflow identifier
102
+ - `get_workflow_inputs(mandatory=False)` - Get workflow input parameters
103
+ - `get_max_cores()` - Get maximum CPU cores from CWL
104
+ - `get_max_ram()` - Get maximum RAM from CWL
105
+ - `get_volume_size(unit='Gi')` - Calculate volume size (supports Mi/Gi)
106
+ - `assert_parameters(mandatory=True)` - Validate required inputs
107
+ - `get_processing_parameters()` - Get execution parameters
108
+ - `get_namespace_name()` - Generate unique namespace name
109
+ - `update_status(progress, message)` - Update execution status
110
+ - `prepare()` - Pre-execution preparation with hooks
111
+ - `finalize(log, output, usage_report, tool_logs)` - Post-execution finalization
112
+
113
+ **Abstract methods (to implement):**
114
+ - `wrap()` - Wrap CWL with stage-in/stage-out
115
+ - `execute()` - Execute the workflow
116
+
117
+ ### Zoo Configuration Classes (zoo_conf.py)
118
+
119
+ | Class | Description |
120
+ |-------|-------------|
121
+ | `ZooConf` | Wraps ZOO configuration dictionary |
122
+ | `ZooInputs` | Handles input parameter conversion and validation |
123
+ | `ZooOutputs` | Manages output parameters |
124
+ | `CWLWorkflow` | Parses CWL, evaluates resources, handles scatter |
125
+ | `ResourceRequirement` | CWL resource hints dataclass |
126
+
127
+ **Key Features:**
128
+ - Advanced type conversion (int, float, bool, arrays)
129
+ - File handling with format metadata
130
+ - OGC bounding box support
131
+ - NULL value handling
132
+ - Array inputs with `isArray`
133
+
134
+ ### ExecutionHandler (handlers.py)
135
+
136
+ Abstract base class for execution customization:
137
+
138
+ ```python
139
+ class ExecutionHandler(ABC):
140
+ @abstractmethod
141
+ def pre_execution_hook(self): pass
142
+
143
+ @abstractmethod
144
+ def post_execution_hook(self, log, output, usage_report, tool_logs): pass
145
+
146
+ @abstractmethod
147
+ def get_secrets(self): pass
148
+
149
+ @abstractmethod
150
+ def get_pod_env_vars(self): pass
151
+
152
+ @abstractmethod
153
+ def get_pod_node_selector(self): pass
154
+
155
+ @abstractmethod
156
+ def handle_outputs(self, log, output, usage_report, tool_logs): pass
157
+
158
+ @abstractmethod
159
+ def get_additional_parameters(self): pass
160
+ ```
161
+
162
+ ---
163
+
164
+ ## Usage Example
165
+
166
+ ```python
167
+ from zoo_runner_common.base_runner import BaseRunner
168
+ from zoo_runner_common.handlers import ExecutionHandler
169
+
170
+ class MyCustomRunner(BaseRunner):
171
+ def wrap(self):
172
+ # Implement CWL wrapping logic
173
+ return wrapped_cwl
174
+
175
+ def execute(self):
176
+ # Prepare execution
177
+ cwljob = self.prepare()
178
+
179
+ # Execute workflow (custom logic)
180
+ result = my_executor.run(cwljob.cwl, cwljob.params)
181
+
182
+ # Finalize
183
+ self.finalize(log, output, usage_report, tool_logs)
184
+ return result
185
+ ```
186
+
187
+ ---
188
+
189
+ ## Runners Using zoo-runner-common
190
+
191
+ | Runner | Backend | Repository |
192
+ |--------|---------|------------|
193
+ | **zoo-calrissian-runner** | Calrissian/Kubernetes | [EOEPCA/zoo-calrissian-runner](https://github.com/EOEPCA/zoo-calrissian-runner) |
194
+ | **zoo-argowf-runner** | Argo Workflows | [ZOO-Project/zoo-argowf-runner](https://github.com/ZOO-Project/zoo-argowf-runner) |
195
+ | **zoo-wes-runner** | WES/TOIL | [ZOO-Project/zoo-wes-runner](https://github.com/ZOO-Project/zoo-wes-runner) |
196
+
197
+ ---
198
+
199
+ ## Module Reference
200
+
201
+ | Module | Description |
202
+ | ---- | -------- |
203
+ | BaseRunner | Abstract runner blueprint all runners must extend |
204
+ | ZooConf | Parses conf.json, manages job ID, state |
205
+ | ZooInputs | Parses inputs.json, formats CWL-style parameters |
206
+ | ZooOutputs | Handles writing and setting output results |
207
+ | CWLWorkflow | Loads, parses, and analyzes CWL workflows |
208
+ | ResourceRequirement | Parses and evaluates CWL resource hints/requirements |
209
+ | wrapper_utils | Provides helper to build wrapped CWL pipeline |
210
+ | ZooStub | Interacts with ZOO's lenv for progress updates |
211
+
212
+ ---
213
+
214
+ ## Used By
215
+ - zoo-wes-runner
216
+ - zoo-argowf-runner
217
+ - zoo-calrissian-runner
@@ -0,0 +1,7 @@
1
+ base_runner.py,sha256=DF7BE_UHgILhxY-zNxCAjOkYqR5mzAmy6I9dZgo0Qqs,10674
2
+ zoo_conf.py,sha256=WD7-wRCogxDvIhRN2m8ILCwHPm3JdlsqNnFj6F1sH3o,11655
3
+ zoostub.py,sha256=6c0VClH9MjkOxpCaA1z8Kf_cIP3k4k0nBWkGxhN2mAY,926
4
+ zoo_runner_common-0.1.1.dist-info/METADATA,sha256=QyS2m7_aWtm8gQf6vBmtXmwiDFduRq-qnjZ67uvAvaE,7001
5
+ zoo_runner_common-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ zoo_runner_common-0.1.1.dist-info/top_level.txt,sha256=TV8APKVcUq2F4UCQo5eW5b_0K3-_uukb7VOl3GF8MzU,29
7
+ zoo_runner_common-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ base_runner
2
+ zoo_conf
3
+ zoostub
zoostub.py ADDED
@@ -0,0 +1,40 @@
1
+ from loguru import logger
2
+
3
+
4
+ class ZooStub:
5
+ def __init__(self):
6
+ self.SERVICE_ACCEPTED = 0
7
+ self.SERVICE_STARTED = 1
8
+ self.SERVICE_PAUSED = 2
9
+ self.SERVICE_SUCCEEDED = 3
10
+ self.SERVICE_FAILED = 4
11
+ self.SERVICE_PAUSED = 5
12
+ self.SERVICE_DEPLOYED = 6
13
+ self.SERVICE_UNDEPLOYED = 7
14
+
15
+ def update_status(self, conf, progress):
16
+ print(f"Status {progress}")
17
+
18
+ def _(self, message):
19
+ print(f"invoked _ with {message}")
20
+
21
+ def trace(self, message):
22
+ logger.trace(message)
23
+
24
+ def debug(self, message):
25
+ logger.debug(message)
26
+
27
+ def info(self, message):
28
+ logger.info(message)
29
+
30
+ def success(self, message):
31
+ logger.success(message)
32
+
33
+ def warning(self, message):
34
+ logger.warning(message)
35
+
36
+ def error(self, message):
37
+ logger.error(message)
38
+
39
+ def critical(self, message):
40
+ logger.critical(message)