zoo-runner-common 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- base_runner.py +334 -0
- zoo_conf.py +317 -0
- zoo_runner_common-0.1.1.dist-info/METADATA +217 -0
- zoo_runner_common-0.1.1.dist-info/RECORD +7 -0
- zoo_runner_common-0.1.1.dist-info/WHEEL +5 -0
- zoo_runner_common-0.1.1.dist-info/top_level.txt +3 -0
- zoostub.py +40 -0
base_runner.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import traceback
|
|
5
|
+
import types
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
|
|
8
|
+
# Shared ZooStub import
|
|
9
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import zoo
|
|
13
|
+
except ImportError:
|
|
14
|
+
from zoostub import ZooStub
|
|
15
|
+
|
|
16
|
+
zoo = ZooStub()
|
|
17
|
+
|
|
18
|
+
from zoo_conf import CWLWorkflow, ZooConf, ZooInputs, ZooOutputs
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BaseRunner(ABC):
|
|
24
|
+
"""
|
|
25
|
+
Base class for CWL workflow runners.
|
|
26
|
+
Provides common functionality and defines the interface for specific runners.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, cwl, inputs, conf, outputs, execution_handler=None):
|
|
30
|
+
"""
|
|
31
|
+
Initialize the base runner.
|
|
32
|
+
|
|
33
|
+
:param cwl: CWL workflow definition (path to file or parsed CWL)
|
|
34
|
+
:param inputs: ZOO inputs dictionary
|
|
35
|
+
:param conf: ZOO configuration dictionary
|
|
36
|
+
:param outputs: ZOO outputs dictionary
|
|
37
|
+
:param execution_handler: Optional ExecutionHandler instance for hooks
|
|
38
|
+
"""
|
|
39
|
+
self.cwl = cwl
|
|
40
|
+
self.execution_handler = execution_handler or self._create_default_handler()
|
|
41
|
+
|
|
42
|
+
# Create typed wrapper objects from ZOO dictionaries
|
|
43
|
+
self.conf = ZooConf(conf)
|
|
44
|
+
self.inputs = ZooInputs(inputs)
|
|
45
|
+
self.outputs = ZooOutputs(outputs)
|
|
46
|
+
|
|
47
|
+
# Parse CWL workflow
|
|
48
|
+
self.workflow = CWLWorkflow(self.cwl, self.conf.workflow_id)
|
|
49
|
+
|
|
50
|
+
# Legacy namespace for backward compatibility
|
|
51
|
+
self.zoo_conf = types.SimpleNamespace(conf=conf)
|
|
52
|
+
|
|
53
|
+
# Runner-specific state
|
|
54
|
+
self.namespace_name = None
|
|
55
|
+
self.execution = None
|
|
56
|
+
|
|
57
|
+
def _create_default_handler(self):
|
|
58
|
+
"""Create a default handler if none provided."""
|
|
59
|
+
|
|
60
|
+
class DefaultHandler:
|
|
61
|
+
def pre_execution_hook(self):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def post_execution_hook(self, *args, **kwargs):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def get_secrets(self):
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def get_additional_parameters(self):
|
|
71
|
+
return {}
|
|
72
|
+
|
|
73
|
+
def get_pod_env_vars(self):
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def get_pod_node_selector(self):
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def handle_outputs(self, *args, **kwargs):
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
def set_job_id(self, job_id):
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
def get_namespace(self):
|
|
86
|
+
"""Get namespace for Calrissian execution."""
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
def get_service_account(self):
|
|
90
|
+
"""Get service account for Calrissian execution."""
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
return DefaultHandler()
|
|
94
|
+
|
|
95
|
+
def update_status(self, progress: int, message: str = ""):
|
|
96
|
+
"""
|
|
97
|
+
Update execution status in ZOO.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
progress: Progress percentage (0-100)
|
|
101
|
+
message: Status message to display
|
|
102
|
+
"""
|
|
103
|
+
if hasattr(self.conf, 'conf') and "lenv" in self.conf.conf:
|
|
104
|
+
self.conf.conf["lenv"]["message"] = message
|
|
105
|
+
zoo.update_status(self.conf.conf, progress)
|
|
106
|
+
else:
|
|
107
|
+
logger.warning("Cannot update status: conf structure not available")
|
|
108
|
+
|
|
109
|
+
def get_namespace_name(self):
|
|
110
|
+
"""
|
|
111
|
+
Generate a namespace name for Kubernetes resources.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
str: Namespace name in format {workflow_id}-{unique_id}
|
|
115
|
+
"""
|
|
116
|
+
if self.namespace_name is None:
|
|
117
|
+
import uuid
|
|
118
|
+
unique_id = str(uuid.uuid4())[:8]
|
|
119
|
+
self.namespace_name = f"{self.get_workflow_id()}-{unique_id}".lower()
|
|
120
|
+
|
|
121
|
+
return self.namespace_name
|
|
122
|
+
|
|
123
|
+
def log_output(self, output):
|
|
124
|
+
"""Log output information."""
|
|
125
|
+
logger.info("[BaseRunner] Output: %s", output)
|
|
126
|
+
|
|
127
|
+
def validate_inputs(self):
|
|
128
|
+
"""Validate input parameters."""
|
|
129
|
+
logger.info("[BaseRunner] Validating inputs...")
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
def prepare(self):
|
|
133
|
+
"""
|
|
134
|
+
Shared pre-execution logic.
|
|
135
|
+
Calls execution handler hooks and prepares processing parameters.
|
|
136
|
+
"""
|
|
137
|
+
logger.info("execution started")
|
|
138
|
+
self.update_status(progress=2, message="starting execution")
|
|
139
|
+
|
|
140
|
+
# Call pre-execution hook
|
|
141
|
+
if self.execution_handler and hasattr(
|
|
142
|
+
self.execution_handler, "pre_execution_hook"
|
|
143
|
+
):
|
|
144
|
+
try:
|
|
145
|
+
self.execution_handler.pre_execution_hook()
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.error(f"Error in pre_execution_hook: {e}")
|
|
148
|
+
logger.error(traceback.format_exc())
|
|
149
|
+
raise
|
|
150
|
+
|
|
151
|
+
logger.info("wrap CWL workflow with stage-in/out steps")
|
|
152
|
+
|
|
153
|
+
processing_parameters = {
|
|
154
|
+
**self.get_processing_parameters(),
|
|
155
|
+
**(
|
|
156
|
+
self.execution_handler.get_additional_parameters()
|
|
157
|
+
if self.execution_handler
|
|
158
|
+
else {}
|
|
159
|
+
),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return types.SimpleNamespace(cwl=self.wrap(), params=processing_parameters)
|
|
163
|
+
|
|
164
|
+
def finalize(self, log, output, usage_report, tool_logs):
|
|
165
|
+
"""
|
|
166
|
+
Finalization logic after execution.
|
|
167
|
+
Calls execution handler post-execution and output handling hooks.
|
|
168
|
+
"""
|
|
169
|
+
logger.info("Finalization started")
|
|
170
|
+
|
|
171
|
+
# Call post-execution hook
|
|
172
|
+
if self.execution_handler and hasattr(
|
|
173
|
+
self.execution_handler, "post_execution_hook"
|
|
174
|
+
):
|
|
175
|
+
try:
|
|
176
|
+
self.execution_handler.post_execution_hook(
|
|
177
|
+
log, output, usage_report, tool_logs
|
|
178
|
+
)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.error(f"Error in post_execution_hook: {e}")
|
|
181
|
+
logger.error(traceback.format_exc())
|
|
182
|
+
raise
|
|
183
|
+
|
|
184
|
+
# Call handle_outputs hook
|
|
185
|
+
if self.execution_handler and hasattr(self.execution_handler, "handle_outputs"):
|
|
186
|
+
try:
|
|
187
|
+
self.execution_handler.handle_outputs(
|
|
188
|
+
log, output, usage_report, tool_logs
|
|
189
|
+
)
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.error(f"Error in handle_outputs: {e}")
|
|
192
|
+
logger.error(traceback.format_exc())
|
|
193
|
+
raise
|
|
194
|
+
|
|
195
|
+
def get_workflow_id(self):
|
|
196
|
+
"""
|
|
197
|
+
Get the workflow identifier from configuration.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
str: The workflow identifier
|
|
201
|
+
"""
|
|
202
|
+
return self.conf.workflow_id
|
|
203
|
+
|
|
204
|
+
def get_workflow_inputs(self, mandatory=False):
|
|
205
|
+
"""
|
|
206
|
+
Get workflow input parameter names.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
mandatory: If True, only return mandatory inputs (no default value)
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
list: List of input parameter names
|
|
213
|
+
"""
|
|
214
|
+
return self.workflow.get_workflow_inputs(mandatory=mandatory)
|
|
215
|
+
|
|
216
|
+
def get_max_cores(self):
|
|
217
|
+
"""
|
|
218
|
+
Get the maximum number of cores from CWL ResourceRequirements.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
int: Maximum cores requested, or default from environment
|
|
222
|
+
"""
|
|
223
|
+
resources = self.workflow.eval_resource()
|
|
224
|
+
max_cores = max(resources["coresMax"]) if resources["coresMax"] else None
|
|
225
|
+
|
|
226
|
+
if max_cores is None:
|
|
227
|
+
max_cores = int(os.environ.get("DEFAULT_MAX_CORES", "2"))
|
|
228
|
+
|
|
229
|
+
return max_cores
|
|
230
|
+
|
|
231
|
+
def get_max_ram(self):
|
|
232
|
+
"""
|
|
233
|
+
Get the maximum RAM in megabytes from CWL ResourceRequirements.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
str: Maximum RAM in MB with unit (e.g., "4096Mi")
|
|
237
|
+
"""
|
|
238
|
+
resources = self.workflow.eval_resource()
|
|
239
|
+
max_ram = max(resources["ramMax"]) if resources["ramMax"] else None
|
|
240
|
+
|
|
241
|
+
if max_ram is None:
|
|
242
|
+
max_ram = int(os.environ.get("DEFAULT_MAX_RAM", "4096"))
|
|
243
|
+
|
|
244
|
+
# Return as string with Mi unit
|
|
245
|
+
return f"{max_ram}Mi"
|
|
246
|
+
|
|
247
|
+
def get_volume_size(self, unit="Mi"):
|
|
248
|
+
"""
|
|
249
|
+
Get the volume size for temporary and output directories.
|
|
250
|
+
|
|
251
|
+
Calculates based on tmpdir and outdir requirements from CWL.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
unit: Unit for volume size ('Gi' for Gigabytes or 'Mi' for Megabytes)
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
str: Volume size with unit (e.g., "10Gi" or "10240Mi")
|
|
258
|
+
"""
|
|
259
|
+
resources = self.workflow.eval_resource()
|
|
260
|
+
|
|
261
|
+
# Get max tmpdir and outdir in MB
|
|
262
|
+
# Use Max if available, otherwise fall back to Min
|
|
263
|
+
tmpdir_max = max(resources["tmpdirMax"]) if resources["tmpdirMax"] else (max(resources["tmpdirMin"]) if resources["tmpdirMin"] else 0)
|
|
264
|
+
outdir_max = max(resources["outdirMax"]) if resources["outdirMax"] else (max(resources["outdirMin"]) if resources["outdirMin"] else 0)
|
|
265
|
+
|
|
266
|
+
# Total in MB
|
|
267
|
+
volume_size_mb = tmpdir_max + outdir_max
|
|
268
|
+
|
|
269
|
+
if volume_size_mb == 0:
|
|
270
|
+
# Default from environment
|
|
271
|
+
default = os.environ.get("DEFAULT_VOLUME_SIZE", "10Gi")
|
|
272
|
+
# If default doesn't match requested unit, convert
|
|
273
|
+
if unit not in default:
|
|
274
|
+
return f"10{unit}"
|
|
275
|
+
return default
|
|
276
|
+
|
|
277
|
+
# Convert based on requested unit
|
|
278
|
+
if unit == "Gi":
|
|
279
|
+
# Convert MB to Gi (1 Gi = 1024 Mi)
|
|
280
|
+
volume_size = int(volume_size_mb / 1024) + 1
|
|
281
|
+
else: # Mi
|
|
282
|
+
volume_size = volume_size_mb
|
|
283
|
+
|
|
284
|
+
return f"{volume_size}{unit}"
|
|
285
|
+
|
|
286
|
+
def assert_parameters(self, mandatory=True):
|
|
287
|
+
"""
|
|
288
|
+
Validate that required workflow inputs are provided.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
mandatory: If True, check only mandatory inputs
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
bool: True if all required inputs are present, False otherwise
|
|
295
|
+
"""
|
|
296
|
+
try:
|
|
297
|
+
required_inputs = self.get_workflow_inputs(mandatory=mandatory)
|
|
298
|
+
|
|
299
|
+
for required_input in required_inputs:
|
|
300
|
+
if required_input not in self.inputs.inputs:
|
|
301
|
+
error_msg = f"Missing required input: {required_input}"
|
|
302
|
+
logger.error(error_msg)
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
logger.info("All required parameters are present")
|
|
306
|
+
return True
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.error(f"Error checking parameters: {e}")
|
|
309
|
+
return False
|
|
310
|
+
|
|
311
|
+
def get_processing_parameters(self):
|
|
312
|
+
"""
|
|
313
|
+
Get processing parameters from inputs.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
dict: Processing parameters suitable for CWL execution
|
|
317
|
+
"""
|
|
318
|
+
return self.inputs.get_processing_parameters(workflow=self.workflow.get_workflow())
|
|
319
|
+
|
|
320
|
+
@abstractmethod
|
|
321
|
+
def wrap(self):
|
|
322
|
+
"""
|
|
323
|
+
Wrap the CWL workflow with stage-in/stage-out steps.
|
|
324
|
+
Must be implemented by subclasses.
|
|
325
|
+
"""
|
|
326
|
+
raise NotImplementedError("Subclasses must implement wrap()")
|
|
327
|
+
|
|
328
|
+
@abstractmethod
|
|
329
|
+
def execute(self):
|
|
330
|
+
"""
|
|
331
|
+
Execute the CWL workflow.
|
|
332
|
+
Must be implemented by subclasses.
|
|
333
|
+
"""
|
|
334
|
+
raise NotImplementedError("Subclasses must implement execute()")
|
zoo_conf.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import attr
|
|
5
|
+
import cwl_utils
|
|
6
|
+
from cwl_utils.parser import load_document_by_yaml
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# useful class for hints in CWL
|
|
10
|
+
@attr.s
|
|
11
|
+
class ResourceRequirement:
|
|
12
|
+
coresMin = attr.ib(default=None)
|
|
13
|
+
coresMax = attr.ib(default=None)
|
|
14
|
+
ramMin = attr.ib(default=None)
|
|
15
|
+
ramMax = attr.ib(default=None)
|
|
16
|
+
tmpdirMin = attr.ib(default=None)
|
|
17
|
+
tmpdirMax = attr.ib(default=None)
|
|
18
|
+
outdirMin = attr.ib(default=None)
|
|
19
|
+
outdirMax = attr.ib(default=None)
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_dict(cls, env):
|
|
23
|
+
return cls(
|
|
24
|
+
**{k: v for k, v in env.items() if k in inspect.signature(cls).parameters}
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CWLWorkflow:
|
|
29
|
+
def __init__(self, cwl, workflow_id):
|
|
30
|
+
self.raw_cwl = cwl
|
|
31
|
+
self.workflow_id = workflow_id
|
|
32
|
+
|
|
33
|
+
# Load the entire CWL document and convert to v1.2
|
|
34
|
+
# Use load_cwl_from_yaml instead of load_document_by_yaml for proper version conversion
|
|
35
|
+
from cwl_loader import load_cwl_from_yaml
|
|
36
|
+
|
|
37
|
+
parsed_cwl = load_cwl_from_yaml(cwl, uri="io://", cwl_version='v1.2', sort=True)
|
|
38
|
+
|
|
39
|
+
# Ensure self.cwl is always a list containing all CWL elements
|
|
40
|
+
if not isinstance(parsed_cwl, list):
|
|
41
|
+
parsed_cwl = [parsed_cwl]
|
|
42
|
+
|
|
43
|
+
self.cwl = parsed_cwl
|
|
44
|
+
|
|
45
|
+
def get_version(self):
|
|
46
|
+
|
|
47
|
+
return self.raw_cwl.get("s:softwareVersion", "")
|
|
48
|
+
|
|
49
|
+
def get_label(self):
|
|
50
|
+
|
|
51
|
+
return self.get_workflow().label
|
|
52
|
+
|
|
53
|
+
def get_doc(self):
|
|
54
|
+
|
|
55
|
+
return self.get_workflow().doc
|
|
56
|
+
|
|
57
|
+
def get_workflow(self) -> cwl_utils.parser.cwl_v1_0.Workflow:
|
|
58
|
+
# returns a cwl_utils.parser.cwl_v1_0.Workflow)
|
|
59
|
+
ids = [elem.id.split("#")[-1] for elem in self.cwl]
|
|
60
|
+
|
|
61
|
+
return self.cwl[ids.index(self.workflow_id)]
|
|
62
|
+
|
|
63
|
+
def get_object_by_id(self, id):
|
|
64
|
+
ids = [elem.id.split("#")[-1] for elem in self.cwl]
|
|
65
|
+
# Remove leading '#' if present, and also remove 'io://' prefix if present
|
|
66
|
+
search_id = id.lstrip("#").replace("io://", "")
|
|
67
|
+
return self.cwl[ids.index(search_id)]
|
|
68
|
+
|
|
69
|
+
def get_workflow_inputs(self, mandatory=False):
|
|
70
|
+
inputs = []
|
|
71
|
+
for inp in self.get_workflow().inputs:
|
|
72
|
+
if mandatory:
|
|
73
|
+
# Use type_ instead of type (cwl-utils API change)
|
|
74
|
+
inp_type = getattr(inp, 'type_', getattr(inp, 'type', None))
|
|
75
|
+
if inp.default is not None or inp_type == ["null", "string"]:
|
|
76
|
+
continue
|
|
77
|
+
else:
|
|
78
|
+
inputs.append(inp.id.split("/")[-1])
|
|
79
|
+
else:
|
|
80
|
+
inputs.append(inp.id.split("/")[-1])
|
|
81
|
+
return inputs
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def has_scatter_requirement(workflow):
|
|
85
|
+
return any(
|
|
86
|
+
isinstance(
|
|
87
|
+
requirement,
|
|
88
|
+
(
|
|
89
|
+
cwl_utils.parser.cwl_v1_0.ScatterFeatureRequirement,
|
|
90
|
+
cwl_utils.parser.cwl_v1_1.ScatterFeatureRequirement,
|
|
91
|
+
cwl_utils.parser.cwl_v1_2.ScatterFeatureRequirement,
|
|
92
|
+
),
|
|
93
|
+
)
|
|
94
|
+
for requirement in workflow.requirements
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def get_resource_requirement(elem):
|
|
99
|
+
"""Gets the ResourceRequirement out of a CommandLineTool or Workflow
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
elem (CommandLineTool or Workflow): CommandLineTool or Workflow
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
cwl_utils.parser.cwl_v1_2.ResourceRequirement or ResourceRequirement
|
|
106
|
+
"""
|
|
107
|
+
resource_requirement = []
|
|
108
|
+
|
|
109
|
+
# look for requirements
|
|
110
|
+
if elem.requirements is not None:
|
|
111
|
+
resource_requirement = [
|
|
112
|
+
requirement
|
|
113
|
+
for requirement in elem.requirements
|
|
114
|
+
if isinstance(
|
|
115
|
+
requirement,
|
|
116
|
+
(
|
|
117
|
+
cwl_utils.parser.cwl_v1_0.ResourceRequirement,
|
|
118
|
+
cwl_utils.parser.cwl_v1_1.ResourceRequirement,
|
|
119
|
+
cwl_utils.parser.cwl_v1_2.ResourceRequirement,
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
if len(resource_requirement) == 1:
|
|
125
|
+
return resource_requirement[0]
|
|
126
|
+
|
|
127
|
+
# look for hints
|
|
128
|
+
if elem.hints is not None:
|
|
129
|
+
resource_requirement = []
|
|
130
|
+
for hint in elem.hints:
|
|
131
|
+
# Handle both dict and object types
|
|
132
|
+
if isinstance(hint, dict):
|
|
133
|
+
if hint.get("class") == "ResourceRequirement":
|
|
134
|
+
resource_requirement.append(ResourceRequirement.from_dict(hint))
|
|
135
|
+
elif hasattr(hint, 'class_'):
|
|
136
|
+
if hint.class_ == "ResourceRequirement":
|
|
137
|
+
resource_requirement.append(hint)
|
|
138
|
+
|
|
139
|
+
if len(resource_requirement) == 1:
|
|
140
|
+
return resource_requirement[0]
|
|
141
|
+
|
|
142
|
+
def eval_resource(self):
|
|
143
|
+
resources = {
|
|
144
|
+
"coresMin": [],
|
|
145
|
+
"coresMax": [],
|
|
146
|
+
"ramMin": [],
|
|
147
|
+
"ramMax": [],
|
|
148
|
+
"tmpdirMin": [],
|
|
149
|
+
"tmpdirMax": [],
|
|
150
|
+
"outdirMin": [],
|
|
151
|
+
"outdirMax": [],
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
for elem in self.cwl:
|
|
155
|
+
if isinstance(
|
|
156
|
+
elem,
|
|
157
|
+
(
|
|
158
|
+
cwl_utils.parser.cwl_v1_0.Workflow,
|
|
159
|
+
cwl_utils.parser.cwl_v1_1.Workflow,
|
|
160
|
+
cwl_utils.parser.cwl_v1_2.Workflow,
|
|
161
|
+
),
|
|
162
|
+
):
|
|
163
|
+
if resource_requirement := self.get_resource_requirement(elem):
|
|
164
|
+
for resource_type in [
|
|
165
|
+
"coresMin",
|
|
166
|
+
"coresMax",
|
|
167
|
+
"ramMin",
|
|
168
|
+
"ramMax",
|
|
169
|
+
"tmpdirMin",
|
|
170
|
+
"tmpdirMax",
|
|
171
|
+
"outdirMin",
|
|
172
|
+
"outdirMax",
|
|
173
|
+
]:
|
|
174
|
+
if getattr(resource_requirement, resource_type):
|
|
175
|
+
resources[resource_type].append(
|
|
176
|
+
getattr(resource_requirement, resource_type)
|
|
177
|
+
)
|
|
178
|
+
for step in elem.steps:
|
|
179
|
+
if resource_requirement := self.get_resource_requirement(
|
|
180
|
+
self.get_object_by_id(step.run[1:])
|
|
181
|
+
):
|
|
182
|
+
multiplier = (
|
|
183
|
+
int(os.getenv("SCATTER_MULTIPLIER", 2))
|
|
184
|
+
if step.scatter
|
|
185
|
+
else 1
|
|
186
|
+
)
|
|
187
|
+
for resource_type in [
|
|
188
|
+
"coresMin",
|
|
189
|
+
"coresMax",
|
|
190
|
+
"ramMin",
|
|
191
|
+
"ramMax",
|
|
192
|
+
"tmpdirMin",
|
|
193
|
+
"tmpdirMax",
|
|
194
|
+
"outdirMin",
|
|
195
|
+
"outdirMax",
|
|
196
|
+
]:
|
|
197
|
+
if getattr(resource_requirement, resource_type):
|
|
198
|
+
resources[resource_type].append(
|
|
199
|
+
getattr(resource_requirement, resource_type)
|
|
200
|
+
* multiplier
|
|
201
|
+
)
|
|
202
|
+
return resources
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class ZooConf:
|
|
206
|
+
def __init__(self, conf):
|
|
207
|
+
self.conf = conf
|
|
208
|
+
self.workflow_id = self.conf["lenv"]["Identifier"]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class ZooInputs:
|
|
212
|
+
def __init__(self, inputs):
|
|
213
|
+
# this conversion is necessary
|
|
214
|
+
# because zoo converts array of length 1 to a string
|
|
215
|
+
for inp in inputs:
|
|
216
|
+
if (
|
|
217
|
+
"maxOccurs" in inputs[inp].keys()
|
|
218
|
+
and int(inputs[inp]["maxOccurs"]) > 1
|
|
219
|
+
and not isinstance(inputs[inp]["value"], list)
|
|
220
|
+
):
|
|
221
|
+
inputs[inp]["value"] = [inputs[inp]["value"]]
|
|
222
|
+
|
|
223
|
+
self.inputs = inputs
|
|
224
|
+
|
|
225
|
+
def get_input_value(self, key):
|
|
226
|
+
try:
|
|
227
|
+
return self.inputs[key]["value"]
|
|
228
|
+
except KeyError as exc:
|
|
229
|
+
raise exc
|
|
230
|
+
except TypeError:
|
|
231
|
+
pass
|
|
232
|
+
|
|
233
|
+
def get_processing_parameters(self, workflow=None):
|
|
234
|
+
"""Returns a list with the input parameters keys
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
workflow: Optional CWL workflow object (currently unused, for future compatibility)
|
|
238
|
+
"""
|
|
239
|
+
import json
|
|
240
|
+
|
|
241
|
+
res = {}
|
|
242
|
+
allowed_types = ["integer", "float", "boolean", "double"]
|
|
243
|
+
|
|
244
|
+
for key, value in self.inputs.items():
|
|
245
|
+
if "format" in value and not("dataType" in value and value["dataType"] in allowed_types):
|
|
246
|
+
res[key] = {
|
|
247
|
+
"format": value["format"],
|
|
248
|
+
"value": value["value"],
|
|
249
|
+
}
|
|
250
|
+
elif "dataType" in value:
|
|
251
|
+
if isinstance(value["dataType"], list):
|
|
252
|
+
if value["dataType"][0] in allowed_types:
|
|
253
|
+
if value["dataType"][0] in ["double", "float"]:
|
|
254
|
+
res[key] = [float(item) for item in value["value"]]
|
|
255
|
+
elif value["dataType"][0] == "integer":
|
|
256
|
+
res[key] = [int(item) for item in value["value"]]
|
|
257
|
+
elif value["dataType"][0] == "boolean":
|
|
258
|
+
res[key] = [bool(item) for item in value["value"]]
|
|
259
|
+
else:
|
|
260
|
+
res[key] = value["value"]
|
|
261
|
+
else:
|
|
262
|
+
if value["value"] == "NULL":
|
|
263
|
+
res[key] = None
|
|
264
|
+
else:
|
|
265
|
+
if value["dataType"] in ["double", "float"]:
|
|
266
|
+
res[key] = float(value["value"])
|
|
267
|
+
elif value["dataType"] == "integer":
|
|
268
|
+
res[key] = int(value["value"])
|
|
269
|
+
elif value["dataType"] == "boolean":
|
|
270
|
+
res[key] = bool(value["value"])
|
|
271
|
+
else:
|
|
272
|
+
res[key] = value["value"]
|
|
273
|
+
else:
|
|
274
|
+
if "cache_file" in value:
|
|
275
|
+
if "isArray" in value and value["isArray"] == "true":
|
|
276
|
+
res[key] = []
|
|
277
|
+
for i in range(len(value["value"])):
|
|
278
|
+
res[key].append({
|
|
279
|
+
"format": value["mimeType"][i] if "mimeType" in value else "text/plain",
|
|
280
|
+
"value": value["value"][i],
|
|
281
|
+
})
|
|
282
|
+
else:
|
|
283
|
+
res[key] = {
|
|
284
|
+
"format": value.get("mimeType", "text/plain"),
|
|
285
|
+
"value": value["value"]
|
|
286
|
+
}
|
|
287
|
+
else:
|
|
288
|
+
if "lowerCorner" in value and "upperCorner" in value:
|
|
289
|
+
res[key] = {
|
|
290
|
+
"format": "ogc-bbox",
|
|
291
|
+
"bbox": json.loads(value["value"]),
|
|
292
|
+
"crs": value["crs"].replace("http://www.opengis.net/def/crs/OGC/1.3/", "")
|
|
293
|
+
}
|
|
294
|
+
else:
|
|
295
|
+
res[key] = value["value"]
|
|
296
|
+
return res
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class ZooOutputs:
|
|
300
|
+
def __init__(self, outputs):
|
|
301
|
+
self.outputs = outputs
|
|
302
|
+
# decuce the output key
|
|
303
|
+
output_keys = list(self.outputs.keys())
|
|
304
|
+
if len(output_keys) > 0:
|
|
305
|
+
self.output_key = output_keys[0]
|
|
306
|
+
else:
|
|
307
|
+
self.output_key = "stac"
|
|
308
|
+
if "stac" not in self.outputs.keys():
|
|
309
|
+
self.outputs["stac"] = {}
|
|
310
|
+
|
|
311
|
+
def get_output_parameters(self):
|
|
312
|
+
"""Returns a list with the output parameters keys"""
|
|
313
|
+
return {key: value["value"] for key, value in self.outputs.items()}
|
|
314
|
+
|
|
315
|
+
def set_output(self, value):
|
|
316
|
+
"""set the output result value"""
|
|
317
|
+
self.outputs[self.output_key]["value"] = value
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zoo-runner-common
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Shared utilities for ZOO-Project CWL runners
|
|
5
|
+
Author-email: Aryan Khare <kharearyan78@gmail.com>, Gérald Fenoy <gerald.fenoy@geolabs.fr>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ZOO-Project/zoo-runner-common
|
|
8
|
+
Project-URL: Documentation, https://zoo-project.github.io/zoo-runner-common/
|
|
9
|
+
Project-URL: Repository, https://github.com/ZOO-Project/zoo-runner-common
|
|
10
|
+
Project-URL: Issues, https://github.com/ZOO-Project/zoo-runner-common/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/ZOO-Project/zoo-runner-common/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: zoo-project,cwl,runner,workflow,ogc,api,processes
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: loguru>=0.7.0
|
|
24
|
+
|
|
25
|
+
# zoo-runner-common
|
|
26
|
+
|
|
27
|
+
A shared utility library for ZOO-Project CWL runners – centralizing reusable components across runners like **Calrissian**, **Argo Workflows**, and **WES**.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Overview
|
|
32
|
+
|
|
33
|
+
The `zoo-runner-common` repository provides core shared components used across multiple ZOO CWL runners. It **eliminates code duplication** by hosting:
|
|
34
|
+
|
|
35
|
+
- **Common base class** (`BaseRunner`) with 8+ shared methods
|
|
36
|
+
- **Zoo-specific configuration handlers** (`ZooConf`, `ZooInputs`, `ZooOutputs`)
|
|
37
|
+
- **CWL workflow parsing** and resource evaluation (`CWLWorkflow`)
|
|
38
|
+
- **Execution handler interface** (`ExecutionHandler`) for hooks
|
|
39
|
+
- **Service stubs** (`ZooStub`) to communicate with ZOO kernel
|
|
40
|
+
|
|
41
|
+
### Key Benefits
|
|
42
|
+
|
|
43
|
+
- ✅ **~437 lines of code duplication eliminated** across 3 runners
|
|
44
|
+
- ✅ **Single source of truth** for common functionality
|
|
45
|
+
- ✅ **Easier maintenance** - fix once, benefit everywhere
|
|
46
|
+
- ✅ **Consistent behavior** across all runners
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Directory Structure
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
zoo-runner-common/
|
|
54
|
+
├── base_runner.py # Abstract BaseRunner with common methods
|
|
55
|
+
├── zoo_conf.py # ZooConf, ZooInputs, ZooOutputs, CWLWorkflow
|
|
56
|
+
├── handlers.py # ExecutionHandler abstract base class
|
|
57
|
+
├── zoostub.py # ZooStub for ZOO kernel communication
|
|
58
|
+
└── __init__.py # Package initialization
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
### As a dependency (recommended)
|
|
66
|
+
|
|
67
|
+
Add to your runner's `pyproject.toml`:
|
|
68
|
+
|
|
69
|
+
```toml
|
|
70
|
+
[project]
|
|
71
|
+
dependencies = [
|
|
72
|
+
"zoo-runner-common @ git+https://github.com/ZOO-Project/zoo-runner-common.git@main",
|
|
73
|
+
]
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Local development
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
export PYTHONPATH="$PYTHONPATH:/path/to/zoo-runner-common"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Or use relative imports:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
import sys
|
|
86
|
+
sys.path.insert(0, os.path.abspath('../zoo-runner-common'))
|
|
87
|
+
from base_runner import BaseRunner
|
|
88
|
+
from zoo_conf import ZooConf, ZooInputs, ZooOutputs, CWLWorkflow
|
|
89
|
+
from handlers import ExecutionHandler
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Components
|
|
95
|
+
|
|
96
|
+
### BaseRunner (base_runner.py)
|
|
97
|
+
|
|
98
|
+
Abstract base class providing common functionality for all CWL runners:
|
|
99
|
+
|
|
100
|
+
**Methods provided:**
|
|
101
|
+
- `get_workflow_id()` - Get workflow identifier
|
|
102
|
+
- `get_workflow_inputs(mandatory=False)` - Get workflow input parameters
|
|
103
|
+
- `get_max_cores()` - Get maximum CPU cores from CWL
|
|
104
|
+
- `get_max_ram()` - Get maximum RAM from CWL
|
|
105
|
+
- `get_volume_size(unit='Gi')` - Calculate volume size (supports Mi/Gi)
|
|
106
|
+
- `assert_parameters(mandatory=True)` - Validate required inputs
|
|
107
|
+
- `get_processing_parameters()` - Get execution parameters
|
|
108
|
+
- `get_namespace_name()` - Generate unique namespace name
|
|
109
|
+
- `update_status(progress, message)` - Update execution status
|
|
110
|
+
- `prepare()` - Pre-execution preparation with hooks
|
|
111
|
+
- `finalize(log, output, usage_report, tool_logs)` - Post-execution finalization
|
|
112
|
+
|
|
113
|
+
**Abstract methods (to implement):**
|
|
114
|
+
- `wrap()` - Wrap CWL with stage-in/stage-out
|
|
115
|
+
- `execute()` - Execute the workflow
|
|
116
|
+
|
|
117
|
+
### Zoo Configuration Classes (zoo_conf.py)
|
|
118
|
+
|
|
119
|
+
| Class | Description |
|
|
120
|
+
|-------|-------------|
|
|
121
|
+
| `ZooConf` | Wraps ZOO configuration dictionary |
|
|
122
|
+
| `ZooInputs` | Handles input parameter conversion and validation |
|
|
123
|
+
| `ZooOutputs` | Manages output parameters |
|
|
124
|
+
| `CWLWorkflow` | Parses CWL, evaluates resources, handles scatter |
|
|
125
|
+
| `ResourceRequirement` | CWL resource hints dataclass |
|
|
126
|
+
|
|
127
|
+
**Key Features:**
|
|
128
|
+
- Advanced type conversion (int, float, bool, arrays)
|
|
129
|
+
- File handling with format metadata
|
|
130
|
+
- OGC bounding box support
|
|
131
|
+
- NULL value handling
|
|
132
|
+
- Array inputs with `isArray`
|
|
133
|
+
|
|
134
|
+
### ExecutionHandler (handlers.py)
|
|
135
|
+
|
|
136
|
+
Abstract base class for execution customization:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
class ExecutionHandler(ABC):
|
|
140
|
+
@abstractmethod
|
|
141
|
+
def pre_execution_hook(self): pass
|
|
142
|
+
|
|
143
|
+
@abstractmethod
|
|
144
|
+
def post_execution_hook(self, log, output, usage_report, tool_logs): pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def get_secrets(self): pass
|
|
148
|
+
|
|
149
|
+
@abstractmethod
|
|
150
|
+
def get_pod_env_vars(self): pass
|
|
151
|
+
|
|
152
|
+
@abstractmethod
|
|
153
|
+
def get_pod_node_selector(self): pass
|
|
154
|
+
|
|
155
|
+
@abstractmethod
|
|
156
|
+
def handle_outputs(self, log, output, usage_report, tool_logs): pass
|
|
157
|
+
|
|
158
|
+
@abstractmethod
|
|
159
|
+
def get_additional_parameters(self): pass
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Usage Example
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from zoo_runner_common.base_runner import BaseRunner
|
|
168
|
+
from zoo_runner_common.handlers import ExecutionHandler
|
|
169
|
+
|
|
170
|
+
class MyCustomRunner(BaseRunner):
|
|
171
|
+
def wrap(self):
|
|
172
|
+
# Implement CWL wrapping logic
|
|
173
|
+
return wrapped_cwl
|
|
174
|
+
|
|
175
|
+
def execute(self):
|
|
176
|
+
# Prepare execution
|
|
177
|
+
cwljob = self.prepare()
|
|
178
|
+
|
|
179
|
+
# Execute workflow (custom logic)
|
|
180
|
+
result = my_executor.run(cwljob.cwl, cwljob.params)
|
|
181
|
+
|
|
182
|
+
# Finalize
|
|
183
|
+
self.finalize(log, output, usage_report, tool_logs)
|
|
184
|
+
return result
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Runners Using zoo-runner-common
|
|
190
|
+
|
|
191
|
+
| Runner | Backend | Repository |
|
|
192
|
+
|--------|---------|------------|
|
|
193
|
+
| **zoo-calrissian-runner** | Calrissian/Kubernetes | [EOEPCA/zoo-calrissian-runner](https://github.com/EOEPCA/zoo-calrissian-runner) |
|
|
194
|
+
| **zoo-argowf-runner** | Argo Workflows | [ZOO-Project/zoo-argowf-runner](https://github.com/ZOO-Project/zoo-argowf-runner) |
|
|
195
|
+
| **zoo-wes-runner** | WES/TOIL | [ZOO-Project/zoo-wes-runner](https://github.com/ZOO-Project/zoo-wes-runner) |
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Module Reference
|
|
200
|
+
|
|
201
|
+
| Module | Description |
|
|
202
|
+
| ---- | -------- |
|
|
203
|
+
| BaseRunner | Abstract runner blueprint all runners must extend |
|
|
204
|
+
| ZooConf | Parses conf.json, manages job ID, state |
|
|
205
|
+
| ZooInputs | Parses inputs.json, formats CWL-style parameters |
|
|
206
|
+
| ZooOutputs | Handles writing and setting output results |
|
|
207
|
+
| CWLWorkflow | Loads, parses, and analyzes CWL workflows |
|
|
208
|
+
| ResourceRequirement | Parses and evaluates CWL resource hints/requirements |
|
|
209
|
+
| wrapper_utils | Provides helper to build wrapped CWL pipeline |
|
|
210
|
+
| ZooStub | Interacts with ZOO's lenv for progress updates |
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## Used By
|
|
215
|
+
- zoo-wes-runner
|
|
216
|
+
- zoo-argowf-runner
|
|
217
|
+
- zoo-calrissian-runner
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
base_runner.py,sha256=DF7BE_UHgILhxY-zNxCAjOkYqR5mzAmy6I9dZgo0Qqs,10674
|
|
2
|
+
zoo_conf.py,sha256=WD7-wRCogxDvIhRN2m8ILCwHPm3JdlsqNnFj6F1sH3o,11655
|
|
3
|
+
zoostub.py,sha256=6c0VClH9MjkOxpCaA1z8Kf_cIP3k4k0nBWkGxhN2mAY,926
|
|
4
|
+
zoo_runner_common-0.1.1.dist-info/METADATA,sha256=QyS2m7_aWtm8gQf6vBmtXmwiDFduRq-qnjZ67uvAvaE,7001
|
|
5
|
+
zoo_runner_common-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
zoo_runner_common-0.1.1.dist-info/top_level.txt,sha256=TV8APKVcUq2F4UCQo5eW5b_0K3-_uukb7VOl3GF8MzU,29
|
|
7
|
+
zoo_runner_common-0.1.1.dist-info/RECORD,,
|
zoostub.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from loguru import logger
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ZooStub:
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self.SERVICE_ACCEPTED = 0
|
|
7
|
+
self.SERVICE_STARTED = 1
|
|
8
|
+
self.SERVICE_PAUSED = 2
|
|
9
|
+
self.SERVICE_SUCCEEDED = 3
|
|
10
|
+
self.SERVICE_FAILED = 4
|
|
11
|
+
self.SERVICE_PAUSED = 5
|
|
12
|
+
self.SERVICE_DEPLOYED = 6
|
|
13
|
+
self.SERVICE_UNDEPLOYED = 7
|
|
14
|
+
|
|
15
|
+
def update_status(self, conf, progress):
|
|
16
|
+
print(f"Status {progress}")
|
|
17
|
+
|
|
18
|
+
def _(self, message):
|
|
19
|
+
print(f"invoked _ with {message}")
|
|
20
|
+
|
|
21
|
+
def trace(self, message):
|
|
22
|
+
logger.trace(message)
|
|
23
|
+
|
|
24
|
+
def debug(self, message):
|
|
25
|
+
logger.debug(message)
|
|
26
|
+
|
|
27
|
+
def info(self, message):
|
|
28
|
+
logger.info(message)
|
|
29
|
+
|
|
30
|
+
def success(self, message):
|
|
31
|
+
logger.success(message)
|
|
32
|
+
|
|
33
|
+
def warning(self, message):
|
|
34
|
+
logger.warning(message)
|
|
35
|
+
|
|
36
|
+
def error(self, message):
|
|
37
|
+
logger.error(message)
|
|
38
|
+
|
|
39
|
+
def critical(self, message):
|
|
40
|
+
logger.critical(message)
|