toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/server/wes/tasks.py
CHANGED
|
@@ -20,7 +20,7 @@ import subprocess
|
|
|
20
20
|
import sys
|
|
21
21
|
import tempfile
|
|
22
22
|
import zipfile
|
|
23
|
-
from typing import Any,
|
|
23
|
+
from typing import Any, Optional, Union
|
|
24
24
|
from urllib.parse import urldefrag
|
|
25
25
|
|
|
26
26
|
from celery.exceptions import SoftTimeLimitExceeded # type: ignore
|
|
@@ -29,13 +29,15 @@ import toil.server.wes.amazon_wes_utils as amazon_wes_utils
|
|
|
29
29
|
from toil.common import Toil
|
|
30
30
|
from toil.jobStores.utils import generate_locator
|
|
31
31
|
from toil.server.celery_app import celery
|
|
32
|
-
from toil.server.utils import (
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
from toil.server.utils import (
|
|
33
|
+
WorkflowStateMachine,
|
|
34
|
+
connect_to_workflow_state_store,
|
|
35
|
+
download_file_from_internet,
|
|
36
|
+
download_file_from_s3,
|
|
37
|
+
get_file_class,
|
|
38
|
+
get_iso_time,
|
|
39
|
+
link_file,
|
|
40
|
+
)
|
|
39
41
|
|
|
40
42
|
logger = logging.getLogger(__name__)
|
|
41
43
|
|
|
@@ -45,6 +47,7 @@ logger = logging.getLogger(__name__)
|
|
|
45
47
|
# our patience for CANCELING WES workflows to time out to CANCELED.
|
|
46
48
|
WAIT_FOR_DEATH_TIMEOUT = 20
|
|
47
49
|
|
|
50
|
+
|
|
48
51
|
class ToilWorkflowRunner:
|
|
49
52
|
"""
|
|
50
53
|
A class to represent a workflow runner to run the requested workflow.
|
|
@@ -53,7 +56,14 @@ class ToilWorkflowRunner:
|
|
|
53
56
|
that command, and collecting the outputs of the resulting workflow run.
|
|
54
57
|
"""
|
|
55
58
|
|
|
56
|
-
def __init__(
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
base_scratch_dir: str,
|
|
62
|
+
state_store_url: str,
|
|
63
|
+
workflow_id: str,
|
|
64
|
+
request: dict[str, Any],
|
|
65
|
+
engine_options: list[str],
|
|
66
|
+
):
|
|
57
67
|
"""
|
|
58
68
|
Make a new ToilWorkflowRunner to actually run a workflow leader based
|
|
59
69
|
on a WES request.
|
|
@@ -85,8 +95,11 @@ class ToilWorkflowRunner:
|
|
|
85
95
|
self.out_dir = os.path.join(self.scratch_dir, "outputs")
|
|
86
96
|
|
|
87
97
|
# Compose the right kind of job store to use it the user doesn't specify one.
|
|
88
|
-
default_type = os.getenv(
|
|
89
|
-
self.default_job_store = generate_locator(
|
|
98
|
+
default_type = os.getenv("TOIL_WES_JOB_STORE_TYPE", "file")
|
|
99
|
+
self.default_job_store = generate_locator(
|
|
100
|
+
default_type,
|
|
101
|
+
local_suggestion=os.path.join(self.scratch_dir, "toil_job_store"),
|
|
102
|
+
)
|
|
90
103
|
|
|
91
104
|
self.job_store = self.default_job_store
|
|
92
105
|
|
|
@@ -112,7 +125,9 @@ class ToilWorkflowRunner:
|
|
|
112
125
|
logger.info("Linking workflow from filesystem.")
|
|
113
126
|
link_file(src=src_url[7:], dest=dest)
|
|
114
127
|
elif src_url.startswith(("http://", "https://")):
|
|
115
|
-
logger.info(
|
|
128
|
+
logger.info(
|
|
129
|
+
f"Downloading workflow_url from the Internet. src: {src_url} dst: {dest}"
|
|
130
|
+
)
|
|
116
131
|
download_file_from_internet(src=src_url, dest=dest, content_type="text/")
|
|
117
132
|
elif src_url.startswith("s3://"):
|
|
118
133
|
logger.info("Downloading workflow_url from Amazon S3.")
|
|
@@ -127,7 +142,9 @@ class ToilWorkflowRunner:
|
|
|
127
142
|
|
|
128
143
|
return dest
|
|
129
144
|
|
|
130
|
-
def sort_options(
|
|
145
|
+
def sort_options(
|
|
146
|
+
self, workflow_engine_parameters: Optional[dict[str, Optional[str]]] = None
|
|
147
|
+
) -> list[str]:
|
|
131
148
|
"""
|
|
132
149
|
Sort the command line arguments in the order that can be recognized by
|
|
133
150
|
the workflow execution engine.
|
|
@@ -188,7 +205,7 @@ class ToilWorkflowRunner:
|
|
|
188
205
|
|
|
189
206
|
return options
|
|
190
207
|
|
|
191
|
-
def initialize_run(self) ->
|
|
208
|
+
def initialize_run(self) -> list[str]:
|
|
192
209
|
"""
|
|
193
210
|
Write workflow and input files and construct a list of shell commands
|
|
194
211
|
to be executed. Return that list of shell commands that should be
|
|
@@ -204,56 +221,69 @@ class ToilWorkflowRunner:
|
|
|
204
221
|
# Obtain main workflow file to a path (no-scheme file URL).
|
|
205
222
|
workflow_url = self.write_workflow(src_url=self.request["workflow_url"])
|
|
206
223
|
|
|
207
|
-
if os.path.basename(workflow_url) == "workflow.zip" and zipfile.is_zipfile(
|
|
224
|
+
if os.path.basename(workflow_url) == "workflow.zip" and zipfile.is_zipfile(
|
|
225
|
+
workflow_url
|
|
226
|
+
):
|
|
208
227
|
# We've been sent a zip file. We should interpret this as an Amazon Genomics CLI-style zip file.
|
|
209
228
|
# Extract everything next to the zip and find and open relvant files.
|
|
210
229
|
logger.info("Extracting and parsing Amazon-style workflow bundle...")
|
|
211
|
-
zip_info = amazon_wes_utils.parse_workflow_zip_file(
|
|
230
|
+
zip_info = amazon_wes_utils.parse_workflow_zip_file(
|
|
231
|
+
workflow_url, self.wf_type
|
|
232
|
+
)
|
|
212
233
|
|
|
213
234
|
# Now parse Amazon's internal format into our own.
|
|
214
235
|
|
|
215
236
|
# Find the real workflow source for the entrypoint file
|
|
216
|
-
if
|
|
237
|
+
if "workflowSource" in zip_info["files"]:
|
|
217
238
|
# They sent a file, which has been opened, so grab its path
|
|
218
|
-
workflow_url = zip_info[
|
|
239
|
+
workflow_url = zip_info["files"]["workflowSource"].name
|
|
219
240
|
logger.info("Workflow source file: '%s'", workflow_url)
|
|
220
|
-
elif
|
|
241
|
+
elif "workflowUrl" in zip_info["data"]:
|
|
221
242
|
# They are pointing to another URL.
|
|
222
243
|
# TODO: What does Amazon expect this to mean? Are we supposed to recurse?
|
|
223
244
|
# For now just forward it.
|
|
224
|
-
workflow_url = zip_info[
|
|
245
|
+
workflow_url = zip_info["data"]["workflowUrl"]
|
|
225
246
|
logger.info("Workflow reference URL: '%s'", workflow_url)
|
|
226
247
|
else:
|
|
227
248
|
# The parser is supposed to throw so we can't get here
|
|
228
|
-
raise RuntimeError(
|
|
229
|
-
|
|
230
|
-
|
|
249
|
+
raise RuntimeError(
|
|
250
|
+
"Parser could not find workflow source or URL in zip"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if (
|
|
254
|
+
"workflowInputFiles" in zip_info["files"]
|
|
255
|
+
and len(zip_info["files"]["workflowInputFiles"]) > 0
|
|
256
|
+
):
|
|
231
257
|
# The bundle contains a list of input files.
|
|
232
258
|
# We interpret these as JSON, and layer them on top of each
|
|
233
259
|
# other, and then apply workflow_params from the request.
|
|
234
|
-
logger.info(
|
|
235
|
-
|
|
260
|
+
logger.info(
|
|
261
|
+
"Workflow came with %d bundled inputs files; coalescing final parameters",
|
|
262
|
+
len(zip_info["files"]["workflowInputFiles"]),
|
|
263
|
+
)
|
|
236
264
|
|
|
237
265
|
coalesced_parameters = {}
|
|
238
|
-
for binary_file in zip_info[
|
|
266
|
+
for binary_file in zip_info["files"]["workflowInputFiles"]:
|
|
239
267
|
try:
|
|
240
268
|
# Read each input file as a JSON
|
|
241
269
|
loaded_parameters = json.load(binary_file)
|
|
242
270
|
except json.JSONDecodeError as e:
|
|
243
|
-
raise RuntimeError(
|
|
271
|
+
raise RuntimeError(
|
|
272
|
+
f"Could not parse inputs JSON {os.path.basename(binary_file.name)}: {e}"
|
|
273
|
+
)
|
|
244
274
|
# And merge them together in order
|
|
245
275
|
coalesced_parameters.update(loaded_parameters)
|
|
246
276
|
# Then apply and replace the parameters that came with the request
|
|
247
277
|
coalesced_parameters.update(workflow_params)
|
|
248
278
|
workflow_params = coalesced_parameters
|
|
249
279
|
|
|
250
|
-
if
|
|
280
|
+
if "workflowOptions" in zip_info["files"]:
|
|
251
281
|
# The bundle contains an options JSON. We interpret these as
|
|
252
282
|
# defaults for workflow_engine_parameters.
|
|
253
283
|
logger.info(f"Workflow came with bundled options JSON")
|
|
254
284
|
try:
|
|
255
285
|
# Read as a JSON
|
|
256
|
-
loaded_options = json.load(zip_info[
|
|
286
|
+
loaded_options = json.load(zip_info["files"]["workflowOptions"])
|
|
257
287
|
except json.JSONDecodeError as e:
|
|
258
288
|
raise RuntimeError(f"Could not parse options JSON: {e}")
|
|
259
289
|
# Apply and replace the engine parameters that came with the
|
|
@@ -275,13 +305,9 @@ class ToilWorkflowRunner:
|
|
|
275
305
|
|
|
276
306
|
# construct and return the command to run
|
|
277
307
|
if self.wf_type == "cwl":
|
|
278
|
-
command_args =
|
|
279
|
-
["toil-cwl-runner"] + options + [workflow_url, input_json]
|
|
280
|
-
)
|
|
308
|
+
command_args = ["toil-cwl-runner"] + options + [workflow_url, input_json]
|
|
281
309
|
elif self.wf_type == "wdl":
|
|
282
|
-
command_args =
|
|
283
|
-
["toil-wdl-runner"] + options + [workflow_url, input_json]
|
|
284
|
-
)
|
|
310
|
+
command_args = ["toil-wdl-runner"] + options + [workflow_url, input_json]
|
|
285
311
|
elif self.wf_type == "py":
|
|
286
312
|
command_args = ["python", workflow_url] + options
|
|
287
313
|
else:
|
|
@@ -290,7 +316,9 @@ class ToilWorkflowRunner:
|
|
|
290
316
|
|
|
291
317
|
return command_args
|
|
292
318
|
|
|
293
|
-
def call_cmd(
|
|
319
|
+
def call_cmd(
|
|
320
|
+
self, cmd: Union[list[str], str], cwd: str
|
|
321
|
+
) -> "subprocess.Popen[bytes]":
|
|
294
322
|
"""
|
|
295
323
|
Calls a command with Popen. Writes stdout, stderr, and the command to
|
|
296
324
|
separate files.
|
|
@@ -300,7 +328,9 @@ class ToilWorkflowRunner:
|
|
|
300
328
|
|
|
301
329
|
with open(stdout_f, "w") as stdout, open(stderr_f, "w") as stderr:
|
|
302
330
|
logger.info(f"Calling: '{' '.join(cmd)}'")
|
|
303
|
-
process = subprocess.Popen(
|
|
331
|
+
process = subprocess.Popen(
|
|
332
|
+
cmd, stdout=stdout, stderr=stderr, close_fds=True, cwd=cwd
|
|
333
|
+
)
|
|
304
334
|
|
|
305
335
|
return process
|
|
306
336
|
|
|
@@ -348,7 +378,7 @@ class ToilWorkflowRunner:
|
|
|
348
378
|
self.store.set("end_time", get_iso_time())
|
|
349
379
|
self.store.set("exit_code", str(exit_code))
|
|
350
380
|
|
|
351
|
-
logger.info(
|
|
381
|
+
logger.info("Toil child finished with code %s", exit_code)
|
|
352
382
|
|
|
353
383
|
if exit_code == 0:
|
|
354
384
|
self.state_machine.send_complete()
|
|
@@ -387,7 +417,14 @@ class ToilWorkflowRunner:
|
|
|
387
417
|
|
|
388
418
|
self.write_scratch_file("outputs.json", json.dumps(output_obj))
|
|
389
419
|
|
|
390
|
-
|
|
420
|
+
|
|
421
|
+
def run_wes_task(
|
|
422
|
+
base_scratch_dir: str,
|
|
423
|
+
state_store_url: str,
|
|
424
|
+
workflow_id: str,
|
|
425
|
+
request: dict[str, Any],
|
|
426
|
+
engine_options: list[str],
|
|
427
|
+
) -> str:
|
|
391
428
|
"""
|
|
392
429
|
Run a requested workflow.
|
|
393
430
|
|
|
@@ -402,8 +439,13 @@ def run_wes_task(base_scratch_dir: str, state_store_url: str, workflow_id: str,
|
|
|
402
439
|
|
|
403
440
|
logger.info("Starting WES workflow")
|
|
404
441
|
|
|
405
|
-
runner = ToilWorkflowRunner(
|
|
406
|
-
|
|
442
|
+
runner = ToilWorkflowRunner(
|
|
443
|
+
base_scratch_dir,
|
|
444
|
+
state_store_url,
|
|
445
|
+
workflow_id,
|
|
446
|
+
request=request,
|
|
447
|
+
engine_options=engine_options,
|
|
448
|
+
)
|
|
407
449
|
|
|
408
450
|
try:
|
|
409
451
|
runner.run()
|
|
@@ -416,24 +458,27 @@ def run_wes_task(base_scratch_dir: str, state_store_url: str, workflow_id: str,
|
|
|
416
458
|
runner.write_output_files()
|
|
417
459
|
except (KeyboardInterrupt, SystemExit, SoftTimeLimitExceeded):
|
|
418
460
|
# We canceled the workflow run
|
|
419
|
-
logger.info(
|
|
461
|
+
logger.info("Canceling the workflow")
|
|
420
462
|
runner.state_machine.send_canceled()
|
|
421
463
|
except Exception:
|
|
422
464
|
# The workflow run broke. We still count as the executor here.
|
|
423
|
-
logger.exception(
|
|
465
|
+
logger.exception("Running Toil produced an exception.")
|
|
424
466
|
runner.state_machine.send_executor_error()
|
|
425
467
|
raise
|
|
426
468
|
|
|
427
469
|
return runner.get_state()
|
|
428
470
|
|
|
471
|
+
|
|
429
472
|
# Wrap the task function as a Celery task
|
|
430
473
|
run_wes = celery.task(name="run_wes")(run_wes_task)
|
|
431
474
|
|
|
475
|
+
|
|
432
476
|
def cancel_run(task_id: str) -> None:
|
|
433
477
|
"""
|
|
434
478
|
Send a SIGTERM signal to the process that is running task_id.
|
|
435
479
|
"""
|
|
436
|
-
celery.control.terminate(task_id, signal=
|
|
480
|
+
celery.control.terminate(task_id, signal="SIGUSR1")
|
|
481
|
+
|
|
437
482
|
|
|
438
483
|
class TaskRunner:
|
|
439
484
|
"""
|
|
@@ -443,13 +488,13 @@ class TaskRunner:
|
|
|
443
488
|
"""
|
|
444
489
|
|
|
445
490
|
@staticmethod
|
|
446
|
-
def run(
|
|
491
|
+
def run(
|
|
492
|
+
args: tuple[str, str, str, dict[str, Any], list[str]], task_id: str
|
|
493
|
+
) -> None:
|
|
447
494
|
"""
|
|
448
495
|
Run the given task args with the given ID on Celery.
|
|
449
496
|
"""
|
|
450
|
-
run_wes.apply_async(args=args,
|
|
451
|
-
task_id=task_id,
|
|
452
|
-
ignore_result=True)
|
|
497
|
+
run_wes.apply_async(args=args, task_id=task_id, ignore_result=True)
|
|
453
498
|
|
|
454
499
|
@staticmethod
|
|
455
500
|
def cancel(task_id: str) -> None:
|
|
@@ -467,8 +512,10 @@ class TaskRunner:
|
|
|
467
512
|
# Nothing to do for Celery
|
|
468
513
|
return True
|
|
469
514
|
|
|
515
|
+
|
|
470
516
|
# If Celery can't be set up, we can just use this fake version instead.
|
|
471
517
|
|
|
518
|
+
|
|
472
519
|
class MultiprocessingTaskRunner(TaskRunner):
|
|
473
520
|
"""
|
|
474
521
|
Version of TaskRunner that just runs tasks with Multiprocessing.
|
|
@@ -478,11 +525,13 @@ class MultiprocessingTaskRunner(TaskRunner):
|
|
|
478
525
|
ToilWorkflowRunner) don't poll for it.
|
|
479
526
|
"""
|
|
480
527
|
|
|
481
|
-
_id_to_process:
|
|
482
|
-
_id_to_log:
|
|
528
|
+
_id_to_process: dict[str, multiprocessing.Process] = {}
|
|
529
|
+
_id_to_log: dict[str, str] = {}
|
|
483
530
|
|
|
484
531
|
@staticmethod
|
|
485
|
-
def set_up_and_run_task(
|
|
532
|
+
def set_up_and_run_task(
|
|
533
|
+
output_path: str, args: tuple[str, str, str, dict[str, Any], list[str]]
|
|
534
|
+
) -> None:
|
|
486
535
|
"""
|
|
487
536
|
Set up logging for the process into the given file and then call
|
|
488
537
|
run_wes_task with the given arguments.
|
|
@@ -499,8 +548,8 @@ class MultiprocessingTaskRunner(TaskRunner):
|
|
|
499
548
|
# that we were told about, so the server can come get the log if we
|
|
500
549
|
# unexpectedly die.
|
|
501
550
|
|
|
502
|
-
output_file = open(output_path,
|
|
503
|
-
output_file.write(
|
|
551
|
+
output_file = open(output_path, "w")
|
|
552
|
+
output_file.write("Initializing task log\n")
|
|
504
553
|
output_file.flush()
|
|
505
554
|
|
|
506
555
|
# Take over logging.
|
|
@@ -523,22 +572,24 @@ class MultiprocessingTaskRunner(TaskRunner):
|
|
|
523
572
|
signal.signal(signal.SIGTERM, handle_sigterm)
|
|
524
573
|
|
|
525
574
|
try:
|
|
526
|
-
logger.info(
|
|
575
|
+
logger.info("Running task")
|
|
527
576
|
output_file.flush()
|
|
528
577
|
run_wes_task(*args)
|
|
529
578
|
except Exception:
|
|
530
|
-
logger.exception(
|
|
579
|
+
logger.exception("Exception in task!")
|
|
531
580
|
raise
|
|
532
581
|
else:
|
|
533
582
|
# If the task does not crash, clean up the log
|
|
534
583
|
os.unlink(output_path)
|
|
535
584
|
finally:
|
|
536
|
-
logger.debug(
|
|
585
|
+
logger.debug("Finishing task log")
|
|
537
586
|
output_file.flush()
|
|
538
587
|
output_file.close()
|
|
539
588
|
|
|
540
589
|
@classmethod
|
|
541
|
-
def run(
|
|
590
|
+
def run(
|
|
591
|
+
cls, args: tuple[str, str, str, dict[str, Any], list[str]], task_id: str
|
|
592
|
+
) -> None:
|
|
542
593
|
"""
|
|
543
594
|
Run the given task args with the given ID.
|
|
544
595
|
"""
|
|
@@ -549,9 +600,13 @@ class MultiprocessingTaskRunner(TaskRunner):
|
|
|
549
600
|
# Store the log filename before the process, like is_ok() expects.
|
|
550
601
|
cls._id_to_log[task_id] = path
|
|
551
602
|
|
|
552
|
-
logger.info(
|
|
603
|
+
logger.info(
|
|
604
|
+
"Starting task %s in a process that should log to %s", task_id, path
|
|
605
|
+
)
|
|
553
606
|
|
|
554
|
-
cls._id_to_process[task_id] = multiprocessing.Process(
|
|
607
|
+
cls._id_to_process[task_id] = multiprocessing.Process(
|
|
608
|
+
target=cls.set_up_and_run_task, args=(path, args)
|
|
609
|
+
)
|
|
555
610
|
cls._id_to_process[task_id].start()
|
|
556
611
|
|
|
557
612
|
@classmethod
|
|
@@ -581,9 +636,14 @@ class MultiprocessingTaskRunner(TaskRunner):
|
|
|
581
636
|
# being canceled by cancel(), then it is OK.
|
|
582
637
|
ACCEPTABLE_EXIT_CODES = [0, -signal.SIGTERM]
|
|
583
638
|
|
|
584
|
-
if
|
|
639
|
+
if (
|
|
640
|
+
process.exitcode is not None
|
|
641
|
+
and process.exitcode not in ACCEPTABLE_EXIT_CODES
|
|
642
|
+
):
|
|
585
643
|
# Something went wring in the task and it couldn't handle it.
|
|
586
|
-
logger.error(
|
|
644
|
+
logger.error(
|
|
645
|
+
"Process for running %s failed with code %s", task_id, process.exitcode
|
|
646
|
+
)
|
|
587
647
|
try:
|
|
588
648
|
for line in open(cls._id_to_log[task_id]):
|
|
589
649
|
# Dump the task log
|
|
@@ -596,5 +656,3 @@ class MultiprocessingTaskRunner(TaskRunner):
|
|
|
596
656
|
return False
|
|
597
657
|
|
|
598
658
|
return True
|
|
599
|
-
|
|
600
|
-
|