toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/test/wdl/wdltoil_test.py
CHANGED
|
@@ -3,23 +3,44 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
import subprocess
|
|
5
5
|
import unittest
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from unittest.mock import patch
|
|
10
|
+
from typing import Any, Dict, List, Set
|
|
9
11
|
|
|
10
12
|
import pytest
|
|
11
13
|
|
|
12
|
-
from toil.
|
|
14
|
+
from toil.provisioners import cluster_factory
|
|
15
|
+
from toil.test import (ToilTest,
|
|
16
|
+
needs_docker_cuda,
|
|
17
|
+
needs_google_storage,
|
|
18
|
+
needs_singularity_or_docker,
|
|
19
|
+
slow, integrative)
|
|
20
|
+
from toil.test.provisioners.clusterTest import AbstractClusterTest
|
|
13
21
|
from toil.version import exactPython
|
|
14
|
-
|
|
15
|
-
|
|
22
|
+
from toil.wdl.wdltoil import WDLSectionJob, WDLWorkflowGraph
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BaseWDLTest(ToilTest):
|
|
26
|
+
"""Base test class for WDL tests."""
|
|
16
27
|
|
|
28
|
+
def setUp(self) -> None:
|
|
29
|
+
"""Runs anew before each test to create farm fresh temp dirs."""
|
|
30
|
+
self.output_dir = os.path.join('/tmp/', 'toil-wdl-test-' + str(uuid4()))
|
|
31
|
+
os.makedirs(self.output_dir)
|
|
17
32
|
|
|
18
|
-
|
|
33
|
+
def tearDown(self) -> None:
|
|
34
|
+
if os.path.exists(self.output_dir):
|
|
35
|
+
shutil.rmtree(self.output_dir)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class WDLConformanceTests(BaseWDLTest):
|
|
19
39
|
"""
|
|
20
|
-
|
|
40
|
+
WDL conformance tests for Toil.
|
|
21
41
|
"""
|
|
22
42
|
wdl_dir = "wdl-conformance-tests"
|
|
43
|
+
|
|
23
44
|
@classmethod
|
|
24
45
|
def setUpClass(cls) -> None:
|
|
25
46
|
|
|
@@ -47,7 +68,7 @@ class ToilConformanceTests(toil.test.wdl.toilwdlTest.BaseToilWdlTest):
|
|
|
47
68
|
p = subprocess.run(self.base_command + ["-v", "1.0", "-n", tests_to_run], capture_output=True)
|
|
48
69
|
|
|
49
70
|
if p.returncode != 0:
|
|
50
|
-
print(p.stdout)
|
|
71
|
+
print(p.stdout.decode('utf-8', errors='replace'))
|
|
51
72
|
|
|
52
73
|
p.check_returncode()
|
|
53
74
|
|
|
@@ -58,7 +79,7 @@ class ToilConformanceTests(toil.test.wdl.toilwdlTest.BaseToilWdlTest):
|
|
|
58
79
|
p = subprocess.run(self.base_command + ["-v", "1.1", "-n", tests_to_run], capture_output=True)
|
|
59
80
|
|
|
60
81
|
if p.returncode != 0:
|
|
61
|
-
print(p.stdout)
|
|
82
|
+
print(p.stdout.decode('utf-8', errors='replace'))
|
|
62
83
|
|
|
63
84
|
p.check_returncode()
|
|
64
85
|
|
|
@@ -69,26 +90,25 @@ class ToilConformanceTests(toil.test.wdl.toilwdlTest.BaseToilWdlTest):
|
|
|
69
90
|
shutil.rmtree("wdl-conformance-tests")
|
|
70
91
|
|
|
71
92
|
|
|
72
|
-
class
|
|
73
|
-
"""
|
|
74
|
-
Version of the old Toil WDL tests that tests the new MiniWDL-based implementation.
|
|
75
|
-
"""
|
|
93
|
+
class WDLTests(BaseWDLTest):
|
|
94
|
+
"""Tests for Toil's MiniWDL-based implementation."""
|
|
76
95
|
|
|
77
96
|
@classmethod
|
|
78
97
|
def setUpClass(cls) -> None:
|
|
79
98
|
"""Runs once for all tests."""
|
|
80
99
|
cls.base_command = [exactPython, '-m', 'toil.wdl.wdltoil']
|
|
81
100
|
|
|
82
|
-
# We inherit a testMD5sum but it is going to need Singularity
|
|
83
|
-
#
|
|
101
|
+
# We inherit a testMD5sum but it is going to need Singularity or Docker
|
|
102
|
+
# now. And also needs to have a WDL 1.0+ WDL file. So we replace it.
|
|
84
103
|
@needs_singularity_or_docker
|
|
85
|
-
def
|
|
104
|
+
def test_MD5sum(self):
|
|
86
105
|
"""Test if Toil produces the same outputs as known good outputs for WDL's
|
|
87
106
|
GATK tutorial #1."""
|
|
88
107
|
wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
|
|
89
108
|
json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
|
|
90
109
|
|
|
91
|
-
result_json = subprocess.check_output(
|
|
110
|
+
result_json = subprocess.check_output(
|
|
111
|
+
self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug', '--retryCount=0'])
|
|
92
112
|
result = json.loads(result_json)
|
|
93
113
|
|
|
94
114
|
assert 'ga4ghMd5.value' in result
|
|
@@ -96,25 +116,15 @@ class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
|
|
|
96
116
|
assert os.path.exists(result['ga4ghMd5.value'])
|
|
97
117
|
assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
|
|
98
118
|
|
|
99
|
-
def test_empty_file_path(self):
|
|
100
|
-
"""Test if empty File type inputs are protected against"""
|
|
101
|
-
wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
|
|
102
|
-
json_file = os.path.abspath('src/toil/test/wdl/md5sum/empty_file.json')
|
|
103
|
-
|
|
104
|
-
p = subprocess.Popen(self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
105
|
-
stdout, stderr = p.communicate()
|
|
106
|
-
retval = p.wait()
|
|
107
|
-
|
|
108
|
-
assert retval != 0
|
|
109
|
-
assert b'Could not find' in stderr
|
|
110
|
-
|
|
111
119
|
@needs_singularity_or_docker
|
|
112
|
-
def test_miniwdl_self_test(self):
|
|
120
|
+
def test_miniwdl_self_test(self, extra_args: Optional[List[str]] = None) -> None:
|
|
113
121
|
"""Test if the MiniWDL self test runs and produces the expected output."""
|
|
114
122
|
wdl_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/self_test.wdl')
|
|
115
123
|
json_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/inputs.json')
|
|
116
124
|
|
|
117
|
-
result_json = subprocess.check_output(
|
|
125
|
+
result_json = subprocess.check_output(
|
|
126
|
+
self.base_command + [wdl_file, json_file, '--logDebug', '-o', self.output_dir, '--outputDialect',
|
|
127
|
+
'miniwdl'] + (extra_args or []))
|
|
118
128
|
result = json.loads(result_json)
|
|
119
129
|
|
|
120
130
|
# Expect MiniWDL-style output with a designated "dir"
|
|
@@ -138,10 +148,17 @@ class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
|
|
|
138
148
|
assert 'hello_caller.messages' in outputs
|
|
139
149
|
assert outputs['hello_caller.messages'] == ["Hello, Alyssa P. Hacker!", "Hello, Ben Bitdiddle!"]
|
|
140
150
|
|
|
151
|
+
@needs_singularity_or_docker
|
|
152
|
+
def test_miniwdl_self_test_by_reference(self) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Test if the MiniWDL self test works when passing input files by URL reference.
|
|
155
|
+
"""
|
|
156
|
+
self.test_miniwdl_self_test(extra_args=["--referenceInputs=True"])
|
|
157
|
+
|
|
141
158
|
@slow
|
|
142
159
|
@needs_docker_cuda
|
|
143
160
|
def test_giraffe_deepvariant(self):
|
|
144
|
-
"""Test if Giraffe and
|
|
161
|
+
"""Test if Giraffe and GPU DeepVariant run. This could take 25 minutes."""
|
|
145
162
|
# TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
|
|
146
163
|
|
|
147
164
|
json_dir = self._createTempDir()
|
|
@@ -164,7 +181,8 @@ class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
|
|
|
164
181
|
"GiraffeDeepVariant.runDeepVariantCallVariants.in_dv_gpu_container": "google/deepvariant:1.3.0-gpu"
|
|
165
182
|
})
|
|
166
183
|
|
|
167
|
-
result_json = subprocess.check_output(
|
|
184
|
+
result_json = subprocess.check_output(
|
|
185
|
+
self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl'])
|
|
168
186
|
result = json.loads(result_json)
|
|
169
187
|
|
|
170
188
|
# Expect MiniWDL-style output with a designated "dir"
|
|
@@ -184,7 +202,7 @@ class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
|
|
|
184
202
|
@slow
|
|
185
203
|
@needs_singularity_or_docker
|
|
186
204
|
def test_giraffe(self):
|
|
187
|
-
"""Test if Giraffe runs. This could take 12 minutes. Also we scale it down."""
|
|
205
|
+
"""Test if Giraffe runs. This could take 12 minutes. Also we scale it down but it still demands lots of memory."""
|
|
188
206
|
# TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
|
|
189
207
|
|
|
190
208
|
json_dir = self._createTempDir()
|
|
@@ -192,7 +210,9 @@ class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
|
|
|
192
210
|
wdl_file = f"{base_uri}/workflows/giraffe.wdl"
|
|
193
211
|
json_file = f"{base_uri}/params/giraffe.json"
|
|
194
212
|
|
|
195
|
-
result_json = subprocess.check_output(
|
|
213
|
+
result_json = subprocess.check_output(
|
|
214
|
+
self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale',
|
|
215
|
+
'0.1'])
|
|
196
216
|
result = json.loads(result_json)
|
|
197
217
|
|
|
198
218
|
# Expect MiniWDL-style output with a designated "dir"
|
|
@@ -224,17 +244,175 @@ class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
|
|
|
224
244
|
assert os.path.exists(result['ga4ghMd5.value'])
|
|
225
245
|
assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
|
|
226
246
|
|
|
227
|
-
def
|
|
228
|
-
"""
|
|
229
|
-
|
|
230
|
-
|
|
247
|
+
def test_coalesce(self):
|
|
248
|
+
"""
|
|
249
|
+
Test if WDLSectionJob can coalesce WDL decls.
|
|
250
|
+
|
|
251
|
+
White box test; will need to be changed or removed if the WDL interpreter changes.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
# Set up data structures for our fake workflow graph to pull from.
|
|
255
|
+
# This has all decl-type nodes
|
|
256
|
+
all_decls: Set[str] = set()
|
|
257
|
+
# And this has all transitive dependencies for all nodes.
|
|
258
|
+
all_deps: Dict[str, Set[str]] = {}
|
|
259
|
+
|
|
260
|
+
def mock_is_decl(self: Any, node_id: str) -> bool:
|
|
261
|
+
"""
|
|
262
|
+
Replacement function to determine if a node is a decl or not.
|
|
263
|
+
"""
|
|
264
|
+
return node_id in all_decls
|
|
265
|
+
|
|
266
|
+
def mock_get_transitive_dependencies(self: Any, node_id: str) -> Set[str]:
|
|
267
|
+
"""
|
|
268
|
+
Replacement function to get all the transitive dependencies of a node.
|
|
269
|
+
"""
|
|
270
|
+
return all_deps[node_id]
|
|
271
|
+
|
|
272
|
+
# These are the only two methods coalesce_nodes calls, so we can
|
|
273
|
+
# replace them to ensure our graph is used without actually needing to
|
|
274
|
+
# make any WDL objects for it.
|
|
275
|
+
#
|
|
276
|
+
# If that changes, the test will need to change! Maybe then it will be
|
|
277
|
+
# worth extracting a base type for this interface.
|
|
278
|
+
with patch.object(WDLWorkflowGraph, 'is_decl', mock_is_decl):
|
|
279
|
+
with patch.object(WDLWorkflowGraph, 'get_transitive_dependencies', mock_get_transitive_dependencies):
|
|
280
|
+
with self.subTest(msg="Two unrelated decls can coalesce"):
|
|
281
|
+
# Set up two unrelated decls
|
|
282
|
+
all_decls = {"decl1", "decl2"}
|
|
283
|
+
all_deps = {
|
|
284
|
+
"decl1": set(),
|
|
285
|
+
"decl2": set()
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
result = WDLSectionJob.coalesce_nodes(["decl1", "decl2"], WDLWorkflowGraph([]))
|
|
289
|
+
|
|
290
|
+
# Make sure they coalesced
|
|
291
|
+
assert len(result) == 1
|
|
292
|
+
assert "decl1" in result[0]
|
|
293
|
+
assert "decl2" in result[0]
|
|
294
|
+
|
|
295
|
+
with self.subTest(msg="A decl will not coalesce with a non-decl"):
|
|
296
|
+
all_decls = {"decl"}
|
|
297
|
+
all_deps = {
|
|
298
|
+
"decl": set(),
|
|
299
|
+
"nondecl": set()
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
result = WDLSectionJob.coalesce_nodes(["decl", "nondecl"], WDLWorkflowGraph([]))
|
|
303
|
+
|
|
304
|
+
assert len(result) == 2
|
|
305
|
+
assert len(result[0]) == 1
|
|
306
|
+
assert len(result[1]) == 1
|
|
307
|
+
|
|
308
|
+
with self.subTest(msg="Two adjacent decls with a common dependency can coalesce"):
|
|
309
|
+
all_decls = {"decl1", "decl2"}
|
|
310
|
+
all_deps = {
|
|
311
|
+
"decl1": {"base"},
|
|
312
|
+
"decl2": {"base"},
|
|
313
|
+
"base": set()
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
|
|
317
|
+
|
|
318
|
+
assert len(result) == 2
|
|
319
|
+
assert "base" in result[0]
|
|
320
|
+
assert "decl1" in result[1]
|
|
321
|
+
assert "decl2" in result[1]
|
|
322
|
+
|
|
323
|
+
with self.subTest(msg="Two adjacent decls with different dependencies will not coalesce"):
|
|
324
|
+
all_decls = {"decl1", "decl2"}
|
|
325
|
+
all_deps = {
|
|
326
|
+
"decl1": {"base"},
|
|
327
|
+
"decl2": set(),
|
|
328
|
+
"base": set()
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
|
|
332
|
+
|
|
333
|
+
assert len(result) == 3
|
|
334
|
+
assert "base" in result[0]
|
|
335
|
+
|
|
336
|
+
with self.subTest(msg="Two adjacent decls with different successors will coalesce"):
|
|
337
|
+
all_decls = {"decl1", "decl2"}
|
|
338
|
+
all_deps = {
|
|
339
|
+
"decl1": set(),
|
|
340
|
+
"decl2": set(),
|
|
341
|
+
"successor": {"decl2"}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
result = WDLSectionJob.coalesce_nodes(["decl1", "decl2", "successor"], WDLWorkflowGraph([]))
|
|
345
|
+
|
|
346
|
+
assert len(result) == 2
|
|
347
|
+
assert "decl1" in result[0]
|
|
348
|
+
assert "decl2" in result[0]
|
|
349
|
+
assert "successor" in result[1]
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@integrative
|
|
353
|
+
@slow
|
|
354
|
+
@pytest.mark.timeout(600)
|
|
355
|
+
class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
356
|
+
"""
|
|
357
|
+
Ensure WDL works on the Kubernetes batchsystem.
|
|
358
|
+
"""
|
|
231
359
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
360
|
+
def __init__(self, name):
|
|
361
|
+
super().__init__(name)
|
|
362
|
+
self.clusterName = 'wdl-integration-test-' + str(uuid4())
|
|
363
|
+
# t2.medium is the minimum t2 instance that permits Kubernetes
|
|
364
|
+
self.leaderNodeType = "t2.medium"
|
|
365
|
+
self.instanceTypes = ["t2.medium"]
|
|
366
|
+
self.clusterType = "kubernetes"
|
|
367
|
+
|
|
368
|
+
def setUp(self) -> None:
|
|
369
|
+
super().setUp()
|
|
370
|
+
self.jobStore = f'aws:{self.awsRegion()}:wdl-test-{uuid4()}'
|
|
371
|
+
|
|
372
|
+
def launchCluster(self) -> None:
|
|
373
|
+
self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
|
|
374
|
+
'--nodeTypes', ",".join(self.instanceTypes),
|
|
375
|
+
'-w', ",".join(self.numWorkers),
|
|
376
|
+
'--nodeStorage', str(self.requestedLeaderStorage)])
|
|
377
|
+
|
|
378
|
+
def test_wdl_kubernetes_cluster(self):
|
|
379
|
+
"""
|
|
380
|
+
Test that a wdl workflow works on a kubernetes cluster. Launches a cluster with 1 worker. This runs a wdl
|
|
381
|
+
workflow that performs an image pull on the worker.
|
|
382
|
+
:return:
|
|
383
|
+
"""
|
|
384
|
+
self.numWorkers = "1"
|
|
385
|
+
self.requestedLeaderStorage = 30
|
|
386
|
+
# create the cluster
|
|
387
|
+
self.launchCluster()
|
|
388
|
+
# get leader
|
|
389
|
+
self.cluster = cluster_factory(
|
|
390
|
+
provisioner="aws", zone=self.zone, clusterName=self.clusterName
|
|
391
|
+
)
|
|
392
|
+
self.leader = self.cluster.getLeader()
|
|
393
|
+
|
|
394
|
+
url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
|
|
395
|
+
commit = "09b9659cd01473e836738a2e0dd205df0adb49c5"
|
|
396
|
+
wdl_dir = "wdl_conformance_tests"
|
|
397
|
+
|
|
398
|
+
# get the wdl-conformance-tests repo to get WDL tasks to run
|
|
399
|
+
self.sshUtil([
|
|
400
|
+
"bash",
|
|
401
|
+
"-c",
|
|
402
|
+
f"git clone {url} {wdl_dir} && cd {wdl_dir} && git checkout {commit}"
|
|
403
|
+
])
|
|
404
|
+
|
|
405
|
+
# run on kubernetes batchsystem
|
|
406
|
+
toil_options = ['--batchSystem=kubernetes',
|
|
407
|
+
f"--jobstore={self.jobStore}"]
|
|
408
|
+
|
|
409
|
+
# run WDL workflow that will run singularity
|
|
410
|
+
test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
|
|
411
|
+
self.sshUtil([
|
|
412
|
+
"bash",
|
|
413
|
+
"-c",
|
|
414
|
+
f"cd {wdl_dir} && toil-wdl-runner {' '.join(test_options)} {' '.join(toil_options)}"])
|
|
235
415
|
|
|
236
|
-
assert retval != 0
|
|
237
|
-
assert b'Could not find' in stderr
|
|
238
416
|
|
|
239
417
|
if __name__ == "__main__":
|
|
240
418
|
unittest.main() # run all tests
|
toil/toilState.py
CHANGED
|
@@ -183,6 +183,7 @@ class ToilState:
|
|
|
183
183
|
if job_id in self.__job_database:
|
|
184
184
|
# Update the one true copy in place
|
|
185
185
|
old_truth = self.__job_database[job_id]
|
|
186
|
+
old_truth.check_new_version(new_truth)
|
|
186
187
|
old_truth.__dict__.update(new_truth.__dict__)
|
|
187
188
|
else:
|
|
188
189
|
# Just keep the new one
|
|
@@ -293,7 +294,8 @@ class ToilState:
|
|
|
293
294
|
successor.predecessorsFinished.add(jobDesc.jobStoreID)
|
|
294
295
|
|
|
295
296
|
# If the successor has no predecessors to finish
|
|
296
|
-
|
|
297
|
+
if len(successor.predecessorsFinished) > successor.predecessorNumber:
|
|
298
|
+
raise RuntimeError("There are more finished predecessors than possible.")
|
|
297
299
|
if len(successor.predecessorsFinished) == successor.predecessorNumber:
|
|
298
300
|
|
|
299
301
|
# It is ready to be run, so remove it from the set of waiting jobs
|
|
@@ -322,7 +324,8 @@ class ToilState:
|
|
|
322
324
|
|
|
323
325
|
# We put the successor job in the set of waiting successor
|
|
324
326
|
# jobs with multiple predecessors
|
|
325
|
-
|
|
327
|
+
if successorJobStoreID in self.jobsToBeScheduledWithMultiplePredecessors:
|
|
328
|
+
raise RuntimeError("Failed to schedule the successor job. The successor job is already scheduled.")
|
|
326
329
|
self.jobsToBeScheduledWithMultiplePredecessors.add(successorJobStoreID)
|
|
327
330
|
|
|
328
331
|
# Process successor
|
|
@@ -337,10 +340,8 @@ class ToilState:
|
|
|
337
340
|
# We've already seen the successor
|
|
338
341
|
|
|
339
342
|
# Add the job as a predecessor
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
not in self.successor_to_predecessors[successorJobStoreID]
|
|
343
|
-
)
|
|
343
|
+
if jobDesc.jobStoreID in self.successor_to_predecessors[successorJobStoreID]:
|
|
344
|
+
raise RuntimeError("Failed to add the job as a predecessor. The job is already added as a predecessor.")
|
|
344
345
|
self.successor_to_predecessors[successorJobStoreID].add(
|
|
345
346
|
str(jobDesc.jobStoreID)
|
|
346
347
|
)
|
toil/utils/toilClean.py
CHANGED
|
@@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def main() -> None:
|
|
25
|
-
parser = parser_with_common_options(jobstore_option=True)
|
|
25
|
+
parser = parser_with_common_options(jobstore_option=True, prog="toil clean")
|
|
26
26
|
|
|
27
27
|
options = parser.parse_args()
|
|
28
28
|
set_logging_from_options(options)
|
toil/utils/toilConfig.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (C) 2015-2021 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Create a config file with all default Toil options."""
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
from configargparse import ArgParser
|
|
19
|
+
|
|
20
|
+
from toil.common import generate_config
|
|
21
|
+
from toil.statsAndLogging import add_logging_options, set_logging_from_options
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def main() -> None:
|
|
27
|
+
parser = ArgParser()
|
|
28
|
+
|
|
29
|
+
parser.add_argument("output", default="config.yaml", help="Filepath to write the config file too. Default=%("
|
|
30
|
+
"default)s")
|
|
31
|
+
add_logging_options(parser)
|
|
32
|
+
options = parser.parse_args()
|
|
33
|
+
set_logging_from_options(options)
|
|
34
|
+
logger.debug("Attempting to write a default config file to %s.", os.path.abspath(options.output))
|
|
35
|
+
generate_config(os.path.abspath(options.output))
|
|
36
|
+
logger.info("Successfully wrote a default config file to %s.", os.path.abspath(options.output))
|
toil/utils/toilDebugFile.py
CHANGED
|
@@ -15,17 +15,19 @@
|
|
|
15
15
|
import argparse
|
|
16
16
|
import logging
|
|
17
17
|
import os.path
|
|
18
|
+
import sys
|
|
18
19
|
from typing import Optional
|
|
19
20
|
|
|
20
21
|
from toil.common import Config, Toil, parser_with_common_options
|
|
21
|
-
from toil.jobStores.
|
|
22
|
+
from toil.jobStores.fileJobStore import FileJobStore
|
|
22
23
|
from toil.lib.resources import glob
|
|
24
|
+
from toil.lib.conversions import strtobool
|
|
23
25
|
from toil.statsAndLogging import set_logging_from_options
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
26
28
|
|
|
27
29
|
|
|
28
|
-
def fetchJobStoreFiles(jobStore:
|
|
30
|
+
def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> None:
|
|
29
31
|
"""
|
|
30
32
|
Takes a list of file names as glob patterns, searches for these within a
|
|
31
33
|
given directory, and attempts to take all of the files found and copy them
|
|
@@ -37,6 +39,10 @@ def fetchJobStoreFiles(jobStore: AbstractJobStore, options: argparse.Namespace)
|
|
|
37
39
|
:param options.localFilePath: Local directory to copy files into.
|
|
38
40
|
:param options.jobStore: The path to the jobStore directory.
|
|
39
41
|
"""
|
|
42
|
+
|
|
43
|
+
# TODO: Implement the necessary methods in the job store class and stop
|
|
44
|
+
# globbing around inside it. Does this even work?
|
|
45
|
+
|
|
40
46
|
for jobStoreFile in options.fetch:
|
|
41
47
|
jobStoreHits = glob(directoryname=options.jobStore,
|
|
42
48
|
glob_pattern=jobStoreFile)
|
|
@@ -48,40 +54,42 @@ def fetchJobStoreFiles(jobStore: AbstractJobStore, options: argparse.Namespace)
|
|
|
48
54
|
symlink=options.useSymlinks)
|
|
49
55
|
|
|
50
56
|
|
|
51
|
-
def printContentsOfJobStore(
|
|
57
|
+
def printContentsOfJobStore(job_store: FileJobStore, job_id: Optional[str] = None) -> None:
|
|
52
58
|
"""
|
|
53
|
-
Fetch a list of all files contained in the
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
:param
|
|
59
|
-
:param
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
Fetch a list of all files contained in the job store if nameOfJob is not
|
|
60
|
+
declared, otherwise it only prints out the names of files for that specific
|
|
61
|
+
job for which it can find a match. Also creates a log file of these file
|
|
62
|
+
names in the current directory.
|
|
63
|
+
|
|
64
|
+
:param job_store: Job store to ask for files from.
|
|
65
|
+
:param job_id: Default is None, which prints out all files in the jobStore.
|
|
66
|
+
If specified, it will print all jobStore files that have been written
|
|
67
|
+
to the jobStore by that job.
|
|
62
68
|
"""
|
|
63
69
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
70
|
+
# TODO: Implement the necessary methods for job stores other than
|
|
71
|
+
# FileJobStore.
|
|
72
|
+
|
|
73
|
+
if job_id:
|
|
74
|
+
logFile = job_id.replace("/", "_") + "_fileset.txt"
|
|
67
75
|
else:
|
|
68
|
-
glob_pattern = "*"
|
|
69
76
|
logFile = "jobstore_files.txt"
|
|
70
|
-
nameOfJob = ""
|
|
71
77
|
|
|
72
|
-
list_of_files =
|
|
78
|
+
list_of_files = job_store.list_all_file_names(for_job=job_id)
|
|
73
79
|
if os.path.exists(logFile):
|
|
74
80
|
os.remove(logFile)
|
|
75
81
|
for gfile in sorted(list_of_files):
|
|
76
|
-
if
|
|
77
|
-
logger.debug(f"{
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
82
|
+
if job_id:
|
|
83
|
+
logger.debug(f"{job_id} File: {os.path.basename(gfile)}")
|
|
84
|
+
else:
|
|
85
|
+
logger.debug(f"File: {os.path.basename(gfile)}")
|
|
86
|
+
with open(logFile, "a+") as f:
|
|
87
|
+
f.write(os.path.basename(gfile))
|
|
88
|
+
f.write("\n")
|
|
81
89
|
|
|
82
90
|
|
|
83
91
|
def main() -> None:
|
|
84
|
-
parser = parser_with_common_options(jobstore_option=True)
|
|
92
|
+
parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
|
|
85
93
|
parser.add_argument("--localFilePath",
|
|
86
94
|
nargs=1,
|
|
87
95
|
help="Location to which to copy job store files.")
|
|
@@ -90,11 +98,11 @@ def main() -> None:
|
|
|
90
98
|
help="List of job-store files to be copied locally."
|
|
91
99
|
"Use either explicit names (i.e. 'data.txt'), or "
|
|
92
100
|
"specify glob patterns (i.e. '*.txt')")
|
|
93
|
-
parser.add_argument("--listFilesInJobStore",
|
|
101
|
+
parser.add_argument("--listFilesInJobStore", type=strtobool,
|
|
94
102
|
help="Prints a list of the current files in the jobStore.")
|
|
95
|
-
parser.add_argument("--fetchEntireJobStore",
|
|
103
|
+
parser.add_argument("--fetchEntireJobStore", type=strtobool,
|
|
96
104
|
help="Copy all job store files into a local directory.")
|
|
97
|
-
parser.add_argument("--useSymlinks",
|
|
105
|
+
parser.add_argument("--useSymlinks", type=strtobool,
|
|
98
106
|
help="Creates symlink 'shortcuts' of files in the localFilePath"
|
|
99
107
|
" instead of hardlinking or copying, where possible. If this is"
|
|
100
108
|
" not possible, it will copy the files (shutil.copyfile()).")
|
|
@@ -109,18 +117,37 @@ def main() -> None:
|
|
|
109
117
|
|
|
110
118
|
if options.fetch:
|
|
111
119
|
# Copy only the listed files locally
|
|
112
|
-
|
|
113
|
-
|
|
120
|
+
|
|
121
|
+
if isinstance(jobStore, FileJobStore):
|
|
122
|
+
logger.debug("Fetching local files: %s", options.fetch)
|
|
123
|
+
fetchJobStoreFiles(jobStore=jobStore, options=options)
|
|
124
|
+
else:
|
|
125
|
+
# The user asked for something we can't do yet.
|
|
126
|
+
# Tell them no but don't stack trace.
|
|
127
|
+
logger.critical("Can only fetch by name or glob from file-based job stores")
|
|
128
|
+
sys.exit(1)
|
|
114
129
|
|
|
115
130
|
elif options.fetchEntireJobStore:
|
|
116
131
|
# Copy all jobStore files locally
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
132
|
+
|
|
133
|
+
if isinstance(jobStore, FileJobStore):
|
|
134
|
+
logger.debug("Fetching all local files.")
|
|
135
|
+
options.fetch = "*"
|
|
136
|
+
fetchJobStoreFiles(jobStore=jobStore, options=options)
|
|
137
|
+
else:
|
|
138
|
+
logger.critical("Can only fetch by name or glob from file-based job stores")
|
|
139
|
+
sys.exit(1)
|
|
120
140
|
|
|
121
141
|
if options.listFilesInJobStore:
|
|
122
142
|
# Log filenames and create a file containing these names in cwd
|
|
123
|
-
|
|
143
|
+
|
|
144
|
+
if isinstance(jobStore, FileJobStore):
|
|
145
|
+
printContentsOfJobStore(job_store=jobStore)
|
|
146
|
+
else:
|
|
147
|
+
logger.critical("Can only list files from file-based job stores")
|
|
148
|
+
sys.exit(1)
|
|
149
|
+
|
|
150
|
+
# TODO: We can't actually do *anything* for non-file job stores.
|
|
124
151
|
|
|
125
152
|
|
|
126
153
|
if __name__ == "__main__":
|