toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,28 @@
|
|
|
1
|
+
import errno
|
|
1
2
|
import textwrap
|
|
2
3
|
from queue import Queue
|
|
3
4
|
|
|
5
|
+
import logging
|
|
4
6
|
import pytest
|
|
7
|
+
import sys
|
|
5
8
|
|
|
6
9
|
import toil.batchSystems.slurm
|
|
7
|
-
from toil.batchSystems.abstractBatchSystem import
|
|
10
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
11
|
+
EXIT_STATUS_UNAVAILABLE_VALUE,
|
|
12
|
+
BatchJobExitReason,
|
|
13
|
+
BatchSystemSupport,
|
|
14
|
+
)
|
|
8
15
|
from toil.common import Config
|
|
9
16
|
from toil.lib.misc import CalledProcessErrorStderr
|
|
10
17
|
from toil.test import ToilTest
|
|
11
18
|
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
12
22
|
# TODO: Come up with a better way to mock the commands then monkey-patching the
|
|
13
23
|
# command-calling functions.
|
|
14
24
|
|
|
25
|
+
|
|
15
26
|
def call_sacct(args, **_) -> str:
|
|
16
27
|
"""
|
|
17
28
|
The arguments passed to `call_command` when executing `sacct` are:
|
|
@@ -25,6 +36,9 @@ def call_sacct(args, **_) -> str:
|
|
|
25
36
|
1236|FAILED|0:2
|
|
26
37
|
1236.extern|COMPLETED|0:0
|
|
27
38
|
"""
|
|
39
|
+
if sum(len(a) for a in args) > 1000:
|
|
40
|
+
# Simulate if the argument list is too long
|
|
41
|
+
raise OSError(errno.E2BIG, "Argument list is too long")
|
|
28
42
|
# Fake output per fake job-id.
|
|
29
43
|
sacct_info = {
|
|
30
44
|
609663: "609663|FAILED|0:2\n609663.extern|COMPLETED|0:0\n",
|
|
@@ -37,7 +51,7 @@ def call_sacct(args, **_) -> str:
|
|
|
37
51
|
789868: "789868|PENDING|0:0\n",
|
|
38
52
|
789869: "789869|COMPLETED|0:0\n789869.batch|COMPLETED|0:0\n789869.extern|COMPLETED|0:0\n",
|
|
39
53
|
}
|
|
40
|
-
job_ids = [int(job_id) for job_id in args[3].split(
|
|
54
|
+
job_ids = [int(job_id) for job_id in args[3].split(",")]
|
|
41
55
|
stdout = ""
|
|
42
56
|
# Glue the fake outputs for the request job-ids together in a single string
|
|
43
57
|
for job_id in job_ids:
|
|
@@ -53,7 +67,8 @@ def call_scontrol(args, **_) -> str:
|
|
|
53
67
|
job_id = int(args[3]) if len(args) > 3 else None
|
|
54
68
|
# Fake output per fake job-id.
|
|
55
69
|
scontrol_info = {
|
|
56
|
-
787204: textwrap.dedent(
|
|
70
|
+
787204: textwrap.dedent(
|
|
71
|
+
"""\
|
|
57
72
|
JobId=787204 JobName=toil_job_6_CWLJob
|
|
58
73
|
UserId=rapthor-mloose(54386) GroupId=rapthor-mloose(54038) MCS_label=N/A
|
|
59
74
|
Priority=11067 Nice=0 Account=rapthor QOS=normal
|
|
@@ -81,8 +96,10 @@ def call_scontrol(args, **_) -> str:
|
|
|
81
96
|
StdOut=/home/rapthor-mloose/code/toil/cwl-v1.2/tmp/toil_19512746-a9f4-4b99-b9ff-48ca5c1b661c.6.787204.out.log
|
|
82
97
|
Power=
|
|
83
98
|
NtasksPerTRES:0
|
|
84
|
-
"""
|
|
85
|
-
|
|
99
|
+
"""
|
|
100
|
+
),
|
|
101
|
+
789724: textwrap.dedent(
|
|
102
|
+
"""\
|
|
86
103
|
JobId=789724 JobName=run_prefactor-cwltool.sh
|
|
87
104
|
UserId=rapthor-mloose(54386) GroupId=rapthor-mloose(54038) MCS_label=N/A
|
|
88
105
|
Priority=7905 Nice=0 Account=rapthor QOS=normal
|
|
@@ -110,8 +127,10 @@ def call_scontrol(args, **_) -> str:
|
|
|
110
127
|
StdOut=/project/rapthor/Share/prefactor/L721962/slurm-789724.out
|
|
111
128
|
Power=
|
|
112
129
|
NtasksPerTRES:0
|
|
113
|
-
"""
|
|
114
|
-
|
|
130
|
+
"""
|
|
131
|
+
),
|
|
132
|
+
789728: textwrap.dedent(
|
|
133
|
+
"""\
|
|
115
134
|
JobId=789728 JobName=sleep.sh
|
|
116
135
|
UserId=rapthor-mloose(54386) GroupId=rapthor-mloose(54038) MCS_label=N/A
|
|
117
136
|
Priority=8005 Nice=0 Account=rapthor QOS=normal
|
|
@@ -138,38 +157,63 @@ def call_scontrol(args, **_) -> str:
|
|
|
138
157
|
StdOut=/home/rapthor-mloose/tmp/slurm-789728.out
|
|
139
158
|
Power=
|
|
140
159
|
NtasksPerTRES:0
|
|
141
|
-
"""
|
|
160
|
+
"""
|
|
161
|
+
),
|
|
142
162
|
}
|
|
143
163
|
if job_id is not None:
|
|
144
164
|
try:
|
|
145
165
|
stdout = scontrol_info[job_id]
|
|
146
166
|
except KeyError:
|
|
147
|
-
raise CalledProcessErrorStderr(
|
|
167
|
+
raise CalledProcessErrorStderr(
|
|
168
|
+
1, "slurm_load_jobs error: Invalid job id specified"
|
|
169
|
+
)
|
|
148
170
|
else:
|
|
149
171
|
# Glue the fake outputs for the request job-ids together in a single string
|
|
150
172
|
stdout = ""
|
|
151
173
|
for value in scontrol_info.values():
|
|
152
|
-
stdout += value +
|
|
174
|
+
stdout += value + "\n"
|
|
153
175
|
return stdout
|
|
154
176
|
|
|
177
|
+
|
|
155
178
|
def call_sacct_raises(*_):
|
|
156
179
|
"""
|
|
157
180
|
Fake that the `sacct` command fails by raising a `CalledProcessErrorStderr`
|
|
158
181
|
"""
|
|
159
|
-
raise CalledProcessErrorStderr(
|
|
160
|
-
|
|
182
|
+
raise CalledProcessErrorStderr(
|
|
183
|
+
1, "sacct: error: Problem talking to the database: " "Connection timed out"
|
|
184
|
+
)
|
|
161
185
|
|
|
186
|
+
def call_sinfo(*_) -> str:
|
|
187
|
+
"""
|
|
188
|
+
Simulate asking for partition info from Slurm
|
|
189
|
+
"""
|
|
190
|
+
stdout = textwrap.dedent(
|
|
191
|
+
"""\
|
|
192
|
+
PARTITION GRES TIMELIMIT PRIO_TIER CPUS MEMORY
|
|
193
|
+
short* (null) 1:00:00 500 256+ 1996800+
|
|
194
|
+
medium (null) 12:00:00 500 256+ 1996800+
|
|
195
|
+
long (null) 14-00:00:00 500 256+ 1996800+
|
|
196
|
+
gpu gpu:A100:8 7-00:00:00 5000 256 996800
|
|
197
|
+
gpu gpu:A5500:8 7-00:00:00 5000 256 1996800
|
|
198
|
+
high_priority gpu:A5500:8 7-00:00:00 65000 256 1996800
|
|
199
|
+
high_priority (null) 7-00:00:00 65000 256+ 1996800+
|
|
200
|
+
simple_nodelist gpu:A100:8 1:00 65000 256 996800
|
|
201
|
+
simple_nodelist gpu:A5500:8 1:00 65000 256 1996800
|
|
202
|
+
simple_nodelist (null) 1:00 65000 256+ 1996800+
|
|
203
|
+
"""
|
|
204
|
+
)
|
|
205
|
+
return stdout
|
|
162
206
|
|
|
163
|
-
class FakeBatchSystem:
|
|
207
|
+
class FakeBatchSystem(BatchSystemSupport):
|
|
164
208
|
"""
|
|
165
209
|
Class that implements a minimal Batch System, needed to create a Worker (see below).
|
|
166
210
|
"""
|
|
167
211
|
|
|
168
212
|
def __init__(self):
|
|
169
|
-
|
|
213
|
+
super().__init__(self.__fake_config(), float("inf"), sys.maxsize, sys.maxsize)
|
|
170
214
|
|
|
171
215
|
def getWaitDuration(self):
|
|
172
|
-
return 10
|
|
216
|
+
return 10
|
|
173
217
|
|
|
174
218
|
def __fake_config(self):
|
|
175
219
|
"""
|
|
@@ -181,10 +225,15 @@ class FakeBatchSystem:
|
|
|
181
225
|
"""
|
|
182
226
|
config = Config()
|
|
183
227
|
from uuid import uuid4
|
|
228
|
+
|
|
184
229
|
config.workflowID = str(uuid4())
|
|
185
|
-
config.cleanWorkDir =
|
|
230
|
+
config.cleanWorkDir = "always"
|
|
231
|
+
toil.batchSystems.slurm.SlurmBatchSystem.setOptions(lambda o: setattr(config, o, None))
|
|
186
232
|
return config
|
|
187
233
|
|
|
234
|
+
# Make the mock class not have abstract methods anymore, even though we don't
|
|
235
|
+
# implement them. See <https://stackoverflow.com/a/17345619>.
|
|
236
|
+
FakeBatchSystem.__abstractmethods__ = set()
|
|
188
237
|
|
|
189
238
|
class SlurmTest(ToilTest):
|
|
190
239
|
"""
|
|
@@ -198,7 +247,8 @@ class SlurmTest(ToilTest):
|
|
|
198
247
|
updatedJobsQueue=Queue(),
|
|
199
248
|
killQueue=Queue(),
|
|
200
249
|
killedJobsQueue=Queue(),
|
|
201
|
-
boss=FakeBatchSystem()
|
|
250
|
+
boss=FakeBatchSystem(),
|
|
251
|
+
)
|
|
202
252
|
|
|
203
253
|
####
|
|
204
254
|
#### tests for _getJobDetailsFromSacct()
|
|
@@ -218,15 +268,25 @@ class SlurmTest(ToilTest):
|
|
|
218
268
|
|
|
219
269
|
def test_getJobDetailsFromSacct_many_all_exist(self):
|
|
220
270
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
221
|
-
expected_result = {
|
|
222
|
-
|
|
271
|
+
expected_result = {
|
|
272
|
+
754725: ("TIMEOUT", 0),
|
|
273
|
+
789456: ("FAILED", 1),
|
|
274
|
+
789724: ("RUNNING", 0),
|
|
275
|
+
789868: ("PENDING", 0),
|
|
276
|
+
789869: ("COMPLETED", 0),
|
|
277
|
+
}
|
|
223
278
|
result = self.worker._getJobDetailsFromSacct(list(expected_result))
|
|
224
279
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
225
280
|
|
|
226
281
|
def test_getJobDetailsFromSacct_many_some_exist(self):
|
|
227
282
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
228
|
-
expected_result = {
|
|
229
|
-
|
|
283
|
+
expected_result = {
|
|
284
|
+
609663: ("FAILED", 130),
|
|
285
|
+
767925: ("FAILED", 2),
|
|
286
|
+
1234: (None, None),
|
|
287
|
+
1235: (None, None),
|
|
288
|
+
765096: ("FAILED", 137),
|
|
289
|
+
}
|
|
230
290
|
result = self.worker._getJobDetailsFromSacct(list(expected_result))
|
|
231
291
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
232
292
|
|
|
@@ -236,6 +296,13 @@ class SlurmTest(ToilTest):
|
|
|
236
296
|
result = self.worker._getJobDetailsFromSacct(list(expected_result))
|
|
237
297
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
238
298
|
|
|
299
|
+
def test_getJobDetailsFromSacct_argument_list_too_big(self):
|
|
300
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
301
|
+
expected_result = {i: (None, None) for i in range(2000)}
|
|
302
|
+
result = self.worker._getJobDetailsFromSacct(list(expected_result))
|
|
303
|
+
assert result == expected_result, f"{result} != {expected_result}"
|
|
304
|
+
|
|
305
|
+
|
|
239
306
|
####
|
|
240
307
|
#### tests for _getJobDetailsFromScontrol()
|
|
241
308
|
####
|
|
@@ -262,13 +329,21 @@ class SlurmTest(ToilTest):
|
|
|
262
329
|
|
|
263
330
|
def test_getJobDetailsFromScontrol_many_all_exist(self):
|
|
264
331
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
265
|
-
expected_result = {
|
|
332
|
+
expected_result = {
|
|
333
|
+
787204: ("COMPLETED", 0),
|
|
334
|
+
789724: ("RUNNING", 0),
|
|
335
|
+
789728: ("PENDING", 0),
|
|
336
|
+
}
|
|
266
337
|
result = self.worker._getJobDetailsFromScontrol(list(expected_result))
|
|
267
338
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
268
339
|
|
|
269
340
|
def test_getJobDetailsFromScontrol_many_some_exist(self):
|
|
270
341
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
271
|
-
expected_result = {
|
|
342
|
+
expected_result = {
|
|
343
|
+
787204: ("COMPLETED", 0),
|
|
344
|
+
789724: ("RUNNING", 0),
|
|
345
|
+
1234: (None, None),
|
|
346
|
+
}
|
|
272
347
|
result = self.worker._getJobDetailsFromScontrol(list(expected_result))
|
|
273
348
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
274
349
|
|
|
@@ -284,14 +359,14 @@ class SlurmTest(ToilTest):
|
|
|
284
359
|
|
|
285
360
|
def test_getJobExitCode_job_exists(self):
|
|
286
361
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
287
|
-
job_id =
|
|
362
|
+
job_id = "785023" # FAILED
|
|
288
363
|
expected_result = (127, BatchJobExitReason.FAILED)
|
|
289
364
|
result = self.worker.getJobExitCode(job_id)
|
|
290
365
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
291
366
|
|
|
292
367
|
def test_getJobExitCode_job_not_exists(self):
|
|
293
368
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
294
|
-
job_id =
|
|
369
|
+
job_id = "1234" # Non-existent
|
|
295
370
|
expected_result = None
|
|
296
371
|
result = self.worker.getJobExitCode(job_id)
|
|
297
372
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
@@ -301,10 +376,12 @@ class SlurmTest(ToilTest):
|
|
|
301
376
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
302
377
|
raise an exception.
|
|
303
378
|
"""
|
|
304
|
-
self.monkeypatch.setattr(
|
|
379
|
+
self.monkeypatch.setattr(
|
|
380
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
381
|
+
)
|
|
305
382
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
306
|
-
job_id =
|
|
307
|
-
expected_result = (0,
|
|
383
|
+
job_id = "787204" # COMPLETED
|
|
384
|
+
expected_result = (0, BatchJobExitReason.FINISHED)
|
|
308
385
|
result = self.worker.getJobExitCode(job_id)
|
|
309
386
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
310
387
|
|
|
@@ -313,9 +390,11 @@ class SlurmTest(ToilTest):
|
|
|
313
390
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
314
391
|
raise an exception. Next, `scontrol` should also raise because it doesn't know the job.
|
|
315
392
|
"""
|
|
316
|
-
self.monkeypatch.setattr(
|
|
393
|
+
self.monkeypatch.setattr(
|
|
394
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
395
|
+
)
|
|
317
396
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
318
|
-
job_id =
|
|
397
|
+
job_id = "1234" # Non-existent
|
|
319
398
|
try:
|
|
320
399
|
_ = self.worker.getJobExitCode(job_id)
|
|
321
400
|
except CalledProcessErrorStderr:
|
|
@@ -329,50 +408,54 @@ class SlurmTest(ToilTest):
|
|
|
329
408
|
|
|
330
409
|
def test_coalesce_job_exit_codes_one_exists(self):
|
|
331
410
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
332
|
-
job_ids = [
|
|
333
|
-
expected_result = [(127,
|
|
411
|
+
job_ids = ["785023"] # FAILED
|
|
412
|
+
expected_result = [(127, BatchJobExitReason.FAILED)]
|
|
334
413
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
335
414
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
336
415
|
|
|
337
416
|
def test_coalesce_job_exit_codes_one_not_exists(self):
|
|
338
417
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
339
|
-
job_ids = [
|
|
418
|
+
job_ids = ["1234"] # Non-existent
|
|
340
419
|
expected_result = [None]
|
|
341
420
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
342
421
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
343
422
|
|
|
344
423
|
def test_coalesce_job_exit_codes_many_all_exist(self):
|
|
345
424
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
346
|
-
job_ids = [
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
425
|
+
job_ids = [
|
|
426
|
+
"754725", # TIMEOUT,
|
|
427
|
+
"789456", # FAILED,
|
|
428
|
+
"789724", # RUNNING,
|
|
429
|
+
"789868", # PENDING,
|
|
430
|
+
"789869",
|
|
431
|
+
] # COMPLETED
|
|
351
432
|
# RUNNING and PENDING jobs should return None
|
|
352
433
|
expected_result = [
|
|
353
434
|
(EXIT_STATUS_UNAVAILABLE_VALUE, BatchJobExitReason.KILLED),
|
|
354
435
|
(1, BatchJobExitReason.FAILED),
|
|
355
436
|
None,
|
|
356
437
|
None,
|
|
357
|
-
(0, BatchJobExitReason.FINISHED)
|
|
438
|
+
(0, BatchJobExitReason.FINISHED),
|
|
358
439
|
]
|
|
359
440
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
360
441
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
361
442
|
|
|
362
443
|
def test_coalesce_job_exit_codes_some_exists(self):
|
|
363
444
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
364
|
-
job_ids = [
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
445
|
+
job_ids = [
|
|
446
|
+
"609663", # FAILED (SIGINT)
|
|
447
|
+
"767925", # FAILED,
|
|
448
|
+
"789724", # RUNNING,
|
|
449
|
+
"999999", # Non-existent,
|
|
450
|
+
"789869",
|
|
451
|
+
] # COMPLETED
|
|
369
452
|
# RUNNING job should return None
|
|
370
453
|
expected_result = [
|
|
371
454
|
(130, BatchJobExitReason.FAILED),
|
|
372
455
|
(2, BatchJobExitReason.FAILED),
|
|
373
456
|
None,
|
|
374
457
|
None,
|
|
375
|
-
(0, BatchJobExitReason.FINISHED)
|
|
458
|
+
(0, BatchJobExitReason.FINISHED),
|
|
376
459
|
]
|
|
377
460
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
378
461
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
@@ -382,9 +465,11 @@ class SlurmTest(ToilTest):
|
|
|
382
465
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
383
466
|
raise an exception.
|
|
384
467
|
"""
|
|
385
|
-
self.monkeypatch.setattr(
|
|
468
|
+
self.monkeypatch.setattr(
|
|
469
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
470
|
+
)
|
|
386
471
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
387
|
-
job_ids = [
|
|
472
|
+
job_ids = ["787204"] # COMPLETED
|
|
388
473
|
expected_result = [(0, BatchJobExitReason.FINISHED)]
|
|
389
474
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
390
475
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
@@ -394,12 +479,143 @@ class SlurmTest(ToilTest):
|
|
|
394
479
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
395
480
|
raise an exception. Next, `scontrol` should also raise because it doesn't know the job.
|
|
396
481
|
"""
|
|
397
|
-
self.monkeypatch.setattr(
|
|
482
|
+
self.monkeypatch.setattr(
|
|
483
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
484
|
+
)
|
|
398
485
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
399
|
-
job_ids = [
|
|
486
|
+
job_ids = ["1234"] # Non-existent
|
|
400
487
|
try:
|
|
401
488
|
_ = self.worker.coalesce_job_exit_codes(job_ids)
|
|
402
489
|
except CalledProcessErrorStderr:
|
|
403
490
|
pass
|
|
404
491
|
else:
|
|
405
492
|
assert False, "Exception CalledProcessErrorStderr not raised"
|
|
493
|
+
|
|
494
|
+
###
|
|
495
|
+
### Tests for partition selection
|
|
496
|
+
##
|
|
497
|
+
|
|
498
|
+
def test_PartitionSet_get_partition(self):
|
|
499
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
|
|
500
|
+
ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
|
|
501
|
+
|
|
502
|
+
# At zero. short will win because simple_nodelist has higher priority.
|
|
503
|
+
self.assertEqual(ps.get_partition(0), "short")
|
|
504
|
+
# Easily within the partition
|
|
505
|
+
self.assertEqual(ps.get_partition(10 * 60), "short")
|
|
506
|
+
# Exactly on the boundary
|
|
507
|
+
self.assertEqual(ps.get_partition(60 * 60), "short")
|
|
508
|
+
# Well within the next partition
|
|
509
|
+
self.assertEqual(ps.get_partition(2 * 60 * 60), "medium")
|
|
510
|
+
# Can only fit in long
|
|
511
|
+
self.assertEqual(ps.get_partition(8 * 24 * 60 * 60), "long")
|
|
512
|
+
# Could fit in gpu or long
|
|
513
|
+
self.assertEqual(ps.get_partition(6 * 24 * 60 * 60), "long")
|
|
514
|
+
# Can't fit in anything
|
|
515
|
+
with self.assertRaises(Exception):
|
|
516
|
+
ps.get_partition(365 * 24 * 60 * 60)
|
|
517
|
+
|
|
518
|
+
def test_PartitionSet_default_gpu_partition(self):
|
|
519
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
|
|
520
|
+
ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
|
|
521
|
+
|
|
522
|
+
# Make sure we picked the useful-length GPU partition and not the super
|
|
523
|
+
# short one.
|
|
524
|
+
self.assertEqual(ps.default_gpu_partition.partition_name, "gpu")
|
|
525
|
+
|
|
526
|
+
def test_prepareSbatch_partition(self):
|
|
527
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
|
|
528
|
+
ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
|
|
529
|
+
self.worker.boss.partitions = ps
|
|
530
|
+
# This is in seconds
|
|
531
|
+
self.worker.boss.config.slurm_time = 30
|
|
532
|
+
|
|
533
|
+
# Without a partition override in the environment, we should get the
|
|
534
|
+
# "short" partition for this job
|
|
535
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
536
|
+
assert "--partition=short" in command
|
|
537
|
+
|
|
538
|
+
# With a partition override, we should not. But the override will be rewritten.
|
|
539
|
+
self.worker.boss.config.slurm_args = "--something --partition foo --somethingElse"
|
|
540
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
541
|
+
assert "--partition=short" not in command
|
|
542
|
+
assert "--partition=foo" in command
|
|
543
|
+
|
|
544
|
+
# All ways of setting partition should work, including =
|
|
545
|
+
self.worker.boss.config.slurm_args = "--something --partition=foo --somethingElse"
|
|
546
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
547
|
+
assert "--partition=short" not in command
|
|
548
|
+
assert "--partition=foo" in command
|
|
549
|
+
|
|
550
|
+
# And short options
|
|
551
|
+
self.worker.boss.config.slurm_args = "--something -p foo --somethingElse"
|
|
552
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
553
|
+
assert "--partition=short" not in command
|
|
554
|
+
assert "--partition=foo" in command
|
|
555
|
+
|
|
556
|
+
# Partition settings from the config should override automatic selection
|
|
557
|
+
self.worker.boss.config.slurm_partition = "foobar"
|
|
558
|
+
self.worker.boss.config.slurm_args = "--something --somethingElse"
|
|
559
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
560
|
+
assert "--partition=foobar" in command
|
|
561
|
+
|
|
562
|
+
# But they should be overridden by the argument overrides
|
|
563
|
+
self.worker.boss.config.slurm_args = "--something --partition=baz --somethingElse"
|
|
564
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
565
|
+
assert "--partition=baz" in command
|
|
566
|
+
|
|
567
|
+
def test_prepareSbatch_time(self):
|
|
568
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
|
|
569
|
+
ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
|
|
570
|
+
self.worker.boss.partitions = ps
|
|
571
|
+
# This is in seconds
|
|
572
|
+
self.worker.boss.config.slurm_time = 30
|
|
573
|
+
|
|
574
|
+
# Without a time override in the environment, we should use the normal
|
|
575
|
+
# time and the "short" partition
|
|
576
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
577
|
+
logger.debug("Command: %s", command)
|
|
578
|
+
assert "--time=0:30" in command
|
|
579
|
+
assert "--partition=short" in command
|
|
580
|
+
|
|
581
|
+
# With a time override, we should use it, slightly translated, and it
|
|
582
|
+
# should change the selected partition.
|
|
583
|
+
self.worker.boss.config.slurm_args = "--something --time 10:00:00 --somethingElse"
|
|
584
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
585
|
+
logger.debug("Command: %s", command)
|
|
586
|
+
assert "--partition=medium" in command
|
|
587
|
+
assert "--time=0:36000" in command
|
|
588
|
+
|
|
589
|
+
# All ways of setting time should work, including =
|
|
590
|
+
self.worker.boss.config.slurm_args = "--something --time=10:00:00 --somethingElse"
|
|
591
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
592
|
+
logger.debug("Command: %s", command)
|
|
593
|
+
assert "--partition=medium" in command
|
|
594
|
+
assert "--time=0:36000" in command
|
|
595
|
+
|
|
596
|
+
# And short options
|
|
597
|
+
self.worker.boss.config.slurm_args = "--something -t 10:00:00 --somethingElse"
|
|
598
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
599
|
+
logger.debug("Command: %s", command)
|
|
600
|
+
assert "--partition=medium" in command
|
|
601
|
+
assert "--time=0:36000" in command
|
|
602
|
+
|
|
603
|
+
def test_prepareSbatch_export(self):
|
|
604
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
|
|
605
|
+
ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
|
|
606
|
+
self.worker.boss.partitions = ps
|
|
607
|
+
|
|
608
|
+
# Without any overrides, we need --export=ALL
|
|
609
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
610
|
+
assert "--export=ALL" in command
|
|
611
|
+
|
|
612
|
+
# With overrides, we don't get --export=ALL
|
|
613
|
+
self.worker.boss.config.slurm_args = "--export=foo"
|
|
614
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
615
|
+
assert "--export=ALL" not in command
|
|
616
|
+
|
|
617
|
+
# With --export-file, we don't get --export=ALL as documented.
|
|
618
|
+
self.worker.boss.config.slurm_args = "--export-file=./thefile.txt"
|
|
619
|
+
command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
|
|
620
|
+
assert "--export=ALL" not in command
|
|
621
|
+
|
|
@@ -30,29 +30,27 @@ class CactusIntegrationTest(AbstractClusterTest):
|
|
|
30
30
|
)
|
|
31
31
|
self.leader = self.cluster.getLeader()
|
|
32
32
|
|
|
33
|
-
CACTUS_COMMIT_SHA =
|
|
33
|
+
CACTUS_COMMIT_SHA = (
|
|
34
|
+
os.environ["CACTUS_COMMIT_SHA"]
|
|
35
|
+
or "f5adf4013326322ae58ef1eccb8409b71d761583"
|
|
36
|
+
) # default cactus commit
|
|
34
37
|
|
|
35
38
|
# command to install and run cactus on the cluster
|
|
36
|
-
cactus_command = (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
# run cactus
|
|
51
|
-
self.sshUtil(
|
|
52
|
-
[
|
|
53
|
-
"bash",
|
|
54
|
-
"-c",
|
|
55
|
-
cactus_command
|
|
56
|
-
]
|
|
39
|
+
cactus_command = (
|
|
40
|
+
"python -m virtualenv --system-site-packages venv && "
|
|
41
|
+
". venv/bin/activate && "
|
|
42
|
+
"git clone https://github.com/ComparativeGenomicsToolkit/cactus.git --recursive && "
|
|
43
|
+
"cd cactus && "
|
|
44
|
+
"git fetch origin && "
|
|
45
|
+
f"git checkout {CACTUS_COMMIT_SHA} && "
|
|
46
|
+
"git submodule update --init --recursive && "
|
|
47
|
+
"pip install --upgrade 'setuptools' pip && "
|
|
48
|
+
"pip install --upgrade . && "
|
|
49
|
+
"pip install --upgrade numpy psutil && "
|
|
50
|
+
"time cactus --batchSystem kubernetes --retryCount=3 "
|
|
51
|
+
f"--consCores 2 --binariesMode singularity --clean always {self.jobStore} "
|
|
52
|
+
"examples/evolverMammals.txt examples/evolverMammals.hal --root mr --defaultDisk 8G --logDebug"
|
|
57
53
|
)
|
|
58
54
|
|
|
55
|
+
# run cactus
|
|
56
|
+
self.sshUtil(["bash", "-c", cactus_command])
|
toil/test/cwl/conftest.py
CHANGED
|
@@ -14,4 +14,43 @@
|
|
|
14
14
|
|
|
15
15
|
# https://pytest.org/latest/example/pythoncollection.html
|
|
16
16
|
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
from io import StringIO
|
|
20
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
21
|
+
|
|
22
|
+
from cwltest import utils
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
17
25
|
collect_ignore = ["spec"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Hook into Pytest for testing CWL conformance with Toil
|
|
29
|
+
# https://pytest.org/en/6.2.x/writing_plugins.html?highlight=conftest#conftest-py-local-per-directory-plugins
|
|
30
|
+
# See cwltool's reference implementation:
|
|
31
|
+
# https://github.com/common-workflow-language/cwltool/blob/05af6c1357c327b3146e9f5da40e7c0aa3e6d976/tests/cwl-conformance/cwltool-conftest.py
|
|
32
|
+
def pytest_cwl_execute_test(
|
|
33
|
+
config: utils.CWLTestConfig,
|
|
34
|
+
processfile: str,
|
|
35
|
+
jobfile: Optional[str]
|
|
36
|
+
) -> Tuple[int, Optional[Dict[str, Any]]]:
|
|
37
|
+
"""Use the CWL reference runner (cwltool) to execute tests."""
|
|
38
|
+
from toil.cwl.cwltoil import main
|
|
39
|
+
|
|
40
|
+
stdout = StringIO()
|
|
41
|
+
argsl: List[str] = [f"--outdir={config.outdir}"]
|
|
42
|
+
if config.runner_quiet:
|
|
43
|
+
argsl.append("--quiet")
|
|
44
|
+
elif config.verbose:
|
|
45
|
+
argsl.append("--debug")
|
|
46
|
+
argsl.extend(config.args)
|
|
47
|
+
argsl.append(processfile)
|
|
48
|
+
if jobfile:
|
|
49
|
+
argsl.append(jobfile)
|
|
50
|
+
try:
|
|
51
|
+
result = main(args=argsl, stdout=stdout)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.error(e)
|
|
54
|
+
return 1, {}
|
|
55
|
+
out = stdout.getvalue()
|
|
56
|
+
return result, json.loads(out) if out else {}
|