toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,10 @@ from queue import Queue
|
|
|
4
4
|
import pytest
|
|
5
5
|
|
|
6
6
|
import toil.batchSystems.slurm
|
|
7
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
8
|
+
EXIT_STATUS_UNAVAILABLE_VALUE,
|
|
9
|
+
BatchJobExitReason,
|
|
10
|
+
)
|
|
7
11
|
from toil.common import Config
|
|
8
12
|
from toil.lib.misc import CalledProcessErrorStderr
|
|
9
13
|
from toil.test import ToilTest
|
|
@@ -11,6 +15,7 @@ from toil.test import ToilTest
|
|
|
11
15
|
# TODO: Come up with a better way to mock the commands then monkey-patching the
|
|
12
16
|
# command-calling functions.
|
|
13
17
|
|
|
18
|
+
|
|
14
19
|
def call_sacct(args, **_) -> str:
|
|
15
20
|
"""
|
|
16
21
|
The arguments passed to `call_command` when executing `sacct` are:
|
|
@@ -36,7 +41,7 @@ def call_sacct(args, **_) -> str:
|
|
|
36
41
|
789868: "789868|PENDING|0:0\n",
|
|
37
42
|
789869: "789869|COMPLETED|0:0\n789869.batch|COMPLETED|0:0\n789869.extern|COMPLETED|0:0\n",
|
|
38
43
|
}
|
|
39
|
-
job_ids = [int(job_id) for job_id in args[3].split(
|
|
44
|
+
job_ids = [int(job_id) for job_id in args[3].split(",")]
|
|
40
45
|
stdout = ""
|
|
41
46
|
# Glue the fake outputs for the request job-ids together in a single string
|
|
42
47
|
for job_id in job_ids:
|
|
@@ -52,7 +57,8 @@ def call_scontrol(args, **_) -> str:
|
|
|
52
57
|
job_id = int(args[3]) if len(args) > 3 else None
|
|
53
58
|
# Fake output per fake job-id.
|
|
54
59
|
scontrol_info = {
|
|
55
|
-
787204: textwrap.dedent(
|
|
60
|
+
787204: textwrap.dedent(
|
|
61
|
+
"""\
|
|
56
62
|
JobId=787204 JobName=toil_job_6_CWLJob
|
|
57
63
|
UserId=rapthor-mloose(54386) GroupId=rapthor-mloose(54038) MCS_label=N/A
|
|
58
64
|
Priority=11067 Nice=0 Account=rapthor QOS=normal
|
|
@@ -80,8 +86,10 @@ def call_scontrol(args, **_) -> str:
|
|
|
80
86
|
StdOut=/home/rapthor-mloose/code/toil/cwl-v1.2/tmp/toil_19512746-a9f4-4b99-b9ff-48ca5c1b661c.6.787204.out.log
|
|
81
87
|
Power=
|
|
82
88
|
NtasksPerTRES:0
|
|
83
|
-
"""
|
|
84
|
-
|
|
89
|
+
"""
|
|
90
|
+
),
|
|
91
|
+
789724: textwrap.dedent(
|
|
92
|
+
"""\
|
|
85
93
|
JobId=789724 JobName=run_prefactor-cwltool.sh
|
|
86
94
|
UserId=rapthor-mloose(54386) GroupId=rapthor-mloose(54038) MCS_label=N/A
|
|
87
95
|
Priority=7905 Nice=0 Account=rapthor QOS=normal
|
|
@@ -109,8 +117,10 @@ def call_scontrol(args, **_) -> str:
|
|
|
109
117
|
StdOut=/project/rapthor/Share/prefactor/L721962/slurm-789724.out
|
|
110
118
|
Power=
|
|
111
119
|
NtasksPerTRES:0
|
|
112
|
-
"""
|
|
113
|
-
|
|
120
|
+
"""
|
|
121
|
+
),
|
|
122
|
+
789728: textwrap.dedent(
|
|
123
|
+
"""\
|
|
114
124
|
JobId=789728 JobName=sleep.sh
|
|
115
125
|
UserId=rapthor-mloose(54386) GroupId=rapthor-mloose(54038) MCS_label=N/A
|
|
116
126
|
Priority=8005 Nice=0 Account=rapthor QOS=normal
|
|
@@ -137,26 +147,31 @@ def call_scontrol(args, **_) -> str:
|
|
|
137
147
|
StdOut=/home/rapthor-mloose/tmp/slurm-789728.out
|
|
138
148
|
Power=
|
|
139
149
|
NtasksPerTRES:0
|
|
140
|
-
"""
|
|
150
|
+
"""
|
|
151
|
+
),
|
|
141
152
|
}
|
|
142
153
|
if job_id is not None:
|
|
143
154
|
try:
|
|
144
155
|
stdout = scontrol_info[job_id]
|
|
145
156
|
except KeyError:
|
|
146
|
-
raise CalledProcessErrorStderr(
|
|
157
|
+
raise CalledProcessErrorStderr(
|
|
158
|
+
1, "slurm_load_jobs error: Invalid job id specified"
|
|
159
|
+
)
|
|
147
160
|
else:
|
|
148
161
|
# Glue the fake outputs for the request job-ids together in a single string
|
|
149
162
|
stdout = ""
|
|
150
163
|
for value in scontrol_info.values():
|
|
151
|
-
stdout += value +
|
|
164
|
+
stdout += value + "\n"
|
|
152
165
|
return stdout
|
|
153
166
|
|
|
167
|
+
|
|
154
168
|
def call_sacct_raises(*_):
|
|
155
169
|
"""
|
|
156
170
|
Fake that the `sacct` command fails by raising a `CalledProcessErrorStderr`
|
|
157
171
|
"""
|
|
158
|
-
raise CalledProcessErrorStderr(
|
|
159
|
-
|
|
172
|
+
raise CalledProcessErrorStderr(
|
|
173
|
+
1, "sacct: error: Problem talking to the database: " "Connection timed out"
|
|
174
|
+
)
|
|
160
175
|
|
|
161
176
|
|
|
162
177
|
class FakeBatchSystem:
|
|
@@ -168,7 +183,7 @@ class FakeBatchSystem:
|
|
|
168
183
|
self.config = self.__fake_config()
|
|
169
184
|
|
|
170
185
|
def getWaitDuration(self):
|
|
171
|
-
return 10
|
|
186
|
+
return 10
|
|
172
187
|
|
|
173
188
|
def __fake_config(self):
|
|
174
189
|
"""
|
|
@@ -180,8 +195,9 @@ class FakeBatchSystem:
|
|
|
180
195
|
"""
|
|
181
196
|
config = Config()
|
|
182
197
|
from uuid import uuid4
|
|
198
|
+
|
|
183
199
|
config.workflowID = str(uuid4())
|
|
184
|
-
config.cleanWorkDir =
|
|
200
|
+
config.cleanWorkDir = "always"
|
|
185
201
|
return config
|
|
186
202
|
|
|
187
203
|
|
|
@@ -192,12 +208,13 @@ class SlurmTest(ToilTest):
|
|
|
192
208
|
|
|
193
209
|
def setUp(self):
|
|
194
210
|
self.monkeypatch = pytest.MonkeyPatch()
|
|
195
|
-
self.worker = toil.batchSystems.slurm.SlurmBatchSystem.
|
|
211
|
+
self.worker = toil.batchSystems.slurm.SlurmBatchSystem.GridEngineThread(
|
|
196
212
|
newJobsQueue=Queue(),
|
|
197
213
|
updatedJobsQueue=Queue(),
|
|
198
214
|
killQueue=Queue(),
|
|
199
215
|
killedJobsQueue=Queue(),
|
|
200
|
-
boss=FakeBatchSystem()
|
|
216
|
+
boss=FakeBatchSystem(),
|
|
217
|
+
)
|
|
201
218
|
|
|
202
219
|
####
|
|
203
220
|
#### tests for _getJobDetailsFromSacct()
|
|
@@ -217,15 +234,25 @@ class SlurmTest(ToilTest):
|
|
|
217
234
|
|
|
218
235
|
def test_getJobDetailsFromSacct_many_all_exist(self):
|
|
219
236
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
220
|
-
expected_result = {
|
|
221
|
-
|
|
237
|
+
expected_result = {
|
|
238
|
+
754725: ("TIMEOUT", 0),
|
|
239
|
+
789456: ("FAILED", 1),
|
|
240
|
+
789724: ("RUNNING", 0),
|
|
241
|
+
789868: ("PENDING", 0),
|
|
242
|
+
789869: ("COMPLETED", 0),
|
|
243
|
+
}
|
|
222
244
|
result = self.worker._getJobDetailsFromSacct(list(expected_result))
|
|
223
245
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
224
246
|
|
|
225
247
|
def test_getJobDetailsFromSacct_many_some_exist(self):
|
|
226
248
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
227
|
-
expected_result = {
|
|
228
|
-
|
|
249
|
+
expected_result = {
|
|
250
|
+
609663: ("FAILED", 130),
|
|
251
|
+
767925: ("FAILED", 2),
|
|
252
|
+
1234: (None, None),
|
|
253
|
+
1235: (None, None),
|
|
254
|
+
765096: ("FAILED", 137),
|
|
255
|
+
}
|
|
229
256
|
result = self.worker._getJobDetailsFromSacct(list(expected_result))
|
|
230
257
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
231
258
|
|
|
@@ -261,13 +288,21 @@ class SlurmTest(ToilTest):
|
|
|
261
288
|
|
|
262
289
|
def test_getJobDetailsFromScontrol_many_all_exist(self):
|
|
263
290
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
264
|
-
expected_result = {
|
|
291
|
+
expected_result = {
|
|
292
|
+
787204: ("COMPLETED", 0),
|
|
293
|
+
789724: ("RUNNING", 0),
|
|
294
|
+
789728: ("PENDING", 0),
|
|
295
|
+
}
|
|
265
296
|
result = self.worker._getJobDetailsFromScontrol(list(expected_result))
|
|
266
297
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
267
298
|
|
|
268
299
|
def test_getJobDetailsFromScontrol_many_some_exist(self):
|
|
269
300
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
270
|
-
expected_result = {
|
|
301
|
+
expected_result = {
|
|
302
|
+
787204: ("COMPLETED", 0),
|
|
303
|
+
789724: ("RUNNING", 0),
|
|
304
|
+
1234: (None, None),
|
|
305
|
+
}
|
|
271
306
|
result = self.worker._getJobDetailsFromScontrol(list(expected_result))
|
|
272
307
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
273
308
|
|
|
@@ -283,14 +318,14 @@ class SlurmTest(ToilTest):
|
|
|
283
318
|
|
|
284
319
|
def test_getJobExitCode_job_exists(self):
|
|
285
320
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
286
|
-
job_id =
|
|
287
|
-
expected_result = 127
|
|
321
|
+
job_id = "785023" # FAILED
|
|
322
|
+
expected_result = (127, BatchJobExitReason.FAILED)
|
|
288
323
|
result = self.worker.getJobExitCode(job_id)
|
|
289
324
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
290
325
|
|
|
291
326
|
def test_getJobExitCode_job_not_exists(self):
|
|
292
327
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
293
|
-
job_id =
|
|
328
|
+
job_id = "1234" # Non-existent
|
|
294
329
|
expected_result = None
|
|
295
330
|
result = self.worker.getJobExitCode(job_id)
|
|
296
331
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
@@ -300,10 +335,12 @@ class SlurmTest(ToilTest):
|
|
|
300
335
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
301
336
|
raise an exception.
|
|
302
337
|
"""
|
|
303
|
-
self.monkeypatch.setattr(
|
|
338
|
+
self.monkeypatch.setattr(
|
|
339
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
340
|
+
)
|
|
304
341
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
305
|
-
job_id =
|
|
306
|
-
expected_result = 0
|
|
342
|
+
job_id = "787204" # COMPLETED
|
|
343
|
+
expected_result = (0, BatchJobExitReason.FINISHED)
|
|
307
344
|
result = self.worker.getJobExitCode(job_id)
|
|
308
345
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
309
346
|
|
|
@@ -312,9 +349,11 @@ class SlurmTest(ToilTest):
|
|
|
312
349
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
313
350
|
raise an exception. Next, `scontrol` should also raise because it doesn't know the job.
|
|
314
351
|
"""
|
|
315
|
-
self.monkeypatch.setattr(
|
|
352
|
+
self.monkeypatch.setattr(
|
|
353
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
354
|
+
)
|
|
316
355
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
317
|
-
job_id =
|
|
356
|
+
job_id = "1234" # Non-existent
|
|
318
357
|
try:
|
|
319
358
|
_ = self.worker.getJobExitCode(job_id)
|
|
320
359
|
except CalledProcessErrorStderr:
|
|
@@ -328,37 +367,55 @@ class SlurmTest(ToilTest):
|
|
|
328
367
|
|
|
329
368
|
def test_coalesce_job_exit_codes_one_exists(self):
|
|
330
369
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
331
|
-
job_ids = [
|
|
332
|
-
expected_result = [127]
|
|
370
|
+
job_ids = ["785023"] # FAILED
|
|
371
|
+
expected_result = [(127, BatchJobExitReason.FAILED)]
|
|
333
372
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
334
373
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
335
374
|
|
|
336
375
|
def test_coalesce_job_exit_codes_one_not_exists(self):
|
|
337
376
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
338
|
-
job_ids = [
|
|
377
|
+
job_ids = ["1234"] # Non-existent
|
|
339
378
|
expected_result = [None]
|
|
340
379
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
341
380
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
342
381
|
|
|
343
382
|
def test_coalesce_job_exit_codes_many_all_exist(self):
|
|
344
383
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
345
|
-
job_ids = [
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
384
|
+
job_ids = [
|
|
385
|
+
"754725", # TIMEOUT,
|
|
386
|
+
"789456", # FAILED,
|
|
387
|
+
"789724", # RUNNING,
|
|
388
|
+
"789868", # PENDING,
|
|
389
|
+
"789869",
|
|
390
|
+
] # COMPLETED
|
|
391
|
+
# RUNNING and PENDING jobs should return None
|
|
392
|
+
expected_result = [
|
|
393
|
+
(EXIT_STATUS_UNAVAILABLE_VALUE, BatchJobExitReason.KILLED),
|
|
394
|
+
(1, BatchJobExitReason.FAILED),
|
|
395
|
+
None,
|
|
396
|
+
None,
|
|
397
|
+
(0, BatchJobExitReason.FINISHED),
|
|
398
|
+
]
|
|
351
399
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
352
400
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
353
401
|
|
|
354
402
|
def test_coalesce_job_exit_codes_some_exists(self):
|
|
355
403
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
|
|
356
|
-
job_ids = [
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
404
|
+
job_ids = [
|
|
405
|
+
"609663", # FAILED (SIGINT)
|
|
406
|
+
"767925", # FAILED,
|
|
407
|
+
"789724", # RUNNING,
|
|
408
|
+
"999999", # Non-existent,
|
|
409
|
+
"789869",
|
|
410
|
+
] # COMPLETED
|
|
411
|
+
# RUNNING job should return None
|
|
412
|
+
expected_result = [
|
|
413
|
+
(130, BatchJobExitReason.FAILED),
|
|
414
|
+
(2, BatchJobExitReason.FAILED),
|
|
415
|
+
None,
|
|
416
|
+
None,
|
|
417
|
+
(0, BatchJobExitReason.FINISHED),
|
|
418
|
+
]
|
|
362
419
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
363
420
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
364
421
|
|
|
@@ -367,10 +424,12 @@ class SlurmTest(ToilTest):
|
|
|
367
424
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
368
425
|
raise an exception.
|
|
369
426
|
"""
|
|
370
|
-
self.monkeypatch.setattr(
|
|
427
|
+
self.monkeypatch.setattr(
|
|
428
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
429
|
+
)
|
|
371
430
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
372
|
-
job_ids = [
|
|
373
|
-
expected_result = [0]
|
|
431
|
+
job_ids = ["787204"] # COMPLETED
|
|
432
|
+
expected_result = [(0, BatchJobExitReason.FINISHED)]
|
|
374
433
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
375
434
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
376
435
|
|
|
@@ -379,9 +438,11 @@ class SlurmTest(ToilTest):
|
|
|
379
438
|
This test forces the use of `scontrol` to get job information, by letting `sacct`
|
|
380
439
|
raise an exception. Next, `scontrol` should also raise because it doesn't know the job.
|
|
381
440
|
"""
|
|
382
|
-
self.monkeypatch.setattr(
|
|
441
|
+
self.monkeypatch.setattr(
|
|
442
|
+
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
443
|
+
)
|
|
383
444
|
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
|
|
384
|
-
job_ids = [
|
|
445
|
+
job_ids = ["1234"] # Non-existent
|
|
385
446
|
try:
|
|
386
447
|
_ = self.worker.coalesce_job_exit_codes(job_ids)
|
|
387
448
|
except CalledProcessErrorStderr:
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import uuid
|
|
3
|
+
|
|
4
|
+
from toil.provisioners import cluster_factory
|
|
5
|
+
from toil.test.provisioners.clusterTest import AbstractClusterTest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CactusIntegrationTest(AbstractClusterTest):
|
|
9
|
+
"""
|
|
10
|
+
Run the Cactus Integration test on a Kubernetes AWS cluster
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, methodName):
|
|
14
|
+
super().__init__(methodName=methodName)
|
|
15
|
+
self.clusterName = "cactus-test-" + str(uuid.uuid4())
|
|
16
|
+
self.leaderNodeType = "t2.medium"
|
|
17
|
+
self.clusterType = "kubernetes"
|
|
18
|
+
|
|
19
|
+
def setUp(self):
|
|
20
|
+
super().setUp()
|
|
21
|
+
self.jobStore = f"aws:{self.awsRegion()}:cluster-{uuid.uuid4()}"
|
|
22
|
+
|
|
23
|
+
def test_cactus_integration(self):
|
|
24
|
+
# Make a cluster with worker nodes
|
|
25
|
+
self.createClusterUtil(args=["--nodeTypes=t2.xlarge", "-w=1-3"])
|
|
26
|
+
# get the leader so we know the IP address - we don't need to wait since create cluster
|
|
27
|
+
# already ensures the leader is running
|
|
28
|
+
self.cluster = cluster_factory(
|
|
29
|
+
provisioner="aws", zone=self.zone, clusterName=self.clusterName
|
|
30
|
+
)
|
|
31
|
+
self.leader = self.cluster.getLeader()
|
|
32
|
+
|
|
33
|
+
CACTUS_COMMIT_SHA = (
|
|
34
|
+
os.environ["CACTUS_COMMIT_SHA"]
|
|
35
|
+
or "f5adf4013326322ae58ef1eccb8409b71d761583"
|
|
36
|
+
) # default cactus commit
|
|
37
|
+
|
|
38
|
+
# command to install and run cactus on the cluster
|
|
39
|
+
cactus_command = (
|
|
40
|
+
"python -m virtualenv --system-site-packages venv && "
|
|
41
|
+
". venv/bin/activate && "
|
|
42
|
+
"git clone https://github.com/ComparativeGenomicsToolkit/cactus.git --recursive && "
|
|
43
|
+
"cd cactus && "
|
|
44
|
+
"git fetch origin && "
|
|
45
|
+
f"git checkout {CACTUS_COMMIT_SHA} && "
|
|
46
|
+
"git submodule update --init --recursive && "
|
|
47
|
+
"pip install --upgrade 'setuptools' pip && "
|
|
48
|
+
"pip install --upgrade . && "
|
|
49
|
+
"pip install --upgrade numpy psutil && "
|
|
50
|
+
"time cactus --batchSystem kubernetes --retryCount=3 "
|
|
51
|
+
f"--consCores 2 --binariesMode singularity --clean always {self.jobStore} "
|
|
52
|
+
"examples/evolverMammals.txt examples/evolverMammals.hal --root mr --defaultDisk 8G --logDebug"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# run cactus
|
|
56
|
+
self.sshUtil(["bash", "-c", cactus_command])
|