toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +41 -17
- toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +9 -9
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +129 -16
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +45 -3
- toil/common.py +56 -31
- toil/cwl/cwltoil.py +442 -371
- toil/deferred.py +1 -1
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +69 -20
- toil/fileStores/cachingFileStore.py +6 -22
- toil/fileStores/nonCachingFileStore.py +6 -15
- toil/job.py +270 -86
- toil/jobStores/abstractJobStore.py +37 -31
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +60 -31
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +3 -3
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +89 -38
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +24 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +42 -4
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +57 -16
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +29 -14
- toil/lib/throttle.py +1 -1
- toil/options/common.py +31 -30
- toil/options/wdl.py +5 -0
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +12 -2
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +93 -23
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +22 -7
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +245 -236
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +11 -14
- toil/test/jobStores/jobStoreTest.py +40 -54
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +99 -16
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +62 -4
- toil/test/utils/utilsTest.py +23 -21
- toil/test/wdl/wdltoil_test.py +49 -21
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +310 -266
- toil/utils/toilStatus.py +98 -52
- toil/version.py +11 -11
- toil/wdl/wdltoil.py +644 -225
- toil/worker.py +125 -83
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- toil-7.0.0.dist-info/METADATA +158 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/test/utils/utilsTest.py
CHANGED
|
@@ -39,7 +39,7 @@ from toil.test import (ToilTest,
|
|
|
39
39
|
needs_rsync3,
|
|
40
40
|
slow)
|
|
41
41
|
from toil.test.sort.sortTest import makeFileToSort
|
|
42
|
-
from toil.utils.toilStats import
|
|
42
|
+
from toil.utils.toilStats import get_stats, process_data
|
|
43
43
|
from toil.utils.toilStatus import ToilStatus
|
|
44
44
|
from toil.version import python
|
|
45
45
|
|
|
@@ -56,6 +56,7 @@ class UtilsTest(ToilTest):
|
|
|
56
56
|
super().setUp()
|
|
57
57
|
self.tempDir = self._createTempDir()
|
|
58
58
|
self.tempFile = get_temp_file(rootDir=self.tempDir)
|
|
59
|
+
self.outputFile = get_temp_file(rootDir=self.tempDir)
|
|
59
60
|
self.outputFile = 'someSortedStuff.txt'
|
|
60
61
|
self.toilDir = os.path.join(self.tempDir, "jobstore")
|
|
61
62
|
self.assertFalse(os.path.exists(self.toilDir))
|
|
@@ -73,9 +74,9 @@ class UtilsTest(ToilTest):
|
|
|
73
74
|
'-m',
|
|
74
75
|
'toil.test.sort.sort',
|
|
75
76
|
f'file:{self.toilDir}',
|
|
77
|
+
f'--fileToSort={self.tempFile}',
|
|
78
|
+
f'--outputFile={self.outputFile}',
|
|
76
79
|
'--clean=never',
|
|
77
|
-
'--numLines=1',
|
|
78
|
-
'--lineLength=1'
|
|
79
80
|
]
|
|
80
81
|
|
|
81
82
|
self.restart_sort_workflow_cmd = [
|
|
@@ -91,7 +92,7 @@ class UtilsTest(ToilTest):
|
|
|
91
92
|
if os.path.exists(self.toilDir):
|
|
92
93
|
shutil.rmtree(self.toilDir)
|
|
93
94
|
|
|
94
|
-
for f in [
|
|
95
|
+
for f in [self.tempFile, self.outputFile, os.path.join(self.tempDir, "output.txt")]:
|
|
95
96
|
if os.path.exists(f):
|
|
96
97
|
os.remove(f)
|
|
97
98
|
|
|
@@ -298,8 +299,8 @@ class UtilsTest(ToilTest):
|
|
|
298
299
|
config = Config()
|
|
299
300
|
config.setOptions(options)
|
|
300
301
|
jobStore = Toil.resumeJobStore(config.jobStore)
|
|
301
|
-
stats =
|
|
302
|
-
collatedStats =
|
|
302
|
+
stats = get_stats(jobStore)
|
|
303
|
+
collatedStats = process_data(jobStore.config, stats)
|
|
303
304
|
self.assertTrue(len(collatedStats.job_types) == 2, "Some jobs are not represented in the stats.")
|
|
304
305
|
|
|
305
306
|
def check_status(self, status, status_fn, seconds=20):
|
|
@@ -314,14 +315,14 @@ class UtilsTest(ToilTest):
|
|
|
314
315
|
def testGetPIDStatus(self):
|
|
315
316
|
"""Test that ToilStatus.getPIDStatus() behaves as expected."""
|
|
316
317
|
wf = subprocess.Popen(self.sort_workflow_cmd)
|
|
317
|
-
self.check_status('RUNNING', status_fn=ToilStatus.getPIDStatus, seconds=
|
|
318
|
+
self.check_status('RUNNING', status_fn=ToilStatus.getPIDStatus, seconds=60)
|
|
318
319
|
wf.wait()
|
|
319
|
-
self.check_status('COMPLETED', status_fn=ToilStatus.getPIDStatus)
|
|
320
|
+
self.check_status('COMPLETED', status_fn=ToilStatus.getPIDStatus, seconds=60)
|
|
320
321
|
|
|
321
322
|
# TODO: we need to reach into the FileJobStore's files and delete this
|
|
322
323
|
# shared file. We assume we know its internal layout.
|
|
323
324
|
os.remove(os.path.join(self.toilDir, 'files/shared/pid.log'))
|
|
324
|
-
self.check_status('QUEUED', status_fn=ToilStatus.getPIDStatus)
|
|
325
|
+
self.check_status('QUEUED', status_fn=ToilStatus.getPIDStatus, seconds=60)
|
|
325
326
|
|
|
326
327
|
def testGetStatusFailedToilWF(self):
|
|
327
328
|
"""
|
|
@@ -331,9 +332,9 @@ class UtilsTest(ToilTest):
|
|
|
331
332
|
"""
|
|
332
333
|
# --badWorker is set to force failure.
|
|
333
334
|
wf = subprocess.Popen(self.sort_workflow_cmd + ['--badWorker=1'])
|
|
334
|
-
self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
|
|
335
|
+
self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=60)
|
|
335
336
|
wf.wait()
|
|
336
|
-
self.check_status('ERROR', status_fn=ToilStatus.getStatus)
|
|
337
|
+
self.check_status('ERROR', status_fn=ToilStatus.getStatus, seconds=60)
|
|
337
338
|
|
|
338
339
|
@needs_cwl
|
|
339
340
|
@needs_docker
|
|
@@ -341,22 +342,22 @@ class UtilsTest(ToilTest):
|
|
|
341
342
|
"""Test that ToilStatus.getStatus() behaves as expected with a failing CWL workflow."""
|
|
342
343
|
# --badWorker is set to force failure.
|
|
343
344
|
cmd = ['toil-cwl-runner', '--jobStore', self.toilDir, '--clean=never', '--badWorker=1',
|
|
344
|
-
'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt']
|
|
345
|
+
'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt', f'--outdir={self.tempDir}']
|
|
345
346
|
wf = subprocess.Popen(cmd)
|
|
346
|
-
self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
|
|
347
|
+
self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=60)
|
|
347
348
|
wf.wait()
|
|
348
|
-
self.check_status('ERROR', status_fn=ToilStatus.getStatus)
|
|
349
|
+
self.check_status('ERROR', status_fn=ToilStatus.getStatus, seconds=60)
|
|
349
350
|
|
|
350
351
|
@needs_cwl
|
|
351
352
|
@needs_docker
|
|
352
353
|
def testGetStatusSuccessfulCWLWF(self):
|
|
353
354
|
"""Test that ToilStatus.getStatus() behaves as expected with a successful CWL workflow."""
|
|
354
355
|
cmd = ['toil-cwl-runner', '--jobStore', self.toilDir, '--clean=never',
|
|
355
|
-
'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt']
|
|
356
|
+
'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt', f'--outdir={self.tempDir}']
|
|
356
357
|
wf = subprocess.Popen(cmd)
|
|
357
|
-
self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=
|
|
358
|
+
self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=60)
|
|
358
359
|
wf.wait()
|
|
359
|
-
self.check_status('COMPLETED', status_fn=ToilStatus.getStatus)
|
|
360
|
+
self.check_status('COMPLETED', status_fn=ToilStatus.getStatus, seconds=60)
|
|
360
361
|
|
|
361
362
|
@needs_cwl
|
|
362
363
|
@patch('builtins.print')
|
|
@@ -375,23 +376,24 @@ class UtilsTest(ToilTest):
|
|
|
375
376
|
args, kwargs = mock_print.call_args
|
|
376
377
|
self.assertIn('invalidcommand', args[0])
|
|
377
378
|
|
|
379
|
+
@pytest.mark.timeout(1200)
|
|
378
380
|
def testRestartAttribute(self):
|
|
379
381
|
"""
|
|
380
|
-
Test that the job store is only destroyed when we observe a
|
|
382
|
+
Test that the job store is only destroyed when we observe a successful workflow run.
|
|
381
383
|
The following simulates a failing workflow that attempts to resume without restart().
|
|
382
384
|
In this case, the job store should not be destroyed until restart() is called.
|
|
383
385
|
"""
|
|
384
386
|
# Run a workflow that will always fail
|
|
385
|
-
cmd = self.restart_sort_workflow_cmd + ['--badWorker=1']
|
|
387
|
+
cmd = self.restart_sort_workflow_cmd + ['--badWorker=1', '--logDebug']
|
|
386
388
|
subprocess.run(cmd)
|
|
387
389
|
|
|
388
|
-
restart_cmd = self.restart_sort_workflow_cmd + ['--badWorker=0', '--restart']
|
|
390
|
+
restart_cmd = self.restart_sort_workflow_cmd + ['--badWorker=0', '--logDebug', '--restart']
|
|
389
391
|
subprocess.run(restart_cmd)
|
|
390
392
|
|
|
391
393
|
# Check the job store exists after restart attempt
|
|
392
394
|
self.assertTrue(os.path.exists(self.toilDir))
|
|
393
395
|
|
|
394
|
-
successful_cmd = [python, '-m', 'toil.test.sort.sort', 'file:' + self.toilDir,
|
|
396
|
+
successful_cmd = [python, '-m', 'toil.test.sort.sort', '--logDebug', 'file:' + self.toilDir,
|
|
395
397
|
'--restart']
|
|
396
398
|
subprocess.run(successful_cmd)
|
|
397
399
|
|
toil/test/wdl/wdltoil_test.py
CHANGED
|
@@ -3,27 +3,31 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
import subprocess
|
|
5
5
|
import unittest
|
|
6
|
-
import
|
|
7
|
-
from typing import
|
|
8
|
-
from unittest.mock import patch
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
from typing import Optional
|
|
9
8
|
|
|
10
9
|
from unittest.mock import patch
|
|
11
10
|
from typing import Any, Dict, List, Set
|
|
12
11
|
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from toil.provisioners import cluster_factory
|
|
13
15
|
from toil.test import (ToilTest,
|
|
14
16
|
needs_docker_cuda,
|
|
15
17
|
needs_google_storage,
|
|
16
18
|
needs_singularity_or_docker,
|
|
17
|
-
|
|
19
|
+
needs_wdl,
|
|
20
|
+
slow, integrative)
|
|
18
21
|
from toil.version import exactPython
|
|
19
22
|
from toil.wdl.wdltoil import WDLSectionJob, WDLWorkflowGraph
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
@needs_wdl
|
|
22
25
|
class BaseWDLTest(ToilTest):
|
|
23
26
|
"""Base test class for WDL tests."""
|
|
27
|
+
|
|
24
28
|
def setUp(self) -> None:
|
|
25
29
|
"""Runs anew before each test to create farm fresh temp dirs."""
|
|
26
|
-
self.output_dir = os.path.join('/tmp/', 'toil-wdl-test-' + str(
|
|
30
|
+
self.output_dir = os.path.join('/tmp/', 'toil-wdl-test-' + str(uuid4()))
|
|
27
31
|
os.makedirs(self.output_dir)
|
|
28
32
|
|
|
29
33
|
def tearDown(self) -> None:
|
|
@@ -36,11 +40,12 @@ class WDLConformanceTests(BaseWDLTest):
|
|
|
36
40
|
WDL conformance tests for Toil.
|
|
37
41
|
"""
|
|
38
42
|
wdl_dir = "wdl-conformance-tests"
|
|
43
|
+
|
|
39
44
|
@classmethod
|
|
40
45
|
def setUpClass(cls) -> None:
|
|
41
46
|
|
|
42
47
|
url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
|
|
43
|
-
commit = "
|
|
48
|
+
commit = "c87b62b4f460e009fd42edec13669c4db14cf90c"
|
|
44
49
|
|
|
45
50
|
p = subprocess.Popen(
|
|
46
51
|
f"git clone {url} {cls.wdl_dir} && cd {cls.wdl_dir} && git checkout {commit}",
|
|
@@ -59,7 +64,7 @@ class WDLConformanceTests(BaseWDLTest):
|
|
|
59
64
|
# estimated running time: 2 minutes
|
|
60
65
|
@slow
|
|
61
66
|
def test_conformance_tests_v10(self):
|
|
62
|
-
tests_to_run = "0
|
|
67
|
+
tests_to_run = "0-15,17-20,22-71,73-77"
|
|
63
68
|
p = subprocess.run(self.base_command + ["-v", "1.0", "-n", tests_to_run], capture_output=True)
|
|
64
69
|
|
|
65
70
|
if p.returncode != 0:
|
|
@@ -70,7 +75,7 @@ class WDLConformanceTests(BaseWDLTest):
|
|
|
70
75
|
# estimated running time: 2 minutes
|
|
71
76
|
@slow
|
|
72
77
|
def test_conformance_tests_v11(self):
|
|
73
|
-
tests_to_run = "
|
|
78
|
+
tests_to_run = "1-63,65-71,73-75,77"
|
|
74
79
|
p = subprocess.run(self.base_command + ["-v", "1.1", "-n", tests_to_run], capture_output=True)
|
|
75
80
|
|
|
76
81
|
if p.returncode != 0:
|
|
@@ -78,6 +83,16 @@ class WDLConformanceTests(BaseWDLTest):
|
|
|
78
83
|
|
|
79
84
|
p.check_returncode()
|
|
80
85
|
|
|
86
|
+
@slow
|
|
87
|
+
def test_conformance_tests_integration(self):
|
|
88
|
+
ids_to_run = "encode,tut01,tut02,tut03,tut04"
|
|
89
|
+
p = subprocess.run(self.base_command + ["-v", "1.0", "--id", ids_to_run], capture_output=True)
|
|
90
|
+
|
|
91
|
+
if p.returncode != 0:
|
|
92
|
+
print(p.stdout.decode('utf-8', errors='replace'))
|
|
93
|
+
|
|
94
|
+
p.check_returncode()
|
|
95
|
+
|
|
81
96
|
@classmethod
|
|
82
97
|
def tearDownClass(cls) -> None:
|
|
83
98
|
upper_dir = os.path.dirname(os.getcwd())
|
|
@@ -87,6 +102,7 @@ class WDLConformanceTests(BaseWDLTest):
|
|
|
87
102
|
|
|
88
103
|
class WDLTests(BaseWDLTest):
|
|
89
104
|
"""Tests for Toil's MiniWDL-based implementation."""
|
|
105
|
+
|
|
90
106
|
@classmethod
|
|
91
107
|
def setUpClass(cls) -> None:
|
|
92
108
|
"""Runs once for all tests."""
|
|
@@ -101,7 +117,8 @@ class WDLTests(BaseWDLTest):
|
|
|
101
117
|
wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
|
|
102
118
|
json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
|
|
103
119
|
|
|
104
|
-
result_json = subprocess.check_output(
|
|
120
|
+
result_json = subprocess.check_output(
|
|
121
|
+
self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug', '--retryCount=0'])
|
|
105
122
|
result = json.loads(result_json)
|
|
106
123
|
|
|
107
124
|
assert 'ga4ghMd5.value' in result
|
|
@@ -109,13 +126,23 @@ class WDLTests(BaseWDLTest):
|
|
|
109
126
|
assert os.path.exists(result['ga4ghMd5.value'])
|
|
110
127
|
assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
|
|
111
128
|
|
|
129
|
+
def test_missing_output_directory(self):
|
|
130
|
+
"""
|
|
131
|
+
Test if Toil can run a WDL workflow into a new directory.
|
|
132
|
+
"""
|
|
133
|
+
wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
|
|
134
|
+
json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
|
|
135
|
+
subprocess.check_call(self.base_command + [wdl, json_file, '-o', os.path.join(self.output_dir, "does", "not", "exist"), '--logDebug', '--retryCount=0'])
|
|
136
|
+
|
|
112
137
|
@needs_singularity_or_docker
|
|
113
138
|
def test_miniwdl_self_test(self, extra_args: Optional[List[str]] = None) -> None:
|
|
114
139
|
"""Test if the MiniWDL self test runs and produces the expected output."""
|
|
115
140
|
wdl_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/self_test.wdl')
|
|
116
141
|
json_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/inputs.json')
|
|
117
142
|
|
|
118
|
-
result_json = subprocess.check_output(
|
|
143
|
+
result_json = subprocess.check_output(
|
|
144
|
+
self.base_command + [wdl_file, json_file, '--logDebug', '-o', self.output_dir, '--outputDialect',
|
|
145
|
+
'miniwdl'] + (extra_args or []))
|
|
119
146
|
result = json.loads(result_json)
|
|
120
147
|
|
|
121
148
|
# Expect MiniWDL-style output with a designated "dir"
|
|
@@ -132,7 +159,7 @@ class WDLTests(BaseWDLTest):
|
|
|
132
159
|
assert isinstance(outputs['hello_caller.message_files'], list)
|
|
133
160
|
assert len(outputs['hello_caller.message_files']) == 2
|
|
134
161
|
for item in outputs['hello_caller.message_files']:
|
|
135
|
-
# All the files should be strings in the "out"
|
|
162
|
+
# All the files should be strings in the "out" directory
|
|
136
163
|
assert isinstance(item, str)
|
|
137
164
|
assert item.startswith(out_dir)
|
|
138
165
|
|
|
@@ -172,7 +199,8 @@ class WDLTests(BaseWDLTest):
|
|
|
172
199
|
"GiraffeDeepVariant.runDeepVariantCallVariants.in_dv_gpu_container": "google/deepvariant:1.3.0-gpu"
|
|
173
200
|
})
|
|
174
201
|
|
|
175
|
-
result_json = subprocess.check_output(
|
|
202
|
+
result_json = subprocess.check_output(
|
|
203
|
+
self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl'])
|
|
176
204
|
result = json.loads(result_json)
|
|
177
205
|
|
|
178
206
|
# Expect MiniWDL-style output with a designated "dir"
|
|
@@ -200,7 +228,9 @@ class WDLTests(BaseWDLTest):
|
|
|
200
228
|
wdl_file = f"{base_uri}/workflows/giraffe.wdl"
|
|
201
229
|
json_file = f"{base_uri}/params/giraffe.json"
|
|
202
230
|
|
|
203
|
-
result_json = subprocess.check_output(
|
|
231
|
+
result_json = subprocess.check_output(
|
|
232
|
+
self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale',
|
|
233
|
+
'0.1'])
|
|
204
234
|
result = json.loads(result_json)
|
|
205
235
|
|
|
206
236
|
# Expect MiniWDL-style output with a designated "dir"
|
|
@@ -265,7 +295,6 @@ class WDLTests(BaseWDLTest):
|
|
|
265
295
|
# worth extracting a base type for this interface.
|
|
266
296
|
with patch.object(WDLWorkflowGraph, 'is_decl', mock_is_decl):
|
|
267
297
|
with patch.object(WDLWorkflowGraph, 'get_transitive_dependencies', mock_get_transitive_dependencies):
|
|
268
|
-
|
|
269
298
|
with self.subTest(msg="Two unrelated decls can coalesce"):
|
|
270
299
|
# Set up two unrelated decls
|
|
271
300
|
all_decls = {"decl1", "decl2"}
|
|
@@ -275,7 +304,7 @@ class WDLTests(BaseWDLTest):
|
|
|
275
304
|
}
|
|
276
305
|
|
|
277
306
|
result = WDLSectionJob.coalesce_nodes(["decl1", "decl2"], WDLWorkflowGraph([]))
|
|
278
|
-
|
|
307
|
+
|
|
279
308
|
# Make sure they coalesced
|
|
280
309
|
assert len(result) == 1
|
|
281
310
|
assert "decl1" in result[0]
|
|
@@ -289,12 +318,11 @@ class WDLTests(BaseWDLTest):
|
|
|
289
318
|
}
|
|
290
319
|
|
|
291
320
|
result = WDLSectionJob.coalesce_nodes(["decl", "nondecl"], WDLWorkflowGraph([]))
|
|
292
|
-
|
|
321
|
+
|
|
293
322
|
assert len(result) == 2
|
|
294
323
|
assert len(result[0]) == 1
|
|
295
324
|
assert len(result[1]) == 1
|
|
296
325
|
|
|
297
|
-
|
|
298
326
|
with self.subTest(msg="Two adjacent decls with a common dependency can coalesce"):
|
|
299
327
|
all_decls = {"decl1", "decl2"}
|
|
300
328
|
all_deps = {
|
|
@@ -304,7 +332,7 @@ class WDLTests(BaseWDLTest):
|
|
|
304
332
|
}
|
|
305
333
|
|
|
306
334
|
result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
|
|
307
|
-
|
|
335
|
+
|
|
308
336
|
assert len(result) == 2
|
|
309
337
|
assert "base" in result[0]
|
|
310
338
|
assert "decl1" in result[1]
|
|
@@ -319,7 +347,7 @@ class WDLTests(BaseWDLTest):
|
|
|
319
347
|
}
|
|
320
348
|
|
|
321
349
|
result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
|
|
322
|
-
|
|
350
|
+
|
|
323
351
|
assert len(result) == 3
|
|
324
352
|
assert "base" in result[0]
|
|
325
353
|
|
|
@@ -332,7 +360,7 @@ class WDLTests(BaseWDLTest):
|
|
|
332
360
|
}
|
|
333
361
|
|
|
334
362
|
result = WDLSectionJob.coalesce_nodes(["decl1", "decl2", "successor"], WDLWorkflowGraph([]))
|
|
335
|
-
|
|
363
|
+
|
|
336
364
|
assert len(result) == 2
|
|
337
365
|
assert "decl1" in result[0]
|
|
338
366
|
assert "decl2" in result[0]
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
from toil.test.provisioners.clusterTest import AbstractClusterTest
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from toil.provisioners import cluster_factory
|
|
9
|
+
from toil.test import (slow, integrative)
|
|
10
|
+
|
|
11
|
+
@integrative
|
|
12
|
+
@slow
|
|
13
|
+
@pytest.mark.timeout(600)
|
|
14
|
+
class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
15
|
+
"""
|
|
16
|
+
Ensure WDL works on the Kubernetes batchsystem.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, name):
|
|
20
|
+
super().__init__(name)
|
|
21
|
+
self.clusterName = 'wdl-integration-test-' + str(uuid4())
|
|
22
|
+
# t2.medium is the minimum t2 instance that permits Kubernetes
|
|
23
|
+
self.leaderNodeType = "t2.medium"
|
|
24
|
+
self.instanceTypes = ["t2.medium"]
|
|
25
|
+
self.clusterType = "kubernetes"
|
|
26
|
+
|
|
27
|
+
def setUp(self) -> None:
|
|
28
|
+
super().setUp()
|
|
29
|
+
self.jobStore = f'aws:{self.awsRegion()}:wdl-test-{uuid4()}'
|
|
30
|
+
|
|
31
|
+
def launchCluster(self) -> None:
|
|
32
|
+
self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
|
|
33
|
+
'--nodeTypes', ",".join(self.instanceTypes),
|
|
34
|
+
'-w', ",".join(self.numWorkers),
|
|
35
|
+
'--nodeStorage', str(self.requestedLeaderStorage)])
|
|
36
|
+
|
|
37
|
+
def test_wdl_kubernetes_cluster(self):
|
|
38
|
+
"""
|
|
39
|
+
Test that a wdl workflow works on a kubernetes cluster. Launches a cluster with 1 worker. This runs a wdl
|
|
40
|
+
workflow that performs an image pull on the worker.
|
|
41
|
+
:return:
|
|
42
|
+
"""
|
|
43
|
+
self.numWorkers = "1"
|
|
44
|
+
self.requestedLeaderStorage = 30
|
|
45
|
+
# create the cluster
|
|
46
|
+
self.launchCluster()
|
|
47
|
+
# get leader
|
|
48
|
+
self.cluster = cluster_factory(
|
|
49
|
+
provisioner="aws", zone=self.zone, clusterName=self.clusterName
|
|
50
|
+
)
|
|
51
|
+
self.leader = self.cluster.getLeader()
|
|
52
|
+
|
|
53
|
+
url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
|
|
54
|
+
commit = "09b9659cd01473e836738a2e0dd205df0adb49c5"
|
|
55
|
+
wdl_dir = "wdl_conformance_tests"
|
|
56
|
+
|
|
57
|
+
# get the wdl-conformance-tests repo to get WDL tasks to run
|
|
58
|
+
self.sshUtil([
|
|
59
|
+
"bash",
|
|
60
|
+
"-c",
|
|
61
|
+
f"git clone {url} {wdl_dir} && cd {wdl_dir} && git checkout {commit}"
|
|
62
|
+
])
|
|
63
|
+
|
|
64
|
+
# run on kubernetes batchsystem
|
|
65
|
+
toil_options = ['--batchSystem=kubernetes',
|
|
66
|
+
f"--jobstore={self.jobStore}"]
|
|
67
|
+
|
|
68
|
+
# run WDL workflow that will run singularity
|
|
69
|
+
test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
|
|
70
|
+
self.sshUtil([
|
|
71
|
+
"bash",
|
|
72
|
+
"-c",
|
|
73
|
+
f"cd {wdl_dir} && toil-wdl-runner {' '.join(test_options)} {' '.join(toil_options)}"])
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
if __name__ == "__main__":
|
|
77
|
+
unittest.main() # run all tests
|
toil/toilState.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
|
+
import time
|
|
15
16
|
from typing import Dict, Optional, Set
|
|
16
17
|
|
|
17
18
|
from toil.bus import JobUpdatedMessage, MessageBus
|
|
@@ -183,12 +184,70 @@ class ToilState:
|
|
|
183
184
|
if job_id in self.__job_database:
|
|
184
185
|
# Update the one true copy in place
|
|
185
186
|
old_truth = self.__job_database[job_id]
|
|
186
|
-
old_truth.
|
|
187
|
+
old_truth.assert_is_not_newer_than(new_truth)
|
|
187
188
|
old_truth.__dict__.update(new_truth.__dict__)
|
|
188
189
|
else:
|
|
189
190
|
# Just keep the new one
|
|
190
191
|
self.__job_database[job_id] = new_truth
|
|
191
192
|
|
|
193
|
+
def reset_job_expecting_change(self, job_id: str, timeout: float) -> bool:
|
|
194
|
+
"""
|
|
195
|
+
Discard any local modifications to a JobDescription.
|
|
196
|
+
|
|
197
|
+
Will make modifications from other hosts visible.
|
|
198
|
+
|
|
199
|
+
Will wait for up to timeout seconds for a modification (or deletion)
|
|
200
|
+
from another host to actually be visible.
|
|
201
|
+
|
|
202
|
+
Always replaces the JobDescription with what is stored in the job
|
|
203
|
+
store, even if no modification ends up being visible.
|
|
204
|
+
|
|
205
|
+
Returns True if an update was detected in time, and False otherwise.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
start_time = time.time()
|
|
209
|
+
wait_time = 0.1
|
|
210
|
+
initially_known = job_id in self.__job_database
|
|
211
|
+
new_truth: Optional[JobDescription] = None
|
|
212
|
+
while True:
|
|
213
|
+
try:
|
|
214
|
+
new_truth = self.__job_store.load_job(job_id)
|
|
215
|
+
except NoSuchJobException:
|
|
216
|
+
# The job is gone now.
|
|
217
|
+
if job_id in self.__job_database:
|
|
218
|
+
# So forget about it
|
|
219
|
+
del self.__job_database[job_id]
|
|
220
|
+
# TODO: Other collections may still reference it.
|
|
221
|
+
if initially_known:
|
|
222
|
+
# Job was deleted, that's an update
|
|
223
|
+
return True
|
|
224
|
+
else:
|
|
225
|
+
if job_id in self.__job_database:
|
|
226
|
+
# We have an old version to compare against
|
|
227
|
+
old_truth = self.__job_database[job_id]
|
|
228
|
+
old_truth.assert_is_not_newer_than(new_truth)
|
|
229
|
+
if old_truth.is_updated_by(new_truth):
|
|
230
|
+
# Do the update
|
|
231
|
+
old_truth.__dict__.update(new_truth.__dict__)
|
|
232
|
+
return True
|
|
233
|
+
else:
|
|
234
|
+
# Just keep the new one. That's an update.
|
|
235
|
+
self.__job_database[job_id] = new_truth
|
|
236
|
+
return True
|
|
237
|
+
# We looked but didn't get a good update
|
|
238
|
+
time_elapsed = time.time() - start_time
|
|
239
|
+
if time_elapsed >= timeout:
|
|
240
|
+
# We're out of time to check.
|
|
241
|
+
if new_truth is not None:
|
|
242
|
+
# Commit whatever we managed to load to accomplish a real
|
|
243
|
+
# reset.
|
|
244
|
+
old_truth.__dict__.update(new_truth.__dict__)
|
|
245
|
+
return False
|
|
246
|
+
# Wait a little and poll again
|
|
247
|
+
time.sleep(min(timeout - time_elapsed, wait_time))
|
|
248
|
+
# Using exponential backoff
|
|
249
|
+
wait_time *= 2
|
|
250
|
+
|
|
192
251
|
# The next 3 functions provide tracking of how many successor jobs a given job
|
|
193
252
|
# is waiting on, exposing only legit operations.
|
|
194
253
|
# TODO: turn these into messages?
|
|
@@ -247,10 +306,10 @@ class ToilState:
|
|
|
247
306
|
|
|
248
307
|
:param jobDesc: The description for the root job of the workflow being run.
|
|
249
308
|
"""
|
|
250
|
-
# If the job description has a
|
|
309
|
+
# If the job description has a body, is a checkpoint, has services
|
|
251
310
|
# or is ready to be deleted it is ready to be processed (i.e. it is updated)
|
|
252
311
|
if (
|
|
253
|
-
jobDesc.
|
|
312
|
+
jobDesc.has_body()
|
|
254
313
|
or (
|
|
255
314
|
isinstance(jobDesc, CheckpointJobDescription)
|
|
256
315
|
and jobDesc.checkpoint is not None
|
|
@@ -259,10 +318,10 @@ class ToilState:
|
|
|
259
318
|
or jobDesc.nextSuccessors() is None
|
|
260
319
|
):
|
|
261
320
|
logger.debug(
|
|
262
|
-
"Found job to run: %s, with
|
|
321
|
+
"Found job to run: %s, with body: %s, with checkpoint: %s, with "
|
|
263
322
|
"services: %s, with no next successors: %s",
|
|
264
323
|
jobDesc.jobStoreID,
|
|
265
|
-
jobDesc.
|
|
324
|
+
jobDesc.has_body(),
|
|
266
325
|
isinstance(jobDesc, CheckpointJobDescription)
|
|
267
326
|
and jobDesc.checkpoint is not None,
|
|
268
327
|
len(jobDesc.services) > 0,
|
|
@@ -272,18 +331,18 @@ class ToilState:
|
|
|
272
331
|
self.bus.publish(JobUpdatedMessage(str(jobDesc.jobStoreID), 0))
|
|
273
332
|
|
|
274
333
|
if isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None:
|
|
275
|
-
jobDesc.
|
|
334
|
+
jobDesc.restore_checkpoint()
|
|
276
335
|
|
|
277
336
|
else: # There exist successors
|
|
278
337
|
logger.debug(
|
|
279
338
|
"Adding job: %s to the state with %s successors",
|
|
280
339
|
jobDesc.jobStoreID,
|
|
281
|
-
len(jobDesc.nextSuccessors()),
|
|
340
|
+
len(jobDesc.nextSuccessors() or set()),
|
|
282
341
|
)
|
|
283
342
|
|
|
284
343
|
# Record the number of successors
|
|
285
344
|
self.successorCounts[str(jobDesc.jobStoreID)] = len(
|
|
286
|
-
jobDesc.nextSuccessors()
|
|
345
|
+
jobDesc.nextSuccessors() or set()
|
|
287
346
|
)
|
|
288
347
|
|
|
289
348
|
def processSuccessorWithMultiplePredecessors(successor: JobDescription) -> None:
|
|
@@ -305,7 +364,7 @@ class ToilState:
|
|
|
305
364
|
self._buildToilState(successor)
|
|
306
365
|
|
|
307
366
|
# For each successor
|
|
308
|
-
for successorJobStoreID in jobDesc.nextSuccessors():
|
|
367
|
+
for successorJobStoreID in jobDesc.nextSuccessors() or set():
|
|
309
368
|
|
|
310
369
|
# If the successor does not yet point back at a
|
|
311
370
|
# predecessor we have not yet considered it
|
toil/utils/toilDebugFile.py
CHANGED
|
@@ -17,11 +17,11 @@ import logging
|
|
|
17
17
|
import os.path
|
|
18
18
|
import sys
|
|
19
19
|
from typing import Optional
|
|
20
|
-
from distutils.util import strtobool
|
|
21
20
|
|
|
22
21
|
from toil.common import Config, Toil, parser_with_common_options
|
|
23
22
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
24
23
|
from toil.lib.resources import glob
|
|
24
|
+
from toil.lib.conversions import strtobool
|
|
25
25
|
from toil.statsAndLogging import set_logging_from_options
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|