toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/test/src/busTest.py
CHANGED
|
@@ -17,10 +17,12 @@ import os
|
|
|
17
17
|
from threading import Thread, current_thread
|
|
18
18
|
|
|
19
19
|
from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
|
|
20
|
-
from toil.bus import (
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
from toil.bus import (
|
|
21
|
+
JobCompletedMessage,
|
|
22
|
+
JobIssuedMessage,
|
|
23
|
+
MessageBus,
|
|
24
|
+
replay_message_bus,
|
|
25
|
+
)
|
|
24
26
|
from toil.common import Toil
|
|
25
27
|
from toil.exceptions import FailedJobsException
|
|
26
28
|
from toil.job import Job
|
|
@@ -28,8 +30,9 @@ from toil.test import ToilTest, get_temp_file
|
|
|
28
30
|
|
|
29
31
|
logger = logging.getLogger(__name__)
|
|
30
32
|
|
|
33
|
+
|
|
31
34
|
class MessageBusTest(ToilTest):
|
|
32
|
-
|
|
35
|
+
|
|
33
36
|
def test_enum_ints_in_file(self) -> None:
|
|
34
37
|
"""
|
|
35
38
|
Make sure writing bus messages to files works with enums.
|
|
@@ -44,7 +47,7 @@ class MessageBusTest(ToilTest):
|
|
|
44
47
|
# Make sure stuff goes away in the right order
|
|
45
48
|
del handler_to_keep_alive
|
|
46
49
|
del bus
|
|
47
|
-
|
|
50
|
+
|
|
48
51
|
for line in open(bus_file):
|
|
49
52
|
logger.debug("Bus line: %s", line)
|
|
50
53
|
|
|
@@ -107,17 +110,19 @@ class MessageBusTest(ToilTest):
|
|
|
107
110
|
Test the ability to restart a workflow when the message bus path used
|
|
108
111
|
by the previous attempt is gone.
|
|
109
112
|
"""
|
|
110
|
-
temp_dir = self._createTempDir(purpose=
|
|
113
|
+
temp_dir = self._createTempDir(purpose="tempDir")
|
|
111
114
|
job_store = self._getTestJobStorePath()
|
|
112
115
|
|
|
113
|
-
bus_holder_dir = os.path.join(temp_dir,
|
|
116
|
+
bus_holder_dir = os.path.join(temp_dir, "bus_holder")
|
|
114
117
|
os.mkdir(bus_holder_dir)
|
|
115
118
|
|
|
116
119
|
start_options = Job.Runner.getDefaultOptions(job_store)
|
|
117
|
-
start_options.logLevel =
|
|
120
|
+
start_options.logLevel = "DEBUG"
|
|
118
121
|
start_options.retryCount = 0
|
|
119
122
|
start_options.clean = "never"
|
|
120
|
-
start_options.write_messages = os.path.abspath(
|
|
123
|
+
start_options.write_messages = os.path.abspath(
|
|
124
|
+
os.path.join(bus_holder_dir, "messagebus.txt")
|
|
125
|
+
)
|
|
121
126
|
|
|
122
127
|
root = Job.wrapJobFn(failing_job_fn)
|
|
123
128
|
|
|
@@ -128,17 +133,17 @@ class MessageBusTest(ToilTest):
|
|
|
128
133
|
except FailedJobsException:
|
|
129
134
|
pass
|
|
130
135
|
|
|
131
|
-
logger.info(
|
|
136
|
+
logger.info("First attempt successfully failed, removing message bus log")
|
|
132
137
|
|
|
133
138
|
# Get rid of the bus
|
|
134
139
|
os.unlink(start_options.write_messages)
|
|
135
140
|
os.rmdir(bus_holder_dir)
|
|
136
141
|
|
|
137
|
-
logger.info(
|
|
142
|
+
logger.info("Making second attempt")
|
|
138
143
|
|
|
139
144
|
# Set up options without a specific bus path
|
|
140
145
|
restart_options = Job.Runner.getDefaultOptions(job_store)
|
|
141
|
-
restart_options.logLevel =
|
|
146
|
+
restart_options.logLevel = "DEBUG"
|
|
142
147
|
restart_options.retryCount = 0
|
|
143
148
|
restart_options.clean = "never"
|
|
144
149
|
restart_options.restart = True
|
|
@@ -150,14 +155,11 @@ class MessageBusTest(ToilTest):
|
|
|
150
155
|
except FailedJobsException:
|
|
151
156
|
pass
|
|
152
157
|
|
|
153
|
-
logger.info(
|
|
158
|
+
logger.info("Second attempt successfully failed")
|
|
154
159
|
|
|
155
160
|
|
|
156
161
|
def failing_job_fn(job: Job) -> None:
|
|
157
162
|
"""
|
|
158
163
|
This function is guaranteed to fail.
|
|
159
164
|
"""
|
|
160
|
-
raise RuntimeError(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
165
|
+
raise RuntimeError("Job attempted to run but failed")
|
toil/test/src/checkpointTest.py
CHANGED
|
@@ -63,8 +63,10 @@ class CheckpointTest(ToilTest):
|
|
|
63
63
|
except FailedJobsException:
|
|
64
64
|
self.fail("Checkpointed workflow restart doesn't clean failures.")
|
|
65
65
|
|
|
66
|
+
|
|
66
67
|
class CheckRetryCount(Job):
|
|
67
68
|
"""Fail N times, succeed on the next try."""
|
|
69
|
+
|
|
68
70
|
def __init__(self, numFailuresBeforeSuccess):
|
|
69
71
|
super().__init__(checkpoint=True)
|
|
70
72
|
self.numFailuresBeforeSuccess = numFailuresBeforeSuccess
|
|
@@ -73,11 +75,11 @@ class CheckRetryCount(Job):
|
|
|
73
75
|
"""Mark a retry in the fileStore, and return the number of retries so far."""
|
|
74
76
|
try:
|
|
75
77
|
with fileStore.jobStore.read_shared_file_stream("checkpointRun") as f:
|
|
76
|
-
timesRun = int(f.read().decode(
|
|
78
|
+
timesRun = int(f.read().decode("utf-8"))
|
|
77
79
|
except NoSuchFileException:
|
|
78
80
|
timesRun = 0
|
|
79
81
|
with fileStore.jobStore.write_shared_file_stream("checkpointRun") as f:
|
|
80
|
-
f.write(str(timesRun + 1).encode(
|
|
82
|
+
f.write(str(timesRun + 1).encode("utf-8"))
|
|
81
83
|
return timesRun
|
|
82
84
|
|
|
83
85
|
def run(self, fileStore):
|
|
@@ -86,10 +88,12 @@ class CheckRetryCount(Job):
|
|
|
86
88
|
if retryCount < self.numFailuresBeforeSuccess:
|
|
87
89
|
self.addChild(AlwaysFail())
|
|
88
90
|
|
|
91
|
+
|
|
89
92
|
class AlwaysFail(Job):
|
|
90
93
|
def run(self, fileStore):
|
|
91
94
|
raise RuntimeError(":(")
|
|
92
95
|
|
|
96
|
+
|
|
93
97
|
class CheckpointFailsFirstTime(Job):
|
|
94
98
|
def __init__(self):
|
|
95
99
|
super().__init__(checkpoint=True)
|
|
@@ -97,8 +101,10 @@ class CheckpointFailsFirstTime(Job):
|
|
|
97
101
|
def run(self, fileStore):
|
|
98
102
|
self.addChild(FailOnce())
|
|
99
103
|
|
|
104
|
+
|
|
100
105
|
class FailOnce(Job):
|
|
101
106
|
"""Fail the first time the workflow is run, but succeed thereafter."""
|
|
107
|
+
|
|
102
108
|
def run(self, fileStore):
|
|
103
109
|
if fileStore.jobStore.config.workflowAttemptNumber < 1:
|
|
104
110
|
raise RuntimeError("first time around")
|
|
@@ -30,28 +30,29 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
|
|
31
31
|
class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
32
32
|
"""Test the deferred function system."""
|
|
33
|
+
|
|
33
34
|
# This determines what job store type to use.
|
|
34
|
-
jobStoreType =
|
|
35
|
+
jobStoreType = "file"
|
|
35
36
|
|
|
36
37
|
def _getTestJobStore(self):
|
|
37
|
-
if self.jobStoreType ==
|
|
38
|
+
if self.jobStoreType == "file":
|
|
38
39
|
return self._getTestJobStorePath()
|
|
39
|
-
elif self.jobStoreType ==
|
|
40
|
-
return f
|
|
41
|
-
elif self.jobStoreType ==
|
|
42
|
-
projectID = os.getenv(
|
|
43
|
-
return f
|
|
40
|
+
elif self.jobStoreType == "aws":
|
|
41
|
+
return f"aws:{self.awsRegion()}:cache-tests-{uuid4()}"
|
|
42
|
+
elif self.jobStoreType == "google":
|
|
43
|
+
projectID = os.getenv("TOIL_GOOGLE_PROJECTID")
|
|
44
|
+
return f"google:{projectID}:cache-tests-{str(uuid4())}"
|
|
44
45
|
else:
|
|
45
|
-
raise RuntimeError(
|
|
46
|
+
raise RuntimeError("Illegal job store type.")
|
|
46
47
|
|
|
47
48
|
def setUp(self):
|
|
48
49
|
super().setUp()
|
|
49
50
|
testDir = self._createTempDir()
|
|
50
51
|
self.options = Job.Runner.getDefaultOptions(self._getTestJobStore())
|
|
51
|
-
self.options.logLevel =
|
|
52
|
+
self.options.logLevel = "INFO"
|
|
52
53
|
self.options.workDir = testDir
|
|
53
|
-
self.options.clean =
|
|
54
|
-
self.options.logFile = os.path.join(testDir,
|
|
54
|
+
self.options.clean = "always"
|
|
55
|
+
self.options.logFile = os.path.join(testDir, "logFile")
|
|
55
56
|
|
|
56
57
|
# Tests for the various defer possibilities
|
|
57
58
|
def testDeferredFunctionRunsWithMethod(self):
|
|
@@ -84,11 +85,11 @@ class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
|
84
85
|
:param function callableFn: The function to use in the test.
|
|
85
86
|
:return: None
|
|
86
87
|
"""
|
|
87
|
-
workdir = self._createTempDir(purpose=
|
|
88
|
+
workdir = self._createTempDir(purpose="nonLocalDir")
|
|
88
89
|
nonLocalFile1 = os.path.join(workdir, str(uuid4()))
|
|
89
90
|
nonLocalFile2 = os.path.join(workdir, str(uuid4()))
|
|
90
|
-
open(nonLocalFile1,
|
|
91
|
-
open(nonLocalFile2,
|
|
91
|
+
open(nonLocalFile1, "w").close()
|
|
92
|
+
open(nonLocalFile2, "w").close()
|
|
92
93
|
assert os.path.exists(nonLocalFile1)
|
|
93
94
|
assert os.path.exists(nonLocalFile2)
|
|
94
95
|
A = Job.wrapJobFn(callableFn, files=(nonLocalFile1, nonLocalFile2))
|
|
@@ -114,15 +115,16 @@ class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
|
114
115
|
where a deferred function fails (since the first file doesn't exist on the retry).
|
|
115
116
|
"""
|
|
116
117
|
self.options.retryCount = 1
|
|
117
|
-
workdir = self._createTempDir(purpose=
|
|
118
|
+
workdir = self._createTempDir(purpose="nonLocalDir")
|
|
118
119
|
nonLocalFile1 = os.path.join(workdir, str(uuid4()))
|
|
119
120
|
nonLocalFile2 = os.path.join(workdir, str(uuid4()))
|
|
120
|
-
open(nonLocalFile1,
|
|
121
|
-
open(nonLocalFile2,
|
|
121
|
+
open(nonLocalFile1, "w").close()
|
|
122
|
+
open(nonLocalFile2, "w").close()
|
|
122
123
|
assert os.path.exists(nonLocalFile1)
|
|
123
124
|
assert os.path.exists(nonLocalFile2)
|
|
124
|
-
A = Job.wrapJobFn(
|
|
125
|
-
|
|
125
|
+
A = Job.wrapJobFn(
|
|
126
|
+
_deferredFunctionRunsWithFailuresFn, files=(nonLocalFile1, nonLocalFile2)
|
|
127
|
+
)
|
|
126
128
|
Job.Runner.startToil(A, self.options)
|
|
127
129
|
assert not os.path.exists(nonLocalFile1)
|
|
128
130
|
assert not os.path.exists(nonLocalFile2)
|
|
@@ -145,11 +147,11 @@ class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
|
145
147
|
|
|
146
148
|
# There can be no retries
|
|
147
149
|
self.options.retryCount = 0
|
|
148
|
-
workdir = self._createTempDir(purpose=
|
|
150
|
+
workdir = self._createTempDir(purpose="nonLocalDir")
|
|
149
151
|
nonLocalFile1 = os.path.join(workdir, str(uuid4()))
|
|
150
152
|
nonLocalFile2 = os.path.join(workdir, str(uuid4()))
|
|
151
|
-
open(nonLocalFile1,
|
|
152
|
-
open(nonLocalFile2,
|
|
153
|
+
open(nonLocalFile1, "w").close()
|
|
154
|
+
open(nonLocalFile2, "w").close()
|
|
153
155
|
assert os.path.exists(nonLocalFile1)
|
|
154
156
|
assert os.path.exists(nonLocalFile2)
|
|
155
157
|
files = [nonLocalFile1, nonLocalFile2]
|
|
@@ -157,8 +159,12 @@ class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
|
157
159
|
# A and B here must run in parallel for this to work
|
|
158
160
|
A = Job.wrapJobFn(_testNewJobsCanHandleOtherJobDeaths_A, files=files, cores=1)
|
|
159
161
|
B = Job.wrapJobFn(_testNewJobsCanHandleOtherJobDeaths_B, files=files, cores=1)
|
|
160
|
-
C = Job.wrapJobFn(
|
|
161
|
-
|
|
162
|
+
C = Job.wrapJobFn(
|
|
163
|
+
_testNewJobsCanHandleOtherJobDeaths_C,
|
|
164
|
+
files=files,
|
|
165
|
+
expectedResult=False,
|
|
166
|
+
cores=1,
|
|
167
|
+
)
|
|
162
168
|
root.addChild(A)
|
|
163
169
|
root.addChild(B)
|
|
164
170
|
B.addChild(C)
|
|
@@ -179,21 +185,22 @@ class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
|
179
185
|
|
|
180
186
|
# There can be no retries
|
|
181
187
|
self.options.retryCount = 0
|
|
182
|
-
workdir = self._createTempDir(purpose=
|
|
188
|
+
workdir = self._createTempDir(purpose="nonLocalDir")
|
|
183
189
|
nonLocalFile1 = os.path.join(workdir, str(uuid4()))
|
|
184
190
|
nonLocalFile2 = os.path.join(workdir, str(uuid4()))
|
|
185
191
|
# The first file has to be non zero or meseeks will go into an infinite sleep
|
|
186
|
-
file1 = open(nonLocalFile1,
|
|
187
|
-
file1.write(
|
|
192
|
+
file1 = open(nonLocalFile1, "w")
|
|
193
|
+
file1.write("test")
|
|
188
194
|
file1.close()
|
|
189
|
-
open(nonLocalFile2,
|
|
195
|
+
open(nonLocalFile2, "w").close()
|
|
190
196
|
assert os.path.exists(nonLocalFile1)
|
|
191
197
|
assert os.path.exists(nonLocalFile2)
|
|
192
198
|
# We only use the "A" job here, and we fill in the first file, so all
|
|
193
199
|
# it will do is defer deleting the second file, delete the first file,
|
|
194
200
|
# and die.
|
|
195
|
-
A = Job.wrapJobFn(
|
|
196
|
-
|
|
201
|
+
A = Job.wrapJobFn(
|
|
202
|
+
_testNewJobsCanHandleOtherJobDeaths_A, files=(nonLocalFile1, nonLocalFile2)
|
|
203
|
+
)
|
|
197
204
|
try:
|
|
198
205
|
Job.Runner.startToil(A, self.options)
|
|
199
206
|
except FailedJobsException:
|
|
@@ -201,6 +208,7 @@ class DeferredFunctionTest(ToilTest, metaclass=ABCMeta):
|
|
|
201
208
|
assert not os.path.exists(nonLocalFile1)
|
|
202
209
|
assert not os.path.exists(nonLocalFile2)
|
|
203
210
|
|
|
211
|
+
|
|
204
212
|
def _writeNonLocalFilesMethod(job, files):
|
|
205
213
|
"""
|
|
206
214
|
Write some data to 2 files. Pass them to a registered deferred method.
|
|
@@ -209,11 +217,12 @@ def _writeNonLocalFilesMethod(job, files):
|
|
|
209
217
|
:return: None
|
|
210
218
|
"""
|
|
211
219
|
for nlf in files:
|
|
212
|
-
with open(nlf,
|
|
220
|
+
with open(nlf, "wb") as nonLocalFileHandle:
|
|
213
221
|
nonLocalFileHandle.write(os.urandom(1 * 1024 * 1024))
|
|
214
222
|
job.defer(_deleteMethods._deleteFileMethod, files[0], nlf=files[1])
|
|
215
223
|
return None
|
|
216
224
|
|
|
225
|
+
|
|
217
226
|
def _writeNonLocalFilesClassMethod(job, files):
|
|
218
227
|
"""
|
|
219
228
|
Write some data to 2 files. Pass them to a registered deferred class method.
|
|
@@ -222,11 +231,12 @@ def _writeNonLocalFilesClassMethod(job, files):
|
|
|
222
231
|
:return: None
|
|
223
232
|
"""
|
|
224
233
|
for nlf in files:
|
|
225
|
-
with open(nlf,
|
|
234
|
+
with open(nlf, "wb") as nonLocalFileHandle:
|
|
226
235
|
nonLocalFileHandle.write(os.urandom(1 * 1024 * 1024))
|
|
227
236
|
job.defer(_deleteMethods._deleteFileClassMethod, files[0], nlf=files[1])
|
|
228
237
|
return None
|
|
229
238
|
|
|
239
|
+
|
|
230
240
|
def _writeNonLocalFilesLambda(job, files):
|
|
231
241
|
"""
|
|
232
242
|
Write some data to 2 files. Pass them to a registered deferred Lambda.
|
|
@@ -236,11 +246,12 @@ def _writeNonLocalFilesLambda(job, files):
|
|
|
236
246
|
"""
|
|
237
247
|
lmd = lambda x, nlf: [os.remove(x), os.remove(nlf)]
|
|
238
248
|
for nlf in files:
|
|
239
|
-
with open(nlf,
|
|
249
|
+
with open(nlf, "wb") as nonLocalFileHandle:
|
|
240
250
|
nonLocalFileHandle.write(os.urandom(1 * 1024 * 1024))
|
|
241
251
|
job.defer(lmd, files[0], nlf=files[1])
|
|
242
252
|
return None
|
|
243
253
|
|
|
254
|
+
|
|
244
255
|
def _deferredFunctionRunsWithFailuresFn(job, files):
|
|
245
256
|
"""
|
|
246
257
|
Refer testDeferredFunctionRunsWithFailures
|
|
@@ -255,6 +266,7 @@ def _deferredFunctionRunsWithFailuresFn(job, files):
|
|
|
255
266
|
assert os.path.exists(files[1])
|
|
256
267
|
job.defer(_deleteFile, files[1])
|
|
257
268
|
|
|
269
|
+
|
|
258
270
|
def _deleteFile(nonLocalFile, nlf=None):
|
|
259
271
|
"""
|
|
260
272
|
Delete nonLocalFile and nlf
|
|
@@ -270,6 +282,7 @@ def _deleteFile(nonLocalFile, nlf=None):
|
|
|
270
282
|
os.remove(nlf)
|
|
271
283
|
logger.debug("Successfully removed file: %s", nlf)
|
|
272
284
|
|
|
285
|
+
|
|
273
286
|
def _testNewJobsCanHandleOtherJobDeaths_A(job, files):
|
|
274
287
|
"""
|
|
275
288
|
Defer deletion of files[1], then wait for _testNewJobsCanHandleOtherJobDeaths_B to
|
|
@@ -281,7 +294,7 @@ def _testNewJobsCanHandleOtherJobDeaths_A(job, files):
|
|
|
281
294
|
|
|
282
295
|
# Write the pid to files[1] such that we can be sure that this process has died before
|
|
283
296
|
# we spawn the next job that will do the cleanup.
|
|
284
|
-
with open(files[1],
|
|
297
|
+
with open(files[1], "w") as fileHandle:
|
|
285
298
|
fileHandle.write(str(os.getpid()))
|
|
286
299
|
job.defer(_deleteFile, files[1])
|
|
287
300
|
logger.info("Deferred delete of %s", files[1])
|
|
@@ -290,10 +303,11 @@ def _testNewJobsCanHandleOtherJobDeaths_A(job, files):
|
|
|
290
303
|
os.remove(files[0])
|
|
291
304
|
os.kill(os.getpid(), signal.SIGKILL)
|
|
292
305
|
|
|
306
|
+
|
|
293
307
|
def _testNewJobsCanHandleOtherJobDeaths_B(job, files):
|
|
294
308
|
# Write something to files[0] such that we can be sure that this process has started
|
|
295
309
|
# before _testNewJobsCanHandleOtherJobDeaths_A kills itself.
|
|
296
|
-
with open(files[0],
|
|
310
|
+
with open(files[0], "w") as fileHandle:
|
|
297
311
|
fileHandle.write(str(os.getpid()))
|
|
298
312
|
while os.path.exists(files[0]):
|
|
299
313
|
time.sleep(0.5)
|
toil/test/src/dockerCheckTest.py
CHANGED
|
@@ -15,6 +15,7 @@ import unittest
|
|
|
15
15
|
|
|
16
16
|
from docker.errors import ImageNotFound
|
|
17
17
|
from toil import checkDockerImageExists, parseDockerAppliance
|
|
18
|
+
from toil.lib.retry import retry
|
|
18
19
|
from toil.test import ToilTest, needs_docker
|
|
19
20
|
|
|
20
21
|
|
|
@@ -22,80 +23,87 @@ from toil.test import ToilTest, needs_docker
|
|
|
22
23
|
class DockerCheckTest(ToilTest):
|
|
23
24
|
"""Tests checking whether a docker image exists or not."""
|
|
24
25
|
|
|
25
|
-
@unittest.skip(
|
|
26
|
+
@unittest.skip("Consumes unauthenticated Docker Hub pulls if run")
|
|
26
27
|
def testOfficialUbuntuRepo(self):
|
|
27
28
|
"""Image exists. This should pass."""
|
|
28
|
-
ubuntu_repo =
|
|
29
|
+
ubuntu_repo = "ubuntu:latest"
|
|
29
30
|
assert checkDockerImageExists(ubuntu_repo)
|
|
30
31
|
|
|
31
|
-
@unittest.skip(
|
|
32
|
+
@unittest.skip("Consumes unauthenticated Docker Hub pulls if run")
|
|
32
33
|
def testBroadDockerRepo(self):
|
|
33
34
|
"""Image exists. This should pass."""
|
|
34
|
-
broad_repo =
|
|
35
|
+
broad_repo = "broadinstitute/genomes-in-the-cloud:2.0.0"
|
|
35
36
|
assert checkDockerImageExists(broad_repo)
|
|
36
37
|
|
|
37
|
-
@unittest.skip(
|
|
38
|
+
@unittest.skip("Consumes unauthenticated Docker Hub pulls if run")
|
|
38
39
|
def testBroadDockerRepoBadTag(self):
|
|
39
40
|
"""Bad tag. This should raise."""
|
|
40
|
-
broad_repo =
|
|
41
|
+
broad_repo = "broadinstitute/genomes-in-the-cloud:-----"
|
|
41
42
|
with self.assertRaises(ImageNotFound):
|
|
42
43
|
checkDockerImageExists(broad_repo)
|
|
43
44
|
|
|
44
|
-
@unittest.skip(
|
|
45
|
+
@unittest.skip("Consumes unauthenticated Docker Hub pulls if run")
|
|
45
46
|
def testNonexistentRepo(self):
|
|
46
47
|
"""Bad image. This should raise."""
|
|
47
|
-
nonexistent_repo =
|
|
48
|
+
nonexistent_repo = "------:-----"
|
|
48
49
|
with self.assertRaises(ImageNotFound):
|
|
49
50
|
checkDockerImageExists(nonexistent_repo)
|
|
50
51
|
|
|
51
52
|
def testToilQuayRepo(self):
|
|
52
53
|
"""Image exists. Should pass."""
|
|
53
|
-
toil_repo =
|
|
54
|
+
toil_repo = "quay.io/ucsc_cgl/toil:latest"
|
|
54
55
|
assert checkDockerImageExists(toil_repo)
|
|
55
56
|
|
|
56
57
|
def testBadQuayRepoNTag(self):
|
|
57
58
|
"""Bad repo and tag. This should raise."""
|
|
58
|
-
nonexistent_quay_repo =
|
|
59
|
+
nonexistent_quay_repo = "quay.io/--------:---"
|
|
59
60
|
with self.assertRaises(ImageNotFound):
|
|
60
61
|
checkDockerImageExists(nonexistent_quay_repo)
|
|
61
62
|
|
|
62
63
|
def testBadQuayRepo(self):
|
|
63
64
|
"""Bad repo. This should raise."""
|
|
64
|
-
nonexistent_quay_repo =
|
|
65
|
+
nonexistent_quay_repo = "quay.io/--------:latest"
|
|
65
66
|
with self.assertRaises(ImageNotFound):
|
|
66
67
|
checkDockerImageExists(nonexistent_quay_repo)
|
|
67
68
|
|
|
68
69
|
def testBadQuayTag(self):
|
|
69
70
|
"""Bad tag. This should raise."""
|
|
70
|
-
nonexistent_quay_repo =
|
|
71
|
+
nonexistent_quay_repo = "quay.io/ucsc_cgl/toil:---"
|
|
71
72
|
with self.assertRaises(ImageNotFound):
|
|
72
73
|
checkDockerImageExists(nonexistent_quay_repo)
|
|
73
74
|
|
|
74
75
|
def testGoogleRepo(self):
|
|
75
76
|
"""Image exists. Should pass."""
|
|
76
|
-
google_repo =
|
|
77
|
+
google_repo = "gcr.io/google-containers/busybox:latest"
|
|
77
78
|
assert checkDockerImageExists(google_repo)
|
|
78
79
|
|
|
80
|
+
@retry(
|
|
81
|
+
errors=[TimeoutError]
|
|
82
|
+
) # see: https://github.com/DataBiosphere/toil/issues/4902
|
|
79
83
|
def testBadGoogleRepo(self):
|
|
80
84
|
"""Bad repo and tag. This should raise."""
|
|
81
|
-
nonexistent_google_repo =
|
|
85
|
+
nonexistent_google_repo = "gcr.io/google-containers/--------:---"
|
|
82
86
|
with self.assertRaises(ImageNotFound):
|
|
83
87
|
checkDockerImageExists(nonexistent_google_repo)
|
|
84
88
|
|
|
85
89
|
def testApplianceParser(self):
|
|
86
90
|
"""Test that a specified appliance is parsed correctly."""
|
|
87
|
-
docker_list = [
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
91
|
+
docker_list = [
|
|
92
|
+
"ubuntu:latest",
|
|
93
|
+
"ubuntu",
|
|
94
|
+
"broadinstitute/genomes-in-the-cloud:2.0.0",
|
|
95
|
+
"quay.io/ucsc_cgl/toil:latest",
|
|
96
|
+
"gcr.io/google-containers/busybox:latest",
|
|
97
|
+
]
|
|
92
98
|
parsings = []
|
|
93
99
|
for image in docker_list:
|
|
94
100
|
registryName, imageName, tag = parseDockerAppliance(image)
|
|
95
101
|
parsings.append([registryName, imageName, tag])
|
|
96
|
-
expected_parsings = [
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
expected_parsings = [
|
|
103
|
+
["docker.io", "ubuntu", "latest"],
|
|
104
|
+
["docker.io", "ubuntu", "latest"],
|
|
105
|
+
["docker.io", "broadinstitute/genomes-in-the-cloud", "2.0.0"],
|
|
106
|
+
["quay.io", "ucsc_cgl/toil", "latest"],
|
|
107
|
+
["gcr.io", "google-containers/busybox", "latest"],
|
|
108
|
+
]
|
|
101
109
|
assert parsings == expected_parsings
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Copyright (C) 2015-2024 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
import time
|
|
17
|
+
from argparse import Namespace
|
|
18
|
+
from threading import Thread
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from toil.common import Toil
|
|
22
|
+
from toil.job import Job
|
|
23
|
+
from toil.jobStores.abstractJobStore import NoSuchFileException
|
|
24
|
+
from toil.test import ToilTest
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EnvironmentTest(ToilTest):
|
|
30
|
+
"""
|
|
31
|
+
Test to make sure that Toil's environment variable save and restore system
|
|
32
|
+
(environment.pickle) works.
|
|
33
|
+
|
|
34
|
+
The environment should be captured once at the start of the workflow and
|
|
35
|
+
should be sent through based on that, not base don the leader's current
|
|
36
|
+
environment when the job is launched.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def test_environment(self):
|
|
40
|
+
options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
|
|
41
|
+
options.logLevel = "DEBUG"
|
|
42
|
+
options.retryCount = 0
|
|
43
|
+
|
|
44
|
+
main(options)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def signal_leader(job):
|
|
48
|
+
"""
|
|
49
|
+
Make a file in the file store that the leader can see.
|
|
50
|
+
"""
|
|
51
|
+
with job.fileStore.jobStore.write_shared_file_stream(
|
|
52
|
+
"jobstarted.txt", encoding="utf-8"
|
|
53
|
+
) as stream:
|
|
54
|
+
stream.write("Job has run")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def check_environment(job, try_name: str):
|
|
58
|
+
"""
|
|
59
|
+
Fail if the test environment is wrong.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
job.fileStore.log_to_leader(f"Try {try_name} checking environment")
|
|
63
|
+
value = os.environ["MAGIC_ENV_VAR_123"]
|
|
64
|
+
job.fileStore.log_to_leader(f"Try {try_name} got: {value}")
|
|
65
|
+
if value != "Value1":
|
|
66
|
+
raise RuntimeError("Environment variable is wrong!")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def wait_a_bit(job):
|
|
70
|
+
"""
|
|
71
|
+
Toil job that waits.
|
|
72
|
+
"""
|
|
73
|
+
time.sleep(10)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def check_environment_repeatedly(job):
|
|
77
|
+
"""
|
|
78
|
+
Toil job that checks the environment, waits, and checks it again, as
|
|
79
|
+
separate invocations.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
signal = job.addChildJobFn(signal_leader)
|
|
83
|
+
check1 = signal.addFollowOnJobFn(check_environment, "try1")
|
|
84
|
+
waiter = check1.addFollowOnJobFn(wait_a_bit)
|
|
85
|
+
check2 = waiter.addFollowOnJobFn(check_environment, "try2")
|
|
86
|
+
# Add another one to make sure we don't chain
|
|
87
|
+
check3 = waiter.addFollowOnJobFn(check_environment, "try3")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def main(options: Optional[Namespace] = None):
|
|
91
|
+
"""
|
|
92
|
+
Run the actual workflow with the given options.
|
|
93
|
+
"""
|
|
94
|
+
if not options:
|
|
95
|
+
# deal with command line arguments
|
|
96
|
+
parser = Job.Runner.getDefaultArgumentParser()
|
|
97
|
+
options = parser.parse_args()
|
|
98
|
+
logging.basicConfig()
|
|
99
|
+
|
|
100
|
+
# Set something that should be seen by Toil jobs
|
|
101
|
+
os.environ["MAGIC_ENV_VAR_123"] = "Value1"
|
|
102
|
+
|
|
103
|
+
with Toil(options) as toil:
|
|
104
|
+
|
|
105
|
+
# Get a tthe job store so we can use shared files.
|
|
106
|
+
jobStore = toil._jobStore
|
|
107
|
+
|
|
108
|
+
# Once the workflow has started, change the environment
|
|
109
|
+
def change_environment_later():
|
|
110
|
+
"""
|
|
111
|
+
After waiting, modify the environment.
|
|
112
|
+
"""
|
|
113
|
+
while True:
|
|
114
|
+
# Wait for the workflow to say it ran something
|
|
115
|
+
time.sleep(5)
|
|
116
|
+
try:
|
|
117
|
+
with jobStore.read_shared_file_stream(
|
|
118
|
+
"jobstarted.txt", encoding="utf-8"
|
|
119
|
+
) as stream:
|
|
120
|
+
logger.info("Got signal from job: %s", stream.read().strip())
|
|
121
|
+
break
|
|
122
|
+
except NoSuchFileException:
|
|
123
|
+
pass
|
|
124
|
+
# Change the environment variable
|
|
125
|
+
logger.info("Changing environment variable")
|
|
126
|
+
os.environ["MAGIC_ENV_VAR_123"] = "Value2"
|
|
127
|
+
|
|
128
|
+
changer_thread = Thread(target=change_environment_later)
|
|
129
|
+
changer_thread.start()
|
|
130
|
+
|
|
131
|
+
toil.start(Job.wrapJobFn(check_environment_repeatedly))
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
main()
|