toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from toil.provisioners import cluster_factory
|
|
7
|
+
from toil.test import integrative, slow
|
|
8
|
+
from toil.test.provisioners.clusterTest import AbstractClusterTest
|
|
9
|
+
from toil.test.wdl.wdltoil_test import (
|
|
10
|
+
WDL_CONFORMANCE_TEST_COMMIT,
|
|
11
|
+
WDL_CONFORMANCE_TEST_REPO,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@integrative
|
|
16
|
+
@slow
|
|
17
|
+
@pytest.mark.timeout(1800)
|
|
18
|
+
class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
19
|
+
"""
|
|
20
|
+
Ensure WDL works on the Kubernetes batchsystem.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, name):
|
|
24
|
+
super().__init__(name)
|
|
25
|
+
self.clusterName = "wdl-integration-test-" + str(uuid4())
|
|
26
|
+
# t2.medium is the minimum t2 instance that permits Kubernetes
|
|
27
|
+
self.leaderNodeType = "t2.medium"
|
|
28
|
+
self.instanceTypes = ["t2.medium"]
|
|
29
|
+
self.clusterType = "kubernetes"
|
|
30
|
+
|
|
31
|
+
def setUp(self) -> None:
|
|
32
|
+
super().setUp()
|
|
33
|
+
self.jobStore = f"aws:{self.awsRegion()}:wdl-test-{uuid4()}"
|
|
34
|
+
|
|
35
|
+
def launchCluster(self) -> None:
|
|
36
|
+
self.createClusterUtil(
|
|
37
|
+
args=[
|
|
38
|
+
"--leaderStorage",
|
|
39
|
+
str(self.requestedLeaderStorage),
|
|
40
|
+
"--nodeTypes",
|
|
41
|
+
",".join(self.instanceTypes),
|
|
42
|
+
"-w",
|
|
43
|
+
",".join(self.numWorkers),
|
|
44
|
+
"--nodeStorage",
|
|
45
|
+
str(self.requestedLeaderStorage),
|
|
46
|
+
]
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def test_wdl_kubernetes_cluster(self):
|
|
50
|
+
"""
|
|
51
|
+
Test that a wdl workflow works on a kubernetes cluster. Launches a cluster with 1 worker. This runs a wdl
|
|
52
|
+
workflow that performs an image pull on the worker.
|
|
53
|
+
:return:
|
|
54
|
+
"""
|
|
55
|
+
self.numWorkers = "1"
|
|
56
|
+
self.requestedLeaderStorage = 30
|
|
57
|
+
# create the cluster
|
|
58
|
+
self.launchCluster()
|
|
59
|
+
# get leader
|
|
60
|
+
self.cluster = cluster_factory(
|
|
61
|
+
provisioner="aws", zone=self.zone, clusterName=self.clusterName
|
|
62
|
+
)
|
|
63
|
+
self.leader = self.cluster.getLeader()
|
|
64
|
+
|
|
65
|
+
wdl_dir = "wdl_conformance_tests"
|
|
66
|
+
|
|
67
|
+
# get the wdl-conformance-tests repo to get WDL tasks to run
|
|
68
|
+
self.sshUtil(
|
|
69
|
+
[
|
|
70
|
+
"bash",
|
|
71
|
+
"-c",
|
|
72
|
+
f"git clone {WDL_CONFORMANCE_TEST_REPO} {wdl_dir} && cd {wdl_dir} && git checkout {WDL_CONFORMANCE_TEST_COMMIT}",
|
|
73
|
+
]
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# run on kubernetes batchsystem
|
|
77
|
+
toil_options = ["--batchSystem=kubernetes", f"--jobstore={self.jobStore}"]
|
|
78
|
+
|
|
79
|
+
# run WDL workflow that will run singularity
|
|
80
|
+
test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
|
|
81
|
+
self.sshUtil(
|
|
82
|
+
[
|
|
83
|
+
"bash",
|
|
84
|
+
"-c",
|
|
85
|
+
f"cd {wdl_dir} && toil-wdl-runner {' '.join(test_options)} {' '.join(toil_options)}",
|
|
86
|
+
]
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
unittest.main() # run all tests
|
toil/toilState.py
CHANGED
|
@@ -12,12 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
|
-
|
|
15
|
+
import time
|
|
16
|
+
from typing import Optional
|
|
16
17
|
|
|
17
18
|
from toil.bus import JobUpdatedMessage, MessageBus
|
|
18
19
|
from toil.job import CheckpointJobDescription, JobDescription
|
|
19
|
-
from toil.jobStores.abstractJobStore import
|
|
20
|
-
NoSuchJobException)
|
|
20
|
+
from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchJobException
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
@@ -62,47 +62,47 @@ class ToilState:
|
|
|
62
62
|
# This holds the one true copy of every JobDescription in the leader.
|
|
63
63
|
# TODO: Do in-place update instead of assignment when we load so we
|
|
64
64
|
# can't let any non-true copies escape.
|
|
65
|
-
self.__job_database:
|
|
65
|
+
self.__job_database: dict[str, JobDescription] = {}
|
|
66
66
|
|
|
67
67
|
# Maps from successor (child or follow-on) jobStoreID to predecessor jobStoreIDs
|
|
68
|
-
self.successor_to_predecessors:
|
|
68
|
+
self.successor_to_predecessors: dict[str, set[str]] = {}
|
|
69
69
|
|
|
70
70
|
# Hash of jobStoreIDs to counts of numbers of successors issued.
|
|
71
71
|
# There are no entries for jobs without successors in this map.
|
|
72
|
-
self.successorCounts:
|
|
72
|
+
self.successorCounts: dict[str, int] = {}
|
|
73
73
|
|
|
74
74
|
# This is a hash of service jobs, referenced by jobStoreID, to their client's ID
|
|
75
|
-
self.service_to_client:
|
|
75
|
+
self.service_to_client: dict[str, str] = {}
|
|
76
76
|
|
|
77
77
|
# Holds, for each client job ID, the job IDs of its services that are
|
|
78
78
|
# possibly currently issued. Includes every service host that has been
|
|
79
79
|
# given to the service manager by the leader, and hasn't been seen by
|
|
80
80
|
# the leader as stopped yet.
|
|
81
|
-
self.servicesIssued:
|
|
81
|
+
self.servicesIssued: dict[str, set[str]] = {}
|
|
82
82
|
|
|
83
83
|
# Holds the IDs of jobs that are currently issued to the batch system
|
|
84
84
|
# and haven't come back yet.
|
|
85
85
|
# TODO: a bit redundant with leader's issued_jobs_by_batch_system_id
|
|
86
|
-
self.jobs_issued:
|
|
86
|
+
self.jobs_issued: set[str] = set()
|
|
87
87
|
|
|
88
88
|
# The set of totally failed jobs - this needs to be filtered at the
|
|
89
89
|
# end to remove jobs that were removed by checkpoints
|
|
90
|
-
self.totalFailedJobs:
|
|
90
|
+
self.totalFailedJobs: set[str] = set()
|
|
91
91
|
|
|
92
92
|
# Jobs (as jobStoreIDs) with successors that have totally failed
|
|
93
|
-
self.hasFailedSuccessors:
|
|
93
|
+
self.hasFailedSuccessors: set[str] = set()
|
|
94
94
|
|
|
95
95
|
# The set of successors of failed jobs as a set of jobStoreIds
|
|
96
|
-
self.failedSuccessors:
|
|
96
|
+
self.failedSuccessors: set[str] = set()
|
|
97
97
|
|
|
98
98
|
# Set of jobs that have multiple predecessors that have one or more predecessors
|
|
99
99
|
# finished, but not all of them.
|
|
100
|
-
self.jobsToBeScheduledWithMultiplePredecessors:
|
|
100
|
+
self.jobsToBeScheduledWithMultiplePredecessors: set[str] = set()
|
|
101
101
|
|
|
102
102
|
def load_workflow(
|
|
103
103
|
self,
|
|
104
104
|
rootJob: JobDescription,
|
|
105
|
-
jobCache: Optional[
|
|
105
|
+
jobCache: Optional[dict[str, JobDescription]] = None,
|
|
106
106
|
) -> None:
|
|
107
107
|
"""
|
|
108
108
|
Load the workflow rooted at the given job.
|
|
@@ -183,12 +183,70 @@ class ToilState:
|
|
|
183
183
|
if job_id in self.__job_database:
|
|
184
184
|
# Update the one true copy in place
|
|
185
185
|
old_truth = self.__job_database[job_id]
|
|
186
|
-
old_truth.
|
|
186
|
+
old_truth.assert_is_not_newer_than(new_truth)
|
|
187
187
|
old_truth.__dict__.update(new_truth.__dict__)
|
|
188
188
|
else:
|
|
189
189
|
# Just keep the new one
|
|
190
190
|
self.__job_database[job_id] = new_truth
|
|
191
191
|
|
|
192
|
+
def reset_job_expecting_change(self, job_id: str, timeout: float) -> bool:
|
|
193
|
+
"""
|
|
194
|
+
Discard any local modifications to a JobDescription.
|
|
195
|
+
|
|
196
|
+
Will make modifications from other hosts visible.
|
|
197
|
+
|
|
198
|
+
Will wait for up to timeout seconds for a modification (or deletion)
|
|
199
|
+
from another host to actually be visible.
|
|
200
|
+
|
|
201
|
+
Always replaces the JobDescription with what is stored in the job
|
|
202
|
+
store, even if no modification ends up being visible.
|
|
203
|
+
|
|
204
|
+
Returns True if an update was detected in time, and False otherwise.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
start_time = time.time()
|
|
208
|
+
wait_time = 0.1
|
|
209
|
+
initially_known = job_id in self.__job_database
|
|
210
|
+
new_truth: Optional[JobDescription] = None
|
|
211
|
+
while True:
|
|
212
|
+
try:
|
|
213
|
+
new_truth = self.__job_store.load_job(job_id)
|
|
214
|
+
except NoSuchJobException:
|
|
215
|
+
# The job is gone now.
|
|
216
|
+
if job_id in self.__job_database:
|
|
217
|
+
# So forget about it
|
|
218
|
+
del self.__job_database[job_id]
|
|
219
|
+
# TODO: Other collections may still reference it.
|
|
220
|
+
if initially_known:
|
|
221
|
+
# Job was deleted, that's an update
|
|
222
|
+
return True
|
|
223
|
+
else:
|
|
224
|
+
if job_id in self.__job_database:
|
|
225
|
+
# We have an old version to compare against
|
|
226
|
+
old_truth = self.__job_database[job_id]
|
|
227
|
+
old_truth.assert_is_not_newer_than(new_truth)
|
|
228
|
+
if old_truth.is_updated_by(new_truth):
|
|
229
|
+
# Do the update
|
|
230
|
+
old_truth.__dict__.update(new_truth.__dict__)
|
|
231
|
+
return True
|
|
232
|
+
else:
|
|
233
|
+
# Just keep the new one. That's an update.
|
|
234
|
+
self.__job_database[job_id] = new_truth
|
|
235
|
+
return True
|
|
236
|
+
# We looked but didn't get a good update
|
|
237
|
+
time_elapsed = time.time() - start_time
|
|
238
|
+
if time_elapsed >= timeout:
|
|
239
|
+
# We're out of time to check.
|
|
240
|
+
if new_truth is not None:
|
|
241
|
+
# Commit whatever we managed to load to accomplish a real
|
|
242
|
+
# reset.
|
|
243
|
+
old_truth.__dict__.update(new_truth.__dict__)
|
|
244
|
+
return False
|
|
245
|
+
# Wait a little and poll again
|
|
246
|
+
time.sleep(min(timeout - time_elapsed, wait_time))
|
|
247
|
+
# Using exponential backoff
|
|
248
|
+
wait_time *= 2
|
|
249
|
+
|
|
192
250
|
# The next 3 functions provide tracking of how many successor jobs a given job
|
|
193
251
|
# is waiting on, exposing only legit operations.
|
|
194
252
|
# TODO: turn these into messages?
|
|
@@ -222,7 +280,11 @@ class ToilState:
|
|
|
222
280
|
)
|
|
223
281
|
else:
|
|
224
282
|
self.successorCounts[predecessor_id] -= 1
|
|
225
|
-
logger.debug(
|
|
283
|
+
logger.debug(
|
|
284
|
+
"Successors: one fewer for %s, now have %d",
|
|
285
|
+
predecessor_id,
|
|
286
|
+
self.successorCounts[predecessor_id],
|
|
287
|
+
)
|
|
226
288
|
if self.successorCounts[predecessor_id] == 0:
|
|
227
289
|
del self.successorCounts[predecessor_id]
|
|
228
290
|
|
|
@@ -237,7 +299,6 @@ class ToilState:
|
|
|
237
299
|
else:
|
|
238
300
|
return self.successorCounts[predecessor_id]
|
|
239
301
|
|
|
240
|
-
|
|
241
302
|
def _buildToilState(self, jobDesc: JobDescription) -> None:
|
|
242
303
|
"""
|
|
243
304
|
Build the ToilState class from the subtree root JobDescription.
|
|
@@ -247,10 +308,10 @@ class ToilState:
|
|
|
247
308
|
|
|
248
309
|
:param jobDesc: The description for the root job of the workflow being run.
|
|
249
310
|
"""
|
|
250
|
-
# If the job description has a
|
|
311
|
+
# If the job description has a body, is a checkpoint, has services
|
|
251
312
|
# or is ready to be deleted it is ready to be processed (i.e. it is updated)
|
|
252
313
|
if (
|
|
253
|
-
jobDesc.
|
|
314
|
+
jobDesc.has_body()
|
|
254
315
|
or (
|
|
255
316
|
isinstance(jobDesc, CheckpointJobDescription)
|
|
256
317
|
and jobDesc.checkpoint is not None
|
|
@@ -259,10 +320,10 @@ class ToilState:
|
|
|
259
320
|
or jobDesc.nextSuccessors() is None
|
|
260
321
|
):
|
|
261
322
|
logger.debug(
|
|
262
|
-
"Found job to run: %s, with
|
|
323
|
+
"Found job to run: %s, with body: %s, with checkpoint: %s, with "
|
|
263
324
|
"services: %s, with no next successors: %s",
|
|
264
325
|
jobDesc.jobStoreID,
|
|
265
|
-
jobDesc.
|
|
326
|
+
jobDesc.has_body(),
|
|
266
327
|
isinstance(jobDesc, CheckpointJobDescription)
|
|
267
328
|
and jobDesc.checkpoint is not None,
|
|
268
329
|
len(jobDesc.services) > 0,
|
|
@@ -271,22 +332,27 @@ class ToilState:
|
|
|
271
332
|
# Set the job updated because we should be able to make progress on it.
|
|
272
333
|
self.bus.publish(JobUpdatedMessage(str(jobDesc.jobStoreID), 0))
|
|
273
334
|
|
|
274
|
-
if
|
|
275
|
-
jobDesc
|
|
335
|
+
if (
|
|
336
|
+
isinstance(jobDesc, CheckpointJobDescription)
|
|
337
|
+
and jobDesc.checkpoint is not None
|
|
338
|
+
):
|
|
339
|
+
jobDesc.restore_checkpoint()
|
|
276
340
|
|
|
277
341
|
else: # There exist successors
|
|
278
342
|
logger.debug(
|
|
279
343
|
"Adding job: %s to the state with %s successors",
|
|
280
344
|
jobDesc.jobStoreID,
|
|
281
|
-
len(jobDesc.nextSuccessors()),
|
|
345
|
+
len(jobDesc.nextSuccessors() or set()),
|
|
282
346
|
)
|
|
283
347
|
|
|
284
348
|
# Record the number of successors
|
|
285
349
|
self.successorCounts[str(jobDesc.jobStoreID)] = len(
|
|
286
|
-
jobDesc.nextSuccessors()
|
|
350
|
+
jobDesc.nextSuccessors() or set()
|
|
287
351
|
)
|
|
288
352
|
|
|
289
|
-
def processSuccessorWithMultiplePredecessors(
|
|
353
|
+
def processSuccessorWithMultiplePredecessors(
|
|
354
|
+
successor: JobDescription,
|
|
355
|
+
) -> None:
|
|
290
356
|
# If jobDesc is not reported as complete by the successor
|
|
291
357
|
if jobDesc.jobStoreID not in successor.predecessorsFinished:
|
|
292
358
|
|
|
@@ -295,17 +361,21 @@ class ToilState:
|
|
|
295
361
|
|
|
296
362
|
# If the successor has no predecessors to finish
|
|
297
363
|
if len(successor.predecessorsFinished) > successor.predecessorNumber:
|
|
298
|
-
raise RuntimeError(
|
|
364
|
+
raise RuntimeError(
|
|
365
|
+
"There are more finished predecessors than possible."
|
|
366
|
+
)
|
|
299
367
|
if len(successor.predecessorsFinished) == successor.predecessorNumber:
|
|
300
368
|
|
|
301
369
|
# It is ready to be run, so remove it from the set of waiting jobs
|
|
302
|
-
self.jobsToBeScheduledWithMultiplePredecessors.remove(
|
|
370
|
+
self.jobsToBeScheduledWithMultiplePredecessors.remove(
|
|
371
|
+
successorJobStoreID
|
|
372
|
+
)
|
|
303
373
|
|
|
304
374
|
# Recursively consider the successor
|
|
305
375
|
self._buildToilState(successor)
|
|
306
376
|
|
|
307
377
|
# For each successor
|
|
308
|
-
for successorJobStoreID in jobDesc.nextSuccessors():
|
|
378
|
+
for successorJobStoreID in jobDesc.nextSuccessors() or set():
|
|
309
379
|
|
|
310
380
|
# If the successor does not yet point back at a
|
|
311
381
|
# predecessor we have not yet considered it
|
|
@@ -324,9 +394,16 @@ class ToilState:
|
|
|
324
394
|
|
|
325
395
|
# We put the successor job in the set of waiting successor
|
|
326
396
|
# jobs with multiple predecessors
|
|
327
|
-
if
|
|
328
|
-
|
|
329
|
-
|
|
397
|
+
if (
|
|
398
|
+
successorJobStoreID
|
|
399
|
+
in self.jobsToBeScheduledWithMultiplePredecessors
|
|
400
|
+
):
|
|
401
|
+
raise RuntimeError(
|
|
402
|
+
"Failed to schedule the successor job. The successor job is already scheduled."
|
|
403
|
+
)
|
|
404
|
+
self.jobsToBeScheduledWithMultiplePredecessors.add(
|
|
405
|
+
successorJobStoreID
|
|
406
|
+
)
|
|
330
407
|
|
|
331
408
|
# Process successor
|
|
332
409
|
processSuccessorWithMultiplePredecessors(successor)
|
|
@@ -340,14 +417,22 @@ class ToilState:
|
|
|
340
417
|
# We've already seen the successor
|
|
341
418
|
|
|
342
419
|
# Add the job as a predecessor
|
|
343
|
-
if
|
|
344
|
-
|
|
420
|
+
if (
|
|
421
|
+
jobDesc.jobStoreID
|
|
422
|
+
in self.successor_to_predecessors[successorJobStoreID]
|
|
423
|
+
):
|
|
424
|
+
raise RuntimeError(
|
|
425
|
+
"Failed to add the job as a predecessor. The job is already added as a predecessor."
|
|
426
|
+
)
|
|
345
427
|
self.successor_to_predecessors[successorJobStoreID].add(
|
|
346
428
|
str(jobDesc.jobStoreID)
|
|
347
429
|
)
|
|
348
430
|
|
|
349
431
|
# If the successor has multiple predecessors
|
|
350
|
-
if
|
|
432
|
+
if (
|
|
433
|
+
successorJobStoreID
|
|
434
|
+
in self.jobsToBeScheduledWithMultiplePredecessors
|
|
435
|
+
):
|
|
351
436
|
|
|
352
437
|
# Get the successor from cache
|
|
353
438
|
successor = self.get_job(successorJobStoreID)
|
toil/utils/toilConfig.py
CHANGED
|
@@ -26,11 +26,20 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
def main() -> None:
|
|
27
27
|
parser = ArgParser()
|
|
28
28
|
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"output",
|
|
31
|
+
default="config.yaml",
|
|
32
|
+
help="Filepath to write the config file too. Default=%(" "default)s",
|
|
33
|
+
)
|
|
31
34
|
add_logging_options(parser)
|
|
32
35
|
options = parser.parse_args()
|
|
33
36
|
set_logging_from_options(options)
|
|
34
|
-
logger.debug(
|
|
37
|
+
logger.debug(
|
|
38
|
+
"Attempting to write a default config file to %s.",
|
|
39
|
+
os.path.abspath(options.output),
|
|
40
|
+
)
|
|
35
41
|
generate_config(os.path.abspath(options.output))
|
|
36
|
-
logger.info(
|
|
42
|
+
logger.info(
|
|
43
|
+
"Successfully wrote a default config file to %s.",
|
|
44
|
+
os.path.abspath(options.output),
|
|
45
|
+
)
|
toil/utils/toilDebugFile.py
CHANGED
|
@@ -17,10 +17,10 @@ import logging
|
|
|
17
17
|
import os.path
|
|
18
18
|
import sys
|
|
19
19
|
from typing import Optional
|
|
20
|
-
from distutils.util import strtobool
|
|
21
20
|
|
|
22
21
|
from toil.common import Config, Toil, parser_with_common_options
|
|
23
22
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
23
|
+
from toil.lib.conversions import strtobool
|
|
24
24
|
from toil.lib.resources import glob
|
|
25
25
|
from toil.statsAndLogging import set_logging_from_options
|
|
26
26
|
|
|
@@ -44,17 +44,23 @@ def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> N
|
|
|
44
44
|
# globbing around inside it. Does this even work?
|
|
45
45
|
|
|
46
46
|
for jobStoreFile in options.fetch:
|
|
47
|
-
jobStoreHits = glob(directoryname=options.jobStore,
|
|
48
|
-
glob_pattern=jobStoreFile)
|
|
47
|
+
jobStoreHits = glob(directoryname=options.jobStore, glob_pattern=jobStoreFile)
|
|
49
48
|
for jobStoreFileID in jobStoreHits:
|
|
50
|
-
logger.debug(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
49
|
+
logger.debug(
|
|
50
|
+
f"Copying job store file: {jobStoreFileID} to {options.localFilePath[0]}"
|
|
51
|
+
)
|
|
52
|
+
jobStore.read_file(
|
|
53
|
+
jobStoreFileID,
|
|
54
|
+
os.path.join(
|
|
55
|
+
options.localFilePath[0], os.path.basename(jobStoreFileID)
|
|
56
|
+
),
|
|
57
|
+
symlink=options.useSymlinks,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def printContentsOfJobStore(
|
|
62
|
+
job_store: FileJobStore, job_id: Optional[str] = None
|
|
63
|
+
) -> None:
|
|
58
64
|
"""
|
|
59
65
|
Fetch a list of all files contained in the job store if nameOfJob is not
|
|
60
66
|
declared, otherwise it only prints out the names of files for that specific
|
|
@@ -90,22 +96,33 @@ def printContentsOfJobStore(job_store: FileJobStore, job_id: Optional[str] = Non
|
|
|
90
96
|
|
|
91
97
|
def main() -> None:
|
|
92
98
|
parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
|
|
93
|
-
parser.add_argument(
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
parser.add_argument(
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
parser.add_argument(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
99
|
+
parser.add_argument(
|
|
100
|
+
"--localFilePath", nargs=1, help="Location to which to copy job store files."
|
|
101
|
+
)
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
"--fetch",
|
|
104
|
+
nargs="+",
|
|
105
|
+
help="List of job-store files to be copied locally."
|
|
106
|
+
"Use either explicit names (i.e. 'data.txt'), or "
|
|
107
|
+
"specify glob patterns (i.e. '*.txt')",
|
|
108
|
+
)
|
|
109
|
+
parser.add_argument(
|
|
110
|
+
"--listFilesInJobStore",
|
|
111
|
+
type=strtobool,
|
|
112
|
+
help="Prints a list of the current files in the jobStore.",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--fetchEntireJobStore",
|
|
116
|
+
type=strtobool,
|
|
117
|
+
help="Copy all job store files into a local directory.",
|
|
118
|
+
)
|
|
119
|
+
parser.add_argument(
|
|
120
|
+
"--useSymlinks",
|
|
121
|
+
type=strtobool,
|
|
122
|
+
help="Creates symlink 'shortcuts' of files in the localFilePath"
|
|
123
|
+
" instead of hardlinking or copying, where possible. If this is"
|
|
124
|
+
" not possible, it will copy the files (shutil.copyfile()).",
|
|
125
|
+
)
|
|
109
126
|
|
|
110
127
|
# Load the jobStore
|
|
111
128
|
options = parser.parse_args()
|