toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
import unittest
|
|
2
|
-
|
|
3
|
-
from toil.test.provisioners.clusterTest import AbstractClusterTest
|
|
4
2
|
from uuid import uuid4
|
|
5
3
|
|
|
6
4
|
import pytest
|
|
7
5
|
|
|
8
6
|
from toil.provisioners import cluster_factory
|
|
9
|
-
from toil.test import
|
|
7
|
+
from toil.test import integrative, slow
|
|
8
|
+
from toil.test.provisioners.clusterTest import AbstractClusterTest
|
|
9
|
+
from toil.test.wdl.wdltoil_test import (
|
|
10
|
+
WDL_CONFORMANCE_TEST_COMMIT,
|
|
11
|
+
WDL_CONFORMANCE_TEST_REPO,
|
|
12
|
+
)
|
|
13
|
+
|
|
10
14
|
|
|
11
15
|
@integrative
|
|
12
16
|
@slow
|
|
13
|
-
@pytest.mark.timeout(
|
|
17
|
+
@pytest.mark.timeout(1800)
|
|
14
18
|
class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
15
19
|
"""
|
|
16
20
|
Ensure WDL works on the Kubernetes batchsystem.
|
|
@@ -18,7 +22,7 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
|
18
22
|
|
|
19
23
|
def __init__(self, name):
|
|
20
24
|
super().__init__(name)
|
|
21
|
-
self.clusterName =
|
|
25
|
+
self.clusterName = "wdl-integration-test-" + str(uuid4())
|
|
22
26
|
# t2.medium is the minimum t2 instance that permits Kubernetes
|
|
23
27
|
self.leaderNodeType = "t2.medium"
|
|
24
28
|
self.instanceTypes = ["t2.medium"]
|
|
@@ -26,13 +30,21 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
|
26
30
|
|
|
27
31
|
def setUp(self) -> None:
|
|
28
32
|
super().setUp()
|
|
29
|
-
self.jobStore = f
|
|
33
|
+
self.jobStore = f"aws:{self.awsRegion()}:wdl-test-{uuid4()}"
|
|
30
34
|
|
|
31
35
|
def launchCluster(self) -> None:
|
|
32
|
-
self.createClusterUtil(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
self.createClusterUtil(
|
|
37
|
+
args=[
|
|
38
|
+
"--leaderStorage",
|
|
39
|
+
str(self.requestedLeaderStorage),
|
|
40
|
+
"--nodeTypes",
|
|
41
|
+
",".join(self.instanceTypes),
|
|
42
|
+
"-w",
|
|
43
|
+
",".join(self.numWorkers),
|
|
44
|
+
"--nodeStorage",
|
|
45
|
+
str(self.requestedLeaderStorage),
|
|
46
|
+
]
|
|
47
|
+
)
|
|
36
48
|
|
|
37
49
|
def test_wdl_kubernetes_cluster(self):
|
|
38
50
|
"""
|
|
@@ -50,27 +62,29 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
|
50
62
|
)
|
|
51
63
|
self.leader = self.cluster.getLeader()
|
|
52
64
|
|
|
53
|
-
url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
|
|
54
|
-
commit = "09b9659cd01473e836738a2e0dd205df0adb49c5"
|
|
55
65
|
wdl_dir = "wdl_conformance_tests"
|
|
56
66
|
|
|
57
67
|
# get the wdl-conformance-tests repo to get WDL tasks to run
|
|
58
|
-
self.sshUtil(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
68
|
+
self.sshUtil(
|
|
69
|
+
[
|
|
70
|
+
"bash",
|
|
71
|
+
"-c",
|
|
72
|
+
f"git clone {WDL_CONFORMANCE_TEST_REPO} {wdl_dir} && cd {wdl_dir} && git checkout {WDL_CONFORMANCE_TEST_COMMIT}",
|
|
73
|
+
]
|
|
74
|
+
)
|
|
63
75
|
|
|
64
76
|
# run on kubernetes batchsystem
|
|
65
|
-
toil_options = [
|
|
66
|
-
f"--jobstore={self.jobStore}"]
|
|
77
|
+
toil_options = ["--batchSystem=kubernetes", f"--jobstore={self.jobStore}"]
|
|
67
78
|
|
|
68
79
|
# run WDL workflow that will run singularity
|
|
69
80
|
test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
|
|
70
|
-
self.sshUtil(
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
81
|
+
self.sshUtil(
|
|
82
|
+
[
|
|
83
|
+
"bash",
|
|
84
|
+
"-c",
|
|
85
|
+
f"cd {wdl_dir} && toil-wdl-runner {' '.join(test_options)} {' '.join(toil_options)}",
|
|
86
|
+
]
|
|
87
|
+
)
|
|
74
88
|
|
|
75
89
|
|
|
76
90
|
if __name__ == "__main__":
|
toil/toilState.py
CHANGED
|
@@ -13,12 +13,11 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
15
|
import time
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Optional
|
|
17
17
|
|
|
18
18
|
from toil.bus import JobUpdatedMessage, MessageBus
|
|
19
19
|
from toil.job import CheckpointJobDescription, JobDescription
|
|
20
|
-
from toil.jobStores.abstractJobStore import
|
|
21
|
-
NoSuchJobException)
|
|
20
|
+
from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchJobException
|
|
22
21
|
|
|
23
22
|
logger = logging.getLogger(__name__)
|
|
24
23
|
|
|
@@ -63,47 +62,47 @@ class ToilState:
|
|
|
63
62
|
# This holds the one true copy of every JobDescription in the leader.
|
|
64
63
|
# TODO: Do in-place update instead of assignment when we load so we
|
|
65
64
|
# can't let any non-true copies escape.
|
|
66
|
-
self.__job_database:
|
|
65
|
+
self.__job_database: dict[str, JobDescription] = {}
|
|
67
66
|
|
|
68
67
|
# Maps from successor (child or follow-on) jobStoreID to predecessor jobStoreIDs
|
|
69
|
-
self.successor_to_predecessors:
|
|
68
|
+
self.successor_to_predecessors: dict[str, set[str]] = {}
|
|
70
69
|
|
|
71
70
|
# Hash of jobStoreIDs to counts of numbers of successors issued.
|
|
72
71
|
# There are no entries for jobs without successors in this map.
|
|
73
|
-
self.successorCounts:
|
|
72
|
+
self.successorCounts: dict[str, int] = {}
|
|
74
73
|
|
|
75
74
|
# This is a hash of service jobs, referenced by jobStoreID, to their client's ID
|
|
76
|
-
self.service_to_client:
|
|
75
|
+
self.service_to_client: dict[str, str] = {}
|
|
77
76
|
|
|
78
77
|
# Holds, for each client job ID, the job IDs of its services that are
|
|
79
78
|
# possibly currently issued. Includes every service host that has been
|
|
80
79
|
# given to the service manager by the leader, and hasn't been seen by
|
|
81
80
|
# the leader as stopped yet.
|
|
82
|
-
self.servicesIssued:
|
|
81
|
+
self.servicesIssued: dict[str, set[str]] = {}
|
|
83
82
|
|
|
84
83
|
# Holds the IDs of jobs that are currently issued to the batch system
|
|
85
84
|
# and haven't come back yet.
|
|
86
85
|
# TODO: a bit redundant with leader's issued_jobs_by_batch_system_id
|
|
87
|
-
self.jobs_issued:
|
|
86
|
+
self.jobs_issued: set[str] = set()
|
|
88
87
|
|
|
89
88
|
# The set of totally failed jobs - this needs to be filtered at the
|
|
90
89
|
# end to remove jobs that were removed by checkpoints
|
|
91
|
-
self.totalFailedJobs:
|
|
90
|
+
self.totalFailedJobs: set[str] = set()
|
|
92
91
|
|
|
93
92
|
# Jobs (as jobStoreIDs) with successors that have totally failed
|
|
94
|
-
self.hasFailedSuccessors:
|
|
93
|
+
self.hasFailedSuccessors: set[str] = set()
|
|
95
94
|
|
|
96
95
|
# The set of successors of failed jobs as a set of jobStoreIds
|
|
97
|
-
self.failedSuccessors:
|
|
96
|
+
self.failedSuccessors: set[str] = set()
|
|
98
97
|
|
|
99
98
|
# Set of jobs that have multiple predecessors that have one or more predecessors
|
|
100
99
|
# finished, but not all of them.
|
|
101
|
-
self.jobsToBeScheduledWithMultiplePredecessors:
|
|
100
|
+
self.jobsToBeScheduledWithMultiplePredecessors: set[str] = set()
|
|
102
101
|
|
|
103
102
|
def load_workflow(
|
|
104
103
|
self,
|
|
105
104
|
rootJob: JobDescription,
|
|
106
|
-
jobCache: Optional[
|
|
105
|
+
jobCache: Optional[dict[str, JobDescription]] = None,
|
|
107
106
|
) -> None:
|
|
108
107
|
"""
|
|
109
108
|
Load the workflow rooted at the given job.
|
|
@@ -281,7 +280,11 @@ class ToilState:
|
|
|
281
280
|
)
|
|
282
281
|
else:
|
|
283
282
|
self.successorCounts[predecessor_id] -= 1
|
|
284
|
-
logger.debug(
|
|
283
|
+
logger.debug(
|
|
284
|
+
"Successors: one fewer for %s, now have %d",
|
|
285
|
+
predecessor_id,
|
|
286
|
+
self.successorCounts[predecessor_id],
|
|
287
|
+
)
|
|
285
288
|
if self.successorCounts[predecessor_id] == 0:
|
|
286
289
|
del self.successorCounts[predecessor_id]
|
|
287
290
|
|
|
@@ -296,7 +299,6 @@ class ToilState:
|
|
|
296
299
|
else:
|
|
297
300
|
return self.successorCounts[predecessor_id]
|
|
298
301
|
|
|
299
|
-
|
|
300
302
|
def _buildToilState(self, jobDesc: JobDescription) -> None:
|
|
301
303
|
"""
|
|
302
304
|
Build the ToilState class from the subtree root JobDescription.
|
|
@@ -330,7 +332,10 @@ class ToilState:
|
|
|
330
332
|
# Set the job updated because we should be able to make progress on it.
|
|
331
333
|
self.bus.publish(JobUpdatedMessage(str(jobDesc.jobStoreID), 0))
|
|
332
334
|
|
|
333
|
-
if
|
|
335
|
+
if (
|
|
336
|
+
isinstance(jobDesc, CheckpointJobDescription)
|
|
337
|
+
and jobDesc.checkpoint is not None
|
|
338
|
+
):
|
|
334
339
|
jobDesc.restore_checkpoint()
|
|
335
340
|
|
|
336
341
|
else: # There exist successors
|
|
@@ -345,7 +350,9 @@ class ToilState:
|
|
|
345
350
|
jobDesc.nextSuccessors() or set()
|
|
346
351
|
)
|
|
347
352
|
|
|
348
|
-
def processSuccessorWithMultiplePredecessors(
|
|
353
|
+
def processSuccessorWithMultiplePredecessors(
|
|
354
|
+
successor: JobDescription,
|
|
355
|
+
) -> None:
|
|
349
356
|
# If jobDesc is not reported as complete by the successor
|
|
350
357
|
if jobDesc.jobStoreID not in successor.predecessorsFinished:
|
|
351
358
|
|
|
@@ -354,11 +361,15 @@ class ToilState:
|
|
|
354
361
|
|
|
355
362
|
# If the successor has no predecessors to finish
|
|
356
363
|
if len(successor.predecessorsFinished) > successor.predecessorNumber:
|
|
357
|
-
raise RuntimeError(
|
|
364
|
+
raise RuntimeError(
|
|
365
|
+
"There are more finished predecessors than possible."
|
|
366
|
+
)
|
|
358
367
|
if len(successor.predecessorsFinished) == successor.predecessorNumber:
|
|
359
368
|
|
|
360
369
|
# It is ready to be run, so remove it from the set of waiting jobs
|
|
361
|
-
self.jobsToBeScheduledWithMultiplePredecessors.remove(
|
|
370
|
+
self.jobsToBeScheduledWithMultiplePredecessors.remove(
|
|
371
|
+
successorJobStoreID
|
|
372
|
+
)
|
|
362
373
|
|
|
363
374
|
# Recursively consider the successor
|
|
364
375
|
self._buildToilState(successor)
|
|
@@ -383,9 +394,16 @@ class ToilState:
|
|
|
383
394
|
|
|
384
395
|
# We put the successor job in the set of waiting successor
|
|
385
396
|
# jobs with multiple predecessors
|
|
386
|
-
if
|
|
387
|
-
|
|
388
|
-
|
|
397
|
+
if (
|
|
398
|
+
successorJobStoreID
|
|
399
|
+
in self.jobsToBeScheduledWithMultiplePredecessors
|
|
400
|
+
):
|
|
401
|
+
raise RuntimeError(
|
|
402
|
+
"Failed to schedule the successor job. The successor job is already scheduled."
|
|
403
|
+
)
|
|
404
|
+
self.jobsToBeScheduledWithMultiplePredecessors.add(
|
|
405
|
+
successorJobStoreID
|
|
406
|
+
)
|
|
389
407
|
|
|
390
408
|
# Process successor
|
|
391
409
|
processSuccessorWithMultiplePredecessors(successor)
|
|
@@ -399,14 +417,22 @@ class ToilState:
|
|
|
399
417
|
# We've already seen the successor
|
|
400
418
|
|
|
401
419
|
# Add the job as a predecessor
|
|
402
|
-
if
|
|
403
|
-
|
|
420
|
+
if (
|
|
421
|
+
jobDesc.jobStoreID
|
|
422
|
+
in self.successor_to_predecessors[successorJobStoreID]
|
|
423
|
+
):
|
|
424
|
+
raise RuntimeError(
|
|
425
|
+
"Failed to add the job as a predecessor. The job is already added as a predecessor."
|
|
426
|
+
)
|
|
404
427
|
self.successor_to_predecessors[successorJobStoreID].add(
|
|
405
428
|
str(jobDesc.jobStoreID)
|
|
406
429
|
)
|
|
407
430
|
|
|
408
431
|
# If the successor has multiple predecessors
|
|
409
|
-
if
|
|
432
|
+
if (
|
|
433
|
+
successorJobStoreID
|
|
434
|
+
in self.jobsToBeScheduledWithMultiplePredecessors
|
|
435
|
+
):
|
|
410
436
|
|
|
411
437
|
# Get the successor from cache
|
|
412
438
|
successor = self.get_job(successorJobStoreID)
|
toil/utils/toilConfig.py
CHANGED
|
@@ -26,11 +26,20 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
def main() -> None:
|
|
27
27
|
parser = ArgParser()
|
|
28
28
|
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"output",
|
|
31
|
+
default="config.yaml",
|
|
32
|
+
help="Filepath to write the config file too. Default=%(" "default)s",
|
|
33
|
+
)
|
|
31
34
|
add_logging_options(parser)
|
|
32
35
|
options = parser.parse_args()
|
|
33
36
|
set_logging_from_options(options)
|
|
34
|
-
logger.debug(
|
|
37
|
+
logger.debug(
|
|
38
|
+
"Attempting to write a default config file to %s.",
|
|
39
|
+
os.path.abspath(options.output),
|
|
40
|
+
)
|
|
35
41
|
generate_config(os.path.abspath(options.output))
|
|
36
|
-
logger.info(
|
|
42
|
+
logger.info(
|
|
43
|
+
"Successfully wrote a default config file to %s.",
|
|
44
|
+
os.path.abspath(options.output),
|
|
45
|
+
)
|
toil/utils/toilDebugFile.py
CHANGED
|
@@ -20,8 +20,8 @@ from typing import Optional
|
|
|
20
20
|
|
|
21
21
|
from toil.common import Config, Toil, parser_with_common_options
|
|
22
22
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
23
|
-
from toil.lib.resources import glob
|
|
24
23
|
from toil.lib.conversions import strtobool
|
|
24
|
+
from toil.lib.resources import glob
|
|
25
25
|
from toil.statsAndLogging import set_logging_from_options
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|
|
@@ -44,17 +44,23 @@ def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> N
|
|
|
44
44
|
# globbing around inside it. Does this even work?
|
|
45
45
|
|
|
46
46
|
for jobStoreFile in options.fetch:
|
|
47
|
-
jobStoreHits = glob(directoryname=options.jobStore,
|
|
48
|
-
glob_pattern=jobStoreFile)
|
|
47
|
+
jobStoreHits = glob(directoryname=options.jobStore, glob_pattern=jobStoreFile)
|
|
49
48
|
for jobStoreFileID in jobStoreHits:
|
|
50
|
-
logger.debug(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
49
|
+
logger.debug(
|
|
50
|
+
f"Copying job store file: {jobStoreFileID} to {options.localFilePath[0]}"
|
|
51
|
+
)
|
|
52
|
+
jobStore.read_file(
|
|
53
|
+
jobStoreFileID,
|
|
54
|
+
os.path.join(
|
|
55
|
+
options.localFilePath[0], os.path.basename(jobStoreFileID)
|
|
56
|
+
),
|
|
57
|
+
symlink=options.useSymlinks,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def printContentsOfJobStore(
|
|
62
|
+
job_store: FileJobStore, job_id: Optional[str] = None
|
|
63
|
+
) -> None:
|
|
58
64
|
"""
|
|
59
65
|
Fetch a list of all files contained in the job store if nameOfJob is not
|
|
60
66
|
declared, otherwise it only prints out the names of files for that specific
|
|
@@ -90,22 +96,33 @@ def printContentsOfJobStore(job_store: FileJobStore, job_id: Optional[str] = Non
|
|
|
90
96
|
|
|
91
97
|
def main() -> None:
|
|
92
98
|
parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
|
|
93
|
-
parser.add_argument(
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
parser.add_argument(
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
parser.add_argument(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
99
|
+
parser.add_argument(
|
|
100
|
+
"--localFilePath", nargs=1, help="Location to which to copy job store files."
|
|
101
|
+
)
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
"--fetch",
|
|
104
|
+
nargs="+",
|
|
105
|
+
help="List of job-store files to be copied locally."
|
|
106
|
+
"Use either explicit names (i.e. 'data.txt'), or "
|
|
107
|
+
"specify glob patterns (i.e. '*.txt')",
|
|
108
|
+
)
|
|
109
|
+
parser.add_argument(
|
|
110
|
+
"--listFilesInJobStore",
|
|
111
|
+
type=strtobool,
|
|
112
|
+
help="Prints a list of the current files in the jobStore.",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--fetchEntireJobStore",
|
|
116
|
+
type=strtobool,
|
|
117
|
+
help="Copy all job store files into a local directory.",
|
|
118
|
+
)
|
|
119
|
+
parser.add_argument(
|
|
120
|
+
"--useSymlinks",
|
|
121
|
+
type=strtobool,
|
|
122
|
+
help="Creates symlink 'shortcuts' of files in the localFilePath"
|
|
123
|
+
" instead of hardlinking or copying, where possible. If this is"
|
|
124
|
+
" not possible, it will copy the files (shutil.copyfile()).",
|
|
125
|
+
)
|
|
109
126
|
|
|
110
127
|
# Load the jobStore
|
|
111
128
|
options = parser.parse_args()
|
toil/utils/toilDebugJob.py
CHANGED
|
@@ -17,11 +17,10 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import pprint
|
|
19
19
|
import sys
|
|
20
|
-
|
|
21
20
|
from pathlib import Path
|
|
22
|
-
from typing import Optional
|
|
21
|
+
from typing import Optional
|
|
23
22
|
|
|
24
|
-
from toil.common import
|
|
23
|
+
from toil.common import Toil, parser_with_common_options
|
|
25
24
|
from toil.job import FilesDownloadedStoppingPointReached
|
|
26
25
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
27
26
|
from toil.statsAndLogging import set_logging_from_options
|
|
@@ -33,23 +32,38 @@ logger = logging.getLogger(__name__)
|
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
def main() -> None:
|
|
36
|
-
parser = parser_with_common_options(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
parser.add_argument(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
parser = parser_with_common_options(
|
|
36
|
+
jobstore_option=True, prog="toil debug-job", default_log_level=logging.DEBUG
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"job",
|
|
40
|
+
type=str,
|
|
41
|
+
help="The job store id or job name of a job within the provided jobstore",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--printJobInfo",
|
|
45
|
+
action="store_true",
|
|
46
|
+
help="Dump debugging info about the job instead of running it",
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--retrieveTaskDirectory",
|
|
50
|
+
dest="retrieve_task_directory",
|
|
51
|
+
type=str,
|
|
52
|
+
default=None,
|
|
53
|
+
help="Download CWL or WDL task inputs to the given directory and stop.",
|
|
54
|
+
)
|
|
43
55
|
|
|
44
56
|
options = parser.parse_args()
|
|
45
57
|
set_logging_from_options(options)
|
|
46
58
|
|
|
47
|
-
if options.retrieve_task_directory is not None and os.path.exists(
|
|
59
|
+
if options.retrieve_task_directory is not None and os.path.exists(
|
|
60
|
+
options.retrieve_task_directory
|
|
61
|
+
):
|
|
48
62
|
# The logic to duplicate container mounts depends on stuff not already existing.
|
|
49
63
|
logger.error(
|
|
50
64
|
"The directory %s given for --retrieveTaskDirectory already exists. "
|
|
51
65
|
"Stopping to avoid clobbering existing files.",
|
|
52
|
-
options.retrieve_task_directory
|
|
66
|
+
options.retrieve_task_directory,
|
|
53
67
|
)
|
|
54
68
|
sys.exit(1)
|
|
55
69
|
|
|
@@ -58,6 +72,7 @@ def main() -> None:
|
|
|
58
72
|
config = jobStore.config
|
|
59
73
|
# But override its options
|
|
60
74
|
config.setOptions(options)
|
|
75
|
+
config.cleanWorkDir = "never"
|
|
61
76
|
|
|
62
77
|
# Find the job
|
|
63
78
|
|
|
@@ -82,26 +97,45 @@ def main() -> None:
|
|
|
82
97
|
if len(hits) == 0:
|
|
83
98
|
# No hits
|
|
84
99
|
if suggestion is None:
|
|
85
|
-
logger.critical(
|
|
100
|
+
logger.critical(
|
|
101
|
+
'No job found with ID or name "%s". No jobs are completely failed.',
|
|
102
|
+
options.job,
|
|
103
|
+
)
|
|
86
104
|
else:
|
|
87
|
-
logger.critical(
|
|
105
|
+
logger.critical(
|
|
106
|
+
'No job found with ID or name "%s". How about the failed job %s instead?',
|
|
107
|
+
options.job,
|
|
108
|
+
suggestion,
|
|
109
|
+
)
|
|
88
110
|
sys.exit(1)
|
|
89
111
|
elif len(hits) > 1:
|
|
90
112
|
# Several hits, maybe only one has failed
|
|
91
113
|
completely_failed_hits = [job for job in hits if job.remainingTryCount == 0]
|
|
92
114
|
if len(completely_failed_hits) == 0:
|
|
93
|
-
logger.critical(
|
|
115
|
+
logger.critical(
|
|
116
|
+
'Multiple jobs match "%s" but none are completely failed: %s',
|
|
117
|
+
options.job,
|
|
118
|
+
hits,
|
|
119
|
+
)
|
|
94
120
|
sys.exit(1)
|
|
95
121
|
elif len(completely_failed_hits) > 0:
|
|
96
|
-
logger.critical(
|
|
122
|
+
logger.critical(
|
|
123
|
+
'Multiple jobs matching "%s" are completely failed: %s',
|
|
124
|
+
options.job,
|
|
125
|
+
completely_failed_hits,
|
|
126
|
+
)
|
|
97
127
|
sys.exit(1)
|
|
98
128
|
else:
|
|
99
129
|
# We found one completely failed job, they probably mean that one.
|
|
100
|
-
logger.info(
|
|
130
|
+
logger.info(
|
|
131
|
+
'There are %s jobs matching "%s"; assuming you mean the failed one: %s',
|
|
132
|
+
options.job,
|
|
133
|
+
completely_failed_hits[0],
|
|
134
|
+
)
|
|
101
135
|
job_id = completely_failed_hits[0].jobStoreID
|
|
102
136
|
else:
|
|
103
137
|
# We found one job with this name, so they must mean that one
|
|
104
|
-
logger.info(
|
|
138
|
+
logger.info('Looked up job named "%s": %s', options.job, hits[0])
|
|
105
139
|
job_id = hits[0].jobStoreID
|
|
106
140
|
|
|
107
141
|
if options.printJobInfo:
|
|
@@ -121,19 +155,29 @@ def main() -> None:
|
|
|
121
155
|
local_worker_temp_dir = None
|
|
122
156
|
if options.retrieve_task_directory is not None:
|
|
123
157
|
# Pick a directory in it (which may be removed by the worker) as the worker's temp dir.
|
|
124
|
-
local_worker_temp_dir = os.path.join(
|
|
158
|
+
local_worker_temp_dir = os.path.join(
|
|
159
|
+
options.retrieve_task_directory, "worker"
|
|
160
|
+
)
|
|
125
161
|
# Make sure it exists
|
|
126
162
|
os.makedirs(local_worker_temp_dir, exist_ok=True)
|
|
127
163
|
# And tell the job to just download files
|
|
128
164
|
debug_flags.add("download_only")
|
|
129
165
|
# We might need to reconstruct a container environment.
|
|
130
|
-
host_and_job_paths: Optional[
|
|
166
|
+
host_and_job_paths: Optional[list[tuple[str, str]]] = None
|
|
131
167
|
# Track if the run succeeded without error
|
|
132
168
|
run_succeeded = False
|
|
133
169
|
|
|
134
170
|
logger.info(f"Running the following job locally: {job_id}")
|
|
135
171
|
try:
|
|
136
|
-
workerScript(
|
|
172
|
+
workerScript(
|
|
173
|
+
jobStore,
|
|
174
|
+
config,
|
|
175
|
+
job_id,
|
|
176
|
+
job_id,
|
|
177
|
+
redirect_output_to_log_file=False,
|
|
178
|
+
local_worker_temp_dir=local_worker_temp_dir,
|
|
179
|
+
debug_flags=debug_flags,
|
|
180
|
+
)
|
|
137
181
|
except FilesDownloadedStoppingPointReached as e:
|
|
138
182
|
# We asked for the files to be downloaded and now they are.
|
|
139
183
|
assert options.retrieve_task_directory is not None
|
|
@@ -164,21 +208,37 @@ def main() -> None:
|
|
|
164
208
|
|
|
165
209
|
for host_path, job_path in sorted_mounts:
|
|
166
210
|
if not os.path.exists(host_path):
|
|
167
|
-
logger.error(
|
|
211
|
+
logger.error(
|
|
212
|
+
"Job intended to mount %s as %s but it does not exist!",
|
|
213
|
+
host_path,
|
|
214
|
+
job_path,
|
|
215
|
+
)
|
|
168
216
|
continue
|
|
169
217
|
if not job_path.startswith("/"):
|
|
170
|
-
logger.error(
|
|
218
|
+
logger.error(
|
|
219
|
+
"Job intended to mount %s as %s but destination is a relative path!",
|
|
220
|
+
host_path,
|
|
221
|
+
job_path,
|
|
222
|
+
)
|
|
171
223
|
continue
|
|
172
224
|
# Drop the slash because we are building a chroot-ish mini filesystem.
|
|
173
225
|
job_relative_path = job_path[1:]
|
|
174
226
|
if job_relative_path.startswith("/"):
|
|
175
227
|
# We are having trouble understanding what the job
|
|
176
228
|
# intended to do. Stop working on this mount.
|
|
177
|
-
logger.error(
|
|
229
|
+
logger.error(
|
|
230
|
+
"Job intended to mount %s as %s but destination starts with multiple slashes for some reason!",
|
|
231
|
+
host_path,
|
|
232
|
+
job_path,
|
|
233
|
+
)
|
|
178
234
|
continue
|
|
179
235
|
fake_job_path = os.path.join(fake_job_root, job_relative_path)
|
|
180
236
|
if os.path.exists(fake_job_path):
|
|
181
|
-
logger.error(
|
|
237
|
+
logger.error(
|
|
238
|
+
"Job intended to mount %s as %s but that location is already mounted!",
|
|
239
|
+
host_path,
|
|
240
|
+
job_path,
|
|
241
|
+
)
|
|
182
242
|
continue
|
|
183
243
|
|
|
184
244
|
logger.info("Job mounted %s as %s", host_path, job_path)
|
toil/utils/toilDestroyCluster.py
CHANGED
|
@@ -20,16 +20,21 @@ from toil.statsAndLogging import set_logging_from_options
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
+
|
|
23
24
|
def main() -> None:
|
|
24
|
-
parser = parser_with_common_options(
|
|
25
|
+
parser = parser_with_common_options(
|
|
26
|
+
provisioner_options=True, jobstore_option=False, prog="toil destroy-cluster"
|
|
27
|
+
)
|
|
25
28
|
options = parser.parse_args()
|
|
26
29
|
set_logging_from_options(options)
|
|
27
30
|
|
|
28
|
-
logger.info(
|
|
31
|
+
logger.info("Destroying cluster %s", options.clusterName)
|
|
29
32
|
|
|
30
|
-
cluster = cluster_factory(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
cluster = cluster_factory(
|
|
34
|
+
provisioner=options.provisioner,
|
|
35
|
+
clusterName=options.clusterName,
|
|
36
|
+
zone=options.zone,
|
|
37
|
+
)
|
|
33
38
|
cluster.destroyCluster()
|
|
34
39
|
|
|
35
|
-
logger.info(
|
|
40
|
+
logger.info("Cluster %s is now gone.", options.clusterName)
|
toil/utils/toilKill.py
CHANGED
|
@@ -26,8 +26,11 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
|
|
27
27
|
def main() -> None:
|
|
28
28
|
parser = parser_with_common_options(prog="toil kill")
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--force",
|
|
31
|
+
action="store_true",
|
|
32
|
+
help="Send SIGKILL to the leader process if local.",
|
|
33
|
+
)
|
|
31
34
|
options = parser.parse_args()
|
|
32
35
|
set_logging_from_options(options)
|
|
33
36
|
config = Config()
|
|
@@ -65,7 +68,9 @@ def main() -> None:
|
|
|
65
68
|
os.kill(pid_to_kill, signal.SIGKILL if options.force else signal.SIGTERM)
|
|
66
69
|
logger.info("Toil process %i successfully terminated.", pid_to_kill)
|
|
67
70
|
except OSError:
|
|
68
|
-
logger.error(
|
|
71
|
+
logger.error(
|
|
72
|
+
"Could not signal process %i. Is it still running?", pid_to_kill
|
|
73
|
+
)
|
|
69
74
|
sys.exit(1)
|
|
70
75
|
else:
|
|
71
76
|
# Flip the flag inside the job store to signal kill
|