toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/utils/toilStatus.py
CHANGED
|
@@ -15,13 +15,12 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import sys
|
|
18
|
-
from typing import Any,
|
|
18
|
+
from typing import Any, Optional
|
|
19
19
|
|
|
20
20
|
from toil.bus import replay_message_bus
|
|
21
|
-
from toil.common import
|
|
21
|
+
from toil.common import Toil, parser_with_common_options
|
|
22
22
|
from toil.job import JobDescription, JobException, ServiceJobDescription
|
|
23
|
-
from toil.jobStores.abstractJobStore import
|
|
24
|
-
NoSuchJobStoreException)
|
|
23
|
+
from toil.jobStores.abstractJobStore import NoSuchFileException, NoSuchJobStoreException
|
|
25
24
|
from toil.statsAndLogging import StatsAndLogging, set_logging_from_options
|
|
26
25
|
|
|
27
26
|
logger = logging.getLogger(__name__)
|
|
@@ -30,33 +29,59 @@ logger = logging.getLogger(__name__)
|
|
|
30
29
|
class ToilStatus:
|
|
31
30
|
"""Tool for reporting on job status."""
|
|
32
31
|
|
|
33
|
-
def __init__(self, jobStoreName: str, specifiedJobs: Optional[
|
|
32
|
+
def __init__(self, jobStoreName: str, specifiedJobs: Optional[list[str]] = None):
|
|
34
33
|
self.jobStoreName = jobStoreName
|
|
35
34
|
self.jobStore = Toil.resumeJobStore(jobStoreName)
|
|
36
35
|
|
|
37
36
|
if specifiedJobs is None:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
37
|
+
try:
|
|
38
|
+
rootJob = self.fetchRootJob()
|
|
39
|
+
logger.info(
|
|
40
|
+
"Traversing the job graph gathering jobs. This may take a couple of minutes."
|
|
41
|
+
)
|
|
42
|
+
self.jobsToReport = self.traverseJobGraph(rootJob)
|
|
43
|
+
except JobException:
|
|
44
|
+
# Root job isn't set.
|
|
45
|
+
logger.warning("Workflow does not have a root job (yet? anymore?). Cannot look for jobs.")
|
|
46
|
+
self.jobsToReport = []
|
|
47
|
+
|
|
41
48
|
else:
|
|
42
49
|
self.jobsToReport = self.fetchUserJobs(specifiedJobs)
|
|
43
50
|
|
|
44
51
|
self.message_bus_path = self.jobStore.config.write_messages
|
|
52
|
+
|
|
45
53
|
def print_dot_chart(self) -> None:
|
|
46
54
|
"""Print a dot output graph representing the workflow."""
|
|
47
55
|
print("digraph toil_graph {")
|
|
48
56
|
print("# This graph was created from job-store: %s" % self.jobStoreName)
|
|
49
57
|
|
|
50
58
|
# Make job IDs to node names map
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
59
|
+
def id_to_name(job_id: str) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Change a job ID into a GraphViz node name.
|
|
62
|
+
"""
|
|
63
|
+
replacements = [
|
|
64
|
+
("_", "_u_"),
|
|
65
|
+
("/", "_s_"),
|
|
66
|
+
("-", "_d_")
|
|
67
|
+
]
|
|
68
|
+
result = job_id
|
|
69
|
+
for char, replacement in replacements:
|
|
70
|
+
result = result.replace(char, replacement)
|
|
71
|
+
return result
|
|
72
|
+
id_strings = [str(job.jobStoreID) for job in self.jobsToReport]
|
|
73
|
+
jobsToNodeNames = {
|
|
74
|
+
s: id_to_name(s) for s in id_strings
|
|
75
|
+
}
|
|
54
76
|
|
|
55
77
|
# Print the nodes
|
|
56
78
|
for job in set(self.jobsToReport):
|
|
57
79
|
print(
|
|
58
|
-
'{} [label="{} {}"];'.format(
|
|
59
|
-
jobsToNodeNames[str(job.jobStoreID)],
|
|
80
|
+
'{} [label="{} {}" color="{}"];'.format(
|
|
81
|
+
jobsToNodeNames[str(job.jobStoreID)],
|
|
82
|
+
job.jobName,
|
|
83
|
+
job.displayName,
|
|
84
|
+
"black" if job.has_body() else "green"
|
|
60
85
|
)
|
|
61
86
|
)
|
|
62
87
|
|
|
@@ -82,7 +107,11 @@ class ToilStatus:
|
|
|
82
107
|
with job.getLogFileHandle(self.jobStore) as fH:
|
|
83
108
|
# TODO: This looks intended to be machine-readable, but the format is
|
|
84
109
|
# unspecified and no escaping is done. But keep these tags around.
|
|
85
|
-
print(
|
|
110
|
+
print(
|
|
111
|
+
StatsAndLogging.formatLogStream(
|
|
112
|
+
fH, stream_name=f"LOG_FILE_OF_JOB:{job} LOG:"
|
|
113
|
+
)
|
|
114
|
+
)
|
|
86
115
|
else:
|
|
87
116
|
print(f"LOG_FILE_OF_JOB: {job} LOG: Job has no log file")
|
|
88
117
|
|
|
@@ -94,65 +123,110 @@ class ToilStatus:
|
|
|
94
123
|
children += "\t(CHILD_JOB:%s,PRECEDENCE:%i)" % (childJob, level)
|
|
95
124
|
print(children)
|
|
96
125
|
|
|
97
|
-
def printAggregateJobStats(
|
|
98
|
-
|
|
99
|
-
|
|
126
|
+
def printAggregateJobStats(
|
|
127
|
+
self, properties: list[set[str]], childNumber: list[int]
|
|
128
|
+
) -> None:
|
|
129
|
+
"""
|
|
130
|
+
Prints each job's ID, log file, remaining tries, and other properties.
|
|
131
|
+
|
|
132
|
+
:param properties: A set of string flag names for each job in self.jobsToReport.
|
|
133
|
+
:param childNumber: A list of child counts for each job in self.jobsToReport.
|
|
134
|
+
"""
|
|
135
|
+
for job, job_properties, job_child_number in zip(
|
|
136
|
+
self.jobsToReport, properties, childNumber
|
|
137
|
+
):
|
|
100
138
|
|
|
101
139
|
def lf(x: str) -> str:
|
|
102
|
-
return f"{x}:{str(x in
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
140
|
+
return f"{x}:{str(x in job_properties)}"
|
|
141
|
+
|
|
142
|
+
# We use a sort of not-really-machine-readable key:value TSV format here.
|
|
143
|
+
# But we only include important keys to help the humans, and flags
|
|
144
|
+
# don't have a value, just a key.
|
|
145
|
+
parts = [f"JOB:{job}"]
|
|
146
|
+
for flag in [
|
|
147
|
+
"COMPLETELY_FAILED",
|
|
148
|
+
"READY_TO_RUN",
|
|
149
|
+
"IS_ZOMBIE",
|
|
150
|
+
"HAS_SERVICES",
|
|
151
|
+
"IS_SERVICE",
|
|
152
|
+
]:
|
|
153
|
+
if flag in job_properties:
|
|
154
|
+
parts.append(flag)
|
|
155
|
+
if job.logJobStoreFileID:
|
|
156
|
+
parts.append(f"LOG_FILE:{job.logJobStoreFileID}")
|
|
157
|
+
if job.remainingTryCount > 0:
|
|
158
|
+
parts.append(f"TRYS_REMAINING:{job.remainingTryCount}")
|
|
159
|
+
if job_child_number > 0:
|
|
160
|
+
parts.append(f"CHILD_NUMBER:{job_child_number}")
|
|
161
|
+
|
|
162
|
+
print("\t".join(parts))
|
|
163
|
+
|
|
164
|
+
def report_on_jobs(self) -> dict[str, Any]:
|
|
111
165
|
"""
|
|
112
166
|
Gathers information about jobs such as its child jobs and status.
|
|
113
167
|
|
|
114
|
-
:returns jobStats:
|
|
115
|
-
|
|
168
|
+
:returns jobStats: Dict containing some lists of jobs by category, and
|
|
169
|
+
some lists of job properties for each job in self.jobsToReport.
|
|
116
170
|
"""
|
|
171
|
+
# These are lists of the matching jobs
|
|
117
172
|
hasChildren = []
|
|
118
173
|
readyToRun = []
|
|
119
174
|
zombies = []
|
|
120
|
-
hasLogFile:
|
|
175
|
+
hasLogFile: list[JobDescription] = []
|
|
121
176
|
hasServices = []
|
|
122
|
-
services:
|
|
123
|
-
|
|
177
|
+
services: list[ServiceJobDescription] = []
|
|
178
|
+
completely_failed = []
|
|
179
|
+
|
|
180
|
+
# These are stats for jobs in self.jobsToReport
|
|
181
|
+
child_number: list[int] = []
|
|
182
|
+
properties: list[set[str]] = []
|
|
183
|
+
|
|
184
|
+
# TODO: This mix of semantics is confusing and made per-job status be
|
|
185
|
+
# wrong for multiple years because it was not understood. Redesign it!
|
|
124
186
|
|
|
125
187
|
for job in self.jobsToReport:
|
|
188
|
+
job_properties: set[str] = set()
|
|
126
189
|
if job.logJobStoreFileID is not None:
|
|
127
190
|
hasLogFile.append(job)
|
|
128
191
|
|
|
129
|
-
|
|
130
|
-
|
|
192
|
+
job_child_number = len(list(job.allSuccessors()))
|
|
193
|
+
child_number.append(job_child_number)
|
|
194
|
+
if job_child_number > 0: # Total number of successors > 0
|
|
131
195
|
hasChildren.append(job)
|
|
132
|
-
|
|
133
|
-
elif job.
|
|
134
|
-
# Job has no children and a
|
|
196
|
+
job_properties.add("HAS_CHILDREN")
|
|
197
|
+
elif job.has_body():
|
|
198
|
+
# Job has no children and a body to run. Indicates job could be run.
|
|
135
199
|
readyToRun.append(job)
|
|
136
|
-
|
|
200
|
+
job_properties.add("READY_TO_RUN")
|
|
137
201
|
else:
|
|
138
202
|
# Job has no successors and no command, so is a zombie job.
|
|
139
203
|
zombies.append(job)
|
|
140
|
-
|
|
204
|
+
job_properties.add("IS_ZOMBIE")
|
|
141
205
|
if job.services:
|
|
142
206
|
hasServices.append(job)
|
|
143
|
-
|
|
207
|
+
job_properties.add("HAS_SERVICES")
|
|
144
208
|
if isinstance(job, ServiceJobDescription):
|
|
145
209
|
services.append(job)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
210
|
+
job_properties.add("IS_SERVICE")
|
|
211
|
+
if job.remainingTryCount == 0:
|
|
212
|
+
# Job is out of tries (and thus completely failed)
|
|
213
|
+
job_properties.add("COMPLETELY_FAILED")
|
|
214
|
+
completely_failed.append(job)
|
|
215
|
+
properties.append(job_properties)
|
|
216
|
+
|
|
217
|
+
jobStats = {
|
|
218
|
+
# These are lists of the mathcing jobs
|
|
219
|
+
"hasChildren": hasChildren,
|
|
220
|
+
"readyToRun": readyToRun,
|
|
221
|
+
"zombies": zombies,
|
|
222
|
+
"hasServices": hasServices,
|
|
223
|
+
"services": services,
|
|
224
|
+
"hasLogFile": hasLogFile,
|
|
225
|
+
"completelyFailed": completely_failed,
|
|
226
|
+
# These are stats for jobs in self.jobsToReport
|
|
227
|
+
"properties": properties,
|
|
228
|
+
"childNumber": child_number,
|
|
229
|
+
}
|
|
156
230
|
return jobStats
|
|
157
231
|
|
|
158
232
|
@staticmethod
|
|
@@ -168,21 +242,21 @@ class ToilStatus:
|
|
|
168
242
|
try:
|
|
169
243
|
jobstore = Toil.resumeJobStore(jobStoreName)
|
|
170
244
|
except NoSuchJobStoreException:
|
|
171
|
-
return
|
|
245
|
+
return "QUEUED"
|
|
172
246
|
except NoSuchFileException:
|
|
173
|
-
return
|
|
247
|
+
return "QUEUED"
|
|
174
248
|
|
|
175
249
|
try:
|
|
176
250
|
pid = jobstore.read_leader_pid()
|
|
177
251
|
try:
|
|
178
252
|
os.kill(pid, 0) # Does not kill process when 0 is passed.
|
|
179
253
|
except OSError: # Process not found, must be done.
|
|
180
|
-
return
|
|
254
|
+
return "COMPLETED"
|
|
181
255
|
else:
|
|
182
|
-
return
|
|
256
|
+
return "RUNNING"
|
|
183
257
|
except NoSuchFileException:
|
|
184
258
|
pass
|
|
185
|
-
return
|
|
259
|
+
return "QUEUED"
|
|
186
260
|
|
|
187
261
|
@staticmethod
|
|
188
262
|
def getStatus(jobStoreName: str) -> str:
|
|
@@ -201,38 +275,45 @@ class ToilStatus:
|
|
|
201
275
|
try:
|
|
202
276
|
jobstore = Toil.resumeJobStore(jobStoreName)
|
|
203
277
|
except NoSuchJobStoreException:
|
|
204
|
-
return
|
|
278
|
+
return "QUEUED"
|
|
205
279
|
except NoSuchFileException:
|
|
206
|
-
return
|
|
280
|
+
return "QUEUED"
|
|
207
281
|
|
|
208
282
|
try:
|
|
209
|
-
with jobstore.read_shared_file_stream(
|
|
283
|
+
with jobstore.read_shared_file_stream("succeeded.log") as successful:
|
|
210
284
|
pass
|
|
211
|
-
return
|
|
285
|
+
return "COMPLETED"
|
|
212
286
|
except NoSuchFileException:
|
|
213
287
|
try:
|
|
214
|
-
with jobstore.read_shared_file_stream(
|
|
288
|
+
with jobstore.read_shared_file_stream("failed.log") as failed:
|
|
215
289
|
pass
|
|
216
|
-
return
|
|
290
|
+
return "ERROR"
|
|
217
291
|
except NoSuchFileException:
|
|
218
292
|
pass
|
|
219
|
-
return
|
|
293
|
+
return "RUNNING"
|
|
220
294
|
|
|
221
|
-
def
|
|
295
|
+
def print_running_jobs(self) -> None:
|
|
222
296
|
"""
|
|
223
|
-
Goes through bus messages, returns a list of tuples which have correspondence between
|
|
224
|
-
PID on assigned batch system and
|
|
225
|
-
|
|
226
297
|
Prints a list of the currently running jobs
|
|
227
298
|
"""
|
|
228
299
|
|
|
229
300
|
print("\nMessage bus path: ", self.message_bus_path)
|
|
230
301
|
if self.message_bus_path is not None:
|
|
231
302
|
if os.path.exists(self.message_bus_path):
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
303
|
+
all_job_statuses = replay_message_bus(self.message_bus_path)
|
|
304
|
+
|
|
305
|
+
for job_status in all_job_statuses.values():
|
|
306
|
+
if job_status.is_running():
|
|
307
|
+
status_line = [
|
|
308
|
+
f"Job ID {job_status.job_store_id} with name {job_status.name} is running"
|
|
309
|
+
]
|
|
310
|
+
if job_status.batch_system != "":
|
|
311
|
+
# batch system exists
|
|
312
|
+
status_line.append(
|
|
313
|
+
f" on {job_status.batch_system} as ID {job_status.external_batch_id}"
|
|
314
|
+
)
|
|
315
|
+
status_line.append(".")
|
|
316
|
+
print("".join(status_line))
|
|
236
317
|
else:
|
|
237
318
|
print("Message bus file is missing!")
|
|
238
319
|
|
|
@@ -251,11 +332,14 @@ class ToilStatus:
|
|
|
251
332
|
"""
|
|
252
333
|
try:
|
|
253
334
|
return self.jobStore.load_root_job()
|
|
254
|
-
except JobException:
|
|
255
|
-
|
|
335
|
+
except JobException as e:
|
|
336
|
+
logger.info(e)
|
|
337
|
+
print(
|
|
338
|
+
"Root job is absent. The workflow has may have completed successfully."
|
|
339
|
+
)
|
|
256
340
|
raise
|
|
257
341
|
|
|
258
|
-
def fetchUserJobs(self, jobs:
|
|
342
|
+
def fetchUserJobs(self, jobs: list[str]) -> list[JobDescription]:
|
|
259
343
|
"""
|
|
260
344
|
Takes a user input array of jobs, verifies that they are in the jobStore
|
|
261
345
|
and returns the array of jobsToReport.
|
|
@@ -268,16 +352,16 @@ class ToilStatus:
|
|
|
268
352
|
try:
|
|
269
353
|
jobsToReport.append(self.jobStore.load_job(jobID))
|
|
270
354
|
except JobException:
|
|
271
|
-
print(
|
|
355
|
+
print("The job %s could not be found." % jobID, file=sys.stderr)
|
|
272
356
|
raise
|
|
273
357
|
return jobsToReport
|
|
274
358
|
|
|
275
359
|
def traverseJobGraph(
|
|
276
360
|
self,
|
|
277
361
|
rootJob: JobDescription,
|
|
278
|
-
jobsToReport: Optional[
|
|
279
|
-
foundJobStoreIDs: Optional[
|
|
280
|
-
) ->
|
|
362
|
+
jobsToReport: Optional[list[JobDescription]] = None,
|
|
363
|
+
foundJobStoreIDs: Optional[set[str]] = None,
|
|
364
|
+
) -> list[JobDescription]:
|
|
281
365
|
"""
|
|
282
366
|
Find all current jobs in the jobStore and return them as an Array.
|
|
283
367
|
|
|
@@ -300,15 +384,24 @@ class ToilStatus:
|
|
|
300
384
|
jobsToReport.append(rootJob)
|
|
301
385
|
# Traverse jobs in stack
|
|
302
386
|
for successorJobStoreID in rootJob.allSuccessors():
|
|
303
|
-
if
|
|
304
|
-
|
|
387
|
+
if (
|
|
388
|
+
successorJobStoreID not in foundJobStoreIDs
|
|
389
|
+
and self.jobStore.job_exists(successorJobStoreID)
|
|
390
|
+
):
|
|
391
|
+
self.traverseJobGraph(
|
|
392
|
+
self.jobStore.load_job(successorJobStoreID),
|
|
393
|
+
jobsToReport,
|
|
394
|
+
foundJobStoreIDs,
|
|
395
|
+
)
|
|
305
396
|
|
|
306
397
|
# Traverse service jobs
|
|
307
398
|
for jobs in rootJob.services:
|
|
308
399
|
for serviceJobStoreID in jobs:
|
|
309
400
|
if self.jobStore.job_exists(serviceJobStoreID):
|
|
310
401
|
if serviceJobStoreID in foundJobStoreIDs:
|
|
311
|
-
raise RuntimeError(
|
|
402
|
+
raise RuntimeError(
|
|
403
|
+
"Service job was unexpectedly found while traversing "
|
|
404
|
+
)
|
|
312
405
|
foundJobStoreIDs.add(serviceJobStoreID)
|
|
313
406
|
jobsToReport.append(self.jobStore.load_job(serviceJobStoreID))
|
|
314
407
|
|
|
@@ -318,37 +411,81 @@ class ToilStatus:
|
|
|
318
411
|
def main() -> None:
|
|
319
412
|
"""Reports the state of a Toil workflow."""
|
|
320
413
|
parser = parser_with_common_options(prog="toil status")
|
|
321
|
-
parser.add_argument(
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
414
|
+
parser.add_argument(
|
|
415
|
+
"--failIfNotComplete",
|
|
416
|
+
action="store_true",
|
|
417
|
+
help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
|
|
418
|
+
default=False,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
parser.add_argument(
|
|
422
|
+
"--noAggStats",
|
|
423
|
+
dest="stats",
|
|
424
|
+
action="store_false",
|
|
425
|
+
help="Do not print overall, aggregate status of workflow.",
|
|
426
|
+
default=True,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
parser.add_argument(
|
|
430
|
+
"--dot",
|
|
431
|
+
"--printDot",
|
|
432
|
+
dest="print_dot",
|
|
433
|
+
action="store_true",
|
|
434
|
+
help="Print dot formatted description of the graph. If using --jobs will "
|
|
435
|
+
"restrict to subgraph including only those jobs. default=%(default)s",
|
|
436
|
+
default=False,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
parser.add_argument(
|
|
440
|
+
"--jobs",
|
|
441
|
+
nargs="+",
|
|
442
|
+
help="Restrict reporting to the following jobs (allows subsetting of the report).",
|
|
443
|
+
default=None,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
parser.add_argument(
|
|
447
|
+
"--perJob",
|
|
448
|
+
"--printPerJobStats",
|
|
449
|
+
dest="print_per_job_stats",
|
|
450
|
+
action="store_true",
|
|
451
|
+
help="Print info about each job. default=%(default)s",
|
|
452
|
+
default=False,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
parser.add_argument(
|
|
456
|
+
"--logs",
|
|
457
|
+
"--printLogs",
|
|
458
|
+
dest="print_logs",
|
|
459
|
+
action="store_true",
|
|
460
|
+
help="Print the log files of jobs (if they exist). default=%(default)s",
|
|
461
|
+
default=False,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
parser.add_argument(
|
|
465
|
+
"--children",
|
|
466
|
+
"--printChildren",
|
|
467
|
+
dest="print_children",
|
|
468
|
+
action="store_true",
|
|
469
|
+
help="Print children of each job. default=%(default)s",
|
|
470
|
+
default=False,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
parser.add_argument(
|
|
474
|
+
"--status",
|
|
475
|
+
"--printStatus",
|
|
476
|
+
dest="print_status",
|
|
477
|
+
action="store_true",
|
|
478
|
+
help="Determine which jobs are currently running and the associated batch system ID, if any",
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
parser.add_argument(
|
|
482
|
+
"--failed",
|
|
483
|
+
"--printFailed",
|
|
484
|
+
dest="print_failed",
|
|
485
|
+
action="store_true",
|
|
486
|
+
help="List jobs which seem to have failed to run",
|
|
487
|
+
)
|
|
337
488
|
|
|
338
|
-
parser.add_argument("--printPerJobStats", action="store_true",
|
|
339
|
-
help="Print info about each job. default=%(default)s",
|
|
340
|
-
default=False)
|
|
341
|
-
|
|
342
|
-
parser.add_argument("--printLogs", action="store_true",
|
|
343
|
-
help="Print the log files of jobs (if they exist). default=%(default)s",
|
|
344
|
-
default=False)
|
|
345
|
-
|
|
346
|
-
parser.add_argument("--printChildren", action="store_true",
|
|
347
|
-
help="Print children of each job. default=%(default)s",
|
|
348
|
-
default=False)
|
|
349
|
-
|
|
350
|
-
parser.add_argument("--printStatus", action="store_true",
|
|
351
|
-
help="Determine which jobs are currently running and the associated batch system ID")
|
|
352
489
|
options = parser.parse_args()
|
|
353
490
|
set_logging_from_options(options)
|
|
354
491
|
|
|
@@ -356,13 +493,10 @@ def main() -> None:
|
|
|
356
493
|
parser.print_help()
|
|
357
494
|
sys.exit(0)
|
|
358
495
|
|
|
359
|
-
config = Config()
|
|
360
|
-
config.setOptions(options)
|
|
361
|
-
|
|
362
496
|
try:
|
|
363
|
-
status = ToilStatus(
|
|
497
|
+
status = ToilStatus(options.jobStore, options.jobs)
|
|
364
498
|
except NoSuchJobStoreException:
|
|
365
|
-
print(
|
|
499
|
+
print(f"The job store {options.jobStore} was not found.")
|
|
366
500
|
return
|
|
367
501
|
except JobException: # Workflow likely complete, user informed in ToilStatus()
|
|
368
502
|
return
|
|
@@ -370,36 +504,55 @@ def main() -> None:
|
|
|
370
504
|
jobStats = status.report_on_jobs()
|
|
371
505
|
|
|
372
506
|
# Info to be reported.
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
507
|
+
# These are lists of matching jobs.
|
|
508
|
+
hasChildren = jobStats["hasChildren"]
|
|
509
|
+
readyToRun = jobStats["readyToRun"]
|
|
510
|
+
zombies = jobStats["zombies"]
|
|
511
|
+
hasServices = jobStats["hasServices"]
|
|
512
|
+
services = jobStats["services"]
|
|
513
|
+
hasLogFile = jobStats["hasLogFile"]
|
|
514
|
+
completely_failed = jobStats["completelyFailed"]
|
|
515
|
+
# These are results for corresponding jobs in status.jobsToReport
|
|
516
|
+
properties = jobStats["properties"]
|
|
517
|
+
childNumber = jobStats["childNumber"]
|
|
518
|
+
|
|
519
|
+
if options.print_per_job_stats:
|
|
383
520
|
status.printAggregateJobStats(properties, childNumber)
|
|
384
|
-
if options.
|
|
521
|
+
if options.print_logs:
|
|
385
522
|
status.printJobLog()
|
|
386
|
-
if options.
|
|
523
|
+
if options.print_children:
|
|
387
524
|
status.printJobChildren()
|
|
388
|
-
if options.
|
|
525
|
+
if options.print_dot:
|
|
389
526
|
status.print_dot_chart()
|
|
527
|
+
if options.print_failed:
|
|
528
|
+
print("Failed jobs:")
|
|
529
|
+
for job in completely_failed:
|
|
530
|
+
print(job)
|
|
390
531
|
if options.stats:
|
|
391
|
-
print(
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
532
|
+
print(
|
|
533
|
+
"Of the %i jobs considered, "
|
|
534
|
+
"there are "
|
|
535
|
+
"%i completely failed jobs, "
|
|
536
|
+
"%i jobs with children, "
|
|
537
|
+
"%i jobs ready to run, "
|
|
538
|
+
"%i zombie jobs, "
|
|
539
|
+
"%i jobs with services, "
|
|
540
|
+
"%i services, "
|
|
541
|
+
"and %i jobs with log files currently in %s."
|
|
542
|
+
% (
|
|
543
|
+
len(status.jobsToReport),
|
|
544
|
+
len(completely_failed),
|
|
545
|
+
len(hasChildren),
|
|
546
|
+
len(readyToRun),
|
|
547
|
+
len(zombies),
|
|
548
|
+
len(hasServices),
|
|
549
|
+
len(services),
|
|
550
|
+
len(hasLogFile),
|
|
551
|
+
status.jobStore,
|
|
552
|
+
)
|
|
553
|
+
)
|
|
554
|
+
if options.print_status:
|
|
555
|
+
status.print_running_jobs()
|
|
402
556
|
if len(status.jobsToReport) > 0 and options.failIfNotComplete:
|
|
403
557
|
# Upon workflow completion, all jobs will have been removed from job store
|
|
404
558
|
exit(1)
|
|
405
|
-
|
|
@@ -31,7 +31,9 @@ def internet_connection() -> bool:
|
|
|
31
31
|
|
|
32
32
|
def main() -> None:
|
|
33
33
|
if not internet_connection():
|
|
34
|
-
raise RuntimeError(
|
|
34
|
+
raise RuntimeError(
|
|
35
|
+
"No internet. Updating the EC2 Instance list requires internet."
|
|
36
|
+
)
|
|
35
37
|
updateStaticEC2Instances()
|
|
36
38
|
|
|
37
39
|
|
toil/version.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
baseVersion = '
|
|
1
|
+
baseVersion = '8.0.0'
|
|
2
2
|
cgcloudVersion = '1.6.0a1.dev393'
|
|
3
|
-
version = '
|
|
4
|
-
cacheTag = 'cache-local-py3.
|
|
5
|
-
mainCacheTag = 'cache-master-py3.
|
|
6
|
-
distVersion = '
|
|
7
|
-
exactPython = 'python3.
|
|
8
|
-
python = 'python3.
|
|
9
|
-
dockerTag = '
|
|
10
|
-
currentCommit = '
|
|
11
|
-
dockerRegistry = 'quay.io/
|
|
3
|
+
version = '8.0.0-d2ae0ea9ab49f238670dbf6aafd20de7afdd8514'
|
|
4
|
+
cacheTag = 'cache-local-py3.13'
|
|
5
|
+
mainCacheTag = 'cache-master-py3.13'
|
|
6
|
+
distVersion = '8.0.0'
|
|
7
|
+
exactPython = 'python3.13'
|
|
8
|
+
python = 'python3.13'
|
|
9
|
+
dockerTag = '8.0.0-d2ae0ea9ab49f238670dbf6aafd20de7afdd8514-py3.13'
|
|
10
|
+
currentCommit = 'd2ae0ea9ab49f238670dbf6aafd20de7afdd8514'
|
|
11
|
+
dockerRegistry = 'quay.io/stxue'
|
|
12
12
|
dockerName = 'toil'
|
|
13
|
-
dirty =
|
|
14
|
-
cwltool_version = '3.1.
|
|
13
|
+
dirty = False
|
|
14
|
+
cwltool_version = '3.1.20250110105449'
|