toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/utils/toilStatus.py
CHANGED
|
@@ -15,13 +15,12 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import sys
|
|
18
|
-
from typing import Any,
|
|
18
|
+
from typing import Any, Optional
|
|
19
19
|
|
|
20
20
|
from toil.bus import replay_message_bus
|
|
21
|
-
from toil.common import
|
|
21
|
+
from toil.common import Toil, parser_with_common_options
|
|
22
22
|
from toil.job import JobDescription, JobException, ServiceJobDescription
|
|
23
|
-
from toil.jobStores.abstractJobStore import
|
|
24
|
-
NoSuchJobStoreException)
|
|
23
|
+
from toil.jobStores.abstractJobStore import NoSuchFileException, NoSuchJobStoreException
|
|
25
24
|
from toil.statsAndLogging import StatsAndLogging, set_logging_from_options
|
|
26
25
|
|
|
27
26
|
logger = logging.getLogger(__name__)
|
|
@@ -30,33 +29,59 @@ logger = logging.getLogger(__name__)
|
|
|
30
29
|
class ToilStatus:
|
|
31
30
|
"""Tool for reporting on job status."""
|
|
32
31
|
|
|
33
|
-
def __init__(self, jobStoreName: str, specifiedJobs: Optional[
|
|
32
|
+
def __init__(self, jobStoreName: str, specifiedJobs: Optional[list[str]] = None):
|
|
34
33
|
self.jobStoreName = jobStoreName
|
|
35
34
|
self.jobStore = Toil.resumeJobStore(jobStoreName)
|
|
36
35
|
|
|
37
36
|
if specifiedJobs is None:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
37
|
+
try:
|
|
38
|
+
rootJob = self.fetchRootJob()
|
|
39
|
+
logger.info(
|
|
40
|
+
"Traversing the job graph gathering jobs. This may take a couple of minutes."
|
|
41
|
+
)
|
|
42
|
+
self.jobsToReport = self.traverseJobGraph(rootJob)
|
|
43
|
+
except JobException:
|
|
44
|
+
# Root job isn't set.
|
|
45
|
+
logger.warning("Workflow does not have a root job (yet? anymore?). Cannot look for jobs.")
|
|
46
|
+
self.jobsToReport = []
|
|
47
|
+
|
|
41
48
|
else:
|
|
42
49
|
self.jobsToReport = self.fetchUserJobs(specifiedJobs)
|
|
43
50
|
|
|
44
51
|
self.message_bus_path = self.jobStore.config.write_messages
|
|
52
|
+
|
|
45
53
|
def print_dot_chart(self) -> None:
|
|
46
54
|
"""Print a dot output graph representing the workflow."""
|
|
47
55
|
print("digraph toil_graph {")
|
|
48
56
|
print("# This graph was created from job-store: %s" % self.jobStoreName)
|
|
49
57
|
|
|
50
58
|
# Make job IDs to node names map
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
59
|
+
def id_to_name(job_id: str) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Change a job ID into a GraphViz node name.
|
|
62
|
+
"""
|
|
63
|
+
replacements = [
|
|
64
|
+
("_", "_u_"),
|
|
65
|
+
("/", "_s_"),
|
|
66
|
+
("-", "_d_")
|
|
67
|
+
]
|
|
68
|
+
result = job_id
|
|
69
|
+
for char, replacement in replacements:
|
|
70
|
+
result = result.replace(char, replacement)
|
|
71
|
+
return result
|
|
72
|
+
id_strings = [str(job.jobStoreID) for job in self.jobsToReport]
|
|
73
|
+
jobsToNodeNames = {
|
|
74
|
+
s: id_to_name(s) for s in id_strings
|
|
75
|
+
}
|
|
54
76
|
|
|
55
77
|
# Print the nodes
|
|
56
78
|
for job in set(self.jobsToReport):
|
|
57
79
|
print(
|
|
58
|
-
'{} [label="{} {}"];'.format(
|
|
59
|
-
jobsToNodeNames[str(job.jobStoreID)],
|
|
80
|
+
'{} [label="{} {}" color="{}"];'.format(
|
|
81
|
+
jobsToNodeNames[str(job.jobStoreID)],
|
|
82
|
+
job.jobName,
|
|
83
|
+
job.displayName,
|
|
84
|
+
"black" if job.has_body() else "green"
|
|
60
85
|
)
|
|
61
86
|
)
|
|
62
87
|
|
|
@@ -82,7 +107,11 @@ class ToilStatus:
|
|
|
82
107
|
with job.getLogFileHandle(self.jobStore) as fH:
|
|
83
108
|
# TODO: This looks intended to be machine-readable, but the format is
|
|
84
109
|
# unspecified and no escaping is done. But keep these tags around.
|
|
85
|
-
print(
|
|
110
|
+
print(
|
|
111
|
+
StatsAndLogging.formatLogStream(
|
|
112
|
+
fH, stream_name=f"LOG_FILE_OF_JOB:{job} LOG:"
|
|
113
|
+
)
|
|
114
|
+
)
|
|
86
115
|
else:
|
|
87
116
|
print(f"LOG_FILE_OF_JOB: {job} LOG: Job has no log file")
|
|
88
117
|
|
|
@@ -94,22 +123,33 @@ class ToilStatus:
|
|
|
94
123
|
children += "\t(CHILD_JOB:%s,PRECEDENCE:%i)" % (childJob, level)
|
|
95
124
|
print(children)
|
|
96
125
|
|
|
97
|
-
def printAggregateJobStats(
|
|
126
|
+
def printAggregateJobStats(
|
|
127
|
+
self, properties: list[set[str]], childNumber: list[int]
|
|
128
|
+
) -> None:
|
|
98
129
|
"""
|
|
99
130
|
Prints each job's ID, log file, remaining tries, and other properties.
|
|
100
131
|
|
|
101
132
|
:param properties: A set of string flag names for each job in self.jobsToReport.
|
|
102
133
|
:param childNumber: A list of child counts for each job in self.jobsToReport.
|
|
103
134
|
"""
|
|
104
|
-
for job, job_properties, job_child_number in zip(
|
|
135
|
+
for job, job_properties, job_child_number in zip(
|
|
136
|
+
self.jobsToReport, properties, childNumber
|
|
137
|
+
):
|
|
105
138
|
|
|
106
139
|
def lf(x: str) -> str:
|
|
107
140
|
return f"{x}:{str(x in job_properties)}"
|
|
141
|
+
|
|
108
142
|
# We use a sort of not-really-machine-readable key:value TSV format here.
|
|
109
143
|
# But we only include important keys to help the humans, and flags
|
|
110
144
|
# don't have a value, just a key.
|
|
111
145
|
parts = [f"JOB:{job}"]
|
|
112
|
-
for flag in [
|
|
146
|
+
for flag in [
|
|
147
|
+
"COMPLETELY_FAILED",
|
|
148
|
+
"READY_TO_RUN",
|
|
149
|
+
"IS_ZOMBIE",
|
|
150
|
+
"HAS_SERVICES",
|
|
151
|
+
"IS_SERVICE",
|
|
152
|
+
]:
|
|
113
153
|
if flag in job_properties:
|
|
114
154
|
parts.append(flag)
|
|
115
155
|
if job.logJobStoreFileID:
|
|
@@ -121,7 +161,7 @@ class ToilStatus:
|
|
|
121
161
|
|
|
122
162
|
print("\t".join(parts))
|
|
123
163
|
|
|
124
|
-
def report_on_jobs(self) ->
|
|
164
|
+
def report_on_jobs(self) -> dict[str, Any]:
|
|
125
165
|
"""
|
|
126
166
|
Gathers information about jobs such as its child jobs and status.
|
|
127
167
|
|
|
@@ -132,20 +172,20 @@ class ToilStatus:
|
|
|
132
172
|
hasChildren = []
|
|
133
173
|
readyToRun = []
|
|
134
174
|
zombies = []
|
|
135
|
-
hasLogFile:
|
|
175
|
+
hasLogFile: list[JobDescription] = []
|
|
136
176
|
hasServices = []
|
|
137
|
-
services:
|
|
177
|
+
services: list[ServiceJobDescription] = []
|
|
138
178
|
completely_failed = []
|
|
139
179
|
|
|
140
180
|
# These are stats for jobs in self.jobsToReport
|
|
141
|
-
child_number:
|
|
142
|
-
properties:
|
|
181
|
+
child_number: list[int] = []
|
|
182
|
+
properties: list[set[str]] = []
|
|
143
183
|
|
|
144
184
|
# TODO: This mix of semantics is confusing and made per-job status be
|
|
145
185
|
# wrong for multiple years because it was not understood. Redesign it!
|
|
146
186
|
|
|
147
187
|
for job in self.jobsToReport:
|
|
148
|
-
job_properties:
|
|
188
|
+
job_properties: set[str] = set()
|
|
149
189
|
if job.logJobStoreFileID is not None:
|
|
150
190
|
hasLogFile.append(job)
|
|
151
191
|
|
|
@@ -176,16 +216,16 @@ class ToilStatus:
|
|
|
176
216
|
|
|
177
217
|
jobStats = {
|
|
178
218
|
# These are lists of the mathcing jobs
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
219
|
+
"hasChildren": hasChildren,
|
|
220
|
+
"readyToRun": readyToRun,
|
|
221
|
+
"zombies": zombies,
|
|
222
|
+
"hasServices": hasServices,
|
|
223
|
+
"services": services,
|
|
224
|
+
"hasLogFile": hasLogFile,
|
|
225
|
+
"completelyFailed": completely_failed,
|
|
186
226
|
# These are stats for jobs in self.jobsToReport
|
|
187
|
-
|
|
188
|
-
|
|
227
|
+
"properties": properties,
|
|
228
|
+
"childNumber": child_number,
|
|
189
229
|
}
|
|
190
230
|
return jobStats
|
|
191
231
|
|
|
@@ -202,21 +242,21 @@ class ToilStatus:
|
|
|
202
242
|
try:
|
|
203
243
|
jobstore = Toil.resumeJobStore(jobStoreName)
|
|
204
244
|
except NoSuchJobStoreException:
|
|
205
|
-
return
|
|
245
|
+
return "QUEUED"
|
|
206
246
|
except NoSuchFileException:
|
|
207
|
-
return
|
|
247
|
+
return "QUEUED"
|
|
208
248
|
|
|
209
249
|
try:
|
|
210
250
|
pid = jobstore.read_leader_pid()
|
|
211
251
|
try:
|
|
212
252
|
os.kill(pid, 0) # Does not kill process when 0 is passed.
|
|
213
253
|
except OSError: # Process not found, must be done.
|
|
214
|
-
return
|
|
254
|
+
return "COMPLETED"
|
|
215
255
|
else:
|
|
216
|
-
return
|
|
256
|
+
return "RUNNING"
|
|
217
257
|
except NoSuchFileException:
|
|
218
258
|
pass
|
|
219
|
-
return
|
|
259
|
+
return "QUEUED"
|
|
220
260
|
|
|
221
261
|
@staticmethod
|
|
222
262
|
def getStatus(jobStoreName: str) -> str:
|
|
@@ -235,38 +275,45 @@ class ToilStatus:
|
|
|
235
275
|
try:
|
|
236
276
|
jobstore = Toil.resumeJobStore(jobStoreName)
|
|
237
277
|
except NoSuchJobStoreException:
|
|
238
|
-
return
|
|
278
|
+
return "QUEUED"
|
|
239
279
|
except NoSuchFileException:
|
|
240
|
-
return
|
|
280
|
+
return "QUEUED"
|
|
241
281
|
|
|
242
282
|
try:
|
|
243
|
-
with jobstore.read_shared_file_stream(
|
|
283
|
+
with jobstore.read_shared_file_stream("succeeded.log") as successful:
|
|
244
284
|
pass
|
|
245
|
-
return
|
|
285
|
+
return "COMPLETED"
|
|
246
286
|
except NoSuchFileException:
|
|
247
287
|
try:
|
|
248
|
-
with jobstore.read_shared_file_stream(
|
|
288
|
+
with jobstore.read_shared_file_stream("failed.log") as failed:
|
|
249
289
|
pass
|
|
250
|
-
return
|
|
290
|
+
return "ERROR"
|
|
251
291
|
except NoSuchFileException:
|
|
252
292
|
pass
|
|
253
|
-
return
|
|
293
|
+
return "RUNNING"
|
|
254
294
|
|
|
255
|
-
def
|
|
295
|
+
def print_running_jobs(self) -> None:
|
|
256
296
|
"""
|
|
257
|
-
Goes through bus messages, returns a list of tuples which have correspondence between
|
|
258
|
-
PID on assigned batch system and
|
|
259
|
-
|
|
260
297
|
Prints a list of the currently running jobs
|
|
261
298
|
"""
|
|
262
299
|
|
|
263
300
|
print("\nMessage bus path: ", self.message_bus_path)
|
|
264
301
|
if self.message_bus_path is not None:
|
|
265
302
|
if os.path.exists(self.message_bus_path):
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
303
|
+
all_job_statuses = replay_message_bus(self.message_bus_path)
|
|
304
|
+
|
|
305
|
+
for job_status in all_job_statuses.values():
|
|
306
|
+
if job_status.is_running():
|
|
307
|
+
status_line = [
|
|
308
|
+
f"Job ID {job_status.job_store_id} with name {job_status.name} is running"
|
|
309
|
+
]
|
|
310
|
+
if job_status.batch_system != "":
|
|
311
|
+
# batch system exists
|
|
312
|
+
status_line.append(
|
|
313
|
+
f" on {job_status.batch_system} as ID {job_status.external_batch_id}"
|
|
314
|
+
)
|
|
315
|
+
status_line.append(".")
|
|
316
|
+
print("".join(status_line))
|
|
270
317
|
else:
|
|
271
318
|
print("Message bus file is missing!")
|
|
272
319
|
|
|
@@ -287,10 +334,12 @@ class ToilStatus:
|
|
|
287
334
|
return self.jobStore.load_root_job()
|
|
288
335
|
except JobException as e:
|
|
289
336
|
logger.info(e)
|
|
290
|
-
print(
|
|
337
|
+
print(
|
|
338
|
+
"Root job is absent. The workflow has may have completed successfully."
|
|
339
|
+
)
|
|
291
340
|
raise
|
|
292
341
|
|
|
293
|
-
def fetchUserJobs(self, jobs:
|
|
342
|
+
def fetchUserJobs(self, jobs: list[str]) -> list[JobDescription]:
|
|
294
343
|
"""
|
|
295
344
|
Takes a user input array of jobs, verifies that they are in the jobStore
|
|
296
345
|
and returns the array of jobsToReport.
|
|
@@ -303,16 +352,16 @@ class ToilStatus:
|
|
|
303
352
|
try:
|
|
304
353
|
jobsToReport.append(self.jobStore.load_job(jobID))
|
|
305
354
|
except JobException:
|
|
306
|
-
print(
|
|
355
|
+
print("The job %s could not be found." % jobID, file=sys.stderr)
|
|
307
356
|
raise
|
|
308
357
|
return jobsToReport
|
|
309
358
|
|
|
310
359
|
def traverseJobGraph(
|
|
311
360
|
self,
|
|
312
361
|
rootJob: JobDescription,
|
|
313
|
-
jobsToReport: Optional[
|
|
314
|
-
foundJobStoreIDs: Optional[
|
|
315
|
-
) ->
|
|
362
|
+
jobsToReport: Optional[list[JobDescription]] = None,
|
|
363
|
+
foundJobStoreIDs: Optional[set[str]] = None,
|
|
364
|
+
) -> list[JobDescription]:
|
|
316
365
|
"""
|
|
317
366
|
Find all current jobs in the jobStore and return them as an Array.
|
|
318
367
|
|
|
@@ -335,15 +384,24 @@ class ToilStatus:
|
|
|
335
384
|
jobsToReport.append(rootJob)
|
|
336
385
|
# Traverse jobs in stack
|
|
337
386
|
for successorJobStoreID in rootJob.allSuccessors():
|
|
338
|
-
if
|
|
339
|
-
|
|
387
|
+
if (
|
|
388
|
+
successorJobStoreID not in foundJobStoreIDs
|
|
389
|
+
and self.jobStore.job_exists(successorJobStoreID)
|
|
390
|
+
):
|
|
391
|
+
self.traverseJobGraph(
|
|
392
|
+
self.jobStore.load_job(successorJobStoreID),
|
|
393
|
+
jobsToReport,
|
|
394
|
+
foundJobStoreIDs,
|
|
395
|
+
)
|
|
340
396
|
|
|
341
397
|
# Traverse service jobs
|
|
342
398
|
for jobs in rootJob.services:
|
|
343
399
|
for serviceJobStoreID in jobs:
|
|
344
400
|
if self.jobStore.job_exists(serviceJobStoreID):
|
|
345
401
|
if serviceJobStoreID in foundJobStoreIDs:
|
|
346
|
-
raise RuntimeError(
|
|
402
|
+
raise RuntimeError(
|
|
403
|
+
"Service job was unexpectedly found while traversing "
|
|
404
|
+
)
|
|
347
405
|
foundJobStoreIDs.add(serviceJobStoreID)
|
|
348
406
|
jobsToReport.append(self.jobStore.load_job(serviceJobStoreID))
|
|
349
407
|
|
|
@@ -353,40 +411,80 @@ class ToilStatus:
|
|
|
353
411
|
def main() -> None:
|
|
354
412
|
"""Reports the state of a Toil workflow."""
|
|
355
413
|
parser = parser_with_common_options(prog="toil status")
|
|
356
|
-
parser.add_argument(
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
parser.add_argument(
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
parser.add_argument(
|
|
389
|
-
|
|
414
|
+
parser.add_argument(
|
|
415
|
+
"--failIfNotComplete",
|
|
416
|
+
action="store_true",
|
|
417
|
+
help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
|
|
418
|
+
default=False,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
parser.add_argument(
|
|
422
|
+
"--noAggStats",
|
|
423
|
+
dest="stats",
|
|
424
|
+
action="store_false",
|
|
425
|
+
help="Do not print overall, aggregate status of workflow.",
|
|
426
|
+
default=True,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
parser.add_argument(
|
|
430
|
+
"--dot",
|
|
431
|
+
"--printDot",
|
|
432
|
+
dest="print_dot",
|
|
433
|
+
action="store_true",
|
|
434
|
+
help="Print dot formatted description of the graph. If using --jobs will "
|
|
435
|
+
"restrict to subgraph including only those jobs. default=%(default)s",
|
|
436
|
+
default=False,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
parser.add_argument(
|
|
440
|
+
"--jobs",
|
|
441
|
+
nargs="+",
|
|
442
|
+
help="Restrict reporting to the following jobs (allows subsetting of the report).",
|
|
443
|
+
default=None,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
parser.add_argument(
|
|
447
|
+
"--perJob",
|
|
448
|
+
"--printPerJobStats",
|
|
449
|
+
dest="print_per_job_stats",
|
|
450
|
+
action="store_true",
|
|
451
|
+
help="Print info about each job. default=%(default)s",
|
|
452
|
+
default=False,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
parser.add_argument(
|
|
456
|
+
"--logs",
|
|
457
|
+
"--printLogs",
|
|
458
|
+
dest="print_logs",
|
|
459
|
+
action="store_true",
|
|
460
|
+
help="Print the log files of jobs (if they exist). default=%(default)s",
|
|
461
|
+
default=False,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
parser.add_argument(
|
|
465
|
+
"--children",
|
|
466
|
+
"--printChildren",
|
|
467
|
+
dest="print_children",
|
|
468
|
+
action="store_true",
|
|
469
|
+
help="Print children of each job. default=%(default)s",
|
|
470
|
+
default=False,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
parser.add_argument(
|
|
474
|
+
"--status",
|
|
475
|
+
"--printStatus",
|
|
476
|
+
dest="print_status",
|
|
477
|
+
action="store_true",
|
|
478
|
+
help="Determine which jobs are currently running and the associated batch system ID, if any",
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
parser.add_argument(
|
|
482
|
+
"--failed",
|
|
483
|
+
"--printFailed",
|
|
484
|
+
dest="print_failed",
|
|
485
|
+
action="store_true",
|
|
486
|
+
help="List jobs which seem to have failed to run",
|
|
487
|
+
)
|
|
390
488
|
|
|
391
489
|
options = parser.parse_args()
|
|
392
490
|
set_logging_from_options(options)
|
|
@@ -398,7 +496,7 @@ def main() -> None:
|
|
|
398
496
|
try:
|
|
399
497
|
status = ToilStatus(options.jobStore, options.jobs)
|
|
400
498
|
except NoSuchJobStoreException:
|
|
401
|
-
print(f
|
|
499
|
+
print(f"The job store {options.jobStore} was not found.")
|
|
402
500
|
return
|
|
403
501
|
except JobException: # Workflow likely complete, user informed in ToilStatus()
|
|
404
502
|
return
|
|
@@ -407,16 +505,16 @@ def main() -> None:
|
|
|
407
505
|
|
|
408
506
|
# Info to be reported.
|
|
409
507
|
# These are lists of matching jobs.
|
|
410
|
-
hasChildren = jobStats[
|
|
411
|
-
readyToRun = jobStats[
|
|
412
|
-
zombies = jobStats[
|
|
413
|
-
hasServices = jobStats[
|
|
414
|
-
services = jobStats[
|
|
415
|
-
hasLogFile = jobStats[
|
|
416
|
-
completely_failed = jobStats[
|
|
508
|
+
hasChildren = jobStats["hasChildren"]
|
|
509
|
+
readyToRun = jobStats["readyToRun"]
|
|
510
|
+
zombies = jobStats["zombies"]
|
|
511
|
+
hasServices = jobStats["hasServices"]
|
|
512
|
+
services = jobStats["services"]
|
|
513
|
+
hasLogFile = jobStats["hasLogFile"]
|
|
514
|
+
completely_failed = jobStats["completelyFailed"]
|
|
417
515
|
# These are results for corresponding jobs in status.jobsToReport
|
|
418
|
-
properties = jobStats[
|
|
419
|
-
childNumber = jobStats[
|
|
516
|
+
properties = jobStats["properties"]
|
|
517
|
+
childNumber = jobStats["childNumber"]
|
|
420
518
|
|
|
421
519
|
if options.print_per_job_stats:
|
|
422
520
|
status.printAggregateJobStats(properties, childNumber)
|
|
@@ -431,21 +529,30 @@ def main() -> None:
|
|
|
431
529
|
for job in completely_failed:
|
|
432
530
|
print(job)
|
|
433
531
|
if options.stats:
|
|
434
|
-
print(
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
532
|
+
print(
|
|
533
|
+
"Of the %i jobs considered, "
|
|
534
|
+
"there are "
|
|
535
|
+
"%i completely failed jobs, "
|
|
536
|
+
"%i jobs with children, "
|
|
537
|
+
"%i jobs ready to run, "
|
|
538
|
+
"%i zombie jobs, "
|
|
539
|
+
"%i jobs with services, "
|
|
540
|
+
"%i services, "
|
|
541
|
+
"and %i jobs with log files currently in %s."
|
|
542
|
+
% (
|
|
543
|
+
len(status.jobsToReport),
|
|
544
|
+
len(completely_failed),
|
|
545
|
+
len(hasChildren),
|
|
546
|
+
len(readyToRun),
|
|
547
|
+
len(zombies),
|
|
548
|
+
len(hasServices),
|
|
549
|
+
len(services),
|
|
550
|
+
len(hasLogFile),
|
|
551
|
+
status.jobStore,
|
|
552
|
+
)
|
|
553
|
+
)
|
|
446
554
|
if options.print_status:
|
|
447
|
-
status.
|
|
555
|
+
status.print_running_jobs()
|
|
448
556
|
if len(status.jobsToReport) > 0 and options.failIfNotComplete:
|
|
449
557
|
# Upon workflow completion, all jobs will have been removed from job store
|
|
450
558
|
exit(1)
|
|
451
|
-
|
|
@@ -31,7 +31,9 @@ def internet_connection() -> bool:
|
|
|
31
31
|
|
|
32
32
|
def main() -> None:
|
|
33
33
|
if not internet_connection():
|
|
34
|
-
raise RuntimeError(
|
|
34
|
+
raise RuntimeError(
|
|
35
|
+
"No internet. Updating the EC2 Instance list requires internet."
|
|
36
|
+
)
|
|
35
37
|
updateStaticEC2Instances()
|
|
36
38
|
|
|
37
39
|
|
toil/version.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
baseVersion = '
|
|
1
|
+
baseVersion = '8.1.0b1'
|
|
2
2
|
cgcloudVersion = '1.6.0a1.dev393'
|
|
3
|
-
version = '
|
|
3
|
+
version = '8.1.0b1-4bb05349c027096ab4785259e39b2648118b5dd7'
|
|
4
4
|
cacheTag = 'cache-local-py3.9'
|
|
5
5
|
mainCacheTag = 'cache-master-py3.9'
|
|
6
|
-
distVersion = '
|
|
6
|
+
distVersion = '8.1.0b1'
|
|
7
7
|
exactPython = 'python3.9'
|
|
8
8
|
python = 'python3.9'
|
|
9
|
-
dockerTag = '
|
|
10
|
-
currentCommit = '
|
|
9
|
+
dockerTag = '8.1.0b1-4bb05349c027096ab4785259e39b2648118b5dd7-py3.9'
|
|
10
|
+
currentCommit = '4bb05349c027096ab4785259e39b2648118b5dd7'
|
|
11
11
|
dockerRegistry = 'quay.io/ucsc_cgl'
|
|
12
12
|
dockerName = 'toil'
|
|
13
13
|
dirty = False
|
|
14
|
-
cwltool_version = '3.1.
|
|
14
|
+
cwltool_version = '3.1.20250110105449'
|
toil/wdl/utils.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from
|
|
14
|
+
from collections.abc import Iterable
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def get_version(iterable: Iterable[str]) -> str:
|
|
@@ -22,14 +22,14 @@ def get_version(iterable: Iterable[str]) -> str:
|
|
|
22
22
|
:return: The WDL version used in the workflow.
|
|
23
23
|
"""
|
|
24
24
|
if isinstance(iterable, str):
|
|
25
|
-
iterable = iterable.split(
|
|
25
|
+
iterable = iterable.split("\n")
|
|
26
26
|
|
|
27
27
|
for line in iterable:
|
|
28
28
|
line = line.strip()
|
|
29
29
|
# check if the first non-empty, non-comment line is the version statement
|
|
30
|
-
if line and not line.startswith(
|
|
31
|
-
if line.startswith(
|
|
30
|
+
if line and not line.startswith("#"):
|
|
31
|
+
if line.startswith("version "):
|
|
32
32
|
return line[8:].strip()
|
|
33
33
|
break
|
|
34
34
|
# only draft-2 doesn't contain the version declaration
|
|
35
|
-
return
|
|
35
|
+
return "draft-2"
|