toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/utils/toilStatus.py CHANGED
@@ -15,13 +15,12 @@
15
15
  import logging
16
16
  import os
17
17
  import sys
18
- from typing import Any, Dict, List, Optional, Set
18
+ from typing import Any, Optional
19
19
 
20
20
  from toil.bus import replay_message_bus
21
- from toil.common import Config, Toil, parser_with_common_options
21
+ from toil.common import Toil, parser_with_common_options
22
22
  from toil.job import JobDescription, JobException, ServiceJobDescription
23
- from toil.jobStores.abstractJobStore import (NoSuchFileException,
24
- NoSuchJobStoreException)
23
+ from toil.jobStores.abstractJobStore import NoSuchFileException, NoSuchJobStoreException
25
24
  from toil.statsAndLogging import StatsAndLogging, set_logging_from_options
26
25
 
27
26
  logger = logging.getLogger(__name__)
@@ -30,33 +29,59 @@ logger = logging.getLogger(__name__)
30
29
  class ToilStatus:
31
30
  """Tool for reporting on job status."""
32
31
 
33
- def __init__(self, jobStoreName: str, specifiedJobs: Optional[List[str]] = None):
32
+ def __init__(self, jobStoreName: str, specifiedJobs: Optional[list[str]] = None):
34
33
  self.jobStoreName = jobStoreName
35
34
  self.jobStore = Toil.resumeJobStore(jobStoreName)
36
35
 
37
36
  if specifiedJobs is None:
38
- rootJob = self.fetchRootJob()
39
- logger.info('Traversing the job graph gathering jobs. This may take a couple of minutes.')
40
- self.jobsToReport = self.traverseJobGraph(rootJob)
37
+ try:
38
+ rootJob = self.fetchRootJob()
39
+ logger.info(
40
+ "Traversing the job graph gathering jobs. This may take a couple of minutes."
41
+ )
42
+ self.jobsToReport = self.traverseJobGraph(rootJob)
43
+ except JobException:
44
+ # Root job isn't set.
45
+ logger.warning("Workflow does not have a root job (yet? anymore?). Cannot look for jobs.")
46
+ self.jobsToReport = []
47
+
41
48
  else:
42
49
  self.jobsToReport = self.fetchUserJobs(specifiedJobs)
43
50
 
44
51
  self.message_bus_path = self.jobStore.config.write_messages
52
+
45
53
  def print_dot_chart(self) -> None:
46
54
  """Print a dot output graph representing the workflow."""
47
55
  print("digraph toil_graph {")
48
56
  print("# This graph was created from job-store: %s" % self.jobStoreName)
49
57
 
50
58
  # Make job IDs to node names map
51
- jobsToNodeNames: Dict[str, str] = dict(
52
- map(lambda job: (str(job.jobStoreID), job.jobName), self.jobsToReport)
53
- )
59
+ def id_to_name(job_id: str) -> str:
60
+ """
61
+ Change a job ID into a GraphViz node name.
62
+ """
63
+ replacements = [
64
+ ("_", "_u_"),
65
+ ("/", "_s_"),
66
+ ("-", "_d_")
67
+ ]
68
+ result = job_id
69
+ for char, replacement in replacements:
70
+ result = result.replace(char, replacement)
71
+ return result
72
+ id_strings = [str(job.jobStoreID) for job in self.jobsToReport]
73
+ jobsToNodeNames = {
74
+ s: id_to_name(s) for s in id_strings
75
+ }
54
76
 
55
77
  # Print the nodes
56
78
  for job in set(self.jobsToReport):
57
79
  print(
58
- '{} [label="{} {}"];'.format(
59
- jobsToNodeNames[str(job.jobStoreID)], job.jobName, job.jobStoreID
80
+ '{} [label="{} {}" color="{}"];'.format(
81
+ jobsToNodeNames[str(job.jobStoreID)],
82
+ job.jobName,
83
+ job.displayName,
84
+ "black" if job.has_body() else "green"
60
85
  )
61
86
  )
62
87
 
@@ -82,7 +107,11 @@ class ToilStatus:
82
107
  with job.getLogFileHandle(self.jobStore) as fH:
83
108
  # TODO: This looks intended to be machine-readable, but the format is
84
109
  # unspecified and no escaping is done. But keep these tags around.
85
- print(StatsAndLogging.formatLogStream(fH, job_name=f"LOG_FILE_OF_JOB:{job} LOG:"))
110
+ print(
111
+ StatsAndLogging.formatLogStream(
112
+ fH, stream_name=f"LOG_FILE_OF_JOB:{job} LOG:"
113
+ )
114
+ )
86
115
  else:
87
116
  print(f"LOG_FILE_OF_JOB: {job} LOG: Job has no log file")
88
117
 
@@ -94,65 +123,110 @@ class ToilStatus:
94
123
  children += "\t(CHILD_JOB:%s,PRECEDENCE:%i)" % (childJob, level)
95
124
  print(children)
96
125
 
97
- def printAggregateJobStats(self, properties: List[str], childNumber: int) -> None:
98
- """Prints a job's ID, log file, remaining tries, and other properties."""
99
- for job in self.jobsToReport:
126
+ def printAggregateJobStats(
127
+ self, properties: list[set[str]], childNumber: list[int]
128
+ ) -> None:
129
+ """
130
+ Prints each job's ID, log file, remaining tries, and other properties.
131
+
132
+ :param properties: A set of string flag names for each job in self.jobsToReport.
133
+ :param childNumber: A list of child counts for each job in self.jobsToReport.
134
+ """
135
+ for job, job_properties, job_child_number in zip(
136
+ self.jobsToReport, properties, childNumber
137
+ ):
100
138
 
101
139
  def lf(x: str) -> str:
102
- return f"{x}:{str(x in properties)}"
103
- print("\t".join(("JOB:%s" % job,
104
- "LOG_FILE:%s" % job.logJobStoreFileID,
105
- "TRYS_REMAINING:%i" % job.remainingTryCount,
106
- "CHILD_NUMBER:%s" % childNumber,
107
- lf("READY_TO_RUN"), lf("IS_ZOMBIE"),
108
- lf("HAS_SERVICES"), lf("IS_SERVICE"))))
109
-
110
- def report_on_jobs(self) -> Dict[str, Any]:
140
+ return f"{x}:{str(x in job_properties)}"
141
+
142
+ # We use a sort of not-really-machine-readable key:value TSV format here.
143
+ # But we only include important keys to help the humans, and flags
144
+ # don't have a value, just a key.
145
+ parts = [f"JOB:{job}"]
146
+ for flag in [
147
+ "COMPLETELY_FAILED",
148
+ "READY_TO_RUN",
149
+ "IS_ZOMBIE",
150
+ "HAS_SERVICES",
151
+ "IS_SERVICE",
152
+ ]:
153
+ if flag in job_properties:
154
+ parts.append(flag)
155
+ if job.logJobStoreFileID:
156
+ parts.append(f"LOG_FILE:{job.logJobStoreFileID}")
157
+ if job.remainingTryCount > 0:
158
+ parts.append(f"TRYS_REMAINING:{job.remainingTryCount}")
159
+ if job_child_number > 0:
160
+ parts.append(f"CHILD_NUMBER:{job_child_number}")
161
+
162
+ print("\t".join(parts))
163
+
164
+ def report_on_jobs(self) -> dict[str, Any]:
111
165
  """
112
166
  Gathers information about jobs such as its child jobs and status.
113
167
 
114
- :returns jobStats: Pairings of a useful category and a list of jobs which fall into it.
115
- :rtype dict:
168
+ :returns jobStats: Dict containing some lists of jobs by category, and
169
+ some lists of job properties for each job in self.jobsToReport.
116
170
  """
171
+ # These are lists of the matching jobs
117
172
  hasChildren = []
118
173
  readyToRun = []
119
174
  zombies = []
120
- hasLogFile: List[JobDescription] = []
175
+ hasLogFile: list[JobDescription] = []
121
176
  hasServices = []
122
- services: List[ServiceJobDescription] = []
123
- properties = set()
177
+ services: list[ServiceJobDescription] = []
178
+ completely_failed = []
179
+
180
+ # These are stats for jobs in self.jobsToReport
181
+ child_number: list[int] = []
182
+ properties: list[set[str]] = []
183
+
184
+ # TODO: This mix of semantics is confusing and made per-job status be
185
+ # wrong for multiple years because it was not understood. Redesign it!
124
186
 
125
187
  for job in self.jobsToReport:
188
+ job_properties: set[str] = set()
126
189
  if job.logJobStoreFileID is not None:
127
190
  hasLogFile.append(job)
128
191
 
129
- childNumber = len(list(job.allSuccessors()))
130
- if childNumber > 0: # Total number of successors > 0
192
+ job_child_number = len(list(job.allSuccessors()))
193
+ child_number.append(job_child_number)
194
+ if job_child_number > 0: # Total number of successors > 0
131
195
  hasChildren.append(job)
132
- properties.add("HAS_CHILDREN")
133
- elif job.command is not None:
134
- # Job has no children and a command to run. Indicates job could be run.
196
+ job_properties.add("HAS_CHILDREN")
197
+ elif job.has_body():
198
+ # Job has no children and a body to run. Indicates job could be run.
135
199
  readyToRun.append(job)
136
- properties.add("READY_TO_RUN")
200
+ job_properties.add("READY_TO_RUN")
137
201
  else:
138
202
  # Job has no successors and no command, so is a zombie job.
139
203
  zombies.append(job)
140
- properties.add("IS_ZOMBIE")
204
+ job_properties.add("IS_ZOMBIE")
141
205
  if job.services:
142
206
  hasServices.append(job)
143
- properties.add("HAS_SERVICES")
207
+ job_properties.add("HAS_SERVICES")
144
208
  if isinstance(job, ServiceJobDescription):
145
209
  services.append(job)
146
- properties.add("IS_SERVICE")
147
-
148
- jobStats = {'hasChildren': hasChildren,
149
- 'readyToRun': readyToRun,
150
- 'zombies': zombies,
151
- 'hasServices': hasServices,
152
- 'services': services,
153
- 'hasLogFile': hasLogFile,
154
- 'properties': properties,
155
- 'childNumber': childNumber}
210
+ job_properties.add("IS_SERVICE")
211
+ if job.remainingTryCount == 0:
212
+ # Job is out of tries (and thus completely failed)
213
+ job_properties.add("COMPLETELY_FAILED")
214
+ completely_failed.append(job)
215
+ properties.append(job_properties)
216
+
217
+ jobStats = {
218
+ # These are lists of the mathcing jobs
219
+ "hasChildren": hasChildren,
220
+ "readyToRun": readyToRun,
221
+ "zombies": zombies,
222
+ "hasServices": hasServices,
223
+ "services": services,
224
+ "hasLogFile": hasLogFile,
225
+ "completelyFailed": completely_failed,
226
+ # These are stats for jobs in self.jobsToReport
227
+ "properties": properties,
228
+ "childNumber": child_number,
229
+ }
156
230
  return jobStats
157
231
 
158
232
  @staticmethod
@@ -168,21 +242,21 @@ class ToilStatus:
168
242
  try:
169
243
  jobstore = Toil.resumeJobStore(jobStoreName)
170
244
  except NoSuchJobStoreException:
171
- return 'QUEUED'
245
+ return "QUEUED"
172
246
  except NoSuchFileException:
173
- return 'QUEUED'
247
+ return "QUEUED"
174
248
 
175
249
  try:
176
250
  pid = jobstore.read_leader_pid()
177
251
  try:
178
252
  os.kill(pid, 0) # Does not kill process when 0 is passed.
179
253
  except OSError: # Process not found, must be done.
180
- return 'COMPLETED'
254
+ return "COMPLETED"
181
255
  else:
182
- return 'RUNNING'
256
+ return "RUNNING"
183
257
  except NoSuchFileException:
184
258
  pass
185
- return 'QUEUED'
259
+ return "QUEUED"
186
260
 
187
261
  @staticmethod
188
262
  def getStatus(jobStoreName: str) -> str:
@@ -201,38 +275,45 @@ class ToilStatus:
201
275
  try:
202
276
  jobstore = Toil.resumeJobStore(jobStoreName)
203
277
  except NoSuchJobStoreException:
204
- return 'QUEUED'
278
+ return "QUEUED"
205
279
  except NoSuchFileException:
206
- return 'QUEUED'
280
+ return "QUEUED"
207
281
 
208
282
  try:
209
- with jobstore.read_shared_file_stream('succeeded.log') as successful:
283
+ with jobstore.read_shared_file_stream("succeeded.log") as successful:
210
284
  pass
211
- return 'COMPLETED'
285
+ return "COMPLETED"
212
286
  except NoSuchFileException:
213
287
  try:
214
- with jobstore.read_shared_file_stream('failed.log') as failed:
288
+ with jobstore.read_shared_file_stream("failed.log") as failed:
215
289
  pass
216
- return 'ERROR'
290
+ return "ERROR"
217
291
  except NoSuchFileException:
218
292
  pass
219
- return 'RUNNING'
293
+ return "RUNNING"
220
294
 
221
- def print_bus_messages(self) -> None:
295
+ def print_running_jobs(self) -> None:
222
296
  """
223
- Goes through bus messages, returns a list of tuples which have correspondence between
224
- PID on assigned batch system and
225
-
226
297
  Prints a list of the currently running jobs
227
298
  """
228
299
 
229
300
  print("\nMessage bus path: ", self.message_bus_path)
230
301
  if self.message_bus_path is not None:
231
302
  if os.path.exists(self.message_bus_path):
232
- replayed_messages = replay_message_bus(self.message_bus_path)
233
- for key in replayed_messages:
234
- if replayed_messages[key].exit_code != 0:
235
- print(replayed_messages[key])
303
+ all_job_statuses = replay_message_bus(self.message_bus_path)
304
+
305
+ for job_status in all_job_statuses.values():
306
+ if job_status.is_running():
307
+ status_line = [
308
+ f"Job ID {job_status.job_store_id} with name {job_status.name} is running"
309
+ ]
310
+ if job_status.batch_system != "":
311
+ # batch system exists
312
+ status_line.append(
313
+ f" on {job_status.batch_system} as ID {job_status.external_batch_id}"
314
+ )
315
+ status_line.append(".")
316
+ print("".join(status_line))
236
317
  else:
237
318
  print("Message bus file is missing!")
238
319
 
@@ -251,11 +332,14 @@ class ToilStatus:
251
332
  """
252
333
  try:
253
334
  return self.jobStore.load_root_job()
254
- except JobException:
255
- print('Root job is absent. The workflow has may have completed successfully.', file=sys.stderr)
335
+ except JobException as e:
336
+ logger.info(e)
337
+ print(
338
+ "Root job is absent. The workflow has may have completed successfully."
339
+ )
256
340
  raise
257
341
 
258
- def fetchUserJobs(self, jobs: List[str]) -> List[JobDescription]:
342
+ def fetchUserJobs(self, jobs: list[str]) -> list[JobDescription]:
259
343
  """
260
344
  Takes a user input array of jobs, verifies that they are in the jobStore
261
345
  and returns the array of jobsToReport.
@@ -268,16 +352,16 @@ class ToilStatus:
268
352
  try:
269
353
  jobsToReport.append(self.jobStore.load_job(jobID))
270
354
  except JobException:
271
- print('The job %s could not be found.' % jobID, file=sys.stderr)
355
+ print("The job %s could not be found." % jobID, file=sys.stderr)
272
356
  raise
273
357
  return jobsToReport
274
358
 
275
359
  def traverseJobGraph(
276
360
  self,
277
361
  rootJob: JobDescription,
278
- jobsToReport: Optional[List[JobDescription]] = None,
279
- foundJobStoreIDs: Optional[Set[str]] = None,
280
- ) -> List[JobDescription]:
362
+ jobsToReport: Optional[list[JobDescription]] = None,
363
+ foundJobStoreIDs: Optional[set[str]] = None,
364
+ ) -> list[JobDescription]:
281
365
  """
282
366
  Find all current jobs in the jobStore and return them as an Array.
283
367
 
@@ -300,15 +384,24 @@ class ToilStatus:
300
384
  jobsToReport.append(rootJob)
301
385
  # Traverse jobs in stack
302
386
  for successorJobStoreID in rootJob.allSuccessors():
303
- if successorJobStoreID not in foundJobStoreIDs and self.jobStore.job_exists(successorJobStoreID):
304
- self.traverseJobGraph(self.jobStore.load_job(successorJobStoreID), jobsToReport, foundJobStoreIDs)
387
+ if (
388
+ successorJobStoreID not in foundJobStoreIDs
389
+ and self.jobStore.job_exists(successorJobStoreID)
390
+ ):
391
+ self.traverseJobGraph(
392
+ self.jobStore.load_job(successorJobStoreID),
393
+ jobsToReport,
394
+ foundJobStoreIDs,
395
+ )
305
396
 
306
397
  # Traverse service jobs
307
398
  for jobs in rootJob.services:
308
399
  for serviceJobStoreID in jobs:
309
400
  if self.jobStore.job_exists(serviceJobStoreID):
310
401
  if serviceJobStoreID in foundJobStoreIDs:
311
- raise RuntimeError('Service job was unexpectedly found while traversing ')
402
+ raise RuntimeError(
403
+ "Service job was unexpectedly found while traversing "
404
+ )
312
405
  foundJobStoreIDs.add(serviceJobStoreID)
313
406
  jobsToReport.append(self.jobStore.load_job(serviceJobStoreID))
314
407
 
@@ -318,37 +411,81 @@ class ToilStatus:
318
411
  def main() -> None:
319
412
  """Reports the state of a Toil workflow."""
320
413
  parser = parser_with_common_options(prog="toil status")
321
- parser.add_argument("--failIfNotComplete", action="store_true",
322
- help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
323
- default=False)
324
-
325
- parser.add_argument("--noAggStats", dest="stats", action="store_false",
326
- help="Do not print overall, aggregate status of workflow.",
327
- default=True)
328
-
329
- parser.add_argument("--printDot", action="store_true",
330
- help="Print dot formatted description of the graph. If using --jobs will "
331
- "restrict to subgraph including only those jobs. default=%(default)s",
332
- default=False)
333
-
334
- parser.add_argument("--jobs", nargs='+',
335
- help="Restrict reporting to the following jobs (allows subsetting of the report).",
336
- default=None)
414
+ parser.add_argument(
415
+ "--failIfNotComplete",
416
+ action="store_true",
417
+ help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
418
+ default=False,
419
+ )
420
+
421
+ parser.add_argument(
422
+ "--noAggStats",
423
+ dest="stats",
424
+ action="store_false",
425
+ help="Do not print overall, aggregate status of workflow.",
426
+ default=True,
427
+ )
428
+
429
+ parser.add_argument(
430
+ "--dot",
431
+ "--printDot",
432
+ dest="print_dot",
433
+ action="store_true",
434
+ help="Print dot formatted description of the graph. If using --jobs will "
435
+ "restrict to subgraph including only those jobs. default=%(default)s",
436
+ default=False,
437
+ )
438
+
439
+ parser.add_argument(
440
+ "--jobs",
441
+ nargs="+",
442
+ help="Restrict reporting to the following jobs (allows subsetting of the report).",
443
+ default=None,
444
+ )
445
+
446
+ parser.add_argument(
447
+ "--perJob",
448
+ "--printPerJobStats",
449
+ dest="print_per_job_stats",
450
+ action="store_true",
451
+ help="Print info about each job. default=%(default)s",
452
+ default=False,
453
+ )
454
+
455
+ parser.add_argument(
456
+ "--logs",
457
+ "--printLogs",
458
+ dest="print_logs",
459
+ action="store_true",
460
+ help="Print the log files of jobs (if they exist). default=%(default)s",
461
+ default=False,
462
+ )
463
+
464
+ parser.add_argument(
465
+ "--children",
466
+ "--printChildren",
467
+ dest="print_children",
468
+ action="store_true",
469
+ help="Print children of each job. default=%(default)s",
470
+ default=False,
471
+ )
472
+
473
+ parser.add_argument(
474
+ "--status",
475
+ "--printStatus",
476
+ dest="print_status",
477
+ action="store_true",
478
+ help="Determine which jobs are currently running and the associated batch system ID, if any",
479
+ )
480
+
481
+ parser.add_argument(
482
+ "--failed",
483
+ "--printFailed",
484
+ dest="print_failed",
485
+ action="store_true",
486
+ help="List jobs which seem to have failed to run",
487
+ )
337
488
 
338
- parser.add_argument("--printPerJobStats", action="store_true",
339
- help="Print info about each job. default=%(default)s",
340
- default=False)
341
-
342
- parser.add_argument("--printLogs", action="store_true",
343
- help="Print the log files of jobs (if they exist). default=%(default)s",
344
- default=False)
345
-
346
- parser.add_argument("--printChildren", action="store_true",
347
- help="Print children of each job. default=%(default)s",
348
- default=False)
349
-
350
- parser.add_argument("--printStatus", action="store_true",
351
- help="Determine which jobs are currently running and the associated batch system ID")
352
489
  options = parser.parse_args()
353
490
  set_logging_from_options(options)
354
491
 
@@ -356,13 +493,10 @@ def main() -> None:
356
493
  parser.print_help()
357
494
  sys.exit(0)
358
495
 
359
- config = Config()
360
- config.setOptions(options)
361
-
362
496
  try:
363
- status = ToilStatus(config.jobStore, options.jobs)
497
+ status = ToilStatus(options.jobStore, options.jobs)
364
498
  except NoSuchJobStoreException:
365
- print('No job store found.')
499
+ print(f"The job store {options.jobStore} was not found.")
366
500
  return
367
501
  except JobException: # Workflow likely complete, user informed in ToilStatus()
368
502
  return
@@ -370,36 +504,55 @@ def main() -> None:
370
504
  jobStats = status.report_on_jobs()
371
505
 
372
506
  # Info to be reported.
373
- hasChildren = jobStats['hasChildren']
374
- readyToRun = jobStats['readyToRun']
375
- zombies = jobStats['zombies']
376
- hasServices = jobStats['hasServices']
377
- services = jobStats['services']
378
- hasLogFile = jobStats['hasLogFile']
379
- properties = jobStats['properties']
380
- childNumber = jobStats['childNumber']
381
-
382
- if options.printPerJobStats:
507
+ # These are lists of matching jobs.
508
+ hasChildren = jobStats["hasChildren"]
509
+ readyToRun = jobStats["readyToRun"]
510
+ zombies = jobStats["zombies"]
511
+ hasServices = jobStats["hasServices"]
512
+ services = jobStats["services"]
513
+ hasLogFile = jobStats["hasLogFile"]
514
+ completely_failed = jobStats["completelyFailed"]
515
+ # These are results for corresponding jobs in status.jobsToReport
516
+ properties = jobStats["properties"]
517
+ childNumber = jobStats["childNumber"]
518
+
519
+ if options.print_per_job_stats:
383
520
  status.printAggregateJobStats(properties, childNumber)
384
- if options.printLogs:
521
+ if options.print_logs:
385
522
  status.printJobLog()
386
- if options.printChildren:
523
+ if options.print_children:
387
524
  status.printJobChildren()
388
- if options.printDot:
525
+ if options.print_dot:
389
526
  status.print_dot_chart()
527
+ if options.print_failed:
528
+ print("Failed jobs:")
529
+ for job in completely_failed:
530
+ print(job)
390
531
  if options.stats:
391
- print('Of the %i jobs considered, '
392
- 'there are %i jobs with children, '
393
- '%i jobs ready to run, '
394
- '%i zombie jobs, '
395
- '%i jobs with services, '
396
- '%i services, '
397
- 'and %i jobs with log files currently in %s.' %
398
- (len(status.jobsToReport), len(hasChildren), len(readyToRun), len(zombies),
399
- len(hasServices), len(services), len(hasLogFile), status.jobStore))
400
- if options.printStatus:
401
- status.print_bus_messages()
532
+ print(
533
+ "Of the %i jobs considered, "
534
+ "there are "
535
+ "%i completely failed jobs, "
536
+ "%i jobs with children, "
537
+ "%i jobs ready to run, "
538
+ "%i zombie jobs, "
539
+ "%i jobs with services, "
540
+ "%i services, "
541
+ "and %i jobs with log files currently in %s."
542
+ % (
543
+ len(status.jobsToReport),
544
+ len(completely_failed),
545
+ len(hasChildren),
546
+ len(readyToRun),
547
+ len(zombies),
548
+ len(hasServices),
549
+ len(services),
550
+ len(hasLogFile),
551
+ status.jobStore,
552
+ )
553
+ )
554
+ if options.print_status:
555
+ status.print_running_jobs()
402
556
  if len(status.jobsToReport) > 0 and options.failIfNotComplete:
403
557
  # Upon workflow completion, all jobs will have been removed from job store
404
558
  exit(1)
405
-
@@ -31,7 +31,9 @@ def internet_connection() -> bool:
31
31
 
32
32
  def main() -> None:
33
33
  if not internet_connection():
34
- raise RuntimeError('No internet. Updating the EC2 Instance list requires internet.')
34
+ raise RuntimeError(
35
+ "No internet. Updating the EC2 Instance list requires internet."
36
+ )
35
37
  updateStaticEC2Instances()
36
38
 
37
39
 
toil/version.py CHANGED
@@ -1,14 +1,14 @@
1
- baseVersion = '6.1.0a1'
1
+ baseVersion = '8.0.0'
2
2
  cgcloudVersion = '1.6.0a1.dev393'
3
- version = '6.1.0a1-04b966f2417ebf2752e6f216e7fbda3ce20b4a37-dirty'
4
- cacheTag = 'cache-local-py3.11'
5
- mainCacheTag = 'cache-master-py3.11'
6
- distVersion = '6.1.0a1'
7
- exactPython = 'python3.11'
8
- python = 'python3.11'
9
- dockerTag = '6.1.0a1-04b966f2417ebf2752e6f216e7fbda3ce20b4a37-dirty-py3.11'
10
- currentCommit = '04b966f2417ebf2752e6f216e7fbda3ce20b4a37'
11
- dockerRegistry = 'quay.io/ucsc_cgl'
3
+ version = '8.0.0-d2ae0ea9ab49f238670dbf6aafd20de7afdd8514'
4
+ cacheTag = 'cache-local-py3.13'
5
+ mainCacheTag = 'cache-master-py3.13'
6
+ distVersion = '8.0.0'
7
+ exactPython = 'python3.13'
8
+ python = 'python3.13'
9
+ dockerTag = '8.0.0-d2ae0ea9ab49f238670dbf6aafd20de7afdd8514-py3.13'
10
+ currentCommit = 'd2ae0ea9ab49f238670dbf6aafd20de7afdd8514'
11
+ dockerRegistry = 'quay.io/stxue'
12
12
  dockerName = 'toil'
13
- dirty = True
14
- cwltool_version = '3.1.20240112164112'
13
+ dirty = False
14
+ cwltool_version = '3.1.20250110105449'