toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/bus.py CHANGED
@@ -20,7 +20,7 @@ functions to "handle" different things happening. Over time, it has become very
20
20
  brittle: exactly the right handling functions need to be called in exactly the
21
21
  right order, or it gets confused and does the wrong thing.
22
22
 
23
- The MessageBus is meant to let the leader avoid this by more losely coupling
23
+ The MessageBus is meant to let the leader avoid this by more loosely coupling
24
24
  its components together, by having them communicate by sending messages instead
25
25
  of by calling functions.
26
26
 
@@ -67,33 +67,66 @@ import os
67
67
  import queue
68
68
  import tempfile
69
69
  import threading
70
+ from collections.abc import Iterator
70
71
  from dataclasses import dataclass
71
- from typing import (IO,
72
- Any,
73
- Callable,
74
- Dict,
75
- Iterator,
76
- List,
77
- NamedTuple,
78
- Optional,
79
- Type,
80
- TypeVar,
81
- cast)
72
+ from typing import IO, Any, Callable, NamedTuple, Optional, TypeVar, cast
82
73
 
83
74
  from pubsub.core import Publisher
84
75
  from pubsub.core.listener import Listener
85
76
  from pubsub.core.topicobj import Topic
86
77
  from pubsub.core.topicutils import ALL_TOPICS
87
78
 
88
- logger = logging.getLogger( __name__ )
79
+ logger = logging.getLogger(__name__)
80
+
81
+ # We define some ways to talk about jobs.
82
+
83
+
84
+ class Names(NamedTuple):
85
+ """
86
+ Stores all the kinds of name a job can have.
87
+ """
88
+
89
+ # Name of the kind of job this is
90
+ job_name: str
91
+ # Name of this particular work unit
92
+ unit_name: str
93
+ # Human-readable name for the job
94
+ display_name: str
95
+ # What the job prints as, used for stats-and-logging log management
96
+ stats_name: str
97
+ # Job store ID of the job for the work unit
98
+ job_store_id: str
99
+
100
+
101
+ def get_job_kind(names: Names) -> str:
102
+ """
103
+ Return an identifying string for the job.
104
+
105
+ The result may contain spaces.
106
+
107
+ Returns: Either the unit name, job name, or display name, which identifies
108
+ the kind of job it is to toil.
109
+ Otherwise "Unknown Job" in case no identifier is available
110
+ """
111
+ if names.unit_name:
112
+ return names.unit_name
113
+ elif names.job_name:
114
+ return names.job_name
115
+ elif names.display_name:
116
+ return names.display_name
117
+ else:
118
+ return "Unknown Job"
119
+
89
120
 
90
121
  # We define a bunch of named tuple message types.
91
122
  # These all need to be plain data: only hold ints, strings, etc.
92
123
 
124
+
93
125
  class JobIssuedMessage(NamedTuple):
94
126
  """
95
127
  Produced when a job is issued to run on the batch system.
96
128
  """
129
+
97
130
  # The kind of job issued, for statistics aggregation
98
131
  job_type: str
99
132
  # The job store ID of the job
@@ -101,20 +134,24 @@ class JobIssuedMessage(NamedTuple):
101
134
  # The toil batch ID of the job
102
135
  toil_batch_id: int
103
136
 
137
+
104
138
  class JobUpdatedMessage(NamedTuple):
105
139
  """
106
140
  Produced when a job is "updated" and ready to have something happen to it.
107
141
  """
142
+
108
143
  # The job store ID of the job
109
144
  job_id: str
110
145
  # The error code/return code for the job, which is nonzero if something has
111
146
  # gone wrong, and 0 otherwise.
112
147
  result_status: int
113
148
 
149
+
114
150
  class JobCompletedMessage(NamedTuple):
115
151
  """
116
152
  Produced when a job is completed, whether successful or not.
117
153
  """
154
+
118
155
  # The kind of job issued, for statistics aggregation
119
156
  job_type: str
120
157
  # The job store ID of the job
@@ -122,27 +159,33 @@ class JobCompletedMessage(NamedTuple):
122
159
  # Exit code for job_id
123
160
  exit_code: int
124
161
 
162
+
125
163
  class JobFailedMessage(NamedTuple):
126
164
  """
127
165
  Produced when a job is completely failed, and will not be retried again.
128
166
  """
167
+
129
168
  # The kind of job issued, for statistics aggregation
130
169
  job_type: str
131
170
  # The job store ID of the job
132
171
  job_id: str
133
172
 
173
+
134
174
  class JobMissingMessage(NamedTuple):
135
175
  """
136
176
  Produced when a job goes missing and should be in the batch system but isn't.
137
177
  """
178
+
138
179
  # The job store ID of the job
139
180
  job_id: str
140
181
 
182
+
141
183
  class JobAnnotationMessage(NamedTuple):
142
184
  """
143
185
  Produced when extra information (such as an AWS Batch job ID from the
144
186
  AWSBatchBatchSystem) is available that goes with a job.
145
187
  """
188
+
146
189
  # The job store ID of the job
147
190
  job_id: str
148
191
  # The name of the annotation
@@ -150,50 +193,60 @@ class JobAnnotationMessage(NamedTuple):
150
193
  # The annotation data
151
194
  annotation_value: str
152
195
 
196
+
153
197
  class ExternalBatchIdMessage(NamedTuple):
154
198
  """
155
199
  Produced when using a batch system, links toil assigned batch ID to
156
200
  Batch system ID (Whatever's returned by local implementation, PID, batch ID, etc)
157
201
  """
158
- #Assigned toil batch job id
202
+
203
+ # Assigned toil batch job id
159
204
  toil_batch_id: int
160
- #Batch system scheduler identity
205
+ # Batch system scheduler identity
161
206
  external_batch_id: str
162
- #Batch system name
207
+ # Batch system name
163
208
  batch_system: str
164
209
 
210
+
165
211
  class QueueSizeMessage(NamedTuple):
166
212
  """
167
213
  Produced to describe the size of the queue of jobs issued but not yet
168
214
  completed. Theoretically recoverable from other messages.
169
215
  """
216
+
170
217
  # The size of the queue
171
218
  queue_size: int
172
219
 
220
+
173
221
  class ClusterSizeMessage(NamedTuple):
174
222
  """
175
223
  Produced by the Toil-integrated autoscaler describe the number of
176
224
  instances of a certain type in a cluster.
177
225
  """
226
+
178
227
  # The instance type name, like t4g.medium
179
228
  instance_type: str
180
229
  # The number of instances of that type that the Toil autoscaler thinks
181
230
  # there are
182
231
  current_size: int
183
232
 
233
+
184
234
  class ClusterDesiredSizeMessage(NamedTuple):
185
235
  """
186
236
  Produced by the Toil-integrated autoscaler to describe the number of
187
237
  instances of a certain type that it thinks will be needed.
188
238
  """
239
+
189
240
  # The instance type name, like t4g.medium
190
241
  instance_type: str
191
242
  # The number of instances of that type that the Toil autoscaler wants there
192
243
  # to be
193
244
  desired_size: int
194
245
 
246
+
195
247
  # Then we define a serialization format.
196
248
 
249
+
197
250
  def message_to_bytes(message: NamedTuple) -> bytes:
198
251
  """
199
252
  Convert a plain-old-data named tuple into a byte string.
@@ -203,32 +256,39 @@ def message_to_bytes(message: NamedTuple) -> bytes:
203
256
  if isinstance(item, (int, float, bool)) or item is None:
204
257
  # This also handles e.g. values from an IntEnum, where the type extends int.
205
258
  # They might replace __str__() but we hope they use a compatible __format__()
206
- parts.append(f"{item}".encode('utf-8'))
259
+ parts.append(f"{item}".encode())
207
260
  elif isinstance(item, str):
208
- parts.append(item.encode('unicode_escape'))
261
+ parts.append(item.encode("unicode_escape"))
209
262
  else:
210
263
  # We haven't implemented this type yet.
211
- raise RuntimeError(f"Cannot store message argument of type {type(item)}: {item}")
212
- return b'\t'.join(parts)
264
+ raise RuntimeError(
265
+ f"Cannot store message argument of type {type(item)}: {item}"
266
+ )
267
+ return b"\t".join(parts)
213
268
 
214
269
 
215
270
  # TODO: Messages have to be named tuple types.
216
- MessageType = TypeVar('MessageType')
217
- def bytes_to_message(message_type: Type[MessageType], data: bytes) -> MessageType:
271
+ MessageType = TypeVar("MessageType")
272
+
273
+
274
+ def bytes_to_message(message_type: type[MessageType], data: bytes) -> MessageType:
218
275
  """
219
276
  Convert bytes from message_to_bytes back to a message of the given type.
220
277
  """
221
- parts = data.split(b'\t')
278
+ parts = data.split(b"\t")
222
279
 
223
280
  # Get a mapping from field name to type in the named tuple.
224
281
  # We need to check a couple different fields because this moved in a recent
225
282
  # Python 3 release.
226
- field_to_type: Optional[Dict[str, type]] = cast(Optional[Dict[str, type]],
227
- getattr(message_type, '__annotations__',
228
- getattr(message_type, '_field_types', None)))
283
+ field_to_type: Optional[dict[str, type]] = cast(
284
+ Optional[dict[str, type]],
285
+ getattr(
286
+ message_type, "__annotations__", getattr(message_type, "_field_types", None)
287
+ ),
288
+ )
229
289
  if field_to_type is None:
230
290
  raise RuntimeError(f"Cannot get field types from {message_type}")
231
- field_names: List[str] = getattr(message_type, '_fields')
291
+ field_names: list[str] = getattr(message_type, "_fields")
232
292
 
233
293
  if len(field_names) != len(parts):
234
294
  raise RuntimeError(f"Cannot parse {field_names} from {parts}")
@@ -239,10 +299,10 @@ def bytes_to_message(message_type: Type[MessageType], data: bytes) -> MessageTyp
239
299
  for name, part in zip(field_names, parts):
240
300
  field_type = field_to_type[name]
241
301
  if field_type in [int, float, bool]:
242
- typed_parts.append(field_type(part.decode('utf-8')))
302
+ typed_parts.append(field_type(part.decode("utf-8")))
243
303
  elif field_type == str:
244
304
  # Decode, accounting for escape sequences
245
- typed_parts.append(part.decode('unicode_escape'))
305
+ typed_parts.append(part.decode("unicode_escape"))
246
306
  else:
247
307
  raise RuntimeError(f"Cannot read message argument of type {field_type}")
248
308
 
@@ -250,8 +310,6 @@ def bytes_to_message(message_type: Type[MessageType], data: bytes) -> MessageTyp
250
310
  return message_type(*typed_parts)
251
311
 
252
312
 
253
-
254
-
255
313
  class MessageBus:
256
314
  """
257
315
  Holds messages that should cause jobs to change their scheduling states.
@@ -280,7 +338,7 @@ class MessageBus:
280
338
  characters, hierarchically dotted).
281
339
  """
282
340
 
283
- return '.'.join([message_type.__module__, message_type.__name__])
341
+ return ".".join([message_type.__module__, message_type.__name__])
284
342
 
285
343
  # All our messages are NamedTuples, but NamedTuples don't actually inherit
286
344
  # from NamedTupe, so MyPy complains if we require that here.
@@ -323,13 +381,16 @@ class MessageBus:
323
381
  Runs only in the owning thread. Delivers a message to its listeners.
324
382
  """
325
383
  topic = self._type_to_name(type(message))
326
- logger.debug('Notifying %s with message: %s', topic, message)
384
+ logger.debug("Notifying %s with message: %s", topic, message)
327
385
  self._pubsub.sendMessage(topic, message=message)
328
386
 
329
387
  # This next function takes callables that take things of the type that was passed in as a
330
388
  # runtime argument, which we can explain to MyPy using a TypeVar and Type[]
331
- MessageType = TypeVar('MessageType', bound='NamedTuple')
332
- def subscribe(self, message_type: Type[MessageType], handler: Callable[[MessageType], Any]) -> Listener:
389
+ MessageType = TypeVar("MessageType", bound="NamedTuple")
390
+
391
+ def subscribe(
392
+ self, message_type: type[MessageType], handler: Callable[[MessageType], Any]
393
+ ) -> Listener:
333
394
  """
334
395
  Register the given callable to be called when messages of the given type are sent.
335
396
  It will be called with messages sent after the subscription is created.
@@ -337,7 +398,7 @@ class MessageBus:
337
398
  """
338
399
 
339
400
  topic = self._type_to_name(message_type)
340
- logger.debug('Listening for message topic: %s', topic)
401
+ logger.debug("Listening for message topic: %s", topic)
341
402
 
342
403
  # Make sure to wrap the handler so we get the right argument name and
343
404
  # we can control lifetime.
@@ -350,10 +411,10 @@ class MessageBus:
350
411
  # Hide the handler function in the pubsub listener to keep it alive.
351
412
  # If it goes out of scope the subscription expires, and the pubsub
352
413
  # system only uses weak references.
353
- setattr(listener, 'handler_wrapper', handler_wraper)
414
+ setattr(listener, "handler_wrapper", handler_wraper)
354
415
  return listener
355
416
 
356
- def connect(self, wanted_types: List[type]) -> 'MessageBusConnection':
417
+ def connect(self, wanted_types: list[type]) -> "MessageBusConnection":
357
418
  """
358
419
  Get a connection object that serves as an inbox for messages of the
359
420
  given types.
@@ -365,7 +426,7 @@ class MessageBus:
365
426
  connection._set_bus_and_message_types(self, wanted_types)
366
427
  return connection
367
428
 
368
- def outbox(self) -> 'MessageOutbox':
429
+ def outbox(self) -> "MessageOutbox":
369
430
  """
370
431
  Get a connection object that only allows sending messages.
371
432
  """
@@ -383,24 +444,27 @@ class MessageBus:
383
444
  somewhere or delete it.
384
445
  """
385
446
 
386
-
387
- stream = open(file_path, 'wb')
447
+ stream = open(file_path, "wb")
388
448
 
389
449
  # Type of the ** is the value type of the dictionary; key type is always string.
390
- def handler(topic_object: Topic = Listener.AUTO_TOPIC, **message_data: NamedTuple) -> None:
450
+ def handler(
451
+ topic_object: Topic = Listener.AUTO_TOPIC, **message_data: NamedTuple
452
+ ) -> None:
391
453
  """
392
454
  Log the message in the given message data, associated with the
393
455
  given topic.
394
456
  """
395
457
  # There should always be a "message"
396
- if len(message_data) != 1 or 'message' not in message_data:
397
- raise RuntimeError("Cannot log the bus message. The message is either empty/malformed or there are too many messages provided.")
398
- message = message_data['message']
458
+ if len(message_data) != 1 or "message" not in message_data:
459
+ raise RuntimeError(
460
+ "Cannot log the bus message. The message is either empty/malformed or there are too many messages provided."
461
+ )
462
+ message = message_data["message"]
399
463
  topic = topic_object.getName()
400
- stream.write(topic.encode('utf-8'))
401
- stream.write(b'\t')
464
+ stream.write(topic.encode("utf-8"))
465
+ stream.write(b"\t")
402
466
  stream.write(message_to_bytes(message))
403
- stream.write(b'\n')
467
+ stream.write(b"\n")
404
468
  stream.flush()
405
469
 
406
470
  listener, _ = self._pubsub.subscribe(handler, ALL_TOPICS)
@@ -409,7 +473,6 @@ class MessageBus:
409
473
  # want the pypubsub Listener.
410
474
  return (handler, listener)
411
475
 
412
-
413
476
  # TODO: If we annotate this as returning an Iterator[NamedTuple], MyPy
414
477
  # complains when we loop over it that the loop variable is a <nothing>,
415
478
  # ifen in code protected by isinstance(). Using a typevar makes it complain
@@ -419,7 +482,9 @@ class MessageBus:
419
482
  # union of the types passed in message_types, in a way that MyPy can
420
483
  # understand.
421
484
  @classmethod
422
- def scan_bus_messages(cls, stream: IO[bytes], message_types: List[Type[NamedTuple]]) -> Iterator[Any]:
485
+ def scan_bus_messages(
486
+ cls, stream: IO[bytes], message_types: list[type[NamedTuple]]
487
+ ) -> Iterator[Any]:
423
488
  """
424
489
  Get an iterator over all messages in the given log stream of the given
425
490
  types, in order. Discard any trailing partial messages.
@@ -429,15 +494,15 @@ class MessageBus:
429
494
  name_to_type = {cls._type_to_name(t): t for t in message_types}
430
495
 
431
496
  for line in stream:
432
- logger.debug('Got message: %s', line)
433
- if not line.endswith(b'\n'):
497
+ logger.debug("Got message: %s", line)
498
+ if not line.endswith(b"\n"):
434
499
  # Skip unterminated line
435
500
  continue
436
501
  # Drop the newline and split on first tab
437
- parts = line[:-1].split(b'\t', 1)
502
+ parts = line[:-1].split(b"\t", 1)
438
503
 
439
504
  # Get the type of the message
440
- message_type = name_to_type.get(parts[0].decode('utf-8'))
505
+ message_type = name_to_type.get(parts[0].decode("utf-8"))
441
506
  if message_type is None:
442
507
  # We aren't interested in this kind of message.
443
508
  continue
@@ -448,6 +513,7 @@ class MessageBus:
448
513
  # And produce it
449
514
  yield message
450
515
 
516
+
451
517
  class MessageBusClient:
452
518
  """
453
519
  Base class for clients (inboxes and outboxes) of a message bus. Handles
@@ -470,6 +536,7 @@ class MessageBusClient:
470
536
  """
471
537
  self._bus = bus
472
538
 
539
+
473
540
  class MessageInbox(MessageBusClient):
474
541
  """
475
542
  A buffered connection to a message bus that lets us receive messages.
@@ -485,16 +552,19 @@ class MessageInbox(MessageBusClient):
485
552
  super().__init__()
486
553
 
487
554
  # This holds all the messages on the bus, organized by type.
488
- self._messages_by_type: Dict[type, List[Any]] = {}
555
+ self._messages_by_type: dict[type, list[Any]] = {}
489
556
  # This holds listeners for all the types, when we connect to a bus
490
- self._listeners_by_type: Dict[type, Listener] = {}
557
+ self._listeners_by_type: dict[type, Listener] = {}
491
558
 
492
559
  # We define a handler for messages
493
560
  def on_message(message: Any) -> None:
494
561
  self._messages_by_type[type(message)].append(message)
562
+
495
563
  self._handler = on_message
496
564
 
497
- def _set_bus_and_message_types(self, bus: MessageBus, wanted_types: List[type]) -> None:
565
+ def _set_bus_and_message_types(
566
+ self, bus: MessageBus, wanted_types: list[type]
567
+ ) -> None:
498
568
  """
499
569
  Connect to the given bus and collect the given message types.
500
570
 
@@ -539,8 +609,9 @@ class MessageInbox(MessageBusClient):
539
609
 
540
610
  # This next function returns things of the type that was passed in as a
541
611
  # runtime argument, which we can explain to MyPy using a TypeVar and Type[]
542
- MessageType = TypeVar('MessageType')
543
- def for_each(self, message_type: Type[MessageType]) -> Iterator[MessageType]:
612
+ MessageType = TypeVar("MessageType")
613
+
614
+ def for_each(self, message_type: type[MessageType]) -> Iterator[MessageType]:
544
615
  """
545
616
  Loop over all messages currently pending of the given type. Each that
546
617
  is handled without raising an exception will be removed.
@@ -570,7 +641,9 @@ class MessageInbox(MessageBusClient):
570
641
  try:
571
642
  # Emit the message
572
643
  if not isinstance(message, message_type):
573
- raise RuntimeError(f"Unacceptable message type {type(message)} in list for type {message_type}")
644
+ raise RuntimeError(
645
+ f"Unacceptable message type {type(message)} in list for type {message_type}"
646
+ )
574
647
  yield message
575
648
  # If we get here it was handled without error.
576
649
  handled = True
@@ -585,7 +658,10 @@ class MessageInbox(MessageBusClient):
585
658
  # Dump anything remaining in our buffer back into the main buffer,
586
659
  # in the right order, and before the later messages.
587
660
  message_list.reverse()
588
- self._messages_by_type[message_type] = message_list + self._messages_by_type[message_type]
661
+ self._messages_by_type[message_type] = (
662
+ message_list + self._messages_by_type[message_type]
663
+ )
664
+
589
665
 
590
666
  class MessageOutbox(MessageBusClient):
591
667
  """
@@ -608,6 +684,7 @@ class MessageOutbox(MessageBusClient):
608
684
  raise RuntimeError("Cannot send message when not connected to a bus")
609
685
  self._bus.publish(message)
610
686
 
687
+
611
688
  class MessageBusConnection(MessageInbox, MessageOutbox):
612
689
  """
613
690
  A two-way connection to a message bus. Buffers incoming messages until you
@@ -620,7 +697,9 @@ class MessageBusConnection(MessageInbox, MessageOutbox):
620
697
  """
621
698
  super().__init__()
622
699
 
623
- def _set_bus_and_message_types(self, bus: MessageBus, wanted_types: List[type]) -> None:
700
+ def _set_bus_and_message_types(
701
+ self, bus: MessageBus, wanted_types: list[type]
702
+ ) -> None:
624
703
  """
625
704
  Connect to the given bus and collect the given message types.
626
705
 
@@ -636,19 +715,28 @@ class MessageBusConnection(MessageInbox, MessageOutbox):
636
715
  class JobStatus:
637
716
  """
638
717
  Records the status of a job.
718
+
719
+ When exit_code is -1, this means the job is either not observed or currently running.
639
720
  """
640
721
 
641
722
  job_store_id: str
642
723
  name: str
643
724
  exit_code: int
644
- annotations: Dict[str, str]
725
+ annotations: dict[str, str]
645
726
  toil_batch_id: int
646
727
  external_batch_id: str
647
728
  batch_system: str
648
729
 
649
730
  def __repr__(self) -> str:
650
- return json.dumps(self, default= lambda o: o.__dict__, indent=4)
651
- def replay_message_bus(path: str) -> Dict[str, JobStatus]:
731
+ return json.dumps(self, default=lambda o: o.__dict__, indent=4)
732
+
733
+ def is_running(self) -> bool:
734
+ return (
735
+ self.exit_code < 0 and self.job_store_id != ""
736
+ ) # if the exit code is -1 and the job id is specified, we assume the job is running
737
+
738
+
739
+ def replay_message_bus(path: str) -> dict[str, JobStatus]:
652
740
  """
653
741
  Replay all the messages and work out what they mean for jobs.
654
742
 
@@ -664,15 +752,26 @@ def replay_message_bus(path: str) -> Dict[str, JobStatus]:
664
752
  is running.
665
753
  """
666
754
 
667
- job_statuses: Dict[str, JobStatus] = collections.defaultdict(lambda: JobStatus('', '', -1, {}, -1, '', ''))
755
+ job_statuses: dict[str, JobStatus] = collections.defaultdict(
756
+ lambda: JobStatus("", "", -1, {}, -1, "", "")
757
+ )
668
758
  batch_to_job_id = {}
669
759
  try:
670
- with open(path, 'rb') as log_stream:
760
+ with open(path, "rb") as log_stream:
671
761
  # Read all the full, properly-terminated messages about job updates
672
- for event in MessageBus.scan_bus_messages(log_stream, [JobUpdatedMessage, JobIssuedMessage, JobCompletedMessage,
673
- JobFailedMessage, JobAnnotationMessage, ExternalBatchIdMessage]):
762
+ for event in MessageBus.scan_bus_messages(
763
+ log_stream,
764
+ [
765
+ JobUpdatedMessage,
766
+ JobIssuedMessage,
767
+ JobCompletedMessage,
768
+ JobFailedMessage,
769
+ JobAnnotationMessage,
770
+ ExternalBatchIdMessage,
771
+ ],
772
+ ):
674
773
  # And for each of them
675
- logger.info('Got message from workflow: %s', event)
774
+ logger.debug("Got message from workflow: %s", event)
676
775
 
677
776
  if isinstance(event, JobUpdatedMessage):
678
777
  # Apply the latest return code from the job with this ID.
@@ -693,22 +792,33 @@ def replay_message_bus(path: str) -> Dict[str, JobStatus]:
693
792
  job_statuses[event.job_id].exit_code = 1
694
793
  elif isinstance(event, JobAnnotationMessage):
695
794
  # Remember the last value of any annotation that is set
696
- job_statuses[event.job_id].annotations[event.annotation_name] = event.annotation_value
795
+ job_statuses[event.job_id].annotations[
796
+ event.annotation_name
797
+ ] = event.annotation_value
697
798
  elif isinstance(event, ExternalBatchIdMessage):
698
799
  if event.toil_batch_id in batch_to_job_id:
699
- job_statuses[batch_to_job_id[event.toil_batch_id]].external_batch_id = event.external_batch_id
700
- job_statuses[batch_to_job_id[event.toil_batch_id]].batch_system = event.batch_system
800
+ job_statuses[
801
+ batch_to_job_id[event.toil_batch_id]
802
+ ].external_batch_id = event.external_batch_id
803
+ job_statuses[
804
+ batch_to_job_id[event.toil_batch_id]
805
+ ].batch_system = event.batch_system
701
806
  except FileNotFoundError:
702
807
  logger.warning("We were unable to access the file")
703
808
 
704
809
  return job_statuses
705
810
 
706
- def gen_message_bus_path() -> str:
811
+
812
+ def gen_message_bus_path(tmpdir: Optional[str] = None) -> str:
707
813
  """
708
814
  Return a file path in tmp to store the message bus at.
709
815
  Calling function is responsible for cleaning the generated file.
816
+
817
+ The tmpdir argument will override the directory that the
818
+ message bus will be made in. If not provided, the standard tempfile
819
+ order will be used.
710
820
  """
711
- fd, path = tempfile.mkstemp()
821
+ fd, path = tempfile.mkstemp(dir=tmpdir)
712
822
  os.close(fd)
713
823
  return path
714
- #TODO Might want to clean up the tmpfile at some point after running the workflow
824
+ # TODO Might want to clean up the tmpfile at some point after running the workflow