toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. toil/__init__.py +121 -83
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +38 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +489 -137
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +630 -359
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1114 -532
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +988 -315
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +727 -403
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +193 -58
  49. toil/lib/aws/utils.py +238 -218
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +322 -209
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +4 -2
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +99 -11
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +65 -18
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +19 -7
  71. toil/lib/retry.py +115 -77
  72. toil/lib/threading.py +282 -80
  73. toil/lib/throttle.py +15 -14
  74. toil/options/common.py +834 -401
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +70 -19
  78. toil/provisioners/__init__.py +111 -46
  79. toil/provisioners/abstractProvisioner.py +322 -157
  80. toil/provisioners/aws/__init__.py +62 -30
  81. toil/provisioners/aws/awsProvisioner.py +980 -627
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +147 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +127 -61
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +148 -64
  98. toil/test/__init__.py +263 -179
  99. toil/test/batchSystems/batchSystemTest.py +438 -195
  100. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +93 -47
  104. toil/test/cactus/test_cactus_integration.py +20 -22
  105. toil/test/cwl/cwlTest.py +271 -71
  106. toil/test/cwl/measure_default_memory.cwl +12 -0
  107. toil/test/cwl/not_run_required_input.cwl +29 -0
  108. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  109. toil/test/docs/scriptsTest.py +60 -34
  110. toil/test/jobStores/jobStoreTest.py +412 -235
  111. toil/test/lib/aws/test_iam.py +116 -48
  112. toil/test/lib/aws/test_s3.py +16 -9
  113. toil/test/lib/aws/test_utils.py +5 -6
  114. toil/test/lib/dockerTest.py +118 -141
  115. toil/test/lib/test_conversions.py +113 -115
  116. toil/test/lib/test_ec2.py +57 -49
  117. toil/test/lib/test_integration.py +104 -0
  118. toil/test/lib/test_misc.py +12 -5
  119. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  120. toil/test/mesos/helloWorld.py +7 -6
  121. toil/test/mesos/stress.py +25 -20
  122. toil/test/options/options.py +7 -2
  123. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  124. toil/test/provisioners/clusterScalerTest.py +440 -250
  125. toil/test/provisioners/clusterTest.py +81 -42
  126. toil/test/provisioners/gceProvisionerTest.py +174 -100
  127. toil/test/provisioners/provisionerTest.py +25 -13
  128. toil/test/provisioners/restartScript.py +5 -4
  129. toil/test/server/serverTest.py +188 -141
  130. toil/test/sort/restart_sort.py +137 -68
  131. toil/test/sort/sort.py +134 -66
  132. toil/test/sort/sortTest.py +91 -49
  133. toil/test/src/autoDeploymentTest.py +140 -100
  134. toil/test/src/busTest.py +20 -18
  135. toil/test/src/checkpointTest.py +8 -2
  136. toil/test/src/deferredFunctionTest.py +49 -35
  137. toil/test/src/dockerCheckTest.py +33 -26
  138. toil/test/src/environmentTest.py +20 -10
  139. toil/test/src/fileStoreTest.py +538 -271
  140. toil/test/src/helloWorldTest.py +7 -4
  141. toil/test/src/importExportFileTest.py +61 -31
  142. toil/test/src/jobDescriptionTest.py +32 -17
  143. toil/test/src/jobEncapsulationTest.py +2 -0
  144. toil/test/src/jobFileStoreTest.py +74 -50
  145. toil/test/src/jobServiceTest.py +187 -73
  146. toil/test/src/jobTest.py +120 -70
  147. toil/test/src/miscTests.py +19 -18
  148. toil/test/src/promisedRequirementTest.py +82 -36
  149. toil/test/src/promisesTest.py +7 -6
  150. toil/test/src/realtimeLoggerTest.py +6 -6
  151. toil/test/src/regularLogTest.py +71 -37
  152. toil/test/src/resourceTest.py +80 -49
  153. toil/test/src/restartDAGTest.py +36 -22
  154. toil/test/src/resumabilityTest.py +9 -2
  155. toil/test/src/retainTempDirTest.py +45 -14
  156. toil/test/src/systemTest.py +12 -8
  157. toil/test/src/threadingTest.py +44 -25
  158. toil/test/src/toilContextManagerTest.py +10 -7
  159. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  160. toil/test/src/workerTest.py +33 -16
  161. toil/test/utils/toilDebugTest.py +70 -58
  162. toil/test/utils/toilKillTest.py +4 -5
  163. toil/test/utils/utilsTest.py +239 -102
  164. toil/test/wdl/wdltoil_test.py +789 -148
  165. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  166. toil/toilState.py +52 -26
  167. toil/utils/toilConfig.py +13 -4
  168. toil/utils/toilDebugFile.py +44 -27
  169. toil/utils/toilDebugJob.py +85 -25
  170. toil/utils/toilDestroyCluster.py +11 -6
  171. toil/utils/toilKill.py +8 -3
  172. toil/utils/toilLaunchCluster.py +251 -145
  173. toil/utils/toilMain.py +37 -16
  174. toil/utils/toilRsyncCluster.py +27 -14
  175. toil/utils/toilSshCluster.py +45 -22
  176. toil/utils/toilStats.py +75 -36
  177. toil/utils/toilStatus.py +226 -119
  178. toil/utils/toilUpdateEC2Instances.py +3 -1
  179. toil/version.py +11 -11
  180. toil/wdl/utils.py +5 -5
  181. toil/wdl/wdltoil.py +3513 -1052
  182. toil/worker.py +269 -128
  183. toil-8.0.0.dist-info/METADATA +173 -0
  184. toil-8.0.0.dist-info/RECORD +253 -0
  185. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  186. toil-7.0.0.dist-info/METADATA +0 -158
  187. toil-7.0.0.dist-info/RECORD +0 -244
  188. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
  189. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  190. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/job.py CHANGED
@@ -11,6 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from __future__ import annotations
15
+
14
16
  import collections
15
17
  import copy
16
18
  import importlib
@@ -27,56 +29,59 @@ from abc import ABCMeta, abstractmethod
27
29
  from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, Namespace
28
30
  from contextlib import contextmanager
29
31
  from io import BytesIO
30
- from typing import (TYPE_CHECKING,
31
- Any,
32
- Callable,
33
- Dict,
34
- Iterator,
35
- List,
36
- Mapping,
37
- NamedTuple,
38
- Optional,
39
- Sequence,
40
- Set,
41
- Tuple,
42
- TypeVar,
43
- Union,
44
- cast,
45
- overload)
32
+ from typing import (
33
+ TYPE_CHECKING,
34
+ Any,
35
+ Callable,
36
+ Dict,
37
+ Iterator,
38
+ List,
39
+ Mapping,
40
+ NamedTuple,
41
+ Optional,
42
+ Sequence,
43
+ Tuple,
44
+ TypeVar,
45
+ Union,
46
+ cast,
47
+ overload,
48
+ TypedDict,
49
+ Literal,
50
+ )
51
+ from urllib.error import HTTPError
52
+ from urllib.parse import urlsplit, unquote, urljoin
53
+
54
+ from toil import memoize
46
55
 
56
+ import dill
47
57
  from configargparse import ArgParser
48
58
 
49
- from toil.bus import Names
50
- from toil.lib.compatibility import deprecated
51
-
52
- if sys.version_info >= (3, 8):
53
- from typing import TypedDict
54
- else:
55
- from typing_extensions import TypedDict
56
-
57
- import dill
58
- # TODO: When this gets into the standard library, get it from there and drop
59
- # typing-extensions dependency on Pythons that are new enough.
60
- from typing_extensions import NotRequired
59
+ from toil.lib.io import is_remote_url
61
60
 
62
- if sys.version_info >= (3, 8):
63
- from typing import Literal
61
+ if sys.version_info < (3, 11):
62
+ from typing_extensions import NotRequired
64
63
  else:
65
- from typing_extensions import Literal
64
+ from typing import NotRequired
66
65
 
66
+ from toil.bus import Names
67
67
  from toil.common import Config, Toil, addOptions, safeUnpickleFromStream
68
68
  from toil.deferred import DeferredFunction
69
69
  from toil.fileStores import FileID
70
+ from toil.lib.compatibility import deprecated
70
71
  from toil.lib.conversions import bytes2human, human2bytes
71
72
  from toil.lib.expando import Expando
72
73
  from toil.lib.resources import ResourceMonitor
73
74
  from toil.resource import ModuleDescriptor
74
75
  from toil.statsAndLogging import set_logging_from_options
75
76
 
77
+ from toil.lib.exceptions import UnimplementedURLException
78
+
76
79
  if TYPE_CHECKING:
77
80
  from optparse import OptionParser
78
81
 
79
- from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
82
+ from toil.batchSystems.abstractBatchSystem import (
83
+ BatchJobExitReason
84
+ )
80
85
  from toil.fileStores.abstractFileStore import AbstractFileStore
81
86
  from toil.jobStores.abstractJobStore import AbstractJobStore
82
87
 
@@ -122,24 +127,28 @@ class ConflictingPredecessorError(Exception):
122
127
  f'The given job: "{predecessor.description}" is already a predecessor of job: "{successor.description}".'
123
128
  )
124
129
 
130
+
125
131
  class DebugStoppingPointReached(BaseException):
126
132
  """
127
133
  Raised when a job reaches a point at which it has been instructed to stop for debugging.
128
134
  """
129
- pass
135
+
130
136
 
131
137
  class FilesDownloadedStoppingPointReached(DebugStoppingPointReached):
132
138
  """
133
139
  Raised when a job stops because it was asked to download its files, and the files are downloaded.
134
140
  """
135
141
 
136
- def __init__(self, message, host_and_job_paths: Optional[List[Tuple[str, str]]] = None):
142
+ def __init__(
143
+ self, message, host_and_job_paths: Optional[list[tuple[str, str]]] = None
144
+ ):
137
145
  super().__init__(message)
138
146
 
139
147
  # Save the host and user-code-visible paths of files, in case we're
140
148
  # using a container and they are different.
141
149
  self.host_and_job_paths = host_and_job_paths
142
150
 
151
+
143
152
  class TemporaryID:
144
153
  """
145
154
  Placeholder for a unregistered job ID used by a JobDescription.
@@ -161,7 +170,7 @@ class TemporaryID:
161
170
  return self.__repr__()
162
171
 
163
172
  def __repr__(self) -> str:
164
- return f'TemporaryID({self._value})'
173
+ return f"TemporaryID({self._value})"
165
174
 
166
175
  def __hash__(self) -> int:
167
176
  return hash(self._value)
@@ -172,6 +181,7 @@ class TemporaryID:
172
181
  def __ne__(self, other: Any) -> bool:
173
182
  return not isinstance(other, TemporaryID) or self._value != other._value
174
183
 
184
+
175
185
  class AcceleratorRequirement(TypedDict):
176
186
  """Requirement for one or more computational accelerators, like a GPU or FPGA."""
177
187
 
@@ -210,7 +220,10 @@ class AcceleratorRequirement(TypedDict):
210
220
 
211
221
  # TODO: support requesting any GPU with X amount of vram
212
222
 
213
- def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> AcceleratorRequirement:
223
+
224
+ def parse_accelerator(
225
+ spec: Union[int, str, dict[str, Union[str, int]]]
226
+ ) -> AcceleratorRequirement:
214
227
  """
215
228
  Parse an AcceleratorRequirement specified by user code.
216
229
 
@@ -247,16 +260,16 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
247
260
  :raises ValueError: if it gets something it can't parse
248
261
  :raises TypeError: if it gets something it can't parse because it's the wrong type.
249
262
  """
250
- KINDS = {'gpu'}
251
- BRANDS = {'nvidia', 'amd'}
252
- APIS = {'cuda', 'rocm', 'opencl'}
263
+ KINDS = {"gpu"}
264
+ BRANDS = {"nvidia", "amd"}
265
+ APIS = {"cuda", "rocm", "opencl"}
253
266
 
254
- parsed: AcceleratorRequirement = {'count': 1, 'kind': 'gpu'}
267
+ parsed: AcceleratorRequirement = {"count": 1, "kind": "gpu"}
255
268
 
256
269
  if isinstance(spec, int):
257
- parsed['count'] = spec
270
+ parsed["count"] = spec
258
271
  elif isinstance(spec, str):
259
- parts = spec.split(':')
272
+ parts = spec.split(":")
260
273
 
261
274
  if len(parts) > 2:
262
275
  raise ValueError("Could not parse AcceleratorRequirement: " + spec)
@@ -265,7 +278,7 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
265
278
 
266
279
  try:
267
280
  # If they have : and then a count, or just a count, handle that.
268
- parsed['count'] = int(possible_count)
281
+ parsed["count"] = int(possible_count)
269
282
  if len(parts) > 1:
270
283
  # Then we take whatever was before the colon as text
271
284
  possible_description = parts[0]
@@ -275,73 +288,97 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
275
288
  # It doesn't end with a number
276
289
  if len(parts) == 2:
277
290
  # We should have a number though.
278
- raise ValueError("Could not parse AcceleratorRequirement count in: " + spec)
291
+ raise ValueError(
292
+ "Could not parse AcceleratorRequirement count in: " + spec
293
+ )
279
294
  else:
280
295
  # Must be just the description
281
296
  possible_description = possible_count
282
297
 
283
298
  # Determine if we have a kind, brand, API, or (by default) model
284
299
  if possible_description in KINDS:
285
- parsed['kind'] = possible_description
300
+ parsed["kind"] = possible_description
286
301
  elif possible_description in BRANDS:
287
- parsed['brand'] = possible_description
302
+ parsed["brand"] = possible_description
288
303
  elif possible_description in APIS:
289
- parsed['api'] = possible_description
304
+ parsed["api"] = possible_description
290
305
  else:
291
306
  if possible_description is not None:
292
- parsed['model'] = possible_description
307
+ parsed["model"] = possible_description
293
308
  elif isinstance(spec, dict):
294
309
  # It's a dict, so merge with the defaults.
295
310
  parsed.update(spec)
296
311
  # TODO: make sure they didn't misspell keys or something
297
312
  else:
298
- raise TypeError(f"Cannot parse value of type {type(spec)} as an AcceleratorRequirement")
313
+ raise TypeError(
314
+ f"Cannot parse value of type {type(spec)} as an AcceleratorRequirement"
315
+ )
299
316
 
300
- if parsed['kind'] == 'gpu':
317
+ if parsed["kind"] == "gpu":
301
318
  # Use some smarts about what current GPUs are like to elaborate the
302
319
  # description.
303
320
 
304
- if 'brand' not in parsed and 'model' in parsed:
321
+ if "brand" not in parsed and "model" in parsed:
305
322
  # Try to guess the brand from the model
306
323
  for brand in BRANDS:
307
- if parsed['model'].startswith(brand):
324
+ if parsed["model"].startswith(brand):
308
325
  # The model often starts with the brand
309
- parsed['brand'] = brand
326
+ parsed["brand"] = brand
310
327
  break
311
328
 
312
- if 'brand' not in parsed and 'api' in parsed:
329
+ if "brand" not in parsed and "api" in parsed:
313
330
  # Try to guess the brand from the API
314
- if parsed['api'] == 'cuda':
331
+ if parsed["api"] == "cuda":
315
332
  # Only nvidia makes cuda cards
316
- parsed['brand'] = 'nvidia'
317
- elif parsed['api'] == 'rocm':
333
+ parsed["brand"] = "nvidia"
334
+ elif parsed["api"] == "rocm":
318
335
  # Only amd makes rocm cards
319
- parsed['brand'] = 'amd'
336
+ parsed["brand"] = "amd"
320
337
 
321
338
  return parsed
322
339
 
323
- def accelerator_satisfies(candidate: AcceleratorRequirement, requirement: AcceleratorRequirement, ignore: List[str] = []) -> bool:
340
+
341
+ def accelerator_satisfies(
342
+ candidate: AcceleratorRequirement,
343
+ requirement: AcceleratorRequirement,
344
+ ignore: list[str] = [],
345
+ ) -> bool:
324
346
  """
325
347
  Test if candidate partially satisfies the given requirement.
326
348
 
327
349
  :returns: True if the given candidate at least partially satisfies the
328
350
  given requirement (i.e. check all fields other than count).
329
351
  """
330
- for key in ['kind', 'brand', 'api', 'model']:
352
+ for key in ["kind", "brand", "api", "model"]:
331
353
  if key in ignore:
332
354
  # Skip this aspect.
333
355
  continue
334
356
  if key in requirement:
335
357
  if key not in candidate:
336
- logger.debug('Candidate %s does not satisfy requirement %s because it does not have a %s', candidate, requirement, key)
358
+ logger.debug(
359
+ "Candidate %s does not satisfy requirement %s because it does not have a %s",
360
+ candidate,
361
+ requirement,
362
+ key,
363
+ )
337
364
  return False
338
365
  if candidate[key] != requirement[key]:
339
- logger.debug('Candidate %s does not satisfy requirement %s because it does not have the correct %s', candidate, requirement, key)
366
+ logger.debug(
367
+ "Candidate %s does not satisfy requirement %s because it does not have the correct %s",
368
+ candidate,
369
+ requirement,
370
+ key,
371
+ )
340
372
  return False
341
373
  # If all these match or are more specific than required, we match!
342
374
  return True
343
375
 
344
- def accelerators_fully_satisfy(candidates: Optional[List[AcceleratorRequirement]], requirement: AcceleratorRequirement, ignore: List[str] = []) -> bool:
376
+
377
+ def accelerators_fully_satisfy(
378
+ candidates: Optional[list[AcceleratorRequirement]],
379
+ requirement: AcceleratorRequirement,
380
+ ignore: list[str] = [],
381
+ ) -> bool:
345
382
  """
346
383
  Determine if a set of accelerators satisfy a requirement.
347
384
 
@@ -352,21 +389,22 @@ def accelerators_fully_satisfy(candidates: Optional[List[AcceleratorRequirement]
352
389
  together (i.e. check all fields including count).
353
390
  """
354
391
 
355
- count_remaining = requirement['count']
392
+ count_remaining = requirement["count"]
356
393
 
357
394
  if candidates:
358
395
  for candidate in candidates:
359
396
  if accelerator_satisfies(candidate, requirement, ignore=ignore):
360
- if candidate['count'] > count_remaining:
397
+ if candidate["count"] > count_remaining:
361
398
  # We found all the matching accelerators we need
362
399
  count_remaining = 0
363
400
  break
364
401
  else:
365
- count_remaining -= candidate['count']
402
+ count_remaining -= candidate["count"]
366
403
 
367
404
  # If we have no count left we are fully satisfied
368
405
  return count_remaining == 0
369
406
 
407
+
370
408
  class RequirementsDict(TypedDict):
371
409
  """
372
410
  Typed storage for requirements for a job.
@@ -377,22 +415,35 @@ class RequirementsDict(TypedDict):
377
415
  cores: NotRequired[Union[int, float]]
378
416
  memory: NotRequired[int]
379
417
  disk: NotRequired[int]
380
- accelerators: NotRequired[List[AcceleratorRequirement]]
418
+ accelerators: NotRequired[list[AcceleratorRequirement]]
381
419
  preemptible: NotRequired[bool]
382
420
 
421
+
383
422
  # These must be all the key names in RequirementsDict
384
423
  REQUIREMENT_NAMES = ["disk", "memory", "cores", "accelerators", "preemptible"]
385
424
 
386
425
  # This is the supertype of all value types in RequirementsDict
387
- ParsedRequirement = Union[int, float, bool, List[AcceleratorRequirement]]
426
+ ParsedRequirement = Union[int, float, bool, list[AcceleratorRequirement]]
388
427
 
389
428
  # We define some types for things we can parse into different kind of requirements
390
429
  ParseableIndivisibleResource = Union[str, int]
391
430
  ParseableDivisibleResource = Union[str, int, float]
392
431
  ParseableFlag = Union[str, int, bool]
393
- ParseableAcceleratorRequirement = Union[str, int, Mapping[str, Any], AcceleratorRequirement, Sequence[Union[str, int, Mapping[str, Any], AcceleratorRequirement]]]
432
+ ParseableAcceleratorRequirement = Union[
433
+ str,
434
+ int,
435
+ Mapping[str, Any],
436
+ AcceleratorRequirement,
437
+ Sequence[Union[str, int, Mapping[str, Any], AcceleratorRequirement]],
438
+ ]
439
+
440
+ ParseableRequirement = Union[
441
+ ParseableIndivisibleResource,
442
+ ParseableDivisibleResource,
443
+ ParseableFlag,
444
+ ParseableAcceleratorRequirement,
445
+ ]
394
446
 
395
- ParseableRequirement = Union[ParseableIndivisibleResource, ParseableDivisibleResource, ParseableFlag, ParseableAcceleratorRequirement]
396
447
 
397
448
  class Requirer:
398
449
  """
@@ -403,9 +454,7 @@ class Requirer:
403
454
 
404
455
  _requirementOverrides: RequirementsDict
405
456
 
406
- def __init__(
407
- self, requirements: Mapping[str, ParseableRequirement]
408
- ) -> None:
457
+ def __init__(self, requirements: Mapping[str, ParseableRequirement]) -> None:
409
458
  """
410
459
  Parse and save the given requirements.
411
460
 
@@ -446,12 +495,11 @@ class Requirer:
446
495
  raise RuntimeError(f"Config assigned multiple times to {self}")
447
496
  self._config = config
448
497
 
449
-
450
- def __getstate__(self) -> Dict[str, Any]:
498
+ def __getstate__(self) -> dict[str, Any]:
451
499
  """Return the dict to use as the instance's __dict__ when pickling."""
452
500
  # We want to exclude the config from pickling.
453
501
  state = self.__dict__.copy()
454
- state['_config'] = None
502
+ state["_config"] = None
455
503
  return state
456
504
 
457
505
  def __copy__(self) -> "Requirer":
@@ -492,37 +540,29 @@ class Requirer:
492
540
  @overload
493
541
  @staticmethod
494
542
  def _parseResource(
495
- name: Union[Literal["memory"], Literal["disks"]], value: ParseableIndivisibleResource
496
- ) -> int:
497
- ...
543
+ name: Union[Literal["memory"], Literal["disks"]],
544
+ value: ParseableIndivisibleResource,
545
+ ) -> int: ...
498
546
 
499
547
  @overload
500
548
  @staticmethod
501
549
  def _parseResource(
502
550
  name: Literal["cores"], value: ParseableDivisibleResource
503
- ) -> Union[int, float]:
504
- ...
551
+ ) -> Union[int, float]: ...
505
552
 
506
553
  @overload
507
554
  @staticmethod
508
555
  def _parseResource(
509
556
  name: Literal["accelerators"], value: ParseableAcceleratorRequirement
510
- ) -> List[AcceleratorRequirement]:
511
- ...
557
+ ) -> list[AcceleratorRequirement]: ...
512
558
 
513
559
  @overload
514
560
  @staticmethod
515
- def _parseResource(
516
- name: str, value: ParseableRequirement
517
- ) -> ParsedRequirement:
518
- ...
561
+ def _parseResource(name: str, value: ParseableRequirement) -> ParsedRequirement: ...
519
562
 
520
563
  @overload
521
564
  @staticmethod
522
- def _parseResource(
523
- name: str, value: None
524
- ) -> None:
525
- ...
565
+ def _parseResource(name: str, value: None) -> None: ...
526
566
 
527
567
  @staticmethod
528
568
  def _parseResource(
@@ -559,43 +599,53 @@ class Requirer:
559
599
  # Anything can be None.
560
600
  return value
561
601
 
562
- if name in ('memory', 'disk', 'cores'):
602
+ if name in ("memory", "disk", "cores"):
563
603
  # These should be numbers that accept things like "5G".
564
604
  if isinstance(value, (str, bytes)):
565
605
  value = human2bytes(value)
566
606
  if isinstance(value, int):
567
607
  return value
568
- elif isinstance(value, float) and name == 'cores':
608
+ elif isinstance(value, float) and name == "cores":
569
609
  # But only cores can be fractional.
570
610
  return value
571
611
  else:
572
- raise TypeError(f"The '{name}' requirement does not accept values that are of type {type(value)}")
573
- elif name == 'preemptible':
612
+ raise TypeError(
613
+ f"The '{name}' requirement does not accept values that are of type {type(value)}"
614
+ )
615
+ elif name == "preemptible":
574
616
  if isinstance(value, str):
575
617
  if value.lower() == "true":
576
618
  return True
577
619
  elif value.lower() == "false":
578
620
  return False
579
621
  else:
580
- raise ValueError(f"The '{name}' requirement, as a string, must be 'true' or 'false' but is {value}")
622
+ raise ValueError(
623
+ f"The '{name}' requirement, as a string, must be 'true' or 'false' but is {value}"
624
+ )
581
625
  elif isinstance(value, int):
582
626
  if value == 1:
583
627
  return True
584
628
  if value == 0:
585
629
  return False
586
630
  else:
587
- raise ValueError(f"The '{name}' requirement, as an int, must be 1 or 0 but is {value}")
631
+ raise ValueError(
632
+ f"The '{name}' requirement, as an int, must be 1 or 0 but is {value}"
633
+ )
588
634
  elif isinstance(value, bool):
589
635
  return value
590
636
  else:
591
- raise TypeError(f"The '{name}' requirement does not accept values that are of type {type(value)}")
592
- elif name == 'accelerators':
637
+ raise TypeError(
638
+ f"The '{name}' requirement does not accept values that are of type {type(value)}"
639
+ )
640
+ elif name == "accelerators":
593
641
  # The type checking for this is delegated to the
594
642
  # AcceleratorRequirement class.
595
643
  if isinstance(value, list):
596
- return [parse_accelerator(v) for v in value] #accelerators={'kind': 'gpu', 'brand': 'nvidia', 'count': 2}
644
+ return [
645
+ parse_accelerator(v) for v in value
646
+ ] # accelerators={'kind': 'gpu', 'brand': 'nvidia', 'count': 2}
597
647
  else:
598
- return [parse_accelerator(value)] #accelerators=1
648
+ return [parse_accelerator(value)] # accelerators=1
599
649
  else:
600
650
  # Anything else we just pass along without opinons
601
651
  return cast(ParsedRequirement, value)
@@ -618,7 +668,10 @@ class Requirer:
618
668
  )
619
669
  return value
620
670
  elif self._config is not None:
621
- values = [getattr(self._config, 'default_' + requirement, None), getattr(self._config, 'default' + requirement.capitalize(), None)]
671
+ values = [
672
+ getattr(self._config, "default_" + requirement, None),
673
+ getattr(self._config, "default" + requirement.capitalize(), None),
674
+ ]
622
675
  value = values[0] if values[0] is not None else values[1]
623
676
  if value is None:
624
677
  raise AttributeError(
@@ -679,10 +732,13 @@ class Requirer:
679
732
  self._requirementOverrides["preemptible"] = Requirer._parseResource(
680
733
  "preemptible", val
681
734
  )
735
+
682
736
  @property
683
- def accelerators(self) -> List[AcceleratorRequirement]:
737
+ def accelerators(self) -> list[AcceleratorRequirement]:
684
738
  """Any accelerators, such as GPUs, that are needed."""
685
- return cast(List[AcceleratorRequirement], self._fetchRequirement("accelerators"))
739
+ return cast(
740
+ list[AcceleratorRequirement], self._fetchRequirement("accelerators")
741
+ )
686
742
 
687
743
  @accelerators.setter
688
744
  def accelerators(self, val: ParseableAcceleratorRequirement) -> None:
@@ -705,7 +761,7 @@ class Requirer:
705
761
  if isinstance(original_value, (int, float)):
706
762
  # This is something we actually can scale up and down
707
763
  new_value = original_value * factor
708
- if requirement in ('memory', 'disk'):
764
+ if requirement in ("memory", "disk"):
709
765
  # Must round to an int
710
766
  new_value = math.ceil(new_value)
711
767
  setattr(scaled, requirement, new_value)
@@ -723,29 +779,32 @@ class Requirer:
723
779
  if isinstance(v, (int, float)) and v > 1000:
724
780
  # Make large numbers readable
725
781
  v = bytes2human(v)
726
- parts.append(f'{k}: {v}')
782
+ parts.append(f"{k}: {v}")
727
783
  if len(parts) == 0:
728
- parts = ['no requirements']
729
- return ', '.join(parts)
784
+ parts = ["no requirements"]
785
+ return ", ".join(parts)
786
+
730
787
 
731
788
  class JobBodyReference(NamedTuple):
732
789
  """
733
790
  Reference from a job description to its body.
734
791
  """
792
+
735
793
  file_store_id: str
736
794
  """File ID (or special shared file name for the root job) of the job's body."""
737
- module_string: str
795
+ module_string: str
738
796
  """Stringified description of the module needed to load the body."""
739
797
 
798
+
740
799
  class JobDescription(Requirer):
741
800
  """
742
801
  Stores all the information that the Toil Leader ever needs to know about a Job.
743
-
802
+
744
803
  This includes:
745
804
  * Resource requirements.
746
805
  * Which jobs are children or follow-ons or predecessors of this job.
747
806
  * A reference to the Job object in the job store.
748
-
807
+
749
808
  Can be obtained from an actual (i.e. executable) Job object, and can be
750
809
  used to obtain the Job object from the JobStore.
751
810
 
@@ -760,8 +819,9 @@ class JobDescription(Requirer):
760
819
  requirements: Mapping[str, Union[int, str, bool]],
761
820
  jobName: str,
762
821
  unitName: Optional[str] = "",
763
- displayName: Optional[str] = "",
764
- local: Optional[bool] = None
822
+ displayName: Optional[str] = "",
823
+ local: Optional[bool] = None,
824
+ files: Optional[set[FileID]] = None,
765
825
  ) -> None:
766
826
  """
767
827
  Create a new JobDescription.
@@ -784,6 +844,7 @@ class JobDescription(Requirer):
784
844
  :param local: If True, the job is meant to use minimal resources but is
785
845
  sensitive to execution latency, and so should be executed by the
786
846
  leader.
847
+ :param files: Set of FileID objects that the job plans to use.
787
848
  """
788
849
  # Set requirements
789
850
  super().__init__(requirements)
@@ -794,10 +855,11 @@ class JobDescription(Requirer):
794
855
  # Save names, making sure they are strings and not e.g. bytes or None.
795
856
  def makeString(x: Union[str, bytes, None]) -> str:
796
857
  if isinstance(x, bytes):
797
- return x.decode('utf-8', errors='replace')
858
+ return x.decode("utf-8", errors="replace")
798
859
  if x is None:
799
860
  return ""
800
861
  return x
862
+
801
863
  self.jobName = makeString(jobName)
802
864
  self.unitName = makeString(unitName)
803
865
  self.displayName = makeString(displayName)
@@ -844,7 +906,7 @@ class JobDescription(Requirer):
844
906
  # chained-in job with its original ID, and also this job's ID with its
845
907
  # original names, or is empty if no chaining has happened.
846
908
  # The first job in the chain comes first in the list.
847
- self._merged_job_names: List[Names] = []
909
+ self._merged_job_names: list[Names] = []
848
910
 
849
911
  # The number of direct predecessors of the job. Needs to be stored at
850
912
  # the JobDescription to support dynamically-created jobs with multiple
@@ -867,17 +929,17 @@ class JobDescription(Requirer):
867
929
 
868
930
  # The IDs of all child jobs of the described job.
869
931
  # Children which are done must be removed with filterSuccessors.
870
- self.childIDs: Set[str] = set()
932
+ self.childIDs: set[str] = set()
871
933
 
872
934
  # The IDs of all follow-on jobs of the described job.
873
935
  # Follow-ons which are done must be removed with filterSuccessors.
874
- self.followOnIDs: Set[str] = set()
936
+ self.followOnIDs: set[str] = set()
875
937
 
876
938
  # We keep our own children and follow-ons in a list of successor
877
939
  # phases, along with any successors adopted from jobs we have chained
878
940
  # from. When we finish our own children and follow-ons, we may have to
879
941
  # go back and finish successors for those jobs.
880
- self.successor_phases: List[Set[str]] = [self.followOnIDs, self.childIDs]
942
+ self.successor_phases: list[set[str]] = [self.followOnIDs, self.childIDs]
881
943
 
882
944
  # Dict from ServiceHostJob ID to list of child ServiceHostJobs that start after it.
883
945
  # All services must have an entry, if only to an empty list.
@@ -893,13 +955,24 @@ class JobDescription(Requirer):
893
955
  # And we log who made the version (by PID)
894
956
  self._job_version_writer = 0
895
957
 
958
+ # Store FileIDs that the Job will want to use
959
+ # This currently does not serve much of a purpose except for debugging
960
+ # In the future, this can be used to improve job scheduling, see https://github.com/DataBiosphere/toil/issues/3071
961
+ self.files_to_use = files or set()
962
+
896
963
  def get_names(self) -> Names:
897
964
  """
898
965
  Get the names and ID of this job as a named tuple.
899
966
  """
900
- return Names(self.jobName, self.unitName, self.displayName, self.displayName, str(self.jobStoreID))
967
+ return Names(
968
+ self.jobName,
969
+ self.unitName,
970
+ self.displayName,
971
+ self.displayName,
972
+ str(self.jobStoreID),
973
+ )
901
974
 
902
- def get_chain(self) -> List[Names]:
975
+ def get_chain(self) -> list[Names]:
903
976
  """
904
977
  Get all the jobs that executed in this job's chain, in order.
905
978
 
@@ -914,7 +987,7 @@ class JobDescription(Requirer):
914
987
  else:
915
988
  return list(self._merged_job_names)
916
989
 
917
- def serviceHostIDsInBatches(self) -> Iterator[List[str]]:
990
+ def serviceHostIDsInBatches(self) -> Iterator[list[str]]:
918
991
  """
919
992
  Find all batches of service host job IDs that can be started at the same time.
920
993
 
@@ -955,14 +1028,13 @@ class JobDescription(Requirer):
955
1028
  """
956
1029
 
957
1030
  for phase in self.successor_phases:
958
- for successor in phase:
959
- yield successor
1031
+ yield from phase
960
1032
 
961
- def successors_by_phase(self) -> Iterator[Tuple[int, str]]:
1033
+ def successors_by_phase(self) -> Iterator[tuple[int, str]]:
962
1034
  """
963
- Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase numbere on the stack.
1035
+ Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase number on the stack.
964
1036
 
965
- Phases ececute higher numbers to lower numbers.
1037
+ Phases execute higher numbers to lower numbers.
966
1038
  """
967
1039
 
968
1040
  for i, phase in enumerate(self.successor_phases):
@@ -1003,7 +1075,7 @@ class JobDescription(Requirer):
1003
1075
  """
1004
1076
  self._body = None
1005
1077
 
1006
- def get_body(self) -> Tuple[str, ModuleDescriptor]:
1078
+ def get_body(self) -> tuple[str, ModuleDescriptor]:
1007
1079
  """
1008
1080
  Get the information needed to load the job body.
1009
1081
 
@@ -1016,9 +1088,11 @@ class JobDescription(Requirer):
1016
1088
  if not self.has_body():
1017
1089
  raise RuntimeError(f"Cannot load the body of a job {self} without one")
1018
1090
 
1019
- return self._body.file_store_id, ModuleDescriptor.fromCommand(self._body.module_string)
1091
+ return self._body.file_store_id, ModuleDescriptor.fromCommand(
1092
+ self._body.module_string
1093
+ )
1020
1094
 
1021
- def nextSuccessors(self) -> Optional[Set[str]]:
1095
+ def nextSuccessors(self) -> Optional[set[str]]:
1022
1096
  """
1023
1097
  Return the collection of job IDs for the successors of this job that are ready to run.
1024
1098
 
@@ -1101,7 +1175,9 @@ class JobDescription(Requirer):
1101
1175
  :returns: True if the job appears to be done, and all related child,
1102
1176
  follow-on, and service jobs appear to be finished and removed.
1103
1177
  """
1104
- return not self.has_body() and next(self.successorsAndServiceHosts(), None) is None
1178
+ return (
1179
+ not self.has_body() and next(self.successorsAndServiceHosts(), None) is None
1180
+ )
1105
1181
 
1106
1182
  def replace(self, other: "JobDescription") -> None:
1107
1183
  """
@@ -1120,11 +1196,15 @@ class JobDescription(Requirer):
1120
1196
  # TODO: We can't join the job graphs with Job._jobGraphsJoined, is that a problem?
1121
1197
 
1122
1198
  # Take all the successors other than this one
1123
- old_phases = [{i for i in p if i != self.jobStoreID} for p in other.successor_phases]
1199
+ old_phases = [
1200
+ {i for i in p if i != self.jobStoreID} for p in other.successor_phases
1201
+ ]
1124
1202
  # And drop empty phases
1125
1203
  old_phases = [p for p in old_phases if len(p) > 0]
1126
1204
  # And put in front of our existing phases
1127
- logger.debug('%s is adopting successor phases from %s of: %s', self, other, old_phases)
1205
+ logger.debug(
1206
+ "%s is adopting successor phases from %s of: %s", self, other, old_phases
1207
+ )
1128
1208
  self.successor_phases = old_phases + self.successor_phases
1129
1209
 
1130
1210
  # When deleting, we need to delete the files for our old ID, and also
@@ -1148,9 +1228,13 @@ class JobDescription(Requirer):
1148
1228
  self.jobStoreID = other.jobStoreID
1149
1229
 
1150
1230
  if len(other.filesToDelete) > 0:
1151
- raise RuntimeError("Trying to take on the ID of a job that is in the process of being committed!")
1231
+ raise RuntimeError(
1232
+ "Trying to take on the ID of a job that is in the process of being committed!"
1233
+ )
1152
1234
  if len(self.filesToDelete) > 0:
1153
- raise RuntimeError("Trying to take on the ID of anothe job while in the process of being committed!")
1235
+ raise RuntimeError(
1236
+ "Trying to take on the ID of anothe job while in the process of being committed!"
1237
+ )
1154
1238
 
1155
1239
  self._job_version = other._job_version
1156
1240
  self._job_version_writer = os.getpid()
@@ -1160,7 +1244,9 @@ class JobDescription(Requirer):
1160
1244
  Make sure this JobDescription is not newer than a prospective new version of the JobDescription.
1161
1245
  """
1162
1246
  if other._job_version < self._job_version:
1163
- raise RuntimeError(f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}")
1247
+ raise RuntimeError(
1248
+ f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}"
1249
+ )
1164
1250
 
1165
1251
  def is_updated_by(self, other: "JobDescription") -> bool:
1166
1252
  """
@@ -1177,7 +1263,7 @@ class JobDescription(Requirer):
1177
1263
  other._job_version_writer,
1178
1264
  self.jobStoreID,
1179
1265
  self,
1180
- self._job_version_writer
1266
+ self._job_version_writer,
1181
1267
  )
1182
1268
  return False
1183
1269
 
@@ -1189,7 +1275,7 @@ class JobDescription(Requirer):
1189
1275
  other,
1190
1276
  other._job_version_writer,
1191
1277
  self,
1192
- self._job_version_writer
1278
+ self._job_version_writer,
1193
1279
  )
1194
1280
  return False
1195
1281
 
@@ -1229,7 +1315,7 @@ class JobDescription(Requirer):
1229
1315
  """Test if the ServiceHostJob is a service of the described job."""
1230
1316
  return serviceID in self.serviceTree
1231
1317
 
1232
- def renameReferences(self, renames: Dict[TemporaryID, str]) -> None:
1318
+ def renameReferences(self, renames: dict[TemporaryID, str]) -> None:
1233
1319
  """
1234
1320
  Apply the given dict of ID renames to all references to jobs.
1235
1321
 
@@ -1245,8 +1331,12 @@ class JobDescription(Requirer):
1245
1331
  # Replace each renamed item one at a time to preserve set identity
1246
1332
  phase.remove(item)
1247
1333
  phase.add(renames[item])
1248
- self.serviceTree = {renames.get(parent, parent): [renames.get(child, child) for child in children]
1249
- for parent, children in self.serviceTree.items()}
1334
+ self.serviceTree = {
1335
+ renames.get(parent, parent): [
1336
+ renames.get(child, child) for child in children
1337
+ ]
1338
+ for parent, children in self.serviceTree.items()
1339
+ }
1250
1340
 
1251
1341
  def addPredecessor(self) -> None:
1252
1342
  """Notify the JobDescription that a predecessor has been added to its Job."""
@@ -1264,7 +1354,11 @@ class JobDescription(Requirer):
1264
1354
  :param jobStore: The job store we are being placed into
1265
1355
  """
1266
1356
 
1267
- def setupJobAfterFailure(self, exit_status: Optional[int] = None, exit_reason: Optional["BatchJobExitReason"] = None) -> None:
1357
+ def setupJobAfterFailure(
1358
+ self,
1359
+ exit_status: Optional[int] = None,
1360
+ exit_reason: Optional["BatchJobExitReason"] = None,
1361
+ ) -> None:
1268
1362
  """
1269
1363
  Configure job after a failure.
1270
1364
 
@@ -1287,30 +1381,49 @@ class JobDescription(Requirer):
1287
1381
  if self._config is None:
1288
1382
  raise RuntimeError("The job's config is not assigned.")
1289
1383
 
1290
- if self._config.enableUnlimitedPreemptibleRetries and exit_reason == BatchJobExitReason.LOST:
1291
- logger.info("*Not* reducing try count (%s) of job %s with ID %s",
1292
- self.remainingTryCount, self, self.jobStoreID)
1384
+ if (
1385
+ self._config.enableUnlimitedPreemptibleRetries
1386
+ and exit_reason == BatchJobExitReason.LOST
1387
+ ):
1388
+ logger.info(
1389
+ "*Not* reducing try count (%s) of job %s with ID %s",
1390
+ self.remainingTryCount,
1391
+ self,
1392
+ self.jobStoreID,
1393
+ )
1293
1394
  else:
1294
1395
  self.remainingTryCount = max(0, self.remainingTryCount - 1)
1295
- logger.warning("Due to failure we are reducing the remaining try count of job %s with ID %s to %s",
1296
- self, self.jobStoreID, self.remainingTryCount)
1396
+ logger.warning(
1397
+ "Due to failure we are reducing the remaining try count of job %s with ID %s to %s",
1398
+ self,
1399
+ self.jobStoreID,
1400
+ self.remainingTryCount,
1401
+ )
1297
1402
  # Set the default memory to be at least as large as the default, in
1298
1403
  # case this was a malloc failure (we do this because of the combined
1299
1404
  # batch system)
1300
1405
  if exit_reason == BatchJobExitReason.MEMLIMIT and self._config.doubleMem:
1301
1406
  self.memory = self.memory * 2
1302
- logger.warning("We have doubled the memory of the failed job %s to %s bytes due to doubleMem flag",
1303
- self, self.memory)
1407
+ logger.warning(
1408
+ "We have doubled the memory of the failed job %s to %s bytes due to doubleMem flag",
1409
+ self,
1410
+ self.memory,
1411
+ )
1304
1412
  if self.memory < self._config.defaultMemory:
1305
1413
  self.memory = self._config.defaultMemory
1306
- logger.warning("We have increased the default memory of the failed job %s to %s bytes",
1307
- self, self.memory)
1414
+ logger.warning(
1415
+ "We have increased the default memory of the failed job %s to %s bytes",
1416
+ self,
1417
+ self.memory,
1418
+ )
1308
1419
 
1309
1420
  if self.disk < self._config.defaultDisk:
1310
1421
  self.disk = self._config.defaultDisk
1311
- logger.warning("We have increased the disk of the failed job %s to the default of %s bytes",
1312
- self, self.disk)
1313
-
1422
+ logger.warning(
1423
+ "We have increased the disk of the failed job %s to the default of %s bytes",
1424
+ self,
1425
+ self.disk,
1426
+ )
1314
1427
 
1315
1428
  def getLogFileHandle(self, jobStore):
1316
1429
  """
@@ -1360,12 +1473,12 @@ class JobDescription(Requirer):
1360
1473
  """Produce a useful logging string identifying this job."""
1361
1474
  printedName = "'" + self.jobName + "'"
1362
1475
  if self.unitName:
1363
- printedName += ' ' + self.unitName
1476
+ printedName += " " + self.unitName
1364
1477
 
1365
1478
  if self.jobStoreID is not None:
1366
- printedName += ' ' + str(self.jobStoreID)
1479
+ printedName += " " + str(self.jobStoreID)
1367
1480
 
1368
- printedName += ' v' + str(self._job_version)
1481
+ printedName += " v" + str(self._job_version)
1369
1482
 
1370
1483
  return printedName
1371
1484
 
@@ -1374,7 +1487,7 @@ class JobDescription(Requirer):
1374
1487
  # a time, keyed by jobStoreID.
1375
1488
 
1376
1489
  def __repr__(self):
1377
- return f'{self.__class__.__name__}( **{self.__dict__!r} )'
1490
+ return f"{self.__class__.__name__}( **{self.__dict__!r} )"
1378
1491
 
1379
1492
  def reserve_versions(self, count: int) -> None:
1380
1493
  """
@@ -1394,6 +1507,7 @@ class JobDescription(Requirer):
1394
1507
  self._job_version_writer = os.getpid()
1395
1508
  logger.debug("New job version: %s", self)
1396
1509
 
1510
+
1397
1511
  class ServiceJobDescription(JobDescription):
1398
1512
  """A description of a job that hosts a service."""
1399
1513
 
@@ -1464,7 +1578,7 @@ class CheckpointJobDescription(JobDescription):
1464
1578
  raise RuntimeError(f"Cannot restore an empty checkpoint for a job {self}")
1465
1579
  self._body = self.checkpoint
1466
1580
 
1467
- def restartCheckpoint(self, jobStore: "AbstractJobStore") -> List[str]:
1581
+ def restartCheckpoint(self, jobStore: "AbstractJobStore") -> list[str]:
1468
1582
  """
1469
1583
  Restart a checkpoint after the total failure of jobs in its subtree.
1470
1584
 
@@ -1475,24 +1589,30 @@ class CheckpointJobDescription(JobDescription):
1475
1589
  Returns a list with the IDs of any successors deleted.
1476
1590
  """
1477
1591
  if self.checkpoint is None:
1478
- raise RuntimeError("Cannot restart a checkpoint job. The checkpoint was never set.")
1592
+ raise RuntimeError(
1593
+ "Cannot restart a checkpoint job. The checkpoint was never set."
1594
+ )
1479
1595
  successorsDeleted = []
1480
1596
  all_successors = list(self.allSuccessors())
1481
1597
  if len(all_successors) > 0 or self.serviceTree or self.has_body():
1482
1598
  if self.has_body():
1483
1599
  if self._body != self.checkpoint:
1484
- raise RuntimeError("The stored body reference and checkpoint are not the same.")
1600
+ raise RuntimeError(
1601
+ "The stored body reference and checkpoint are not the same."
1602
+ )
1485
1603
  logger.debug("Checkpoint job already has body set to run")
1486
1604
  else:
1487
1605
  self.restore_checkpoint()
1488
1606
 
1489
- jobStore.update_job(self) # Update immediately to ensure that checkpoint
1607
+ jobStore.update_job(self) # Update immediately to ensure that checkpoint
1490
1608
  # is made before deleting any remaining successors
1491
1609
 
1492
1610
  if len(all_successors) > 0 or self.serviceTree:
1493
1611
  # If the subtree of successors is not complete restart everything
1494
- logger.debug("Checkpoint job has unfinished successor jobs, deleting successors: %s, services: %s " %
1495
- (all_successors, self.serviceTree.keys()))
1612
+ logger.debug(
1613
+ "Checkpoint job has unfinished successor jobs, deleting successors: %s, services: %s "
1614
+ % (all_successors, self.serviceTree.keys())
1615
+ )
1496
1616
 
1497
1617
  # Delete everything on the stack, as these represent successors to clean
1498
1618
  # up as we restart the queue
@@ -1505,9 +1625,13 @@ class CheckpointJobDescription(JobDescription):
1505
1625
  logger.debug("Job %s has already been deleted", otherJobID)
1506
1626
  if jobDesc.jobStoreID != self.jobStoreID:
1507
1627
  # Delete everything under us except us.
1508
- logger.debug("Checkpoint is deleting old successor job: %s", jobDesc.jobStoreID)
1628
+ logger.debug(
1629
+ "Checkpoint is deleting old successor job: %s",
1630
+ jobDesc.jobStoreID,
1631
+ )
1509
1632
  jobStore.delete_job(jobDesc.jobStoreID)
1510
1633
  successorsDeleted.append(jobDesc.jobStoreID)
1634
+
1511
1635
  recursiveDelete(self)
1512
1636
 
1513
1637
  # Cut links to the jobs we deleted.
@@ -1536,6 +1660,7 @@ class Job:
1536
1660
  displayName: Optional[str] = "",
1537
1661
  descriptionClass: Optional[type] = None,
1538
1662
  local: Optional[bool] = None,
1663
+ files: Optional[set[FileID]] = None,
1539
1664
  ) -> None:
1540
1665
  """
1541
1666
  Job initializer.
@@ -1556,6 +1681,7 @@ class Job:
1556
1681
  :param displayName: Human-readable job type display name.
1557
1682
  :param descriptionClass: Override for the JobDescription class used to describe the job.
1558
1683
  :param local: if the job can be run on the leader.
1684
+ :param files: Set of Files that the job will want to use.
1559
1685
 
1560
1686
  :type memory: int or string convertible by toil.lib.conversions.human2bytes to an int
1561
1687
  :type cores: float, int, or string convertible by toil.lib.conversions.human2bytes to an int
@@ -1571,14 +1697,20 @@ class Job:
1571
1697
  jobName = self.__class__.__name__
1572
1698
  displayName = displayName if displayName else jobName
1573
1699
 
1574
- #Some workflows use preemptable instead of preemptible
1700
+ # Some workflows use preemptable instead of preemptible
1575
1701
  if preemptable and not preemptible:
1576
- logger.warning("Preemptable as a keyword has been deprecated, please use preemptible.")
1702
+ logger.warning(
1703
+ "Preemptable as a keyword has been deprecated, please use preemptible."
1704
+ )
1577
1705
  preemptible = preemptable
1578
1706
  # Build a requirements dict for the description
1579
- requirements = {'memory': memory, 'cores': cores, 'disk': disk,
1580
- 'accelerators': accelerators,
1581
- 'preemptible': preemptible}
1707
+ requirements = {
1708
+ "memory": memory,
1709
+ "cores": cores,
1710
+ "disk": disk,
1711
+ "accelerators": accelerators,
1712
+ "preemptible": preemptible,
1713
+ }
1582
1714
  if descriptionClass is None:
1583
1715
  if checkpoint:
1584
1716
  # Actually describe as a checkpoint job
@@ -1594,7 +1726,8 @@ class Job:
1594
1726
  jobName,
1595
1727
  unitName=unitName,
1596
1728
  displayName=displayName,
1597
- local=local
1729
+ local=local,
1730
+ files=files,
1598
1731
  )
1599
1732
 
1600
1733
  # Private class variables needed to actually execute a job, in the worker.
@@ -1617,7 +1750,9 @@ class Job:
1617
1750
  # Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module
1618
1751
  # defining the class self is an instance of, which may be a subclass of Job that may be
1619
1752
  # defined in a different module.
1620
- self.userModule: ModuleDescriptor = ModuleDescriptor.forModule(self.__module__).globalize()
1753
+ self.userModule: ModuleDescriptor = ModuleDescriptor.forModule(
1754
+ self.__module__
1755
+ ).globalize()
1621
1756
  # Maps index paths into composite return values to lists of IDs of files containing
1622
1757
  # promised values for those return value items. An index path is a tuple of indices that
1623
1758
  # traverses a nested data structure of lists, dicts, tuples or any other type supporting
@@ -1630,7 +1765,7 @@ class Job:
1630
1765
  self._tempDir = None
1631
1766
 
1632
1767
  # Holds flags set by set_debug_flag()
1633
- self._debug_flags: Set[str] = set()
1768
+ self._debug_flags: set[str] = set()
1634
1769
 
1635
1770
  def __str__(self):
1636
1771
  """
@@ -1640,7 +1775,7 @@ class Job:
1640
1775
  if self.description is None:
1641
1776
  return repr(self)
1642
1777
  else:
1643
- return 'Job(' + str(self.description) + ')'
1778
+ return "Job(" + str(self.description) + ")"
1644
1779
 
1645
1780
  def check_initialized(self) -> None:
1646
1781
  """
@@ -1652,8 +1787,10 @@ class Job:
1652
1787
  If __init__() has not been called, raise an error.
1653
1788
  """
1654
1789
  if not hasattr(self, "_description"):
1655
- raise ValueError(f"Job instance of type {type(self)} has not been initialized. super().__init__() may not "
1656
- f"have been called.")
1790
+ raise ValueError(
1791
+ f"Job instance of type {type(self)} has not been initialized. super().__init__() may not "
1792
+ f"have been called."
1793
+ )
1657
1794
 
1658
1795
  @property
1659
1796
  def jobStoreID(self) -> Union[str, TemporaryID]:
@@ -1673,33 +1810,37 @@ class Job:
1673
1810
  def disk(self) -> int:
1674
1811
  """The maximum number of bytes of disk the job will require to run."""
1675
1812
  return self.description.disk
1813
+
1676
1814
  @disk.setter
1677
1815
  def disk(self, val):
1678
- self.description.disk = val
1816
+ self.description.disk = val
1679
1817
 
1680
1818
  @property
1681
1819
  def memory(self):
1682
1820
  """The maximum number of bytes of memory the job will require to run."""
1683
1821
  return self.description.memory
1822
+
1684
1823
  @memory.setter
1685
1824
  def memory(self, val):
1686
- self.description.memory = val
1825
+ self.description.memory = val
1687
1826
 
1688
1827
  @property
1689
1828
  def cores(self) -> Union[int, float]:
1690
1829
  """The number of CPU cores required."""
1691
1830
  return self.description.cores
1831
+
1692
1832
  @cores.setter
1693
1833
  def cores(self, val):
1694
- self.description.cores = val
1834
+ self.description.cores = val
1695
1835
 
1696
1836
  @property
1697
- def accelerators(self) -> List[AcceleratorRequirement]:
1837
+ def accelerators(self) -> list[AcceleratorRequirement]:
1698
1838
  """Any accelerators, such as GPUs, that are needed."""
1699
1839
  return self.description.accelerators
1840
+
1700
1841
  @accelerators.setter
1701
- def accelerators(self, val: List[ParseableAcceleratorRequirement]) -> None:
1702
- self.description.accelerators = val
1842
+ def accelerators(self, val: list[ParseableAcceleratorRequirement]) -> None:
1843
+ self.description.accelerators = val
1703
1844
 
1704
1845
  @property
1705
1846
  def preemptible(self) -> bool:
@@ -1709,15 +1850,30 @@ class Job:
1709
1850
  @deprecated(new_function_name="preemptible")
1710
1851
  def preemptable(self):
1711
1852
  return self.description.preemptible
1853
+
1712
1854
  @preemptible.setter
1713
1855
  def preemptible(self, val):
1714
- self.description.preemptible = val
1856
+ self.description.preemptible = val
1715
1857
 
1716
1858
  @property
1717
1859
  def checkpoint(self) -> bool:
1718
1860
  """Determine if the job is a checkpoint job or not."""
1719
1861
  return isinstance(self._description, CheckpointJobDescription)
1720
1862
 
1863
+ @property
1864
+ def files_to_use(self) -> set[FileID]:
1865
+ return self.description.files_to_use
1866
+
1867
+ @files_to_use.setter
1868
+ def files_to_use(self, val: set[FileID]):
1869
+ self.description.files_to_use = val
1870
+
1871
+ def add_to_files_to_use(self, val: FileID):
1872
+ self.description.files_to_use.add(val)
1873
+
1874
+ def remove_from_files_to_use(self, val: FileID):
1875
+ self.description.files_to_use.remove(val)
1876
+
1721
1877
  def assignConfig(self, config: Config) -> None:
1722
1878
  """
1723
1879
  Assign the given config object.
@@ -1831,7 +1987,7 @@ class Job:
1831
1987
 
1832
1988
  return followOnJob
1833
1989
 
1834
- def hasPredecessor(self, job: 'Job') -> bool:
1990
+ def hasPredecessor(self, job: "Job") -> bool:
1835
1991
  """Check if a given job is already a predecessor of this job."""
1836
1992
  return job in self._directPredecessors
1837
1993
 
@@ -1893,7 +2049,9 @@ class Job:
1893
2049
 
1894
2050
  def hasService(self, service: "Job.Service") -> bool:
1895
2051
  """Return True if the given Service is a service of this job, and False otherwise."""
1896
- return service.hostID is None or self._description.hasServiceHostJob(service.hostID)
2052
+ return service.hostID is None or self._description.hasServiceHostJob(
2053
+ service.hostID
2054
+ )
1897
2055
 
1898
2056
  # Convenience functions for creating jobs
1899
2057
 
@@ -1941,7 +2099,9 @@ class Job:
1941
2099
  :return: The new child job that wraps fn.
1942
2100
  """
1943
2101
  if PromisedRequirement.convertPromises(kwargs):
1944
- return self.addChild(PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs))
2102
+ return self.addChild(
2103
+ PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs)
2104
+ )
1945
2105
  else:
1946
2106
  return self.addChild(JobFunctionWrappingJob(fn, *args, **kwargs))
1947
2107
 
@@ -1957,7 +2117,9 @@ class Job:
1957
2117
  :return: The new follow-on job that wraps fn.
1958
2118
  """
1959
2119
  if PromisedRequirement.convertPromises(kwargs):
1960
- return self.addFollowOn(PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs))
2120
+ return self.addFollowOn(
2121
+ PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs)
2122
+ )
1961
2123
  else:
1962
2124
  return self.addFollowOn(JobFunctionWrappingJob(fn, *args, **kwargs))
1963
2125
 
@@ -2059,8 +2221,12 @@ class Job:
2059
2221
  raise JobPromiseConstraintError(self)
2060
2222
  # TODO: can we guarantee self.jobStoreID is populated and so pass that here?
2061
2223
  with self._promiseJobStore.write_file_stream() as (fileHandle, jobStoreFileID):
2062
- promise = UnfulfilledPromiseSentinel(str(self.description), jobStoreFileID, False)
2063
- logger.debug('Issuing promise %s for result of %s', jobStoreFileID, self.description)
2224
+ promise = UnfulfilledPromiseSentinel(
2225
+ str(self.description), jobStoreFileID, False
2226
+ )
2227
+ logger.debug(
2228
+ "Issuing promise %s for result of %s", jobStoreFileID, self.description
2229
+ )
2064
2230
  pickle.dump(promise, fileHandle, pickle.HIGHEST_PROTOCOL)
2065
2231
  self._rvs[path].append(jobStoreFileID)
2066
2232
  return self._promiseJobStore.config.jobStore, jobStoreFileID
@@ -2110,7 +2276,7 @@ class Job:
2110
2276
  self.checkJobGraphAcylic()
2111
2277
  self.checkNewCheckpointsAreLeafVertices()
2112
2278
 
2113
- def getRootJobs(self) -> Set['Job']:
2279
+ def getRootJobs(self) -> set["Job"]:
2114
2280
  """
2115
2281
  Return the set of root job objects that contain this job.
2116
2282
 
@@ -2142,8 +2308,9 @@ class Job:
2142
2308
  """
2143
2309
  rootJobs = self.getRootJobs()
2144
2310
  if len(rootJobs) != 1:
2145
- raise JobGraphDeadlockException("Graph does not contain exactly one"
2146
- " root job: %s" % rootJobs)
2311
+ raise JobGraphDeadlockException(
2312
+ "Graph does not contain exactly one" " root job: %s" % rootJobs
2313
+ )
2147
2314
 
2148
2315
  def checkJobGraphAcylic(self):
2149
2316
  """
@@ -2163,15 +2330,15 @@ class Job:
2163
2330
 
2164
2331
  Only deals with jobs created here, rather than loaded from the job store.
2165
2332
  """
2166
- #Get the root jobs
2333
+ # Get the root jobs
2167
2334
  roots = self.getRootJobs()
2168
2335
  if len(roots) == 0:
2169
2336
  raise JobGraphDeadlockException("Graph contains no root jobs due to cycles")
2170
2337
 
2171
- #Get implied edges
2338
+ # Get implied edges
2172
2339
  extraEdges = self._getImpliedEdges(roots)
2173
2340
 
2174
- #Check for directed cycles in the augmented graph
2341
+ # Check for directed cycles in the augmented graph
2175
2342
  visited = set()
2176
2343
  for root in roots:
2177
2344
  root._checkJobGraphAcylicDFS([], visited, extraEdges)
@@ -2181,17 +2348,23 @@ class Job:
2181
2348
  if self not in visited:
2182
2349
  visited.add(self)
2183
2350
  stack.append(self)
2184
- for successor in [self._registry[jID] for jID in self.description.allSuccessors() if jID in self._registry] + extraEdges[self]:
2351
+ for successor in [
2352
+ self._registry[jID]
2353
+ for jID in self.description.allSuccessors()
2354
+ if jID in self._registry
2355
+ ] + extraEdges[self]:
2185
2356
  # Grab all the successors in the current registry (i.e. added form this node) and look at them.
2186
2357
  successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
2187
2358
  if stack.pop() != self:
2188
2359
  raise RuntimeError("The stack ordering/elements was changed.")
2189
2360
  if self in stack:
2190
2361
  stack.append(self)
2191
- raise JobGraphDeadlockException("A cycle of job dependencies has been detected '%s'" % stack)
2362
+ raise JobGraphDeadlockException(
2363
+ "A cycle of job dependencies has been detected '%s'" % stack
2364
+ )
2192
2365
 
2193
2366
  @staticmethod
2194
- def _getImpliedEdges(roots) -> Dict["Job", List["Job"]]:
2367
+ def _getImpliedEdges(roots) -> dict["Job", list["Job"]]:
2195
2368
  """
2196
2369
  Gets the set of implied edges (between children and follow-ons of a common job).
2197
2370
 
@@ -2201,17 +2374,17 @@ class Job:
2201
2374
 
2202
2375
  :returns: dict from Job object to list of Job objects that must be done before it can start.
2203
2376
  """
2204
- #Get nodes (Job objects) in job graph
2377
+ # Get nodes (Job objects) in job graph
2205
2378
  nodes = set()
2206
2379
  for root in roots:
2207
2380
  root._collectAllSuccessors(nodes)
2208
2381
 
2209
2382
  ##For each follow-on edge calculate the extra implied edges
2210
- #Adjacency list of implied edges, i.e. map of jobs to lists of jobs
2211
- #connected by an implied edge
2383
+ # Adjacency list of implied edges, i.e. map of jobs to lists of jobs
2384
+ # connected by an implied edge
2212
2385
  extraEdges = {n: [] for n in nodes}
2213
2386
  for job in nodes:
2214
- # Get all the nonempty successor phases
2387
+ # Get all the nonempty successor phases
2215
2388
  phases = [p for p in job.description.successor_phases if len(p) > 0]
2216
2389
  for depth in range(1, len(phases)):
2217
2390
  # Add edges from all jobs in the earlier/upper subtrees to all
@@ -2231,7 +2404,11 @@ class Job:
2231
2404
  for inUpper in reacheable:
2232
2405
  # Add extra edges to the roots of all the lower subtrees
2233
2406
  # But skip anything in the lower subtree not in the current _registry (i.e. not created hear)
2234
- extraEdges[inUpper] += [job._registry[lowerID] for lowerID in lower if lowerID in job._registry]
2407
+ extraEdges[inUpper] += [
2408
+ job._registry[lowerID]
2409
+ for lowerID in lower
2410
+ if lowerID in job._registry
2411
+ ]
2235
2412
 
2236
2413
  return extraEdges
2237
2414
 
@@ -2251,17 +2428,21 @@ class Job:
2251
2428
  :raises toil.job.JobGraphDeadlockException: if there exists a job being added to the graph for which \
2252
2429
  checkpoint=True and which is not a leaf.
2253
2430
  """
2254
- roots = self.getRootJobs() # Roots jobs of component, these are preexisting jobs in the graph
2431
+ roots = (
2432
+ self.getRootJobs()
2433
+ ) # Roots jobs of component, these are preexisting jobs in the graph
2255
2434
 
2256
2435
  # All jobs in the component of the job graph containing self
2257
2436
  jobs = set()
2258
- list(map(lambda x : x._collectAllSuccessors(jobs), roots))
2437
+ list(map(lambda x: x._collectAllSuccessors(jobs), roots))
2259
2438
 
2260
2439
  # Check for each job for which checkpoint is true that it is a cut vertex or leaf
2261
2440
  for y in [x for x in jobs if x.checkpoint]:
2262
- if y not in roots: # The roots are the prexisting jobs
2441
+ if y not in roots: # The roots are the prexisting jobs
2263
2442
  if not Job._isLeafVertex(y):
2264
- raise JobGraphDeadlockException("New checkpoint job %s is not a leaf in the job graph" % y)
2443
+ raise JobGraphDeadlockException(
2444
+ "New checkpoint job %s is not a leaf in the job graph" % y
2445
+ )
2265
2446
 
2266
2447
  ####################################################
2267
2448
  # Deferred function system
@@ -2290,7 +2471,9 @@ class Job:
2290
2471
  :param dict kwargs: The keyword arguments to the function
2291
2472
  """
2292
2473
  if self._defer is None:
2293
- raise Exception('A deferred function may only be registered with a job while that job is running.')
2474
+ raise Exception(
2475
+ "A deferred function may only be registered with a job while that job is running."
2476
+ )
2294
2477
  self._defer(DeferredFunction.create(function, *args, **kwargs))
2295
2478
 
2296
2479
  ####################################################
@@ -2299,7 +2482,7 @@ class Job:
2299
2482
  # and defining a service (Job.Service)
2300
2483
  ####################################################
2301
2484
 
2302
- class Runner():
2485
+ class Runner:
2303
2486
  """Used to setup and run Toil workflow."""
2304
2487
 
2305
2488
  @staticmethod
@@ -2315,7 +2498,9 @@ class Job:
2315
2498
  return parser
2316
2499
 
2317
2500
  @staticmethod
2318
- def getDefaultOptions(jobStore: Optional[str] = None, jobstore_as_flag: bool = False) -> Namespace:
2501
+ def getDefaultOptions(
2502
+ jobStore: Optional[str] = None, jobstore_as_flag: bool = False
2503
+ ) -> Namespace:
2319
2504
  """
2320
2505
  Get default options for a toil workflow.
2321
2506
 
@@ -2326,9 +2511,13 @@ class Job:
2326
2511
  """
2327
2512
  # setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
2328
2513
  if not jobstore_as_flag and jobStore is None:
2329
- raise RuntimeError("The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
2330
- "to False!")
2331
- parser = Job.Runner.getDefaultArgumentParser(jobstore_as_flag=jobstore_as_flag)
2514
+ raise RuntimeError(
2515
+ "The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
2516
+ "to False!"
2517
+ )
2518
+ parser = Job.Runner.getDefaultArgumentParser(
2519
+ jobstore_as_flag=jobstore_as_flag
2520
+ )
2332
2521
  arguments = []
2333
2522
  if jobstore_as_flag and jobStore is not None:
2334
2523
  arguments = ["--jobstore", jobStore]
@@ -2337,7 +2526,10 @@ class Job:
2337
2526
  return parser.parse_args(args=arguments)
2338
2527
 
2339
2528
  @staticmethod
2340
- def addToilOptions(parser: Union["OptionParser", ArgumentParser], jobstore_as_flag: bool = False) -> None:
2529
+ def addToilOptions(
2530
+ parser: Union["OptionParser", ArgumentParser],
2531
+ jobstore_as_flag: bool = False,
2532
+ ) -> None:
2341
2533
  """
2342
2534
  Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
2343
2535
  parser object.
@@ -2377,19 +2569,29 @@ class Job:
2377
2569
  Is not executed as a job; runs within a ServiceHostJob.
2378
2570
  """
2379
2571
 
2380
- def __init__(self, memory=None, cores=None, disk=None, accelerators=None, preemptible=None, unitName=None):
2572
+ def __init__(
2573
+ self,
2574
+ memory=None,
2575
+ cores=None,
2576
+ disk=None,
2577
+ accelerators=None,
2578
+ preemptible=None,
2579
+ unitName=None,
2580
+ ):
2381
2581
  """
2382
2582
  Memory, core and disk requirements are specified identically to as in \
2383
2583
  :func:`toil.job.Job.__init__`.
2384
2584
  """
2385
2585
  # Save the requirements in ourselves so they are visible on `self` to user code.
2386
- super().__init__({
2387
- 'memory': memory,
2388
- 'cores': cores,
2389
- 'disk': disk,
2390
- 'accelerators': accelerators,
2391
- 'preemptible': preemptible
2392
- })
2586
+ super().__init__(
2587
+ {
2588
+ "memory": memory,
2589
+ "cores": cores,
2590
+ "disk": disk,
2591
+ "accelerators": accelerators,
2592
+ "preemptible": preemptible,
2593
+ }
2594
+ )
2393
2595
 
2394
2596
  # And the unit name
2395
2597
  self.unitName = unitName
@@ -2467,15 +2669,19 @@ class Job:
2467
2669
 
2468
2670
  def filter_main(module_name, class_name):
2469
2671
  try:
2470
- if module_name == '__main__':
2672
+ if module_name == "__main__":
2471
2673
  return getattr(userModule, class_name)
2472
2674
  else:
2473
2675
  return getattr(importlib.import_module(module_name), class_name)
2474
2676
  except:
2475
- if module_name == '__main__':
2476
- logger.debug('Failed getting %s from module %s.', class_name, userModule)
2677
+ if module_name == "__main__":
2678
+ logger.debug(
2679
+ "Failed getting %s from module %s.", class_name, userModule
2680
+ )
2477
2681
  else:
2478
- logger.debug('Failed getting %s from module %s.', class_name, module_name)
2682
+ logger.debug(
2683
+ "Failed getting %s from module %s.", class_name, module_name
2684
+ )
2479
2685
  raise
2480
2686
 
2481
2687
  class FilteredUnpickler(pickle.Unpickler):
@@ -2485,7 +2691,9 @@ class Job:
2485
2691
  unpickler = FilteredUnpickler(fileHandle)
2486
2692
 
2487
2693
  runnable = unpickler.load()
2488
- if requireInstanceOf is not None and not isinstance(runnable, requireInstanceOf):
2694
+ if requireInstanceOf is not None and not isinstance(
2695
+ runnable, requireInstanceOf
2696
+ ):
2489
2697
  raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
2490
2698
 
2491
2699
  return runnable
@@ -2518,15 +2726,28 @@ class Job:
2518
2726
  # File may be gone if the job is a service being re-run and the accessing job is
2519
2727
  # already complete.
2520
2728
  if jobStore.file_exists(promiseFileStoreID):
2521
- logger.debug("Resolve promise %s from %s with a %s", promiseFileStoreID, self, type(promisedValue))
2729
+ logger.debug(
2730
+ "Resolve promise %s from %s with a %s",
2731
+ promiseFileStoreID,
2732
+ self,
2733
+ type(promisedValue),
2734
+ )
2522
2735
  with jobStore.update_file_stream(promiseFileStoreID) as fileHandle:
2523
2736
  try:
2524
- pickle.dump(promisedValue, fileHandle, pickle.HIGHEST_PROTOCOL)
2737
+ pickle.dump(
2738
+ promisedValue, fileHandle, pickle.HIGHEST_PROTOCOL
2739
+ )
2525
2740
  except AttributeError:
2526
- logger.exception("Could not pickle promise result %s", promisedValue)
2741
+ logger.exception(
2742
+ "Could not pickle promise result %s", promisedValue
2743
+ )
2527
2744
  raise
2528
2745
  else:
2529
- logger.debug("Do not resolve promise %s from %s because it is no longer needed", promiseFileStoreID, self)
2746
+ logger.debug(
2747
+ "Do not resolve promise %s from %s because it is no longer needed",
2748
+ promiseFileStoreID,
2749
+ self,
2750
+ )
2530
2751
 
2531
2752
  # Functions associated with Job.checkJobGraphAcyclic to establish that the job graph does not
2532
2753
  # contain any cycles of dependencies:
@@ -2551,7 +2772,7 @@ class Job:
2551
2772
  # We added this successor locally
2552
2773
  todo.append(self._registry[successorID])
2553
2774
 
2554
- def getTopologicalOrderingOfJobs(self) -> List["Job"]:
2775
+ def getTopologicalOrderingOfJobs(self) -> list["Job"]:
2555
2776
  """
2556
2777
  :returns: a list of jobs such that for all pairs of indices i, j for which i < j, \
2557
2778
  the job at index i can be run before the job at index j.
@@ -2573,8 +2794,8 @@ class Job:
2573
2794
  job = todo[-1]
2574
2795
  todo.pop()
2575
2796
 
2576
- #Do not add the job to the ordering until all its predecessors have been
2577
- #added to the ordering
2797
+ # Do not add the job to the ordering until all its predecessors have been
2798
+ # added to the ordering
2578
2799
  outstandingPredecessor = False
2579
2800
  for predJob in job._directPredecessors:
2580
2801
  if predJob.jobStoreID not in visited:
@@ -2599,7 +2820,7 @@ class Job:
2599
2820
  # Storing Jobs into the JobStore
2600
2821
  ####################################################
2601
2822
 
2602
- def _register(self, jobStore) -> List[Tuple[TemporaryID, str]]:
2823
+ def _register(self, jobStore) -> list[tuple[TemporaryID, str]]:
2603
2824
  """
2604
2825
  If this job lacks a JobStore-assigned ID, assign this job an ID.
2605
2826
  Must be called for each job before it is saved to the JobStore for the first time.
@@ -2628,7 +2849,7 @@ class Job:
2628
2849
  # We already have an ID. No assignment or reference rewrite necessary.
2629
2850
  return []
2630
2851
 
2631
- def _renameReferences(self, renames: Dict[TemporaryID, str]) -> None:
2852
+ def _renameReferences(self, renames: dict[TemporaryID, str]) -> None:
2632
2853
  """
2633
2854
  Apply the given dict of ID renames to all references to other jobs.
2634
2855
 
@@ -2664,8 +2885,8 @@ class Job:
2664
2885
 
2665
2886
  # Clear out old Cactus compatibility fields that don't need to be
2666
2887
  # preserved and shouldn't be serialized.
2667
- if hasattr(self, '_services'):
2668
- delattr(self, '_services')
2888
+ if hasattr(self, "_services"):
2889
+ delattr(self, "_services")
2669
2890
 
2670
2891
  # Remember fields we will overwrite
2671
2892
  description = self._description
@@ -2683,7 +2904,9 @@ class Job:
2683
2904
  self._directPredecessors = set()
2684
2905
 
2685
2906
  # Save the body of the job
2686
- with jobStore.write_file_stream(description.jobStoreID, cleanup=True) as (fileHandle, fileStoreID):
2907
+ with jobStore.write_file_stream(
2908
+ description.jobStoreID, cleanup=True
2909
+ ) as (fileHandle, fileStoreID):
2687
2910
  pickle.dump(self, fileHandle, pickle.HIGHEST_PROTOCOL)
2688
2911
  finally:
2689
2912
  # Restore important fields (before handling errors)
@@ -2709,7 +2932,12 @@ class Job:
2709
2932
  # Connect the body of the job to the JobDescription
2710
2933
  self._description.attach_body(fileStoreID, userScript)
2711
2934
 
2712
- def _saveJobGraph(self, jobStore: "AbstractJobStore", saveSelf: bool = False, returnValues: bool = None):
2935
+ def _saveJobGraph(
2936
+ self,
2937
+ jobStore: "AbstractJobStore",
2938
+ saveSelf: bool = False,
2939
+ returnValues: bool = None,
2940
+ ):
2713
2941
  """
2714
2942
  Save job data and new JobDescriptions to the given job store for this
2715
2943
  job and all descending jobs, including services.
@@ -2760,7 +2988,12 @@ class Job:
2760
2988
  # Set up to save last job first, so promises flow the right way
2761
2989
  ordering.reverse()
2762
2990
 
2763
- logger.debug("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
2991
+ logger.debug(
2992
+ "Saving graph of %d jobs, %d non-service, %d new",
2993
+ len(allJobs),
2994
+ len(ordering),
2995
+ len(fakeToReal),
2996
+ )
2764
2997
 
2765
2998
  # Make sure we're the root
2766
2999
  if ordering[-1] != self:
@@ -2773,15 +3006,15 @@ class Job:
2773
3006
  if not isinstance(j, ServiceHostJob) and j.jobStoreID not in ordered_ids:
2774
3007
  raise RuntimeError(f"{j} not found in ordering {ordering}")
2775
3008
 
2776
-
2777
-
2778
3009
  if not saveSelf:
2779
3010
  # Fulfil promises for return values (even if value is None)
2780
3011
  self._fulfillPromises(returnValues, jobStore)
2781
3012
 
2782
3013
  for job in ordering:
2783
3014
  logger.debug("Processing job %s", job.description)
2784
- for serviceBatch in reversed(list(job.description.serviceHostIDsInBatches())):
3015
+ for serviceBatch in reversed(
3016
+ list(job.description.serviceHostIDsInBatches())
3017
+ ):
2785
3018
  # For each batch of service host jobs in reverse order they start
2786
3019
  for serviceID in serviceBatch:
2787
3020
  logger.debug("Processing service %s", serviceID)
@@ -2819,7 +3052,8 @@ class Job:
2819
3052
  # All other job vertices in the graph are checked by checkNewCheckpointsAreLeafVertices
2820
3053
  if self.checkpoint and not Job._isLeafVertex(self):
2821
3054
  raise JobGraphDeadlockException(
2822
- 'New checkpoint job %s is not a leaf in the job graph' % self)
3055
+ "New checkpoint job %s is not a leaf in the job graph" % self
3056
+ )
2823
3057
 
2824
3058
  # Save the root job and all descendants and services
2825
3059
  self._saveJobGraph(jobStore, saveSelf=True)
@@ -2845,19 +3079,19 @@ class Job:
2845
3079
  :param job_description: the JobDescription of the job to retrieve.
2846
3080
  :returns: The job referenced by the JobDescription.
2847
3081
  """
2848
-
3082
+
2849
3083
  file_store_id, user_module_descriptor = job_description.get_body()
2850
- logger.debug('Loading user module %s.', user_module_descriptor)
3084
+ logger.debug("Loading user module %s.", user_module_descriptor)
2851
3085
  user_module = cls._loadUserModule(user_module_descriptor)
2852
3086
 
2853
- #Loads context manager using file stream
3087
+ # Loads context manager using file stream
2854
3088
  if file_store_id == "firstJob":
2855
3089
  # This one is actually a shared file name and not a file ID.
2856
3090
  manager = job_store.read_shared_file_stream(file_store_id)
2857
3091
  else:
2858
3092
  manager = job_store.read_file_stream(file_store_id)
2859
3093
 
2860
- #Open and unpickle
3094
+ # Open and unpickle
2861
3095
  with manager as file_handle:
2862
3096
 
2863
3097
  job = cls._unpickle(user_module, file_handle, requireInstanceOf=Job)
@@ -2869,7 +3103,6 @@ class Job:
2869
3103
 
2870
3104
  return job
2871
3105
 
2872
-
2873
3106
  def _run(self, jobGraph=None, fileStore=None, **kwargs):
2874
3107
  """
2875
3108
  Function which worker calls to ultimately invoke
@@ -2934,7 +3167,9 @@ class Job:
2934
3167
  os.chdir(baseDir)
2935
3168
  # Finish up the stats
2936
3169
  if stats is not None:
2937
- totalCpuTime, totalMemoryUsage = ResourceMonitor.get_total_cpu_time_and_memory_usage()
3170
+ totalCpuTime, totalMemoryUsage = (
3171
+ ResourceMonitor.get_total_cpu_time_and_memory_usage()
3172
+ )
2938
3173
  stats.jobs.append(
2939
3174
  Expando(
2940
3175
  time=str(time.time() - startTime),
@@ -2942,7 +3177,7 @@ class Job:
2942
3177
  class_name=self._jobName(),
2943
3178
  memory=str(totalMemoryUsage),
2944
3179
  requested_cores=str(self.cores),
2945
- disk=str(fileStore.get_disk_usage())
3180
+ disk=str(fileStore.get_disk_usage()),
2946
3181
  )
2947
3182
  )
2948
3183
 
@@ -2987,13 +3222,12 @@ class Job:
2987
3222
  self._defer = None
2988
3223
  self._fileStore = None
2989
3224
 
2990
-
2991
3225
  # Serialize the new Jobs defined by the run method to the jobStore
2992
3226
  self._saveJobGraph(jobStore, saveSelf=False, returnValues=returnValues)
2993
3227
 
2994
3228
  # Clear out the body, because the job is done.
2995
3229
  self.description.detach_body()
2996
-
3230
+
2997
3231
  # That and the new child/follow-on relationships will need to be
2998
3232
  # recorded later by an update() of the JobDescription.
2999
3233
 
@@ -3016,7 +3250,9 @@ class Job:
3016
3250
 
3017
3251
  return flag in self._debug_flags
3018
3252
 
3019
- def files_downloaded_hook(self, host_and_job_paths: Optional[List[Tuple[str, str]]] = None) -> None:
3253
+ def files_downloaded_hook(
3254
+ self, host_and_job_paths: Optional[list[tuple[str, str]]] = None
3255
+ ) -> None:
3020
3256
  """
3021
3257
  Function that subclasses can call when they have downloaded their input files.
3022
3258
 
@@ -3031,7 +3267,10 @@ class Job:
3031
3267
  # Stop the worker!
3032
3268
  logger.info("Job has downloaded its files. Stopping.")
3033
3269
  # Send off the path mapping for the debugging wrapper.
3034
- raise FilesDownloadedStoppingPointReached("Files downloaded", host_and_job_paths=host_and_job_paths)
3270
+ raise FilesDownloadedStoppingPointReached(
3271
+ "Files downloaded", host_and_job_paths=host_and_job_paths
3272
+ )
3273
+
3035
3274
 
3036
3275
  class JobException(Exception):
3037
3276
  """General job exception."""
@@ -3045,6 +3284,7 @@ class JobGraphDeadlockException(JobException):
3045
3284
  An exception raised in the event that a workflow contains an unresolvable \
3046
3285
  dependency, such as a cycle. See :func:`toil.job.Job.checkJobGraphForDeadlocks`.
3047
3286
  """
3287
+
3048
3288
  def __init__(self, string):
3049
3289
  super().__init__(string)
3050
3290
 
@@ -3053,6 +3293,7 @@ class FunctionWrappingJob(Job):
3053
3293
  """
3054
3294
  Job used to wrap a function. In its `run` method the wrapped function is called.
3055
3295
  """
3296
+
3056
3297
  def __init__(self, userFunction, *args, **kwargs):
3057
3298
  """
3058
3299
  :param callable userFunction: The function to wrap. It will be called with ``*args`` and
@@ -3072,7 +3313,9 @@ class FunctionWrappingJob(Job):
3072
3313
  if argSpec.defaults is None:
3073
3314
  argDict = {}
3074
3315
  else:
3075
- argDict = dict(list(zip(argSpec.args[-len(argSpec.defaults):], argSpec.defaults)))
3316
+ argDict = dict(
3317
+ list(zip(argSpec.args[-len(argSpec.defaults) :], argSpec.defaults))
3318
+ )
3076
3319
 
3077
3320
  def resolve(key, default=None, dehumanize=False):
3078
3321
  try:
@@ -3090,36 +3333,48 @@ class FunctionWrappingJob(Job):
3090
3333
  value = human2bytes(value)
3091
3334
  return value
3092
3335
 
3093
- super().__init__(memory=resolve('memory', dehumanize=True),
3094
- cores=resolve('cores', dehumanize=True),
3095
- disk=resolve('disk', dehumanize=True),
3096
- accelerators=resolve('accelerators'),
3097
- preemptible=resolve('preemptible'),
3098
- checkpoint=resolve('checkpoint', default=False),
3099
- unitName=resolve('name', default=None))
3336
+ super().__init__(
3337
+ memory=resolve("memory", dehumanize=True),
3338
+ cores=resolve("cores", dehumanize=True),
3339
+ disk=resolve("disk", dehumanize=True),
3340
+ accelerators=resolve("accelerators"),
3341
+ preemptible=resolve("preemptible"),
3342
+ checkpoint=resolve("checkpoint", default=False),
3343
+ unitName=resolve("name", default=None),
3344
+ )
3100
3345
 
3101
- self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__).globalize()
3346
+ self.userFunctionModule = ModuleDescriptor.forModule(
3347
+ userFunction.__module__
3348
+ ).globalize()
3102
3349
  self.userFunctionName = str(userFunction.__name__)
3103
3350
  self.description.jobName = self.userFunctionName
3104
3351
  self._args = args
3105
3352
  self._kwargs = kwargs
3106
3353
 
3107
3354
  def _getUserFunction(self):
3108
- logger.debug('Loading user function %s from module %s.',
3109
- self.userFunctionName,
3110
- self.userFunctionModule)
3355
+ logger.debug(
3356
+ "Loading user function %s from module %s.",
3357
+ self.userFunctionName,
3358
+ self.userFunctionModule,
3359
+ )
3111
3360
  userFunctionModule = self._loadUserModule(self.userFunctionModule)
3112
3361
  return getattr(userFunctionModule, self.userFunctionName)
3113
3362
 
3114
- def run(self,fileStore):
3115
- userFunction = self._getUserFunction( )
3363
+ def run(self, fileStore):
3364
+ userFunction = self._getUserFunction()
3116
3365
  return userFunction(*self._args, **self._kwargs)
3117
3366
 
3118
3367
  def getUserScript(self):
3119
3368
  return self.userFunctionModule
3120
3369
 
3121
3370
  def _jobName(self):
3122
- return ".".join((self.__class__.__name__, self.userFunctionModule.name, self.userFunctionName))
3371
+ return ".".join(
3372
+ (
3373
+ self.__class__.__name__,
3374
+ self.userFunctionModule.name,
3375
+ self.userFunctionName,
3376
+ )
3377
+ )
3123
3378
 
3124
3379
 
3125
3380
  class JobFunctionWrappingJob(FunctionWrappingJob):
@@ -3165,10 +3420,20 @@ class PromisedRequirementFunctionWrappingJob(FunctionWrappingJob):
3165
3420
  Spawns child function using parent function parameters and fulfilled promised
3166
3421
  resource requirements.
3167
3422
  """
3423
+
3168
3424
  def __init__(self, userFunction, *args, **kwargs):
3169
3425
  self._promisedKwargs = kwargs.copy()
3170
3426
  # Replace resource requirements in intermediate job with small values.
3171
- kwargs.update(dict(disk='1M', memory='32M', cores=0.1, accelerators=[], preemptible=True, preemptable=True))
3427
+ kwargs.update(
3428
+ dict(
3429
+ disk="1M",
3430
+ memory="32M",
3431
+ cores=0.1,
3432
+ accelerators=[],
3433
+ preemptible=True,
3434
+ preemptable=True,
3435
+ )
3436
+ )
3172
3437
  super().__init__(userFunction, *args, **kwargs)
3173
3438
 
3174
3439
  @classmethod
@@ -3193,7 +3458,9 @@ class PromisedRequirementFunctionWrappingJob(FunctionWrappingJob):
3193
3458
  for requirement in REQUIREMENT_NAMES:
3194
3459
  try:
3195
3460
  if isinstance(self._promisedKwargs[requirement], PromisedRequirement):
3196
- self._promisedKwargs[requirement] = self._promisedKwargs[requirement].getValue()
3461
+ self._promisedKwargs[requirement] = self._promisedKwargs[
3462
+ requirement
3463
+ ].getValue()
3197
3464
  except KeyError:
3198
3465
  pass
3199
3466
 
@@ -3207,7 +3474,9 @@ class PromisedRequirementJobFunctionWrappingJob(PromisedRequirementFunctionWrapp
3207
3474
  def run(self, fileStore):
3208
3475
  self.evaluatePromisedRequirements()
3209
3476
  userFunction = self._getUserFunction()
3210
- return self.addChildJobFn(userFunction, *self._args, **self._promisedKwargs).rv()
3477
+ return self.addChildJobFn(
3478
+ userFunction, *self._args, **self._promisedKwargs
3479
+ ).rv()
3211
3480
 
3212
3481
 
3213
3482
  class EncapsulatedJob(Job):
@@ -3234,6 +3503,7 @@ class EncapsulatedJob(Job):
3234
3503
  is the return value of the root job, e.g. A().encapsulate().rv() and A().rv() will resolve to
3235
3504
  the same value after A or A.encapsulate() has been run.
3236
3505
  """
3506
+
3237
3507
  def __init__(self, job, unitName=None):
3238
3508
  """
3239
3509
  :param toil.job.Job job: the job to encapsulate.
@@ -3253,7 +3523,12 @@ class EncapsulatedJob(Job):
3253
3523
  Job.addChild(self, job)
3254
3524
  # Use small resource requirements for dummy Job instance.
3255
3525
  # But not too small, or the job won't have enough resources to safely start up Toil.
3256
- self.encapsulatedFollowOn = Job(disk='100M', memory='512M', cores=0.1, unitName=None if unitName is None else unitName + '-followOn')
3526
+ self.encapsulatedFollowOn = Job(
3527
+ disk="100M",
3528
+ memory="512M",
3529
+ cores=0.1,
3530
+ unitName=None if unitName is None else unitName + "-followOn",
3531
+ )
3257
3532
  Job.addFollowOn(self, self.encapsulatedFollowOn)
3258
3533
  else:
3259
3534
  # Unpickling on the worker, to be run as a no-op.
@@ -3265,17 +3540,25 @@ class EncapsulatedJob(Job):
3265
3540
 
3266
3541
  def addChild(self, childJob):
3267
3542
  if self.encapsulatedFollowOn is None:
3268
- raise RuntimeError("Children cannot be added to EncapsulatedJob while it is running")
3543
+ raise RuntimeError(
3544
+ "Children cannot be added to EncapsulatedJob while it is running"
3545
+ )
3269
3546
  return Job.addChild(self.encapsulatedFollowOn, childJob)
3270
3547
 
3271
3548
  def addService(self, service, parentService=None):
3272
3549
  if self.encapsulatedFollowOn is None:
3273
- raise RuntimeError("Services cannot be added to EncapsulatedJob while it is running")
3274
- return Job.addService(self.encapsulatedFollowOn, service, parentService=parentService)
3550
+ raise RuntimeError(
3551
+ "Services cannot be added to EncapsulatedJob while it is running"
3552
+ )
3553
+ return Job.addService(
3554
+ self.encapsulatedFollowOn, service, parentService=parentService
3555
+ )
3275
3556
 
3276
3557
  def addFollowOn(self, followOnJob):
3277
3558
  if self.encapsulatedFollowOn is None:
3278
- raise RuntimeError("Follow-ons cannot be added to EncapsulatedJob while it is running")
3559
+ raise RuntimeError(
3560
+ "Follow-ons cannot be added to EncapsulatedJob while it is running"
3561
+ )
3279
3562
  return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
3280
3563
 
3281
3564
  def rv(self, *path) -> "Promise":
@@ -3318,6 +3601,7 @@ class ServiceHostJob(Job):
3318
3601
  """
3319
3602
  Job that runs a service. Used internally by Toil. Users should subclass Service instead of using this.
3320
3603
  """
3604
+
3321
3605
  def __init__(self, service):
3322
3606
  """
3323
3607
  This constructor should not be called by a user.
@@ -3328,12 +3612,17 @@ class ServiceHostJob(Job):
3328
3612
 
3329
3613
  # Make sure the service hasn't been given a host already.
3330
3614
  if service.hostID is not None:
3331
- raise RuntimeError("Cannot set the host. The service has already been given a host.")
3615
+ raise RuntimeError(
3616
+ "Cannot set the host. The service has already been given a host."
3617
+ )
3332
3618
 
3333
3619
  # Make ourselves with name info from the Service and a
3334
3620
  # ServiceJobDescription that has the service control flags.
3335
- super().__init__(**service.requirements,
3336
- unitName=service.unitName, descriptionClass=ServiceJobDescription)
3621
+ super().__init__(
3622
+ **service.requirements,
3623
+ unitName=service.unitName,
3624
+ descriptionClass=ServiceJobDescription,
3625
+ )
3337
3626
 
3338
3627
  # Make sure the service knows it has a host now
3339
3628
  service.hostID = self.jobStoreID
@@ -3371,13 +3660,19 @@ class ServiceHostJob(Job):
3371
3660
  # stuff onto us.
3372
3661
 
3373
3662
  def addChild(self, child):
3374
- raise RuntimeError("Service host jobs cannot have children, follow-ons, or services")
3663
+ raise RuntimeError(
3664
+ "Service host jobs cannot have children, follow-ons, or services"
3665
+ )
3375
3666
 
3376
3667
  def addFollowOn(self, followOn):
3377
- raise RuntimeError("Service host jobs cannot have children, follow-ons, or services")
3668
+ raise RuntimeError(
3669
+ "Service host jobs cannot have children, follow-ons, or services"
3670
+ )
3378
3671
 
3379
3672
  def addService(self, service, parentService=None):
3380
- raise RuntimeError("Service host jobs cannot have children, follow-ons, or services")
3673
+ raise RuntimeError(
3674
+ "Service host jobs cannot have children, follow-ons, or services"
3675
+ )
3381
3676
 
3382
3677
  def saveBody(self, jobStore):
3383
3678
  """
@@ -3386,7 +3681,9 @@ class ServiceHostJob(Job):
3386
3681
  # Save unpickled service
3387
3682
  service = self.service
3388
3683
  # Serialize service
3389
- self.pickledService = pickle.dumps(self.service, protocol=pickle.HIGHEST_PROTOCOL)
3684
+ self.pickledService = pickle.dumps(
3685
+ self.service, protocol=pickle.HIGHEST_PROTOCOL
3686
+ )
3390
3687
  # Clear real service until we have the module to load it back
3391
3688
  self.service = None
3392
3689
  # Save body as normal
@@ -3397,24 +3694,30 @@ class ServiceHostJob(Job):
3397
3694
 
3398
3695
  def run(self, fileStore):
3399
3696
  # Unpickle the service
3400
- logger.debug('Loading service module %s.', self.serviceModule)
3697
+ logger.debug("Loading service module %s.", self.serviceModule)
3401
3698
  userModule = self._loadUserModule(self.serviceModule)
3402
- service = self._unpickle(userModule, BytesIO(self.pickledService), requireInstanceOf=Job.Service)
3699
+ service = self._unpickle(
3700
+ userModule, BytesIO(self.pickledService), requireInstanceOf=Job.Service
3701
+ )
3403
3702
  self.pickledService = None
3404
3703
  # Make sure it has the config, since it wasn't load()-ed via the JobStore
3405
3704
  service.assignConfig(fileStore.jobStore.config)
3406
- #Start the service
3705
+ # Start the service
3407
3706
  startCredentials = service.start(self)
3408
3707
  try:
3409
- #The start credentials must be communicated to processes connecting to
3410
- #the service, to do this while the run method is running we
3411
- #cheat and set the return value promise within the run method
3708
+ # The start credentials must be communicated to processes connecting to
3709
+ # the service, to do this while the run method is running we
3710
+ # cheat and set the return value promise within the run method
3412
3711
  self._fulfillPromises(startCredentials, fileStore.jobStore)
3413
- self._rvs = {} # Set this to avoid the return values being updated after the
3414
- #run method has completed!
3712
+ self._rvs = (
3713
+ {}
3714
+ ) # Set this to avoid the return values being updated after the
3715
+ # run method has completed!
3415
3716
 
3416
- #Now flag that the service is running jobs can connect to it
3417
- logger.debug("Removing the start jobStoreID to indicate that establishment of the service")
3717
+ # Now flag that the service is running jobs can connect to it
3718
+ logger.debug(
3719
+ "Removing the start jobStoreID to indicate that establishment of the service"
3720
+ )
3418
3721
  if self.description.startJobStoreID is None:
3419
3722
  raise RuntimeError("No start jobStoreID to remove.")
3420
3723
  if fileStore.jobStore.file_exists(self.description.startJobStoreID):
@@ -3422,23 +3725,33 @@ class ServiceHostJob(Job):
3422
3725
  if fileStore.jobStore.file_exists(self.description.startJobStoreID):
3423
3726
  raise RuntimeError("The start jobStoreID is not a file.")
3424
3727
 
3425
- #Now block until we are told to stop, which is indicated by the removal
3426
- #of a file
3728
+ # Now block until we are told to stop, which is indicated by the removal
3729
+ # of a file
3427
3730
  if self.description.terminateJobStoreID is None:
3428
3731
  raise RuntimeError("No terminate jobStoreID to use.")
3429
3732
  while True:
3430
3733
  # Check for the terminate signal
3431
- if not fileStore.jobStore.file_exists(self.description.terminateJobStoreID):
3432
- logger.debug("Detected that the terminate jobStoreID has been removed so exiting")
3433
- if not fileStore.jobStore.file_exists(self.description.errorJobStoreID):
3434
- raise RuntimeError("Detected the error jobStoreID has been removed so exiting with an error")
3734
+ if not fileStore.jobStore.file_exists(
3735
+ self.description.terminateJobStoreID
3736
+ ):
3737
+ logger.debug(
3738
+ "Detected that the terminate jobStoreID has been removed so exiting"
3739
+ )
3740
+ if not fileStore.jobStore.file_exists(
3741
+ self.description.errorJobStoreID
3742
+ ):
3743
+ raise RuntimeError(
3744
+ "Detected the error jobStoreID has been removed so exiting with an error"
3745
+ )
3435
3746
  break
3436
3747
 
3437
3748
  # Check the service's status and exit if failed or complete
3438
3749
  try:
3439
3750
  if not service.check():
3440
- logger.debug("The service has finished okay, but we have not been told to terminate. "
3441
- "Waiting for leader to tell us to come back.")
3751
+ logger.debug(
3752
+ "The service has finished okay, but we have not been told to terminate. "
3753
+ "Waiting for leader to tell us to come back."
3754
+ )
3442
3755
  # TODO: Adjust leader so that it keys on something
3443
3756
  # other than the services finishing (assumed to be
3444
3757
  # after the children) to know when to run follow-on
@@ -3449,7 +3762,9 @@ class ServiceHostJob(Job):
3449
3762
  logger.debug("Detected abnormal termination of the service")
3450
3763
  raise
3451
3764
 
3452
- time.sleep(fileStore.jobStore.config.servicePollingInterval) #Avoid excessive polling
3765
+ time.sleep(
3766
+ fileStore.jobStore.config.servicePollingInterval
3767
+ ) # Avoid excessive polling
3453
3768
 
3454
3769
  logger.debug("Service is done")
3455
3770
  finally:
@@ -3460,6 +3775,354 @@ class ServiceHostJob(Job):
3460
3775
  return self.serviceModule
3461
3776
 
3462
3777
 
3778
+ class FileMetadata(NamedTuple):
3779
+ """
3780
+ Metadata for a file.
3781
+ source is the URL to grab the file from
3782
+ parent_dir is parent directory of the source
3783
+ size is the size of the file. Is none if the filesize cannot be retrieved.
3784
+ """
3785
+
3786
+ source: str
3787
+ parent_dir: str
3788
+ size: Optional[int]
3789
+
3790
+
3791
+ def potential_absolute_uris(
3792
+ uri: str,
3793
+ path: list[str],
3794
+ importer: Optional[str] = None,
3795
+ execution_dir: Optional[str] = None,
3796
+ ) -> Iterator[str]:
3797
+ """
3798
+ Get potential absolute URIs to check for an imported file.
3799
+
3800
+ Given a URI or bare path, yield in turn all the URIs, with schemes, where we
3801
+ should actually try to find it, given that we want to search under/against
3802
+ the given paths or URIs, the current directory, and the given importing WDL
3803
+ document if any.
3804
+ """
3805
+
3806
+ if uri == "":
3807
+ # Empty URIs can't come from anywhere.
3808
+ return
3809
+
3810
+ # We need to brute-force find this URI relative to:
3811
+ #
3812
+ # 1. Itself if a full URI.
3813
+ #
3814
+ # 2. Importer's URL, if importer is a URL and this is a
3815
+ # host-root-relative URL starting with / or scheme-relative
3816
+ # starting with //, or just plain relative.
3817
+ #
3818
+ # 3. Current directory, if a relative path.
3819
+ #
3820
+ # 4. All the prefixes in "path".
3821
+ #
3822
+ # If it can't be found anywhere, we ought to (probably) throw
3823
+ # FileNotFoundError like the MiniWDL implementation does, with a
3824
+ # correct errno.
3825
+ #
3826
+ # To do this, we have AbstractFileStore.read_from_url, which can read a
3827
+ # URL into a binary-mode writable, or throw some kind of unspecified
3828
+ # exception if the source doesn't exist or can't be fetched.
3829
+
3830
+ # This holds scheme-applied full URIs for all the places to search.
3831
+ full_path_list = []
3832
+
3833
+ if importer is not None:
3834
+ # Add the place the imported file came form, to search first.
3835
+ full_path_list.append(Toil.normalize_uri(importer))
3836
+
3837
+ # Then the current directory. We need to make sure to include a filename component here or it will treat the current directory with no trailing / as a document and relative paths will look 1 level up.
3838
+ # When importing on a worker, the cwd will be a tmpdir and will result in FileNotFoundError after os.path.abspath, so override with the execution dir
3839
+ full_path_list.append(Toil.normalize_uri(execution_dir or ".") + "/.")
3840
+
3841
+ # Then the specified paths.
3842
+ # TODO:
3843
+ # https://github.com/chanzuckerberg/miniwdl/blob/e3e8ef74e80fbe59f137b0ad40b354957915c345/WDL/Tree.py#L1479-L1482
3844
+ # seems backward actually and might do these first!
3845
+ full_path_list += [Toil.normalize_uri(p) for p in path]
3846
+
3847
+ # This holds all the URIs we tried and failed with.
3848
+ failures: set[str] = set()
3849
+
3850
+ for candidate_base in full_path_list:
3851
+ # Try fetching based off each base URI
3852
+ candidate_uri = urljoin(candidate_base, uri)
3853
+ if candidate_uri in failures:
3854
+ # Already tried this one, maybe we have an absolute uri input.
3855
+ continue
3856
+ logger.debug(
3857
+ "Consider %s which is %s off of %s", candidate_uri, uri, candidate_base
3858
+ )
3859
+
3860
+ # Try it
3861
+ yield candidate_uri
3862
+ # If we come back it didn't work
3863
+ failures.add(candidate_uri)
3864
+
3865
+
3866
+ def get_file_sizes(
3867
+ filenames: List[str],
3868
+ file_source: AbstractJobStore,
3869
+ search_paths: Optional[List[str]] = None,
3870
+ include_remote_files: bool = True,
3871
+ execution_dir: Optional[str] = None,
3872
+ ) -> Dict[str, FileMetadata]:
3873
+ """
3874
+ Resolve relative-URI files in the given environment and turn them into absolute normalized URIs. Returns a dictionary of the *string values* from the WDL file values
3875
+ to a tuple of the normalized URI, parent directory ID, and size of the file. The size of the file may be None, which means unknown size.
3876
+
3877
+ :param filenames: list of filenames to evaluate on
3878
+ :param file_source: Context to search for files with
3879
+ :param task_path: Dotted WDL name of the user-level code doing the
3880
+ importing (probably the workflow name).
3881
+ :param search_paths: If set, try resolving input location relative to the URLs or
3882
+ directories in this list.
3883
+ :param include_remote_files: If set, import files from remote locations. Else leave them as URI references.
3884
+ """
3885
+
3886
+ @memoize
3887
+ def get_filename_size(filename: str) -> FileMetadata:
3888
+ tried = []
3889
+ for candidate_uri in potential_absolute_uris(
3890
+ filename,
3891
+ search_paths if search_paths is not None else [],
3892
+ execution_dir=execution_dir,
3893
+ ):
3894
+ tried.append(candidate_uri)
3895
+ try:
3896
+ if not include_remote_files and is_remote_url(candidate_uri):
3897
+ # Use remote URIs in place. But we need to find the one that exists.
3898
+ if not file_source.url_exists(candidate_uri):
3899
+ # Wasn't found there
3900
+ continue
3901
+
3902
+ # Now we know this exists, so pass it through
3903
+ # Get filesizes
3904
+ filesize = file_source.get_size(candidate_uri)
3905
+ except UnimplementedURLException as e:
3906
+ # We can't find anything that can even support this URL scheme.
3907
+ # Report to the user, they are probably missing an extra.
3908
+ logger.critical("Error: " + str(e))
3909
+ raise
3910
+ except HTTPError as e:
3911
+ # Something went wrong looking for it there.
3912
+ logger.warning(
3913
+ "Checked URL %s but got HTTP status %s", candidate_uri, e.code
3914
+ )
3915
+ if e.code == 405:
3916
+ # 405 Method not allowed, maybe HEAD requests are not supported
3917
+ filesize = None
3918
+ else:
3919
+ # Try the next location.
3920
+ continue
3921
+ except FileNotFoundError:
3922
+ # Wasn't found there
3923
+ continue
3924
+ except Exception:
3925
+ # Something went wrong besides the file not being found. Maybe
3926
+ # we have no auth.
3927
+ logger.error(
3928
+ "Something went wrong when testing for existence of %s",
3929
+ candidate_uri,
3930
+ )
3931
+ raise
3932
+
3933
+ # Work out what the basename for the file was
3934
+ file_basename = os.path.basename(urlsplit(candidate_uri).path)
3935
+
3936
+ if file_basename == "":
3937
+ # We can't have files with no basename because we need to
3938
+ # download them at that basename later in WDL.
3939
+ raise RuntimeError(
3940
+ f"File {candidate_uri} has no basename"
3941
+ )
3942
+
3943
+ # Was actually found
3944
+ if is_remote_url(candidate_uri):
3945
+ # Might be a file URI or other URI.
3946
+ # We need to make sure file URIs and local paths that point to
3947
+ # the same place are treated the same.
3948
+ parsed = urlsplit(candidate_uri)
3949
+ if parsed.scheme == "file:":
3950
+ # This is a local file URI. Convert to a path for source directory tracking.
3951
+ parent_dir = os.path.dirname(unquote(parsed.path))
3952
+ else:
3953
+ # This is some other URL. Get the URL to the parent directory and use that.
3954
+ parent_dir = urljoin(candidate_uri, ".")
3955
+ else:
3956
+ # Must be a local path
3957
+ parent_dir = os.path.dirname(candidate_uri)
3958
+
3959
+ return cast(FileMetadata, (candidate_uri, parent_dir, filesize))
3960
+ # Not found
3961
+ raise RuntimeError(
3962
+ f"Could not find {filename} at any of: {list(potential_absolute_uris(filename, search_paths if search_paths is not None else []))}"
3963
+ )
3964
+
3965
+ return {k: get_filename_size(k) for k in filenames}
3966
+
3967
+
3968
+ class CombineImportsJob(Job):
3969
+ """
3970
+ Combine the outputs of multiple WorkerImportsJob into one promise
3971
+ """
3972
+
3973
+ def __init__(self, d: Sequence[Promised[Dict[str, FileID]]], **kwargs):
3974
+ """
3975
+ :param d: Sequence of dictionaries to merge
3976
+ """
3977
+ self._d = d
3978
+ super().__init__(**kwargs)
3979
+
3980
+ def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
3981
+ """
3982
+ Merge the dicts
3983
+ """
3984
+ d = unwrap_all(self._d)
3985
+ return {k: v for item in d for k, v in item.items()}
3986
+
3987
+
3988
+ class WorkerImportJob(Job):
3989
+ """
3990
+ Job to do file imports on a worker instead of a leader. Assumes all local and cloud files are accessible.
3991
+
3992
+ For the CWL/WDL runners, this class is only used when runImportsOnWorkers is enabled.
3993
+ """
3994
+
3995
+ def __init__(
3996
+ self,
3997
+ filenames: List[str],
3998
+ local: bool = False,
3999
+ **kwargs: Any
4000
+ ):
4001
+ """
4002
+ Setup importing files on a worker.
4003
+ :param filenames: List of file URIs to import
4004
+ :param kwargs: args for the superclass
4005
+ """
4006
+ self.filenames = filenames
4007
+ super().__init__(local=local, **kwargs)
4008
+
4009
+ @staticmethod
4010
+ def import_files(
4011
+ files: List[str], file_source: "AbstractJobStore"
4012
+ ) -> Dict[str, FileID]:
4013
+ """
4014
+ Import a list of files into the jobstore. Returns a mapping of the filename to the associated FileIDs
4015
+
4016
+ When stream is true but the import is not streamable, the worker will run out of
4017
+ disk space and run a new import job with enough disk space instead.
4018
+ :param files: list of files to import
4019
+ :param file_source: AbstractJobStore
4020
+ :return: Dictionary mapping filenames to associated jobstore FileID
4021
+ """
4022
+ # todo: make the import ensure streaming is done instead of relying on running out of disk space
4023
+ path_to_fileid = {}
4024
+
4025
+ @memoize
4026
+ def import_filename(filename: str) -> Optional[FileID]:
4027
+ return file_source.import_file(filename, symlink=True)
4028
+
4029
+ for file in files:
4030
+ imported = import_filename(file)
4031
+ if imported is not None:
4032
+ path_to_fileid[file] = imported
4033
+ return path_to_fileid
4034
+
4035
+ def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
4036
+ """
4037
+ Import the workflow inputs and then create and run the workflow.
4038
+ :return: Promise of workflow outputs
4039
+ """
4040
+ return self.import_files(self.filenames, file_store.jobStore)
4041
+
4042
+
4043
+ class ImportsJob(Job):
4044
+ """
4045
+ Job to organize and delegate files to individual WorkerImportJobs.
4046
+
4047
+ For the CWL/WDL runners, this is only used when runImportsOnWorkers is enabled
4048
+ """
4049
+
4050
+ def __init__(
4051
+ self,
4052
+ file_to_data: Dict[str, FileMetadata],
4053
+ max_batch_size: ParseableIndivisibleResource,
4054
+ import_worker_disk: ParseableIndivisibleResource,
4055
+ **kwargs: Any,
4056
+ ):
4057
+ """
4058
+ Job to take the inputs for a workflow and import them on a worker instead of a leader. Assumes all local and cloud files are accessible.
4059
+
4060
+ This class is only used when runImportsOnWorkers is enabled.
4061
+
4062
+ :param file_to_data: mapping of file source name to file metadata
4063
+ :param max_batch_size: maximum cumulative file size of a batched import
4064
+ """
4065
+ super().__init__(local=True, **kwargs)
4066
+ self._file_to_data = file_to_data
4067
+ self._max_batch_size = max_batch_size
4068
+ self._import_worker_disk = import_worker_disk
4069
+
4070
+ def run(
4071
+ self, file_store: AbstractFileStore
4072
+ ) -> Tuple[Promised[Dict[str, FileID]], Dict[str, FileMetadata]]:
4073
+ """
4074
+ Import the workflow inputs and then create and run the workflow.
4075
+ :return: Tuple of a mapping from the candidate uri to the file id and a mapping of the source filenames to its metadata. The candidate uri is a field in the file metadata
4076
+ """
4077
+ max_batch_size = self._max_batch_size
4078
+ file_to_data = self._file_to_data
4079
+ # Run WDL imports on a worker instead
4080
+
4081
+ filenames = list(file_to_data.keys())
4082
+
4083
+ import_jobs = []
4084
+
4085
+ # This list will hold lists of batched filenames
4086
+ file_batches = []
4087
+
4088
+ # List of filenames for each batch
4089
+ per_batch_files = []
4090
+ per_batch_size = 0
4091
+ while len(filenames) > 0:
4092
+ filename = filenames.pop(0)
4093
+ # See if adding this to the queue will make the batch job too big
4094
+ filesize = file_to_data[filename][2]
4095
+ if per_batch_size + filesize >= max_batch_size:
4096
+ # batch is too big now, store to schedule the batch
4097
+ if len(per_batch_files) == 0:
4098
+ # schedule the individual file
4099
+ per_batch_files.append(filename)
4100
+ file_batches.append(per_batch_files)
4101
+ # reset batching calculation
4102
+ per_batch_size = 0
4103
+ else:
4104
+ per_batch_size += filesize
4105
+ per_batch_files.append(filename)
4106
+
4107
+ if per_batch_files:
4108
+ file_batches.append(per_batch_files)
4109
+
4110
+ # Create batch import jobs for each group of files
4111
+ for batch in file_batches:
4112
+ candidate_uris = [file_to_data[filename][0] for filename in batch]
4113
+ import_jobs.append(WorkerImportJob(candidate_uris, disk=self._import_worker_disk))
4114
+
4115
+ for job in import_jobs:
4116
+ self.addChild(job)
4117
+
4118
+ combine_imports_job = CombineImportsJob([job.rv() for job in import_jobs])
4119
+ for job in import_jobs:
4120
+ job.addFollowOn(combine_imports_job)
4121
+ self.addChild(combine_imports_job)
4122
+
4123
+ return combine_imports_job.rv(), file_to_data
4124
+
4125
+
3463
4126
  class Promise:
3464
4127
  """
3465
4128
  References a return value from a method as a *promise* before the method itself is run.
@@ -3520,7 +4183,9 @@ class Promise:
3520
4183
  def __new__(cls, *args) -> "Promise":
3521
4184
  """Instantiate this Promise."""
3522
4185
  if len(args) != 2:
3523
- raise RuntimeError("Cannot instantiate promise. Invalid number of arguments given (Expected 2).")
4186
+ raise RuntimeError(
4187
+ "Cannot instantiate promise. Invalid number of arguments given (Expected 2)."
4188
+ )
3524
4189
  if isinstance(args[0], Job):
3525
4190
  # Regular instantiation when promise is created, before it is being pickled
3526
4191
  return super().__new__(cls)
@@ -3541,6 +4206,7 @@ class Promise:
3541
4206
  value = safeUnpickleFromStream(fileHandle)
3542
4207
  return value
3543
4208
 
4209
+
3544
4210
  # Machinery for type-safe-ish Toil Python workflows.
3545
4211
  #
3546
4212
  # TODO: Until we make Promise generic on the promised type, and work out how to
@@ -3548,12 +4214,13 @@ class Promise:
3548
4214
  # method returns, this won't actually be type-safe, because any Promise will be
3549
4215
  # a Promised[] for any type.
3550
4216
 
3551
- T = TypeVar('T')
4217
+ T = TypeVar("T")
3552
4218
  # We have type shorthand for a promised value.
3553
4219
  # Uses a generic type alias, so you can have a Promised[T]. See <https://github.com/python/mypy/pull/2378>.
3554
4220
 
3555
4221
  Promised = Union[Promise, T]
3556
4222
 
4223
+
3557
4224
  def unwrap(p: Promised[T]) -> T:
3558
4225
  """
3559
4226
  Function for ensuring you actually have a promised value, and not just a promise.
@@ -3562,9 +4229,10 @@ def unwrap(p: Promised[T]) -> T:
3562
4229
  The "unwrap" terminology is borrowed from Rust.
3563
4230
  """
3564
4231
  if isinstance(p, Promise):
3565
- raise TypeError(f'Attempted to unwrap a value that is still a Promise: {p}')
4232
+ raise TypeError(f"Attempted to unwrap a value that is still a Promise: {p}")
3566
4233
  return p
3567
4234
 
4235
+
3568
4236
  def unwrap_all(p: Sequence[Promised[T]]) -> Sequence[T]:
3569
4237
  """
3570
4238
  Function for ensuring you actually have a collection of promised values,
@@ -3574,9 +4242,12 @@ def unwrap_all(p: Sequence[Promised[T]]) -> Sequence[T]:
3574
4242
  """
3575
4243
  for i, item in enumerate(p):
3576
4244
  if isinstance(item, Promise):
3577
- raise TypeError(f'Attempted to unwrap a value at index {i} that is still a Promise: {item}')
4245
+ raise TypeError(
4246
+ f"Attempted to unwrap a value at index {i} that is still a Promise: {item}"
4247
+ )
3578
4248
  return p
3579
4249
 
4250
+
3580
4251
  class PromisedRequirement:
3581
4252
  """
3582
4253
  Class for dynamically allocating job function resource requirements.
@@ -3603,13 +4274,15 @@ class PromisedRequirement:
3603
4274
  :param args: variable length argument list
3604
4275
  :type args: int or .Promise
3605
4276
  """
3606
- if hasattr(valueOrCallable, '__call__'):
4277
+ if hasattr(valueOrCallable, "__call__"):
3607
4278
  if len(args) == 0:
3608
- raise RuntimeError('Need parameters for PromisedRequirement function.')
4279
+ raise RuntimeError("Need parameters for PromisedRequirement function.")
3609
4280
  func = valueOrCallable
3610
4281
  else:
3611
4282
  if len(args) != 0:
3612
- raise RuntimeError('Define a PromisedRequirement function to handle multiple arguments.')
4283
+ raise RuntimeError(
4284
+ "Define a PromisedRequirement function to handle multiple arguments."
4285
+ )
3613
4286
  func = lambda x: x
3614
4287
  args = [valueOrCallable]
3615
4288
 
@@ -3622,7 +4295,7 @@ class PromisedRequirement:
3622
4295
  return func(*self._args)
3623
4296
 
3624
4297
  @staticmethod
3625
- def convertPromises(kwargs: Dict[str, Any]) -> bool:
4298
+ def convertPromises(kwargs: dict[str, Any]) -> bool:
3626
4299
  """
3627
4300
  Return True if reserved resource keyword is a Promise or PromisedRequirement instance.
3628
4301
 
@@ -3651,15 +4324,15 @@ class UnfulfilledPromiseSentinel:
3651
4324
  self.file_id = file_id
3652
4325
 
3653
4326
  @staticmethod
3654
- def __setstate__(stateDict: Dict[str, Any]) -> None:
4327
+ def __setstate__(stateDict: dict[str, Any]) -> None:
3655
4328
  """
3656
4329
  Only called when unpickling.
3657
4330
 
3658
4331
  This won't be unpickled unless the promise wasn't resolved, so we throw
3659
4332
  an exception.
3660
4333
  """
3661
- jobName = stateDict['fulfillingJobName']
3662
- file_id = stateDict['file_id']
4334
+ jobName = stateDict["fulfillingJobName"]
4335
+ file_id = stateDict["file_id"]
3663
4336
  raise RuntimeError(
3664
4337
  f"This job was passed promise {file_id} that wasn't yet resolved when it "
3665
4338
  f"ran. The job {jobName} that fulfills this promise hasn't yet "