toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/common.py CHANGED
@@ -23,76 +23,68 @@ import tempfile
23
23
  import time
24
24
  import uuid
25
25
  import warnings
26
-
27
- from ruamel.yaml import YAML
28
- from ruamel.yaml.comments import CommentedMap
29
- from configargparse import ArgParser, YAMLConfigFileParser
30
- from argparse import (SUPPRESS,
31
- ArgumentDefaultsHelpFormatter,
32
- ArgumentParser,
33
- Namespace,
34
- _ArgumentGroup, Action, _StoreFalseAction, _StoreTrueAction, _AppendAction)
26
+ from argparse import (
27
+ SUPPRESS,
28
+ ArgumentDefaultsHelpFormatter,
29
+ ArgumentParser,
30
+ Namespace,
31
+ _ArgumentGroup,
32
+ _StoreFalseAction,
33
+ _StoreTrueAction,
34
+ )
35
35
  from functools import lru_cache
36
36
  from types import TracebackType
37
- from typing import (IO,
38
- TYPE_CHECKING,
39
- Any,
40
- Callable,
41
- ContextManager,
42
- Dict,
43
- List,
44
- Optional,
45
- Set,
46
- Tuple,
47
- Type,
48
- TypeVar,
49
- Union,
50
- cast,
51
- overload)
52
- from urllib.parse import urlparse, unquote, quote
37
+ from typing import (
38
+ IO,
39
+ TYPE_CHECKING,
40
+ Any,
41
+ Callable,
42
+ ContextManager,
43
+ Literal,
44
+ Optional,
45
+ TypeVar,
46
+ Union,
47
+ cast,
48
+ overload,
49
+ )
50
+ from urllib.parse import quote, unquote, urlparse
53
51
 
54
52
  import requests
55
-
56
- from toil.options.common import add_base_toil_options, JOBSTORE_HELP
57
- from toil.options.cwl import add_cwl_options
58
- from toil.options.wdl import add_wdl_options
59
-
60
- if sys.version_info >= (3, 8):
61
- from typing import Literal
62
- else:
63
- from typing_extensions import Literal
53
+ from configargparse import ArgParser, YAMLConfigFileParser
54
+ from ruamel.yaml import YAML
55
+ from ruamel.yaml.comments import CommentedMap
64
56
 
65
57
  from toil import logProcessContext, lookupEnvVar
66
- from toil.batchSystems.options import (add_all_batchsystem_options,
67
- set_batchsystem_options)
68
- from toil.bus import (ClusterDesiredSizeMessage,
69
- ClusterSizeMessage,
70
- JobCompletedMessage,
71
- JobFailedMessage,
72
- JobIssuedMessage,
73
- JobMissingMessage,
74
- MessageBus,
75
- QueueSizeMessage)
58
+ from toil.batchSystems.options import set_batchsystem_options
59
+ from toil.bus import (
60
+ ClusterDesiredSizeMessage,
61
+ ClusterSizeMessage,
62
+ JobCompletedMessage,
63
+ JobFailedMessage,
64
+ JobIssuedMessage,
65
+ JobMissingMessage,
66
+ MessageBus,
67
+ QueueSizeMessage,
68
+ gen_message_bus_path,
69
+ )
76
70
  from toil.fileStores import FileID
77
- from toil.lib.aws import zone_to_region, build_tag_dict_from_env
78
71
  from toil.lib.compatibility import deprecated
79
- from toil.lib.io import try_path, AtomicFileCreate
72
+ from toil.lib.io import AtomicFileCreate, try_path
80
73
  from toil.lib.retry import retry
81
- from toil.provisioners import (add_provisioner_options,
82
- cluster_factory,
83
- parse_node_types)
74
+ from toil.lib.threading import ensure_filesystem_lockable
75
+ from toil.options.common import JOBSTORE_HELP, add_base_toil_options
76
+ from toil.options.cwl import add_cwl_options
77
+ from toil.options.runner import add_runner_options
78
+ from toil.options.wdl import add_wdl_options
79
+ from toil.provisioners import add_provisioner_options, cluster_factory
84
80
  from toil.realtimeLogger import RealtimeLogger
85
- from toil.statsAndLogging import (add_logging_options,
86
- set_logging_from_options)
87
- from toil.version import dockerRegistry, dockerTag, version, baseVersion
81
+ from toil.statsAndLogging import add_logging_options, set_logging_from_options
82
+ from toil.version import dockerRegistry, dockerTag, version
88
83
 
89
84
  if TYPE_CHECKING:
90
85
  from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
91
86
  from toil.batchSystems.options import OptionSetter
92
- from toil.job import (AcceleratorRequirement,
93
- Job,
94
- JobDescription,
95
- TemporaryID)
87
+ from toil.job import AcceleratorRequirement, Job, JobDescription, TemporaryID
96
88
  from toil.jobStores.abstractJobStore import AbstractJobStore
97
89
  from toil.provisioners.abstractProvisioner import AbstractProvisioner
98
90
  from toil.resource import ModuleDescriptor
@@ -108,6 +100,7 @@ DEFAULT_CONFIG_FILE: str = os.path.join(TOIL_HOME_DIR, "default.yaml")
108
100
 
109
101
  class Config:
110
102
  """Class to represent configuration operations for a toil workflow run."""
103
+
111
104
  logFile: Optional[str]
112
105
  logRotating: bool
113
106
  cleanWorkDir: str
@@ -124,6 +117,7 @@ class Config:
124
117
  kubernetes_owner: Optional[str]
125
118
  kubernetes_service_account: Optional[str]
126
119
  kubernetes_pod_timeout: float
120
+ kubernetes_privileged: bool
127
121
  tes_endpoint: str
128
122
  tes_user: str
129
123
  tes_password: str
@@ -137,6 +131,7 @@ class Config:
137
131
  """The backing scheduler will be instructed, if possible, to save logs
138
132
  to this directory, where the leader can read them."""
139
133
  statePollingWait: int
134
+ state_polling_timeout: int
140
135
  disableAutoDeployment: bool
141
136
 
142
137
  # Core options
@@ -148,6 +143,7 @@ class Config:
148
143
  workflowAttemptNumber: int
149
144
  jobStore: str
150
145
  logLevel: str
146
+ colored_logs: bool
151
147
  workDir: Optional[str]
152
148
  coordination_dir: Optional[str]
153
149
  noStdOutErr: bool
@@ -167,26 +163,26 @@ class Config:
167
163
  caching: Optional[bool]
168
164
  symlinkImports: bool
169
165
  moveOutputs: bool
166
+ symlink_job_store_reads: bool
170
167
 
171
168
  # Autoscaling options
172
169
  provisioner: Optional[str]
173
- nodeTypes: List[Tuple[Set[str], Optional[float]]]
174
- minNodes: List[int]
175
- maxNodes: List[int]
170
+ nodeTypes: list[tuple[set[str], Optional[float]]]
171
+ minNodes: list[int]
172
+ maxNodes: list[int]
176
173
  targetTime: float
177
174
  betaInertia: float
178
175
  scaleInterval: int
179
176
  preemptibleCompensation: float
180
177
  nodeStorage: int
181
- nodeStorageOverrides: List[str]
178
+ nodeStorageOverrides: list[str]
182
179
  metrics: bool
183
180
  assume_zero_overhead: bool
184
181
 
185
182
  # Parameters to limit service jobs, so preventing deadlock scheduling scenarios
186
183
  maxPreemptibleServiceJobs: int
187
184
  maxServiceJobs: int
188
- deadlockWait: Union[
189
- float, int]
185
+ deadlockWait: Union[float, int]
190
186
  deadlockCheckInterval: Union[float, int]
191
187
 
192
188
  # Resource requirements
@@ -197,7 +193,7 @@ class Config:
197
193
  # TODO: These names are generated programmatically in
198
194
  # Requirer._fetchRequirement so we can't use snake_case until we fix
199
195
  # that (and add compatibility getters/setters?)
200
- defaultAccelerators: List['AcceleratorRequirement']
196
+ defaultAccelerators: list["AcceleratorRequirement"]
201
197
  maxCores: int
202
198
  maxMemory: int
203
199
  maxDisk: int
@@ -208,6 +204,7 @@ class Config:
208
204
  doubleMem: bool
209
205
  maxJobDuration: int
210
206
  rescueJobsFrequency: int
207
+ job_store_timeout: float
211
208
 
212
209
  # Log management
213
210
  maxLogFileSize: int
@@ -218,7 +215,7 @@ class Config:
218
215
  realTimeLogging: bool
219
216
 
220
217
  # Misc
221
- environment: Dict[str, str]
218
+ environment: dict[str, str]
222
219
  disableChaining: bool
223
220
  disableJobStoreChecksumVerification: bool
224
221
  sseKey: Optional[str]
@@ -239,6 +236,8 @@ class Config:
239
236
  # CWL
240
237
  cwl: bool
241
238
 
239
+ memory_is_product: bool
240
+
242
241
  def __init__(self) -> None:
243
242
  # only default options that are not CLI options defined here (thus CLI options are centralized)
244
243
  self.cwl = False # will probably remove later
@@ -276,8 +275,7 @@ class Config:
276
275
  def setOptions(self, options: Namespace) -> None:
277
276
  """Creates a config object from the options object."""
278
277
 
279
- def set_option(option_name: str,
280
- old_names: Optional[List[str]] = None) -> None:
278
+ def set_option(option_name: str, old_names: Optional[list[str]] = None) -> None:
281
279
  """
282
280
  Determine the correct value for the given option.
283
281
 
@@ -285,8 +283,6 @@ class Config:
285
283
 
286
284
  1. options object under option_name
287
285
  2. options object under old_names
288
- 3. environment variables in env
289
- 4. provided default value
290
286
 
291
287
  Selected option value is run through parsing_funtion if it is set.
292
288
  Then the parsed value is run through check_function to check it for
@@ -302,15 +298,21 @@ class Config:
302
298
  for old_name in old_names:
303
299
  # If the option is already set with the new name and not the old name
304
300
  # prioritize the new name over the old name and break
305
- if option_value is not None and option_value != [] and option_value != {}:
301
+ if (
302
+ option_value is not None
303
+ and option_value != []
304
+ and option_value != {}
305
+ ):
306
306
  break
307
307
  # Try all the old names in case user code is setting them
308
308
  # in an options object.
309
309
  # This does assume that all deprecated options have a default value of None
310
310
  if getattr(options, old_name, None) is not None:
311
- warnings.warn(f'Using deprecated option field {old_name} to '
312
- f'provide value for config field {option_name}',
313
- DeprecationWarning)
311
+ warnings.warn(
312
+ f"Using deprecated option field {old_name} to "
313
+ f"provide value for config field {option_name}",
314
+ DeprecationWarning,
315
+ )
314
316
  option_value = getattr(options, old_name)
315
317
  if option_value is not None or not hasattr(self, option_name):
316
318
  setattr(self, option_name, option_value)
@@ -325,18 +327,20 @@ class Config:
325
327
  set_option("stats")
326
328
  set_option("cleanWorkDir")
327
329
  set_option("clean")
328
- set_option('clusterStats')
330
+ set_option("clusterStats")
329
331
  set_option("restart")
330
332
 
331
333
  # Batch system options
332
334
  set_option("batchSystem")
333
- set_batchsystem_options(None, cast("OptionSetter",
334
- set_option)) # None as that will make set_batchsystem_options iterate through all batch systems and set their corresponding values
335
+ set_batchsystem_options(
336
+ None, cast("OptionSetter", set_option)
337
+ ) # None as that will make set_batchsystem_options iterate through all batch systems and set their corresponding values
335
338
 
336
339
  # File store options
337
340
  set_option("symlinkImports", old_names=["linkImports"])
338
341
  set_option("moveOutputs", old_names=["moveExports"])
339
342
  set_option("caching", old_names=["enableCaching"])
343
+ set_option("symlink_job_store_reads")
340
344
 
341
345
  # Autoscaling options
342
346
  set_option("provisioner")
@@ -375,6 +379,7 @@ class Config:
375
379
  set_option("doubleMem")
376
380
  set_option("maxJobDuration")
377
381
  set_option("rescueJobsFrequency")
382
+ set_option("job_store_timeout")
378
383
 
379
384
  # Log management
380
385
  set_option("maxLogFileSize")
@@ -383,6 +388,16 @@ class Config:
383
388
  set_option("writeLogsFromAllJobs")
384
389
  set_option("write_messages")
385
390
 
391
+ if self.write_messages is None:
392
+ # The user hasn't specified a place for the message bus so we
393
+ # should make one.
394
+ # pass in coordination_dir for toil-cwl-runner; we want to obey --tmpdir-prefix
395
+ # from cwltool and we change the coordination_dir when detected. we don't want
396
+ # to make another config attribute so put the message bus in the already prefixed dir
397
+ # if a coordination_dir is provided normally, we can still put the bus in there
398
+ # as the coordination dir should serve a similar purpose to the tmp directory
399
+ self.write_messages = gen_message_bus_path(self.coordination_dir)
400
+
386
401
  # Misc
387
402
  set_option("environment")
388
403
 
@@ -401,28 +416,45 @@ class Config:
401
416
  set_option("badWorker")
402
417
  set_option("badWorkerFailInterval")
403
418
  set_option("logLevel")
419
+ set_option("colored_logs")
404
420
 
405
- self.check_configuration_consistency()
421
+ set_option("memory_is_product")
406
422
 
407
- logger.debug("Loaded configuration: %s", vars(options))
423
+ # Apply overrides as highest priority
424
+ # Override workDir with value of TOIL_WORKDIR_OVERRIDE if it exists
425
+ if os.getenv("TOIL_WORKDIR_OVERRIDE") is not None:
426
+ self.workDir = os.getenv("TOIL_WORKDIR_OVERRIDE")
427
+ # Override coordination_dir with value of TOIL_COORDINATION_DIR_OVERRIDE if it exists
428
+ if os.getenv("TOIL_COORDINATION_DIR_OVERRIDE") is not None:
429
+ self.coordination_dir = os.getenv("TOIL_COORDINATION_DIR_OVERRIDE")
430
+
431
+ self.check_configuration_consistency()
408
432
 
409
433
  def check_configuration_consistency(self) -> None:
410
434
  """Old checks that cannot be fit into an action class for argparse"""
411
435
  if self.writeLogs and self.writeLogsGzip:
412
- raise ValueError("Cannot use both --writeLogs and --writeLogsGzip at the same time.")
436
+ raise ValueError(
437
+ "Cannot use both --writeLogs and --writeLogsGzip at the same time."
438
+ )
413
439
  if self.writeLogsFromAllJobs and not self.writeLogs and not self.writeLogsGzip:
414
- raise ValueError("To enable --writeLogsFromAllJobs, either --writeLogs or --writeLogsGzip must be set.")
440
+ raise ValueError(
441
+ "To enable --writeLogsFromAllJobs, either --writeLogs or --writeLogsGzip must be set."
442
+ )
415
443
  for override in self.nodeStorageOverrides:
416
444
  tokens = override.split(":")
417
445
  if not any(tokens[0] in n[0] for n in self.nodeTypes):
418
- raise ValueError("Instance type in --nodeStorageOverrides must be in --nodeTypes")
446
+ raise ValueError(
447
+ "Instance type in --nodeStorageOverrides must be in --nodeTypes"
448
+ )
419
449
 
420
450
  if self.stats:
421
451
  if self.clean != "never" and self.clean is not None:
422
- logger.warning("Contradicting options passed: Clean flag is set to %s "
423
- "despite the stats flag requiring "
424
- "the jobStore to be intact at the end of the run. "
425
- "Setting clean to \'never\'." % self.clean)
452
+ logger.warning(
453
+ "Contradicting options passed: Clean flag is set to %s "
454
+ "despite the stats flag requiring "
455
+ "the jobStore to be intact at the end of the run. "
456
+ "Setting clean to 'never'." % self.clean
457
+ )
426
458
  self.clean = "never"
427
459
 
428
460
  def __eq__(self, other: object) -> bool:
@@ -442,7 +474,9 @@ def check_and_create_toil_home_dir() -> None:
442
474
 
443
475
  dir_path = try_path(TOIL_HOME_DIR)
444
476
  if dir_path is None:
445
- raise RuntimeError(f"Cannot create or access Toil configuration directory {TOIL_HOME_DIR}")
477
+ raise RuntimeError(
478
+ f"Cannot create or access Toil configuration directory {TOIL_HOME_DIR}"
479
+ )
446
480
 
447
481
 
448
482
  def check_and_create_default_config_file() -> None:
@@ -500,9 +534,23 @@ def generate_config(filepath: str) -> None:
500
534
  # and --caching respectively
501
535
  # Skip StoreTrue and StoreFalse options that have opposite defaults as including it in the config would
502
536
  # override those defaults
503
- deprecated_or_redundant_options = ("help", "config", "logCritical", "logDebug", "logError", "logInfo", "logOff",
504
- "logWarning", "linkImports", "noLinkImports", "moveExports", "noMoveExports",
505
- "enableCaching", "disableCaching", "version")
537
+ deprecated_or_redundant_options = (
538
+ "help",
539
+ "config",
540
+ "logCritical",
541
+ "logDebug",
542
+ "logError",
543
+ "logInfo",
544
+ "logOff",
545
+ "logWarning",
546
+ "linkImports",
547
+ "noLinkImports",
548
+ "moveExports",
549
+ "noMoveExports",
550
+ "enableCaching",
551
+ "disableCaching",
552
+ "version",
553
+ )
506
554
 
507
555
  def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
508
556
  """
@@ -513,9 +561,12 @@ def generate_config(filepath: str) -> None:
513
561
  :return: CommentedMap of what to put into the config file
514
562
  """
515
563
  data = CommentedMap() # to preserve order
516
- group_title_key: Dict[str, str] = dict()
564
+ group_title_key: dict[str, str] = dict()
517
565
  for action in parser._actions:
518
- if any(s.replace("-", "") in deprecated_or_redundant_options for s in action.option_strings):
566
+ if any(
567
+ s.replace("-", "") in deprecated_or_redundant_options
568
+ for s in action.option_strings
569
+ ):
519
570
  continue
520
571
  # if action is StoreFalse and default is True then don't include
521
572
  if isinstance(action, _StoreFalseAction) and action.default is True:
@@ -527,8 +578,11 @@ def generate_config(filepath: str) -> None:
527
578
  if len(action.option_strings) == 0:
528
579
  continue
529
580
 
530
- option_string = action.option_strings[0] if action.option_strings[0].find("--") != -1 else \
531
- action.option_strings[1]
581
+ option_string = (
582
+ action.option_strings[0]
583
+ if action.option_strings[0].find("--") != -1
584
+ else action.option_strings[1]
585
+ )
532
586
  option = option_string[2:]
533
587
 
534
588
  default = action.default
@@ -551,12 +605,20 @@ def generate_config(filepath: str) -> None:
551
605
  add_base_toil_options(parser, jobstore_as_flag=True, cwl=False)
552
606
  toil_base_data = create_config_dict_from_parser(parser)
553
607
 
554
- toil_base_data.yaml_set_start_comment("This is the configuration file for Toil. To set an option, uncomment an "
555
- "existing option and set its value. The current values are the defaults. "
556
- "If the default configuration file is outdated, it can be refreshed with "
557
- "`toil config ~/.toil/default.yaml`.\n\nBASE TOIL OPTIONS\n")
608
+ toil_base_data.yaml_set_start_comment(
609
+ "This is the configuration file for Toil. To set an option, uncomment an "
610
+ "existing option and set its value. The current values are the defaults. "
611
+ "If the default configuration file is outdated, it can be refreshed with "
612
+ "`toil config ~/.toil/default.yaml`.\n\nBASE TOIL OPTIONS\n"
613
+ )
558
614
  all_data.append(toil_base_data)
559
615
 
616
+ parser = ArgParser(YAMLConfigFileParser())
617
+ add_runner_options(parser)
618
+ toil_cwl_data = create_config_dict_from_parser(parser)
619
+ toil_cwl_data.yaml_set_start_comment("\nTOIL SHARED CWL AND WDL RUNNER OPTIONS")
620
+ all_data.append(toil_cwl_data)
621
+
560
622
  parser = ArgParser(YAMLConfigFileParser())
561
623
  add_cwl_options(parser)
562
624
  toil_cwl_data = create_config_dict_from_parser(parser)
@@ -580,38 +642,52 @@ def generate_config(filepath: str) -> None:
580
642
  with AtomicFileCreate(filepath) as temp_path:
581
643
  with open(temp_path, "w") as f:
582
644
  f.write("config_version: 1.0\n")
583
- yaml = YAML(typ=['rt', 'string'])
645
+ yaml = YAML(typ="rt")
584
646
  for data in all_data:
585
- if "config_version" in data:
586
- del data["config_version"]
587
- for line in yaml.dump_to_string(data).split("\n"): # type: ignore[attr-defined]
588
- if line:
589
- f.write("#")
590
- f.write(line)
591
- f.write("\n")
647
+ data.pop("config_version", None)
648
+ yaml.dump(
649
+ data,
650
+ f,
651
+ transform=lambda s: re.sub(r"^(.)", r"#\1", s, flags=re.MULTILINE),
652
+ )
592
653
 
593
654
 
594
655
  def parser_with_common_options(
595
- provisioner_options: bool = False, jobstore_option: bool = True, prog: Optional[str] = None
656
+ provisioner_options: bool = False,
657
+ jobstore_option: bool = True,
658
+ prog: Optional[str] = None,
659
+ default_log_level: Optional[int] = None,
596
660
  ) -> ArgParser:
597
- parser = ArgParser(prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter)
661
+ parser = ArgParser(
662
+ prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter
663
+ )
598
664
 
599
665
  if provisioner_options:
600
666
  add_provisioner_options(parser)
601
667
 
602
668
  if jobstore_option:
603
- parser.add_argument('jobStore', type=str, help=JOBSTORE_HELP)
669
+ parser.add_argument("jobStore", type=str, help=JOBSTORE_HELP)
604
670
 
605
671
  # always add these
606
- add_logging_options(parser)
607
- parser.add_argument("--version", action='version', version=version)
608
- parser.add_argument("--tempDirRoot", dest="tempDirRoot", type=str, default=tempfile.gettempdir(),
609
- help="Path to where temporary directory containing all temp files are created, "
610
- "by default generates a fresh tmp dir with 'tempfile.gettempdir()'.")
672
+ add_logging_options(parser, default_log_level)
673
+ parser.add_argument("--version", action="version", version=version)
674
+ parser.add_argument(
675
+ "--tempDirRoot",
676
+ dest="tempDirRoot",
677
+ type=str,
678
+ default=tempfile.gettempdir(),
679
+ help="Path to where temporary directory containing all temp files are created, "
680
+ "by default generates a fresh tmp dir with 'tempfile.gettempdir()'.",
681
+ )
611
682
  return parser
612
683
 
613
684
 
614
- def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool = False, wdl: bool = False) -> None:
685
+ def addOptions(
686
+ parser: ArgumentParser,
687
+ jobstore_as_flag: bool = False,
688
+ cwl: bool = False,
689
+ wdl: bool = False,
690
+ ) -> None:
615
691
  """
616
692
  Add all Toil command line options to a parser.
617
693
 
@@ -624,10 +700,13 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
624
700
  :param wdl: Whether WDL options are expected. If so, WDL options won't be suppressed.
625
701
  """
626
702
  if cwl and wdl:
627
- raise RuntimeError("CWL and WDL cannot both be true at the same time when adding options.")
703
+ raise RuntimeError(
704
+ "CWL and WDL cannot both be true at the same time when adding options."
705
+ )
628
706
  if not (isinstance(parser, ArgumentParser) or isinstance(parser, _ArgumentGroup)):
629
707
  raise ValueError(
630
- f"Unanticipated class: {parser.__class__}. Must be: argparse.ArgumentParser or ArgumentGroup.")
708
+ f"Unanticipated class: {parser.__class__}. Must be: argparse.ArgumentParser or ArgumentGroup."
709
+ )
631
710
 
632
711
  if isinstance(parser, ArgParser):
633
712
  # in case the user passes in their own configargparse instance instead of calling getDefaultArgumentParser()
@@ -637,10 +716,12 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
637
716
  else:
638
717
  # configargparse advertises itself as a drag and drop replacement, and running the normal argparse ArgumentParser
639
718
  # through this code still seems to work (with the exception of --config and environmental variables)
640
- warnings.warn(f'Using deprecated library argparse for options parsing.'
641
- f'This will not parse config files or use environment variables.'
642
- f'Use configargparse instead or call Job.Runner.getDefaultArgumentParser()',
643
- DeprecationWarning)
719
+ warnings.warn(
720
+ f"Using deprecated library argparse for options parsing."
721
+ f"This will not parse config files or use environment variables."
722
+ f"Use configargparse instead or call Job.Runner.getDefaultArgumentParser()",
723
+ DeprecationWarning,
724
+ )
644
725
 
645
726
  check_and_create_default_config_file()
646
727
  # Check on the config file to make sure it is sensible
@@ -649,16 +730,17 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
649
730
  # If we have an empty config file, someone has to manually delete
650
731
  # it before we will work again.
651
732
  raise RuntimeError(
652
- f"Config file {DEFAULT_CONFIG_FILE} exists but is empty. Delete it! Stat says: {config_status}")
733
+ f"Config file {DEFAULT_CONFIG_FILE} exists but is empty. Delete it! Stat says: {config_status}"
734
+ )
653
735
  try:
654
- with open(DEFAULT_CONFIG_FILE, "r") as f:
736
+ with open(DEFAULT_CONFIG_FILE) as f:
655
737
  yaml = YAML(typ="safe")
656
738
  s = yaml.load(f)
657
739
  logger.debug("Initialized default configuration: %s", json.dumps(s))
658
740
  except:
659
741
  # Something went wrong reading the default config, so dump its
660
742
  # contents to the log.
661
- logger.info("Configuration file contents: %s", open(DEFAULT_CONFIG_FILE, 'r').read())
743
+ logger.info("Configuration file contents: %s", open(DEFAULT_CONFIG_FILE).read())
662
744
  raise
663
745
 
664
746
  # Add base toil options
@@ -667,6 +749,8 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
667
749
  # This is done so the config file can hold all available options
668
750
  add_cwl_options(parser, suppress=not cwl)
669
751
  add_wdl_options(parser, suppress=not wdl)
752
+ # Add shared runner options
753
+ add_runner_options(parser, cwl=cwl, wdl=wdl)
670
754
 
671
755
  def check_arguments(typ: str) -> None:
672
756
  """
@@ -680,36 +764,69 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
680
764
  add_cwl_options(check_parser)
681
765
  if typ == "cwl":
682
766
  add_wdl_options(check_parser)
767
+
683
768
  for action in check_parser._actions:
684
769
  action.default = SUPPRESS
685
- other_options, _ = check_parser.parse_known_args(sys.argv[1:], ignore_help_args=True)
770
+ other_options, _ = check_parser.parse_known_args(
771
+ sys.argv[1:], ignore_help_args=True
772
+ )
686
773
  if len(vars(other_options)) != 0:
687
- raise parser.error(f"{'WDL' if typ == 'cwl' else 'CWL'} options are not allowed on the command line.")
774
+ raise parser.error(
775
+ f"{'WDL' if typ == 'cwl' else 'CWL'} options are not allowed on the command line."
776
+ )
688
777
 
689
778
  # if cwl is set, format the namespace for cwl and check that wdl options are not set on the command line
690
779
  if cwl:
691
- parser.add_argument("cwltool", type=str, help="CWL file to run.")
692
- parser.add_argument("cwljob", nargs="*", help="Input file or CWL options. If CWL workflow takes an input, "
693
- "the name of the input can be used as an option. "
694
- "For example: \"%(prog)s workflow.cwl --file1 file\". "
695
- "If an input has the same name as a Toil option, pass '--' before it.")
780
+ # So we can manually write out the help for this and the inputs
781
+ # file/workflow options in the argument parser description, we suppress
782
+ # help for this option.
783
+ parser.add_argument("cwltool", metavar="WORKFLOW", type=str, help=SUPPRESS)
784
+ # We also need a "cwljob" command line argument, holding possibly a
785
+ # positional input file and possibly a whole string of option flags
786
+ # only known to the workflow.
787
+ #
788
+ # We don't want to try and parse out the positional argument here
789
+ # since, on Python 3.12, we can grab what's really supposed to be an
790
+ # argument to a workflow-defined option.
791
+ #
792
+ # We don't want to use the undocumented argparse.REMAINDER, since that
793
+ # will eat any Toil-defined option flags after the first positional
794
+ # argument.
795
+ #
796
+ # So we just use parse_known_args and dump all unknown args into it,
797
+ # and manually write help text in the argparse description. So don't
798
+ # define it here.
696
799
  check_arguments(typ="cwl")
697
800
 
698
801
  # if wdl is set, format the namespace for wdl and check that cwl options are not set on the command line
699
802
  if wdl:
700
- parser.add_argument("wdl_uri", type=str,
701
- help="WDL document URI")
702
- parser.add_argument("inputs_uri", type=str, nargs='?',
703
- help="WDL input JSON URI")
704
- parser.add_argument("--input", "-i", dest="inputs_uri", type=str,
705
- help="WDL input JSON URI")
803
+ parser.add_argument("wdl_uri", type=str, help="WDL document URI")
804
+ # We want to have an inputs_url that can be either a positional or a flag.
805
+ # We can't just have them share a single-item dest in Python 3.12;
806
+ # argparse does not guarantee that will work, and we can get the
807
+ # positional default value clobbering the flag. See
808
+ # <https://stackoverflow.com/a/60531838>.
809
+ # So we make them accumulate to the same list.
810
+ # Note that we will get a None in the list when there's no positional inputs.
811
+ parser.add_argument(
812
+ "inputs_uri", type=str, nargs='?', action="append", help="WDL input JSON URI"
813
+ )
814
+ parser.add_argument(
815
+ "--input",
816
+ "--inputs",
817
+ "-i",
818
+ dest="inputs_uri",
819
+ type=str,
820
+ action="append",
821
+ help="WDL input JSON URI",
822
+ )
706
823
  check_arguments(typ="wdl")
707
824
 
708
825
 
709
826
  @lru_cache(maxsize=None)
710
827
  def getNodeID() -> str:
711
828
  """
712
- Return unique ID of the current node (host). The resulting string will be convertable to a uuid.UUID.
829
+ Return unique ID of the current node (host). The resulting string will be convertible to a uuid.UUID.
713
830
 
714
831
  Tries several methods until success. The returned ID should be identical across calls from different processes on
715
832
  the same node at least until the next OS reboot.
@@ -725,15 +842,20 @@ def getNodeID() -> str:
725
842
  with open(idSourceFile) as inp:
726
843
  nodeID = inp.readline().strip()
727
844
  except OSError:
728
- logger.warning(f"Exception when trying to read ID file {idSourceFile}. "
729
- f"Will try next method to get node ID.", exc_info=True)
845
+ logger.warning(
846
+ f"Exception when trying to read ID file {idSourceFile}. "
847
+ f"Will try next method to get node ID.",
848
+ exc_info=True,
849
+ )
730
850
  else:
731
851
  if len(nodeID.split()) == 1:
732
852
  logger.debug(f"Obtained node ID {nodeID} from file {idSourceFile}")
733
853
  break
734
854
  else:
735
- logger.warning(f"Node ID {nodeID} from file {idSourceFile} contains spaces. "
736
- f"Will try next method to get node ID.")
855
+ logger.warning(
856
+ f"Node ID {nodeID} from file {idSourceFile} contains spaces. "
857
+ f"Will try next method to get node ID."
858
+ )
737
859
  else:
738
860
  nodeIDs = []
739
861
  for i_call in range(2):
@@ -747,18 +869,22 @@ def getNodeID() -> str:
747
869
  if nodeIDs[0] == nodeIDs[1]:
748
870
  nodeID = nodeIDs[0]
749
871
  else:
750
- logger.warning(f"Different node IDs {nodeIDs} received from repeated calls to uuid.getnode(). "
751
- f"You should use another method to generate node ID.")
872
+ logger.warning(
873
+ f"Different node IDs {nodeIDs} received from repeated calls to uuid.getnode(). "
874
+ f"You should use another method to generate node ID."
875
+ )
752
876
 
753
877
  logger.debug(f"Obtained node ID {nodeID} from uuid.getnode()")
754
878
  if not nodeID:
755
- logger.warning("Failed to generate stable node ID, returning empty string. If you see this message with a "
756
- "work dir on a shared file system when using workers running on multiple nodes, you might "
757
- "experience cryptic job failures")
758
- if len(nodeID.replace('-', '')) < UUID_LENGTH:
879
+ logger.warning(
880
+ "Failed to generate stable node ID, returning empty string. If you see this message with a "
881
+ "work dir on a shared file system when using workers running on multiple nodes, you might "
882
+ "experience cryptic job failures"
883
+ )
884
+ if len(nodeID.replace("-", "")) < UUID_LENGTH:
759
885
  # Some platforms (Mac) give us not enough actual hex characters.
760
- # Repeat them so the result is convertable to a uuid.UUID
761
- nodeID = nodeID.replace('-', '')
886
+ # Repeat them so the result is convertible to a uuid.UUID
887
+ nodeID = nodeID.replace("-", "")
762
888
  num_repeats = UUID_LENGTH // len(nodeID) + 1
763
889
  nodeID = nodeID * num_repeats
764
890
  nodeID = nodeID[:UUID_LENGTH]
@@ -771,6 +897,7 @@ class Toil(ContextManager["Toil"]):
771
897
 
772
898
  Specifically the batch system, job store, and its configuration.
773
899
  """
900
+
774
901
  config: Config
775
902
  _jobStore: "AbstractJobStore"
776
903
  _batchSystem: "AbstractBatchSystem"
@@ -787,7 +914,7 @@ class Toil(ContextManager["Toil"]):
787
914
  """
788
915
  super().__init__()
789
916
  self.options = options
790
- self._jobCache: Dict[Union[str, "TemporaryID"], "JobDescription"] = {}
917
+ self._jobCache: dict[Union[str, "TemporaryID"], "JobDescription"] = {}
791
918
  self._inContextManager: bool = False
792
919
  self._inRestart: bool = False
793
920
 
@@ -801,6 +928,7 @@ class Toil(ContextManager["Toil"]):
801
928
  set_logging_from_options(self.options)
802
929
  config = Config()
803
930
  config.setOptions(self.options)
931
+ logger.debug("Loaded configuration: %s", vars(self.options))
804
932
  if config.jobStore is None:
805
933
  raise RuntimeError("No jobstore provided!")
806
934
  jobStore = self.getJobStore(config.jobStore)
@@ -829,10 +957,10 @@ class Toil(ContextManager["Toil"]):
829
957
  return self
830
958
 
831
959
  def __exit__(
832
- self,
833
- exc_type: Optional[Type[BaseException]],
834
- exc_val: Optional[BaseException],
835
- exc_tb: Optional[TracebackType],
960
+ self,
961
+ exc_type: Optional[type[BaseException]],
962
+ exc_val: Optional[BaseException],
963
+ exc_tb: Optional[TracebackType],
836
964
  ) -> Literal[False]:
837
965
  """
838
966
  Clean up after a workflow invocation.
@@ -840,24 +968,33 @@ class Toil(ContextManager["Toil"]):
840
968
  Depending on the configuration, delete the job store.
841
969
  """
842
970
  try:
843
- if (exc_type is not None and self.config.clean == "onError" or
844
- exc_type is None and self.config.clean == "onSuccess" or
845
- self.config.clean == "always"):
971
+ if (
972
+ exc_type is not None
973
+ and self.config.clean == "onError"
974
+ or exc_type is None
975
+ and self.config.clean == "onSuccess"
976
+ or self.config.clean == "always"
977
+ ):
846
978
 
847
979
  try:
848
980
  if self.config.restart and not self._inRestart:
849
981
  pass
850
982
  else:
851
983
  self._jobStore.destroy()
852
- logger.info("Successfully deleted the job store: %s" % str(self._jobStore))
984
+ logger.info(
985
+ "Successfully deleted the job store: %s"
986
+ % str(self._jobStore)
987
+ )
853
988
  except:
854
- logger.info("Failed to delete the job store: %s" % str(self._jobStore))
989
+ logger.info(
990
+ "Failed to delete the job store: %s" % str(self._jobStore)
991
+ )
855
992
  raise
856
993
  except Exception as e:
857
994
  if exc_type is None:
858
995
  raise
859
996
  else:
860
- logger.exception('The following error was raised during clean up:')
997
+ logger.exception("The following error was raised during clean up:")
861
998
  self._inContextManager = False
862
999
  self._inRestart = False
863
1000
  return False # let exceptions through
@@ -875,13 +1012,24 @@ class Toil(ContextManager["Toil"]):
875
1012
  """
876
1013
  self._assertContextManagerUsed()
877
1014
 
1015
+ from toil.job import Job
1016
+
1017
+ # Check that the rootJob is an instance of the Job class
1018
+ if not isinstance(rootJob, Job):
1019
+ raise RuntimeError("The type of the root job is not a job.")
1020
+
1021
+ # Check that the rootJob has been initialized
1022
+ rootJob.check_initialized()
1023
+
878
1024
  # Write shared files to the job store
879
1025
  self._jobStore.write_leader_pid()
880
1026
  self._jobStore.write_leader_node_id()
881
1027
 
882
1028
  if self.config.restart:
883
- raise ToilRestartException('A Toil workflow can only be started once. Use '
884
- 'Toil.restart() to resume it.')
1029
+ raise ToilRestartException(
1030
+ "A Toil workflow can only be started once. Use "
1031
+ "Toil.restart() to resume it."
1032
+ )
885
1033
 
886
1034
  self._batchSystem = self.createBatchSystem(self.config)
887
1035
  self._setupAutoDeployment(rootJob.getUserScript())
@@ -894,7 +1042,7 @@ class Toil(ContextManager["Toil"]):
894
1042
  # a shared file, where we can find and unpickle it at the end of the workflow.
895
1043
  # Unpickling the promise will automatically substitute the promise for the actual
896
1044
  # return value.
897
- with self._jobStore.write_shared_file_stream('rootJobReturnValue') as fH:
1045
+ with self._jobStore.write_shared_file_stream("rootJobReturnValue") as fH:
898
1046
  rootJob.prepareForPromiseRegistration(self._jobStore)
899
1047
  promise = rootJob.rv()
900
1048
  pickle.dump(promise, fH, protocol=pickle.HIGHEST_PROTOCOL)
@@ -922,15 +1070,18 @@ class Toil(ContextManager["Toil"]):
922
1070
  self._jobStore.write_leader_node_id()
923
1071
 
924
1072
  if not self.config.restart:
925
- raise ToilRestartException('A Toil workflow must be initiated with Toil.start(), '
926
- 'not restart().')
1073
+ raise ToilRestartException(
1074
+ "A Toil workflow must be initiated with Toil.start(), " "not restart()."
1075
+ )
927
1076
 
928
1077
  from toil.job import JobException
1078
+
929
1079
  try:
930
1080
  self._jobStore.load_root_job()
931
1081
  except JobException:
932
1082
  logger.warning(
933
- 'Requested restart but the workflow has already been completed; allowing exports to rerun.')
1083
+ "Requested restart but the workflow has already been completed; allowing exports to rerun."
1084
+ )
934
1085
  return self._jobStore.get_root_job_return_value()
935
1086
 
936
1087
  self._batchSystem = self.createBatchSystem(self.config)
@@ -949,12 +1100,14 @@ class Toil(ContextManager["Toil"]):
949
1100
  if self.config.provisioner is None:
950
1101
  self._provisioner = None
951
1102
  else:
952
- self._provisioner = cluster_factory(provisioner=self.config.provisioner,
953
- clusterName=None,
954
- zone=None, # read from instance meta-data
955
- nodeStorage=self.config.nodeStorage,
956
- nodeStorageOverrides=self.config.nodeStorageOverrides,
957
- sseKey=self.config.sseKey)
1103
+ self._provisioner = cluster_factory(
1104
+ provisioner=self.config.provisioner,
1105
+ clusterName=None,
1106
+ zone=None, # read from instance meta-data
1107
+ nodeStorage=self.config.nodeStorage,
1108
+ nodeStorageOverrides=self.config.nodeStorageOverrides,
1109
+ sseKey=self.config.sseKey,
1110
+ )
958
1111
  self._provisioner.setAutoscaledNodeTypes(self.config.nodeTypes)
959
1112
 
960
1113
  @classmethod
@@ -967,27 +1120,30 @@ class Toil(ContextManager["Toil"]):
967
1120
  :return: an instance of a concrete subclass of AbstractJobStore
968
1121
  """
969
1122
  name, rest = cls.parseLocator(locator)
970
- if name == 'file':
1123
+ if name == "file":
971
1124
  from toil.jobStores.fileJobStore import FileJobStore
1125
+
972
1126
  return FileJobStore(rest)
973
- elif name == 'aws':
1127
+ elif name == "aws":
974
1128
  from toil.jobStores.aws.jobStore import AWSJobStore
1129
+
975
1130
  return AWSJobStore(rest)
976
- elif name == 'google':
1131
+ elif name == "google":
977
1132
  from toil.jobStores.googleJobStore import GoogleJobStore
1133
+
978
1134
  return GoogleJobStore(rest)
979
1135
  else:
980
1136
  raise RuntimeError("Unknown job store implementation '%s'" % name)
981
1137
 
982
1138
  @staticmethod
983
- def parseLocator(locator: str) -> Tuple[str, str]:
984
- if locator[0] in '/.' or ':' not in locator:
985
- return 'file', locator
1139
+ def parseLocator(locator: str) -> tuple[str, str]:
1140
+ if locator[0] in "/." or ":" not in locator:
1141
+ return "file", locator
986
1142
  else:
987
1143
  try:
988
- name, rest = locator.split(':', 1)
1144
+ name, rest = locator.split(":", 1)
989
1145
  except ValueError:
990
- raise RuntimeError('Invalid job store locator syntax.')
1146
+ raise RuntimeError("Invalid job store locator syntax.")
991
1147
  else:
992
1148
  return name, rest
993
1149
 
@@ -995,7 +1151,7 @@ class Toil(ContextManager["Toil"]):
995
1151
  def buildLocator(name: str, rest: str) -> str:
996
1152
  if ":" in name:
997
1153
  raise ValueError(f"Can't have a ':' in the name: '{name}'.")
998
- return f'{name}:{rest}'
1154
+ return f"{name}:{rest}"
999
1155
 
1000
1156
  @classmethod
1001
1157
  def resumeJobStore(cls, locator: str) -> "AbstractJobStore":
@@ -1012,30 +1168,39 @@ class Toil(ContextManager["Toil"]):
1012
1168
 
1013
1169
  :return: an instance of a concrete subclass of AbstractBatchSystem
1014
1170
  """
1015
- kwargs = dict(config=config,
1016
- maxCores=config.maxCores,
1017
- maxMemory=config.maxMemory,
1018
- maxDisk=config.maxDisk)
1171
+ kwargs = dict(
1172
+ config=config,
1173
+ maxCores=config.maxCores,
1174
+ maxMemory=config.maxMemory,
1175
+ maxDisk=config.maxDisk,
1176
+ )
1019
1177
 
1020
1178
  from toil.batchSystems.registry import get_batch_system, get_batch_systems
1021
1179
 
1022
1180
  try:
1023
1181
  batch_system = get_batch_system(config.batchSystem)
1024
1182
  except KeyError:
1025
- raise RuntimeError(f'Unrecognized batch system: {config.batchSystem} '
1026
- f'(choose from: {", ".join(get_batch_systems())})')
1183
+ raise RuntimeError(
1184
+ f"Unrecognized batch system: {config.batchSystem} "
1185
+ f'(choose from: {", ".join(get_batch_systems())})'
1186
+ )
1027
1187
 
1028
1188
  if config.caching and not batch_system.supportsWorkerCleanup():
1029
- raise RuntimeError(f'{config.batchSystem} currently does not support shared caching, because it '
1030
- 'does not support cleaning up a worker after the last job finishes. Set '
1031
- '--caching=false')
1032
-
1033
- logger.debug('Using the %s' % re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", batch_system.__name__).lower())
1189
+ raise RuntimeError(
1190
+ f"{config.batchSystem} currently does not support shared caching, because it "
1191
+ "does not support cleaning up a worker after the last job finishes. Set "
1192
+ "--caching=false"
1193
+ )
1194
+
1195
+ logger.debug(
1196
+ "Using the %s"
1197
+ % re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", batch_system.__name__).lower()
1198
+ )
1034
1199
 
1035
1200
  return batch_system(**kwargs)
1036
1201
 
1037
1202
  def _setupAutoDeployment(
1038
- self, userScript: Optional["ModuleDescriptor"] = None
1203
+ self, userScript: Optional["ModuleDescriptor"] = None
1039
1204
  ) -> None:
1040
1205
  """
1041
1206
  Determine the user script, save it to the job store and inject a reference to the saved copy into the batch system.
@@ -1048,86 +1213,113 @@ class Toil(ContextManager["Toil"]):
1048
1213
  if userScript is not None:
1049
1214
  # This branch is hit when a workflow is being started
1050
1215
  if userScript.belongsToToil:
1051
- logger.debug('User script %s belongs to Toil. No need to auto-deploy it.', userScript)
1216
+ logger.debug(
1217
+ "User script %s belongs to Toil. No need to auto-deploy it.",
1218
+ userScript,
1219
+ )
1052
1220
  userScript = None
1053
1221
  else:
1054
- if (self._batchSystem.supportsAutoDeployment() and
1055
- not self.config.disableAutoDeployment):
1222
+ if (
1223
+ self._batchSystem.supportsAutoDeployment()
1224
+ and not self.config.disableAutoDeployment
1225
+ ):
1056
1226
  # Note that by saving the ModuleDescriptor, and not the Resource we allow for
1057
1227
  # redeploying a potentially modified user script on workflow restarts.
1058
- with self._jobStore.write_shared_file_stream('userScript') as f:
1228
+ with self._jobStore.write_shared_file_stream("userScript") as f:
1059
1229
  pickle.dump(userScript, f, protocol=pickle.HIGHEST_PROTOCOL)
1060
1230
  else:
1061
- from toil.batchSystems.singleMachine import \
1062
- SingleMachineBatchSystem
1231
+ from toil.batchSystems.singleMachine import SingleMachineBatchSystem
1232
+
1063
1233
  if not isinstance(self._batchSystem, SingleMachineBatchSystem):
1064
- logger.warning('Batch system does not support auto-deployment. The user script '
1065
- '%s will have to be present at the same location on every worker.', userScript)
1234
+ logger.warning(
1235
+ "Batch system does not support auto-deployment. The user script "
1236
+ "%s will have to be present at the same location on every worker.",
1237
+ userScript,
1238
+ )
1066
1239
  userScript = None
1067
1240
  else:
1068
1241
  # This branch is hit on restarts
1069
- if self._batchSystem.supportsAutoDeployment() and not self.config.disableAutoDeployment:
1242
+ if (
1243
+ self._batchSystem.supportsAutoDeployment()
1244
+ and not self.config.disableAutoDeployment
1245
+ ):
1070
1246
  # We could deploy a user script
1071
1247
  from toil.jobStores.abstractJobStore import NoSuchFileException
1248
+
1072
1249
  try:
1073
- with self._jobStore.read_shared_file_stream('userScript') as f:
1250
+ with self._jobStore.read_shared_file_stream("userScript") as f:
1074
1251
  userScript = safeUnpickleFromStream(f)
1075
1252
  except NoSuchFileException:
1076
- logger.debug('User script neither set explicitly nor present in the job store.')
1253
+ logger.debug(
1254
+ "User script neither set explicitly nor present in the job store."
1255
+ )
1077
1256
  userScript = None
1078
1257
  if userScript is None:
1079
- logger.debug('No user script to auto-deploy.')
1258
+ logger.debug("No user script to auto-deploy.")
1080
1259
  else:
1081
- logger.debug('Saving user script %s as a resource', userScript)
1260
+ logger.debug("Saving user script %s as a resource", userScript)
1082
1261
  userScriptResource = userScript.saveAsResourceTo(self._jobStore)
1083
- logger.debug('Injecting user script %s into batch system.', userScriptResource)
1262
+ logger.debug(
1263
+ "Injecting user script %s into batch system.", userScriptResource
1264
+ )
1084
1265
  self._batchSystem.setUserScript(userScriptResource)
1085
1266
 
1267
+ def url_exists(self, src_uri: str) -> bool:
1268
+ return self._jobStore.url_exists(self.normalize_uri(src_uri))
1269
+
1086
1270
  # Importing a file with a shared file name returns None, but without one it
1087
1271
  # returns a file ID. Explain this to MyPy.
1088
1272
 
1089
1273
  @overload
1090
- def importFile(self,
1091
- srcUrl: str,
1092
- sharedFileName: str,
1093
- symlink: bool = True) -> None:
1094
- ...
1274
+ def importFile(
1275
+ self, srcUrl: str, sharedFileName: str, symlink: bool = True
1276
+ ) -> None: ...
1095
1277
 
1096
1278
  @overload
1097
- def importFile(self,
1098
- srcUrl: str,
1099
- sharedFileName: None = None,
1100
- symlink: bool = True) -> FileID:
1101
- ...
1102
-
1103
- @deprecated(new_function_name='import_file')
1104
- def importFile(self,
1105
- srcUrl: str,
1106
- sharedFileName: Optional[str] = None,
1107
- symlink: bool = True) -> Optional[FileID]:
1279
+ def importFile(
1280
+ self, srcUrl: str, sharedFileName: None = None, symlink: bool = True
1281
+ ) -> FileID: ...
1282
+
1283
+ @deprecated(new_function_name="import_file")
1284
+ def importFile(
1285
+ self, srcUrl: str, sharedFileName: Optional[str] = None, symlink: bool = True
1286
+ ) -> Optional[FileID]:
1108
1287
  return self.import_file(srcUrl, sharedFileName, symlink)
1109
1288
 
1110
1289
  @overload
1111
- def import_file(self,
1112
- src_uri: str,
1113
- shared_file_name: str,
1114
- symlink: bool = True,
1115
- check_existence: bool = True) -> None:
1116
- ...
1290
+ def import_file(
1291
+ self,
1292
+ src_uri: str,
1293
+ shared_file_name: str,
1294
+ symlink: bool = True,
1295
+ check_existence: bool = True,
1296
+ ) -> None: ...
1117
1297
 
1118
1298
  @overload
1119
- def import_file(self,
1120
- src_uri: str,
1121
- shared_file_name: None = None,
1122
- symlink: bool = True,
1123
- check_existence: bool = True) -> FileID:
1124
- ...
1125
-
1126
- def import_file(self,
1127
- src_uri: str,
1128
- shared_file_name: Optional[str] = None,
1129
- symlink: bool = True,
1130
- check_existence: bool = True) -> Optional[FileID]:
1299
+ def import_file(
1300
+ self,
1301
+ src_uri: str,
1302
+ shared_file_name: None = None,
1303
+ symlink: bool = True,
1304
+ check_existence: Literal[True] = True
1305
+ ) -> FileID: ...
1306
+
1307
+ @overload
1308
+ def import_file(
1309
+ self,
1310
+ src_uri: str,
1311
+ shared_file_name: None = None,
1312
+ symlink: bool = True,
1313
+ check_existence: bool = True
1314
+ ) -> Optional[FileID]: ...
1315
+
1316
+ def import_file(
1317
+ self,
1318
+ src_uri: str,
1319
+ shared_file_name: Optional[str] = None,
1320
+ symlink: bool = True,
1321
+ check_existence: bool = True
1322
+ ) -> Optional[FileID]:
1131
1323
  """
1132
1324
  Import the file at the given URL into the job store.
1133
1325
 
@@ -1143,7 +1335,9 @@ class Toil(ContextManager["Toil"]):
1143
1335
  self._assertContextManagerUsed()
1144
1336
  full_uri = self.normalize_uri(src_uri, check_existence=check_existence)
1145
1337
  try:
1146
- imported = self._jobStore.import_file(full_uri, shared_file_name=shared_file_name, symlink=symlink)
1338
+ imported = self._jobStore.import_file(
1339
+ full_uri, shared_file_name=shared_file_name, symlink=symlink
1340
+ )
1147
1341
  except FileNotFoundError:
1148
1342
  # TODO: I thought we refactored the different job store import
1149
1343
  # methods to not raise and instead return None, but that looks to
@@ -1160,10 +1354,10 @@ class Toil(ContextManager["Toil"]):
1160
1354
  # We need to protect the caller from missing files.
1161
1355
  # We think a file was missing, and we got None becasuse of it.
1162
1356
  # We didn't get None instead because of usign a shared file name.
1163
- raise FileNotFoundError(f'Could not find file {src_uri}')
1357
+ raise FileNotFoundError(f"Could not find file {src_uri}")
1164
1358
  return imported
1165
1359
 
1166
- @deprecated(new_function_name='export_file')
1360
+ @deprecated(new_function_name="export_file")
1167
1361
  def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
1168
1362
  return self.export_file(jobStoreFileID, dstUrl)
1169
1363
 
@@ -1186,18 +1380,21 @@ class Toil(ContextManager["Toil"]):
1186
1380
  :param check_existence: If set, raise FileNotFoundError if a URI points to
1187
1381
  a local file that does not exist.
1188
1382
  """
1189
- if urlparse(uri).scheme == 'file':
1190
- uri = unquote(urlparse(uri).path) # this should strip off the local file scheme; it will be added back
1383
+ if urlparse(uri).scheme == "file":
1384
+ uri = unquote(
1385
+ urlparse(uri).path
1386
+ ) # this should strip off the local file scheme; it will be added back
1191
1387
 
1192
1388
  # account for the scheme-less case, which should be coerced to a local absolute path
1193
- if urlparse(uri).scheme == '':
1389
+ if urlparse(uri).scheme == "":
1194
1390
  abs_path = os.path.abspath(uri)
1195
1391
  if not os.path.exists(abs_path) and check_existence:
1196
1392
  raise FileNotFoundError(
1197
1393
  f'Could not find local file "{abs_path}" when importing "{uri}".\n'
1198
1394
  f'Make sure paths are relative to "{os.getcwd()}" or use absolute paths.\n'
1199
- f'If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.).')
1200
- return f'file://{quote(abs_path)}'
1395
+ f"If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.)."
1396
+ )
1397
+ return f"file://{quote(abs_path)}"
1201
1398
  return uri
1202
1399
 
1203
1400
  def _setBatchSystemEnvVars(self) -> None:
@@ -1209,15 +1406,19 @@ class Toil(ContextManager["Toil"]):
1209
1406
  def _serialiseEnv(self) -> None:
1210
1407
  """Put the environment in a globally accessible pickle file."""
1211
1408
  # Dump out the environment of this process in the environment pickle file.
1212
- with self._jobStore.write_shared_file_stream("environment.pickle") as fileHandle:
1409
+ with self._jobStore.write_shared_file_stream(
1410
+ "environment.pickle"
1411
+ ) as fileHandle:
1213
1412
  pickle.dump(dict(os.environ), fileHandle, pickle.HIGHEST_PROTOCOL)
1214
1413
  logger.debug("Written the environment for the jobs to the environment file")
1215
1414
 
1216
1415
  def _cacheAllJobs(self) -> None:
1217
1416
  """Download all jobs in the current job store into self.jobCache."""
1218
- logger.debug('Caching all jobs in job store')
1219
- self._jobCache = {jobDesc.jobStoreID: jobDesc for jobDesc in self._jobStore.jobs()}
1220
- logger.debug(f'{len(self._jobCache)} jobs downloaded.')
1417
+ logger.debug("Caching all jobs in job store")
1418
+ self._jobCache = {
1419
+ jobDesc.jobStoreID: jobDesc for jobDesc in self._jobStore.jobs()
1420
+ }
1421
+ logger.debug(f"{len(self._jobCache)} jobs downloaded.")
1221
1422
 
1222
1423
  def _cacheJob(self, job: "JobDescription") -> None:
1223
1424
  """
@@ -1239,14 +1440,22 @@ class Toil(ContextManager["Toil"]):
1239
1440
  :param configWorkDir: Value passed to the program using the --workDir flag
1240
1441
  :return: Path to the Toil work directory, constant across all machines
1241
1442
  """
1242
- workDir = os.getenv('TOIL_WORKDIR_OVERRIDE') or configWorkDir or os.getenv(
1243
- 'TOIL_WORKDIR') or tempfile.gettempdir()
1443
+ workDir = (
1444
+ os.getenv("TOIL_WORKDIR_OVERRIDE")
1445
+ or configWorkDir
1446
+ or os.getenv("TOIL_WORKDIR")
1447
+ or tempfile.gettempdir()
1448
+ )
1244
1449
  if not os.path.exists(workDir):
1245
- raise RuntimeError(f'The directory specified by --workDir or TOIL_WORKDIR ({workDir}) does not exist.')
1450
+ raise RuntimeError(
1451
+ f"The directory specified by --workDir or TOIL_WORKDIR ({workDir}) does not exist."
1452
+ )
1246
1453
  return workDir
1247
1454
 
1248
1455
  @classmethod
1249
- def get_toil_coordination_dir(cls, config_work_dir: Optional[str], config_coordination_dir: Optional[str]) -> str:
1456
+ def get_toil_coordination_dir(
1457
+ cls, config_work_dir: Optional[str], config_coordination_dir: Optional[str]
1458
+ ) -> str:
1250
1459
  """
1251
1460
  Return a path to a writable directory, which will be in memory if
1252
1461
  convenient. Ought to be used for file locking and coordination.
@@ -1255,51 +1464,61 @@ class Toil(ContextManager["Toil"]):
1255
1464
  --workDir flag
1256
1465
  :param config_coordination_dir: Value passed to the program using the
1257
1466
  --coordinationDir flag
1467
+ :param workflow_id: Used if a tmpdir_prefix exists to create full
1468
+ directory paths unique per workflow
1258
1469
 
1259
1470
  :return: Path to the Toil coordination directory. Ought to be on a
1260
1471
  POSIX filesystem that allows directories containing open files to be
1261
1472
  deleted.
1262
1473
  """
1263
1474
 
1264
- if 'XDG_RUNTIME_DIR' in os.environ and not os.path.exists(os.environ['XDG_RUNTIME_DIR']):
1265
- # Slurm has been observed providing this variable but not keeping
1266
- # the directory live as long as we run for.
1267
- logger.warning('XDG_RUNTIME_DIR is set to nonexistent directory %s; your environment may be out of spec!',
1268
- os.environ['XDG_RUNTIME_DIR'])
1269
-
1270
1475
  # Go get a coordination directory, using a lot of short-circuiting of
1271
1476
  # or and the fact that and returns its second argument when it
1272
1477
  # succeeds.
1273
1478
  coordination_dir: Optional[str] = (
1274
1479
  # First try an override env var
1275
- os.getenv('TOIL_COORDINATION_DIR_OVERRIDE') or
1276
- # Then the value from the config
1277
- config_coordination_dir or
1278
- # Then a normal env var
1279
- # TODO: why/how would this propagate when not using single machine?
1280
- os.getenv('TOIL_COORDINATION_DIR') or
1281
- # Then try a `toil` subdirectory of the XDG runtime directory
1282
- # (often /var/run/users/<UID>). But only if we are actually in a
1283
- # session that has the env var set. Otherwise it might belong to a
1284
- # different set of sessions and get cleaned up out from under us
1285
- # when that session ends.
1286
- # We don't think Slurm XDG sessions are trustworthy, depending on
1287
- # the cluster's PAM configuration, so don't use them.
1288
- ('XDG_RUNTIME_DIR' in os.environ and 'SLURM_JOBID' not in os.environ and try_path(
1289
- os.path.join(os.environ['XDG_RUNTIME_DIR'], 'toil'))) or
1290
- # Try under /run/lock. It might be a temp dir style sticky directory.
1291
- try_path('/run/lock') or
1292
- # Finally, fall back on the work dir and hope it's a legit filesystem.
1293
- cls.getToilWorkDir(config_work_dir)
1480
+ os.getenv("TOIL_COORDINATION_DIR_OVERRIDE")
1481
+ or
1482
+ # Then the value from the config
1483
+ config_coordination_dir
1484
+ or
1485
+ # Then a normal env var
1486
+ # TODO: why/how would this propagate when not using single machine?
1487
+ os.getenv("TOIL_COORDINATION_DIR")
1488
+ or
1489
+ # Then try a `toil` subdirectory of the XDG runtime directory
1490
+ # (often /var/run/users/<UID>). But only if we are actually in a
1491
+ # session that has the env var set. Otherwise it might belong to a
1492
+ # different set of sessions and get cleaned up out from under us
1493
+ # when that session ends.
1494
+ # We don't think Slurm XDG sessions are trustworthy, depending on
1495
+ # the cluster's PAM configuration, so don't use them.
1496
+ (
1497
+ "XDG_RUNTIME_DIR" in os.environ
1498
+ and "SLURM_JOBID" not in os.environ
1499
+ and try_path(os.path.join(os.environ["XDG_RUNTIME_DIR"], "toil"))
1500
+ )
1501
+ or
1502
+ # Try under /run/lock. It might be a temp dir style sticky directory.
1503
+ try_path("/run/lock")
1504
+ or
1505
+ # Try all possible temp directories, falling back to the current working
1506
+ # directory
1507
+ tempfile.gettempdir()
1508
+ or
1509
+ # Finally, fall back on the work dir and hope it's a legit filesystem.
1510
+ cls.getToilWorkDir(config_work_dir)
1294
1511
  )
1295
1512
 
1296
1513
  if coordination_dir is None:
1297
- raise RuntimeError("Could not determine a coordination directory by any method!")
1514
+ raise RuntimeError(
1515
+ "Could not determine a coordination directory by any method!"
1516
+ )
1298
1517
 
1299
1518
  return coordination_dir
1300
1519
 
1301
1520
  @staticmethod
1302
- def _get_workflow_path_component(workflow_id: str) -> str:
1521
+ def get_workflow_path_component(workflow_id: str) -> str:
1303
1522
  """
1304
1523
  Get a safe filesystem path component for a workflow.
1305
1524
 
@@ -1308,11 +1527,13 @@ class Toil(ContextManager["Toil"]):
1308
1527
 
1309
1528
  :param workflow_id: The ID of the current Toil workflow.
1310
1529
  """
1311
- return str(uuid.uuid5(uuid.UUID(getNodeID()), workflow_id)).replace('-', '')
1530
+ return "toilwf-" + str(uuid.uuid5(uuid.UUID(getNodeID()), workflow_id)).replace(
1531
+ "-", ""
1532
+ )
1312
1533
 
1313
1534
  @classmethod
1314
1535
  def getLocalWorkflowDir(
1315
- cls, workflowID: str, configWorkDir: Optional[str] = None
1536
+ cls, workflowID: str, configWorkDir: Optional[str] = None
1316
1537
  ) -> str:
1317
1538
  """
1318
1539
  Return the directory where worker directories and the cache will be located for this workflow on this machine.
@@ -1325,7 +1546,9 @@ class Toil(ContextManager["Toil"]):
1325
1546
 
1326
1547
  # Create a directory unique to each host in case workDir is on a shared FS.
1327
1548
  # This prevents workers on different nodes from erasing each other's directories.
1328
- workflowDir: str = os.path.join(base, cls._get_workflow_path_component(workflowID))
1549
+ workflowDir: str = os.path.join(
1550
+ base, cls.get_workflow_path_component(workflowID)
1551
+ )
1329
1552
  try:
1330
1553
  # Directory creation is atomic
1331
1554
  os.mkdir(workflowDir)
@@ -1334,15 +1557,17 @@ class Toil(ContextManager["Toil"]):
1334
1557
  # The directory exists if a previous worker set it up.
1335
1558
  raise
1336
1559
  else:
1337
- logger.debug('Created the workflow directory for this machine at %s' % workflowDir)
1560
+ logger.debug(
1561
+ "Created the workflow directory for this machine at %s" % workflowDir
1562
+ )
1338
1563
  return workflowDir
1339
1564
 
1340
1565
  @classmethod
1341
1566
  def get_local_workflow_coordination_dir(
1342
- cls,
1343
- workflow_id: str,
1344
- config_work_dir: Optional[str],
1345
- config_coordination_dir: Optional[str]
1567
+ cls,
1568
+ workflow_id: str,
1569
+ config_work_dir: Optional[str],
1570
+ config_coordination_dir: Optional[str],
1346
1571
  ) -> str:
1347
1572
  """
1348
1573
  Return the directory where coordination files should be located for
@@ -1367,10 +1592,18 @@ class Toil(ContextManager["Toil"]):
1367
1592
  base = cls.get_toil_coordination_dir(config_work_dir, config_coordination_dir)
1368
1593
 
1369
1594
  # Make a per-workflow and node subdirectory
1370
- subdir = os.path.join(base, cls._get_workflow_path_component(workflow_id))
1595
+ subdir = os.path.join(base, cls.get_workflow_path_component(workflow_id))
1596
+
1371
1597
  # Make it exist
1372
1598
  os.makedirs(subdir, exist_ok=True)
1373
- # TODO: May interfere with workflow directory creation logging if it's the same directory.
1599
+ # TODO: May interfere with workflow directory creation logging if it's
1600
+ # the same directory.
1601
+
1602
+ # Don't let it out if it smells like an unacceptable filesystem for locks
1603
+ ensure_filesystem_lockable(
1604
+ subdir, hint="Use --coordinationDir to provide a different location."
1605
+ )
1606
+
1374
1607
  # Return it
1375
1608
  return subdir
1376
1609
 
@@ -1382,24 +1615,31 @@ class Toil(ContextManager["Toil"]):
1382
1615
  """
1383
1616
  logProcessContext(self.config)
1384
1617
 
1385
- with RealtimeLogger(self._batchSystem,
1386
- level=self.options.logLevel if self.options.realTimeLogging else None):
1618
+ with RealtimeLogger(
1619
+ self._batchSystem,
1620
+ level=self.options.logLevel if self.options.realTimeLogging else "INFO",
1621
+ ):
1387
1622
  # FIXME: common should not import from leader
1388
1623
  from toil.leader import Leader
1389
- return Leader(config=self.config,
1390
- batchSystem=self._batchSystem,
1391
- provisioner=self._provisioner,
1392
- jobStore=self._jobStore,
1393
- rootJob=rootJob,
1394
- jobCache=self._jobCache).run()
1624
+
1625
+ return Leader(
1626
+ config=self.config,
1627
+ batchSystem=self._batchSystem,
1628
+ provisioner=self._provisioner,
1629
+ jobStore=self._jobStore,
1630
+ rootJob=rootJob,
1631
+ jobCache=self._jobCache,
1632
+ ).run()
1395
1633
 
1396
1634
  def _shutdownBatchSystem(self) -> None:
1397
1635
  """Shuts down current batch system if it has been created."""
1398
1636
  startTime = time.time()
1399
- logger.debug('Shutting down batch system ...')
1637
+ logger.debug("Shutting down batch system ...")
1400
1638
  self._batchSystem.shutdown()
1401
- logger.debug('... finished shutting down the batch system in %s seconds.'
1402
- % (time.time() - startTime))
1639
+ logger.debug(
1640
+ "... finished shutting down the batch system in %s seconds."
1641
+ % (time.time() - startTime)
1642
+ )
1403
1643
 
1404
1644
  def _assertContextManagerUsed(self) -> None:
1405
1645
  if not self._inContextManager:
@@ -1414,25 +1654,33 @@ class ToilRestartException(Exception):
1414
1654
  class ToilContextManagerException(Exception):
1415
1655
  def __init__(self) -> None:
1416
1656
  super().__init__(
1417
- 'This method cannot be called outside the "with Toil(...)" context manager.')
1657
+ 'This method cannot be called outside the "with Toil(...)" context manager.'
1658
+ )
1418
1659
 
1419
1660
 
1420
1661
  class ToilMetrics:
1421
- def __init__(self, bus: MessageBus, provisioner: Optional["AbstractProvisioner"] = None) -> None:
1662
+ def __init__(
1663
+ self, bus: MessageBus, provisioner: Optional["AbstractProvisioner"] = None
1664
+ ) -> None:
1422
1665
  clusterName = "none"
1423
1666
  region = "us-west-2"
1424
1667
  if provisioner is not None:
1425
1668
  clusterName = str(provisioner.clusterName)
1426
1669
  if provisioner._zone is not None:
1427
- if provisioner.cloud == 'aws':
1670
+ if provisioner.cloud == "aws":
1671
+ # lazy import to avoid AWS dependency if the aws extra is not installed
1672
+ from toil.lib.aws import zone_to_region
1673
+
1428
1674
  # Remove AZ name
1429
1675
  region = zone_to_region(provisioner._zone)
1430
1676
  else:
1431
1677
  region = provisioner._zone
1432
1678
 
1433
- registry = lookupEnvVar(name='docker registry',
1434
- envName='TOIL_DOCKER_REGISTRY',
1435
- defaultValue=dockerRegistry)
1679
+ registry = lookupEnvVar(
1680
+ name="docker registry",
1681
+ envName="TOIL_DOCKER_REGISTRY",
1682
+ defaultValue=dockerRegistry,
1683
+ )
1436
1684
 
1437
1685
  self.mtailImage = f"{registry}/toil-mtail:{dockerTag}"
1438
1686
  self.grafanaImage = f"{registry}/toil-grafana:{dockerTag}"
@@ -1449,14 +1697,21 @@ class ToilMetrics:
1449
1697
 
1450
1698
  try:
1451
1699
  self.mtailProc: Optional[subprocess.Popen[bytes]] = subprocess.Popen(
1452
- ["docker", "run",
1453
- "--rm",
1454
- "--interactive",
1455
- "--net=host",
1456
- "--name", "toil_mtail",
1457
- "-p", "3903:3903",
1458
- self.mtailImage],
1459
- stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1700
+ [
1701
+ "docker",
1702
+ "run",
1703
+ "--rm",
1704
+ "--interactive",
1705
+ "--net=host",
1706
+ "--name",
1707
+ "toil_mtail",
1708
+ "-p",
1709
+ "3903:3903",
1710
+ self.mtailImage,
1711
+ ],
1712
+ stdin=subprocess.PIPE,
1713
+ stdout=subprocess.PIPE,
1714
+ )
1460
1715
  except subprocess.CalledProcessError:
1461
1716
  logger.warning("Couldn't start toil metrics server.")
1462
1717
  self.mtailProc = None
@@ -1469,20 +1724,32 @@ class ToilMetrics:
1469
1724
  if not provisioner:
1470
1725
  try:
1471
1726
  self.nodeExporterProc = subprocess.Popen(
1472
- ["docker", "run",
1473
- "--rm",
1474
- "--net=host",
1475
- "-p", "9100:9100",
1476
- "-v", "/proc:/host/proc",
1477
- "-v", "/sys:/host/sys",
1478
- "-v", "/:/rootfs",
1479
- "quay.io/prometheus/node-exporter:v1.3.1",
1480
- "-collector.procfs", "/host/proc",
1481
- "-collector.sysfs", "/host/sys",
1482
- "-collector.filesystem.ignored-mount-points",
1483
- "^/(sys|proc|dev|host|etc)($|/)"])
1727
+ [
1728
+ "docker",
1729
+ "run",
1730
+ "--rm",
1731
+ "--net=host",
1732
+ "-p",
1733
+ "9100:9100",
1734
+ "-v",
1735
+ "/proc:/host/proc",
1736
+ "-v",
1737
+ "/sys:/host/sys",
1738
+ "-v",
1739
+ "/:/rootfs",
1740
+ "quay.io/prometheus/node-exporter:v1.3.1",
1741
+ "-collector.procfs",
1742
+ "/host/proc",
1743
+ "-collector.sysfs",
1744
+ "/host/sys",
1745
+ "-collector.filesystem.ignored-mount-points",
1746
+ "^/(sys|proc|dev|host|etc)($|/)",
1747
+ ]
1748
+ )
1484
1749
  except subprocess.CalledProcessError:
1485
- logger.warning("Couldn't start node exporter, won't get RAM and CPU usage for dashboard.")
1750
+ logger.warning(
1751
+ "Couldn't start node exporter, won't get RAM and CPU usage for dashboard."
1752
+ )
1486
1753
  except KeyboardInterrupt:
1487
1754
  if self.nodeExporterProc is not None:
1488
1755
  self.nodeExporterProc.terminate()
@@ -1499,23 +1766,32 @@ class ToilMetrics:
1499
1766
  JobMissingMessage: self.logMissingJob,
1500
1767
  JobIssuedMessage: self.logIssuedJob,
1501
1768
  JobFailedMessage: self.logFailedJob,
1502
- JobCompletedMessage: self.logCompletedJob
1769
+ JobCompletedMessage: self.logCompletedJob,
1503
1770
  }
1504
1771
  # The only way to make this inteligible to MyPy is to wrap the dict in
1505
1772
  # a function that can cast.
1506
- MessageType = TypeVar('MessageType')
1773
+ MessageType = TypeVar("MessageType")
1507
1774
 
1508
- def get_listener(message_type: Type[MessageType]) -> Callable[[MessageType], None]:
1775
+ def get_listener(
1776
+ message_type: type[MessageType],
1777
+ ) -> Callable[[MessageType], None]:
1509
1778
  return cast(Callable[[MessageType], None], TARGETS[message_type])
1510
1779
 
1511
1780
  # Then set up the listeners.
1512
- self._listeners = [bus.subscribe(message_type, get_listener(message_type)) for message_type in TARGETS.keys()]
1781
+ self._listeners = [
1782
+ bus.subscribe(message_type, get_listener(message_type))
1783
+ for message_type in TARGETS.keys()
1784
+ ]
1513
1785
 
1514
1786
  @staticmethod
1515
1787
  def _containerRunning(containerName: str) -> bool:
1516
1788
  try:
1517
- result = subprocess.check_output(["docker", "inspect", "-f",
1518
- "'{{.State.Running}}'", containerName]).decode('utf-8') == "true"
1789
+ result = (
1790
+ subprocess.check_output(
1791
+ ["docker", "inspect", "-f", "'{{.State.Running}}'", containerName]
1792
+ ).decode("utf-8")
1793
+ == "true"
1794
+ )
1519
1795
  except subprocess.CalledProcessError:
1520
1796
  result = False
1521
1797
  return result
@@ -1527,24 +1803,38 @@ class ToilMetrics:
1527
1803
  subprocess.check_call(["docker", "rm", "-f", "toil_prometheus"])
1528
1804
  except subprocess.CalledProcessError:
1529
1805
  pass
1530
- subprocess.check_call(["docker", "run",
1531
- "--name", "toil_prometheus",
1532
- "--net=host",
1533
- "-d",
1534
- "-p", "9090:9090",
1535
- self.prometheusImage,
1536
- clusterName,
1537
- zone])
1806
+ subprocess.check_call(
1807
+ [
1808
+ "docker",
1809
+ "run",
1810
+ "--name",
1811
+ "toil_prometheus",
1812
+ "--net=host",
1813
+ "-d",
1814
+ "-p",
1815
+ "9090:9090",
1816
+ self.prometheusImage,
1817
+ clusterName,
1818
+ zone,
1819
+ ]
1820
+ )
1538
1821
 
1539
1822
  if not self._containerRunning("toil_grafana"):
1540
1823
  try:
1541
1824
  subprocess.check_call(["docker", "rm", "-f", "toil_grafana"])
1542
1825
  except subprocess.CalledProcessError:
1543
1826
  pass
1544
- subprocess.check_call(["docker", "run",
1545
- "--name", "toil_grafana",
1546
- "-d", "-p=3000:3000",
1547
- self.grafanaImage])
1827
+ subprocess.check_call(
1828
+ [
1829
+ "docker",
1830
+ "run",
1831
+ "--name",
1832
+ "toil_grafana",
1833
+ "-d",
1834
+ "-p=3000:3000",
1835
+ self.grafanaImage,
1836
+ ]
1837
+ )
1548
1838
  except subprocess.CalledProcessError:
1549
1839
  logger.warning("Could not start prometheus/grafana dashboard.")
1550
1840
  return
@@ -1552,15 +1842,17 @@ class ToilMetrics:
1552
1842
  try:
1553
1843
  self.add_prometheus_data_source()
1554
1844
  except requests.exceptions.ConnectionError:
1555
- logger.debug("Could not add data source to Grafana dashboard - no metrics will be displayed.")
1845
+ logger.debug(
1846
+ "Could not add data source to Grafana dashboard - no metrics will be displayed."
1847
+ )
1556
1848
 
1557
1849
  @retry(errors=[requests.exceptions.ConnectionError])
1558
1850
  def add_prometheus_data_source(self) -> None:
1559
1851
  requests.post(
1560
- 'http://localhost:3000/api/datasources',
1561
- auth=('admin', 'admin'),
1852
+ "http://localhost:3000/api/datasources",
1853
+ auth=("admin", "admin"),
1562
1854
  data='{"name":"DS_PROMETHEUS","type":"prometheus", "url":"http://localhost:9090", "access":"direct"}',
1563
- headers={'content-type': 'application/json', "access": "direct"}
1855
+ headers={"content-type": "application/json", "access": "direct"},
1564
1856
  )
1565
1857
 
1566
1858
  def log(self, message: str) -> None:
@@ -1571,14 +1863,10 @@ class ToilMetrics:
1571
1863
  # Note: The mtail configuration (dashboard/mtail/toil.mtail) depends on these messages
1572
1864
  # remaining intact
1573
1865
 
1574
- def logClusterSize(
1575
- self, m: ClusterSizeMessage
1576
- ) -> None:
1866
+ def logClusterSize(self, m: ClusterSizeMessage) -> None:
1577
1867
  self.log("current_size '%s' %i" % (m.instance_type, m.current_size))
1578
1868
 
1579
- def logClusterDesiredSize(
1580
- self, m: ClusterDesiredSizeMessage
1581
- ) -> None:
1869
+ def logClusterDesiredSize(self, m: ClusterDesiredSizeMessage) -> None:
1582
1870
  self.log("desired_size '%s' %i" % (m.instance_type, m.desired_size))
1583
1871
 
1584
1872
  def logQueueSize(self, m: QueueSizeMessage) -> None:
@@ -1598,13 +1886,13 @@ class ToilMetrics:
1598
1886
 
1599
1887
  def shutdown(self) -> None:
1600
1888
  if self.mtailProc is not None:
1601
- logger.debug('Stopping mtail')
1889
+ logger.debug("Stopping mtail")
1602
1890
  self.mtailProc.kill()
1603
- logger.debug('Stopped mtail')
1891
+ logger.debug("Stopped mtail")
1604
1892
  if self.nodeExporterProc is not None:
1605
- logger.debug('Stopping node exporter')
1893
+ logger.debug("Stopping node exporter")
1606
1894
  self.nodeExporterProc.kill()
1607
- logger.debug('Stopped node exporter')
1895
+ logger.debug("Stopped node exporter")
1608
1896
  self._listeners = []
1609
1897
 
1610
1898
 
@@ -1612,7 +1900,7 @@ def cacheDirName(workflowID: str) -> str:
1612
1900
  """
1613
1901
  :return: Name of the cache directory.
1614
1902
  """
1615
- return f'cache-{workflowID}'
1903
+ return f"cache-{workflowID}"
1616
1904
 
1617
1905
 
1618
1906
  def getDirSizeRecursively(dirPath: str) -> int:
@@ -1638,8 +1926,16 @@ def getDirSizeRecursively(dirPath: str) -> int:
1638
1926
 
1639
1927
  dirPath = os.path.abspath(dirPath)
1640
1928
  try:
1641
- return int(subprocess.check_output(['du', '-s', dirPath],
1642
- env=dict(os.environ, BLOCKSIZE='512')).decode('utf-8').split()[0]) * 512
1929
+ return (
1930
+ int(
1931
+ subprocess.check_output(
1932
+ ["du", "-s", dirPath], env=dict(os.environ, BLOCKSIZE="512")
1933
+ )
1934
+ .decode("utf-8")
1935
+ .split()[0]
1936
+ )
1937
+ * 512
1938
+ )
1643
1939
  # The environment variable 'BLOCKSIZE'='512' is set instead of the much cleaner
1644
1940
  # --block-size=1 because Apple can't handle it.
1645
1941
  except (OSError, subprocess.CalledProcessError):
@@ -1654,7 +1950,7 @@ def getDirSizeRecursively(dirPath: str) -> int:
1654
1950
  return total_size
1655
1951
 
1656
1952
 
1657
- def getFileSystemSize(dirPath: str) -> Tuple[int, int]:
1953
+ def getFileSystemSize(dirPath: str) -> tuple[int, int]:
1658
1954
  """
1659
1955
  Return the free space, and total size of the file system hosting `dirPath`.
1660
1956
 
@@ -1662,7 +1958,7 @@ def getFileSystemSize(dirPath: str) -> Tuple[int, int]:
1662
1958
  :return: free space and total size of file system
1663
1959
  """
1664
1960
  if not os.path.exists(dirPath):
1665
- raise RuntimeError(f'Could not find dir size for non-existent path: {dirPath}')
1961
+ raise RuntimeError(f"Could not find dir size for non-existent path: {dirPath}")
1666
1962
  diskStats = os.statvfs(dirPath)
1667
1963
  freeSpace = diskStats.f_frsize * diskStats.f_bavail
1668
1964
  diskSize = diskStats.f_frsize * diskStats.f_blocks