toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/options/common.py CHANGED
@@ -1,18 +1,16 @@
1
+ import logging
1
2
  import os
2
- from argparse import ArgumentParser, Action, _AppendAction
3
- from typing import Any, Optional, Union, Type, Callable, List, Dict, TYPE_CHECKING
3
+ from argparse import Action, ArgumentParser, _AppendAction
4
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Union
4
5
 
5
- from distutils.util import strtobool
6
6
  from configargparse import SUPPRESS
7
- import logging
8
-
9
7
  from ruamel.yaml import YAML
10
8
 
11
- from toil.lib.conversions import bytes2human, human2bytes
12
-
13
9
  from toil.batchSystems.options import add_all_batchsystem_options
10
+ from toil.lib.conversions import bytes2human, human2bytes, opt_strtobool, strtobool
14
11
  from toil.provisioners import parse_node_types
15
12
  from toil.statsAndLogging import add_logging_options
13
+
16
14
  if TYPE_CHECKING:
17
15
  from toil.job import AcceleratorRequirement
18
16
 
@@ -24,7 +22,8 @@ SYS_MAX_SIZE = 9223372036854775807
24
22
  # sys.max_size on 64 bit systems is 9223372036854775807, so that 32-bit systems
25
23
  # use the same number
26
24
 
27
- def parse_set_env(l: List[str]) -> Dict[str, Optional[str]]:
25
+
26
+ def parse_set_env(l: list[str]) -> dict[str, Optional[str]]:
28
27
  """
29
28
  Parse a list of strings of the form "NAME=VALUE" or just "NAME" into a dictionary.
30
29
 
@@ -57,20 +56,20 @@ def parse_set_env(l: List[str]) -> Dict[str, Optional[str]]:
57
56
  v: Optional[str] = None
58
57
  for i in l:
59
58
  try:
60
- k, v = i.split('=', 1)
59
+ k, v = i.split("=", 1)
61
60
  except ValueError:
62
61
  k, v = i, None
63
62
  if not k:
64
- raise ValueError('Empty name')
63
+ raise ValueError("Empty name")
65
64
  d[k] = v
66
65
  return d
67
66
 
68
67
 
69
- def parse_str_list(s: str) -> List[str]:
68
+ def parse_str_list(s: str) -> list[str]:
70
69
  return [str(x) for x in s.split(",")]
71
70
 
72
71
 
73
- def parse_int_list(s: str) -> List[int]:
72
+ def parse_int_list(s: str) -> list[int]:
74
73
  return [int(x) for x in s.split(",")]
75
74
 
76
75
 
@@ -92,7 +91,7 @@ def fC(minValue: float, maxValue: Optional[float] = None) -> Callable[[float], b
92
91
  return lambda x: minValue <= x < maxValue
93
92
 
94
93
 
95
- def parse_accelerator_list(specs: Optional[str]) -> List['AcceleratorRequirement']:
94
+ def parse_accelerator_list(specs: Optional[str]) -> list["AcceleratorRequirement"]:
96
95
  """
97
96
  Parse a string description of one or more accelerator requirements.
98
97
  """
@@ -103,20 +102,22 @@ def parse_accelerator_list(specs: Optional[str]) -> List['AcceleratorRequirement
103
102
  # Otherwise parse each requirement.
104
103
  from toil.job import parse_accelerator
105
104
 
106
- return [parse_accelerator(r) for r in specs.split(',')]
105
+ return [parse_accelerator(r) for r in specs.split(",")]
107
106
 
108
107
 
109
108
  def parseBool(val: str) -> bool:
110
- if val.lower() in ['true', 't', 'yes', 'y', 'on', '1']:
109
+ if val.lower() in ["true", "t", "yes", "y", "on", "1"]:
111
110
  return True
112
- elif val.lower() in ['false', 'f', 'no', 'n', 'off', '0']:
111
+ elif val.lower() in ["false", "f", "no", "n", "off", "0"]:
113
112
  return False
114
113
  else:
115
- raise RuntimeError("Could not interpret \"%s\" as a boolean value" % val)
114
+ raise RuntimeError('Could not interpret "%s" as a boolean value' % val)
116
115
 
117
116
 
118
117
  # This is kept in the outer scope as multiple batchsystem files use this
119
- def make_open_interval_action(min: Union[int, float], max: Optional[Union[int, float]] = None) -> Type[Action]:
118
+ def make_open_interval_action(
119
+ min: Union[int, float], max: Optional[Union[int, float]] = None
120
+ ) -> type[Action]:
120
121
  """
121
122
  Returns an argparse action class to check if the input is within the given half-open interval.
122
123
  ex:
@@ -129,7 +130,9 @@ def make_open_interval_action(min: Union[int, float], max: Optional[Union[int, f
129
130
  """
130
131
 
131
132
  class IntOrFloatOpenAction(Action):
132
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
133
+ def __call__(
134
+ self, parser: Any, namespace: Any, values: Any, option_string: Any = None
135
+ ) -> None:
133
136
  if isinstance(min, int):
134
137
  if max is not None: # for mypy
135
138
  assert isinstance(max, int)
@@ -138,10 +141,18 @@ def make_open_interval_action(min: Union[int, float], max: Optional[Union[int, f
138
141
  func = fC(min, max)
139
142
  try:
140
143
  if not func(values):
141
- raise parser.error(
142
- f"{option_string} ({values}) must be within the range: [{min}, {'infinity' if max is None else max})")
144
+ if max is None:
145
+ raise parser.error(
146
+ f"{option_string} ({values}) must be at least {min}"
147
+ )
148
+ else:
149
+ raise parser.error(
150
+ f"{option_string} ({values}) must be at least {min} and strictly less than {max})"
151
+ )
143
152
  except AssertionError:
144
- raise RuntimeError(f"The {option_string} option has an invalid value: {values}")
153
+ raise RuntimeError(
154
+ f"The {option_string} option has an invalid value: {values}"
155
+ )
145
156
  setattr(namespace, self.dest, values)
146
157
 
147
158
  return IntOrFloatOpenAction
@@ -159,8 +170,9 @@ def parse_jobstore(jobstore_uri: str) -> str:
159
170
  :return: URI of the jobstore
160
171
  """
161
172
  from toil.common import Toil
173
+
162
174
  name, rest = Toil.parseLocator(jobstore_uri)
163
- if name == 'file':
175
+ if name == "file":
164
176
  # We need to resolve relative paths early, on the leader, because the worker process
165
177
  # may have a different working directory than the leader, e.g. under Mesos.
166
178
  return Toil.buildLocator(name, os.path.abspath(rest))
@@ -168,22 +180,26 @@ def parse_jobstore(jobstore_uri: str) -> str:
168
180
  return jobstore_uri
169
181
 
170
182
 
171
- JOBSTORE_HELP = ("The location of the job store for the workflow. "
172
- "A job store holds persistent information about the jobs, stats, and files in a "
173
- "workflow. If the workflow is run with a distributed batch system, the job "
174
- "store must be accessible by all worker nodes. Depending on the desired "
175
- "job store implementation, the location should be formatted according to "
176
- "one of the following schemes:\n\n"
177
- "file:<path> where <path> points to a directory on the file systen\n\n"
178
- "aws:<region>:<prefix> where <region> is the name of an AWS region like "
179
- "us-west-2 and <prefix> will be prepended to the names of any top-level "
180
- "AWS resources in use by job store, e.g. S3 buckets.\n\n "
181
- "google:<project_id>:<prefix> TODO: explain\n\n"
182
- "For backwards compatibility, you may also specify ./foo (equivalent to "
183
- "file:./foo or just file:foo) or /bar (equivalent to file:/bar).")
184
-
185
-
186
- def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool = False) -> None:
183
+ JOBSTORE_HELP = (
184
+ "The location of the job store for the workflow. "
185
+ "A job store holds persistent information about the jobs, stats, and files in a "
186
+ "workflow. If the workflow is run with a distributed batch system, the job "
187
+ "store must be accessible by all worker nodes. Depending on the desired "
188
+ "job store implementation, the location should be formatted according to "
189
+ "one of the following schemes:\n\n"
190
+ "file:<path> where <path> points to a directory on the file system\n\n"
191
+ "aws:<region>:<prefix> where <region> is the name of an AWS region like "
192
+ "us-west-2 and <prefix> will be prepended to the names of any top-level "
193
+ "AWS resources in use by job store, e.g. S3 buckets.\n\n "
194
+ "google:<project_id>:<prefix> TODO: explain\n\n"
195
+ "For backwards compatibility, you may also specify ./foo (equivalent to "
196
+ "file:./foo or just file:foo) or /bar (equivalent to file:/bar)."
197
+ )
198
+
199
+
200
+ def add_base_toil_options(
201
+ parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool = False
202
+ ) -> None:
187
203
  """
188
204
  Add base Toil command line options to the parser.
189
205
  :param parser: Argument parser to add options to
@@ -198,48 +214,58 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
198
214
 
199
215
  # If using argparse instead of configargparse, this should just not parse when calling parse_args()
200
216
  # default config value is set to none as defaults should already be populated at config init
201
- config.add_argument('--config', dest='config', is_config_file_arg=True, default=None, metavar="PATH",
202
- help="Get options from a config file.")
203
-
204
- def convert_bool(b: str) -> bool:
205
- """Convert a string representation of bool to bool"""
206
- return bool(strtobool(b))
207
-
208
- def opt_strtobool(b: Optional[str]) -> Optional[bool]:
209
- """Convert an optional string representation of bool to None or bool"""
210
- return b if b is None else convert_bool(b)
217
+ config.add_argument(
218
+ "--config",
219
+ dest="config",
220
+ is_config_file_arg=True,
221
+ default=None,
222
+ metavar="PATH",
223
+ help="Get options from a config file.",
224
+ )
211
225
 
212
226
  add_logging_options(parser)
213
227
  parser.register("type", "bool", parseBool) # Custom type for arg=True/False.
214
228
 
215
229
  # Core options
216
230
  core_options = parser.add_argument_group(
217
- title="Toil core options.",
231
+ title="Toil core options",
218
232
  description="Options to specify the location of the Toil workflow and "
219
- "turn on stats collation about the performance of jobs."
233
+ "turn on stats collation about the performance of jobs.",
220
234
  )
221
235
  if jobstore_as_flag:
222
- core_options.add_argument('--jobstore', '--jobStore', dest='jobStore', type=parse_jobstore, default=None,
223
- help=JOBSTORE_HELP)
236
+ core_options.add_argument(
237
+ "--jobstore",
238
+ "--jobStore",
239
+ dest="jobStore",
240
+ type=parse_jobstore,
241
+ default=None,
242
+ help=JOBSTORE_HELP,
243
+ )
224
244
  else:
225
- core_options.add_argument('jobStore', type=parse_jobstore, help=JOBSTORE_HELP)
245
+ core_options.add_argument("jobStore", type=parse_jobstore, help=JOBSTORE_HELP)
226
246
 
227
247
  class WorkDirAction(Action):
228
248
  """
229
249
  Argparse action class to check that the provided --workDir exists
230
250
  """
231
251
 
232
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
252
+ def __call__(
253
+ self, parser: Any, namespace: Any, values: Any, option_string: Any = None
254
+ ) -> None:
233
255
  workDir = values
234
256
  if workDir is not None:
235
257
  workDir = os.path.abspath(workDir)
236
258
  if not os.path.exists(workDir):
237
- raise RuntimeError(f"The path provided to --workDir ({workDir}) does not exist.")
259
+ raise RuntimeError(
260
+ f"The path provided to --workDir ({workDir}) does not exist."
261
+ )
238
262
 
239
263
  if len(workDir) > 80:
240
- logger.warning(f'Length of workDir path "{workDir}" is {len(workDir)} characters. '
241
- f'Consider setting a shorter path with --workPath or setting TMPDIR to something '
242
- f'like "/tmp" to avoid overly long paths.')
264
+ logger.warning(
265
+ f'Length of workDir path "{workDir}" is {len(workDir)} characters. '
266
+ f"Consider setting a shorter path with --workPath or setting TMPDIR to something "
267
+ f'like "/tmp" to avoid overly long paths.'
268
+ )
243
269
  setattr(namespace, self.dest, workDir)
244
270
 
245
271
  class CoordinationDirAction(Action):
@@ -247,16 +273,21 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
247
273
  Argparse action class to check that the provided --coordinationDir exists
248
274
  """
249
275
 
250
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
276
+ def __call__(
277
+ self, parser: Any, namespace: Any, values: Any, option_string: Any = None
278
+ ) -> None:
251
279
  coordination_dir = values
252
280
  if coordination_dir is not None:
253
281
  coordination_dir = os.path.abspath(coordination_dir)
254
282
  if not os.path.exists(coordination_dir):
255
283
  raise RuntimeError(
256
- f"The path provided to --coordinationDir ({coordination_dir}) does not exist.")
284
+ f"The path provided to --coordinationDir ({coordination_dir}) does not exist."
285
+ )
257
286
  setattr(namespace, self.dest, coordination_dir)
258
287
 
259
- def make_closed_interval_action(min: Union[int, float], max: Optional[Union[int, float]] = None) -> Type[Action]:
288
+ def make_closed_interval_action(
289
+ min: Union[int, float], max: Optional[Union[int, float]] = None
290
+ ) -> type[Action]:
260
291
  """
261
292
  Returns an argparse action class to check if the input is within the given half-open interval.
262
293
  ex:
@@ -268,7 +299,13 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
268
299
  """
269
300
 
270
301
  class ClosedIntOrFloatAction(Action):
271
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
302
+ def __call__(
303
+ self,
304
+ parser: Any,
305
+ namespace: Any,
306
+ values: Any,
307
+ option_string: Any = None,
308
+ ) -> None:
272
309
  def is_within(x: Union[int, float]) -> bool:
273
310
  if max is None:
274
311
  return min <= x
@@ -278,132 +315,221 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
278
315
  try:
279
316
  if not is_within(values):
280
317
  raise parser.error(
281
- f"{option_string} ({values}) must be within the range: [{min}, {'infinity' if max is None else max}]")
318
+ f"{option_string} ({values}) must be within the range: [{min}, {'infinity' if max is None else max}]"
319
+ )
282
320
  except AssertionError:
283
- raise RuntimeError(f"The {option_string} option has an invalid value: {values}")
321
+ raise RuntimeError(
322
+ f"The {option_string} option has an invalid value: {values}"
323
+ )
284
324
  setattr(namespace, self.dest, values)
285
325
 
286
326
  return ClosedIntOrFloatAction
287
327
 
288
- core_options.add_argument("--workDir", dest="workDir", default=None, env_var="TOIL_WORKDIR", action=WorkDirAction,
289
- metavar="PATH",
290
- help="Absolute path to directory where temporary files generated during the Toil "
291
- "run should be placed. Standard output and error from batch system jobs "
292
- "(unless --noStdOutErr is set) will be placed in this directory. A cache directory "
293
- "may be placed in this directory. Temp files and folders will be placed in a "
294
- "directory toil-<workflowID> within workDir. The workflowID is generated by "
295
- "Toil and will be reported in the workflow logs. Default is determined by the "
296
- "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on "
297
- "all machines running jobs; if capturing standard output and error from batch "
298
- "system jobs is desired, it will generally need to be on a shared file system. "
299
- "When sharing a cache between containers on a host, this directory must be "
300
- "shared between the containers.")
301
- core_options.add_argument("--coordinationDir", dest="coordination_dir", default=None,
302
- env_var="TOIL_COORDINATION_DIR", action=CoordinationDirAction, metavar="PATH",
303
- help="Absolute path to directory where Toil will keep state and lock files."
304
- "When sharing a cache between containers on a host, this directory must be "
305
- "shared between the containers.")
306
- core_options.add_argument("--noStdOutErr", dest="noStdOutErr", default=False, action="store_true",
307
- help="Do not capture standard output and error from batch system jobs.")
308
- core_options.add_argument("--stats", dest="stats", default=False, action="store_true",
309
- help="Records statistics about the toil workflow to be used by 'toil stats'.")
310
- clean_choices = ['always', 'onError', 'never', 'onSuccess']
311
- core_options.add_argument("--clean", dest="clean", choices=clean_choices, default="onSuccess",
312
- help=f"Determines the deletion of the jobStore upon completion of the program. "
313
- f"Choices: {clean_choices}. The --stats option requires information from the "
314
- f"jobStore upon completion so the jobStore will never be deleted with that flag. "
315
- f"If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. "
316
- f"Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise.")
317
- core_options.add_argument("--cleanWorkDir", dest="cleanWorkDir", choices=clean_choices, default='always',
318
- help=f"Determines deletion of temporary worker directory upon completion of a job. "
319
- f"Choices: {clean_choices}. Default = always. WARNING: This option should be "
320
- f"changed for debugging only. Running a full pipeline with this option could "
321
- f"fill your disk with excessive intermediate data.")
322
- core_options.add_argument("--clusterStats", dest="clusterStats", nargs='?', action='store', default=None,
323
- metavar="OPT_PATH", const=os.getcwd(),
324
- help="If enabled, writes out JSON resource usage statistics to a file. "
325
- "The default location for this file is the current working directory, but an "
326
- "absolute path can also be passed to specify where this file should be written. "
327
- "This options only applies when using scalable batch systems.")
328
+ core_options.add_argument(
329
+ "--workDir",
330
+ dest="workDir",
331
+ default=None,
332
+ env_var="TOIL_WORKDIR",
333
+ action=WorkDirAction,
334
+ metavar="PATH",
335
+ help="Absolute path to directory where temporary files generated during the Toil "
336
+ "run should be placed. Standard output and error from batch system jobs "
337
+ "(unless --noStdOutErr is set) will be placed in this directory. A cache directory "
338
+ "may be placed in this directory. Temp files and folders will be placed in a "
339
+ "directory toil-<workflowID> within workDir. The workflowID is generated by "
340
+ "Toil and will be reported in the workflow logs. Default is determined by the "
341
+ "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on "
342
+ "all machines running jobs; if capturing standard output and error from batch "
343
+ "system jobs is desired, it will generally need to be on a shared file system. "
344
+ "When sharing a cache between containers on a host, this directory must be "
345
+ "shared between the containers.",
346
+ )
347
+ core_options.add_argument(
348
+ "--coordinationDir",
349
+ dest="coordination_dir",
350
+ default=None,
351
+ env_var="TOIL_COORDINATION_DIR",
352
+ action=CoordinationDirAction,
353
+ metavar="PATH",
354
+ help="Absolute path to directory where Toil will keep state and lock files. "
355
+ "When sharing a cache between containers on a host, this directory must be "
356
+ "shared between the containers.",
357
+ )
358
+ core_options.add_argument(
359
+ "--noStdOutErr",
360
+ dest="noStdOutErr",
361
+ default=False,
362
+ action="store_true",
363
+ help="Do not capture standard output and error from batch system jobs.",
364
+ )
365
+ core_options.add_argument(
366
+ "--stats",
367
+ dest="stats",
368
+ default=False,
369
+ action="store_true",
370
+ help="Records statistics about the toil workflow to be used by 'toil stats'.",
371
+ )
372
+ clean_choices = ["always", "onError", "never", "onSuccess"]
373
+ core_options.add_argument(
374
+ "--clean",
375
+ dest="clean",
376
+ choices=clean_choices,
377
+ default="onSuccess",
378
+ help=f"Determines the deletion of the jobStore upon completion of the program. "
379
+ f"Choices: {clean_choices}. The --stats option requires information from the "
380
+ f"jobStore upon completion so the jobStore will never be deleted with that flag. "
381
+ f"If you wish to be able to restart the run, choose 'never' or 'onSuccess'. "
382
+ f"Default is 'never' if stats is enabled, and 'onSuccess' otherwise.",
383
+ )
384
+ core_options.add_argument(
385
+ "--cleanWorkDir",
386
+ dest="cleanWorkDir",
387
+ choices=clean_choices,
388
+ default="always",
389
+ help=f"Determines deletion of temporary worker directory upon completion of a job. "
390
+ f"Choices: {clean_choices}. Default = always. WARNING: This option should be "
391
+ f"changed for debugging only. Running a full pipeline with this option could "
392
+ f"fill your disk with excessive intermediate data.",
393
+ )
394
+ core_options.add_argument(
395
+ "--clusterStats",
396
+ dest="clusterStats",
397
+ nargs="?",
398
+ action="store",
399
+ default=None,
400
+ metavar="OPT_PATH",
401
+ const=os.getcwd(),
402
+ help="If enabled, writes out JSON resource usage statistics to a file. "
403
+ "The default location for this file is the current working directory, but an "
404
+ "absolute path can also be passed to specify where this file should be written. "
405
+ "This options only applies when using scalable batch systems.",
406
+ )
328
407
 
329
408
  # Restarting the workflow options
330
409
  restart_options = parser.add_argument_group(
331
- title="Toil options for restarting an existing workflow.",
332
- description="Allows the restart of an existing workflow"
410
+ title="Toil options for restarting an existing workflow",
411
+ description="Allows the restart of an existing workflow",
412
+ )
413
+ restart_options.add_argument(
414
+ "--restart",
415
+ dest="restart",
416
+ default=False,
417
+ action="store_true",
418
+ help="If --restart is specified then will attempt to restart existing workflow "
419
+ "at the location pointed to by the --jobStore option. Will raise an exception "
420
+ "if the workflow does not exist",
333
421
  )
334
- restart_options.add_argument("--restart", dest="restart", default=False, action="store_true",
335
- help="If --restart is specified then will attempt to restart existing workflow "
336
- "at the location pointed to by the --jobStore option. Will raise an exception "
337
- "if the workflow does not exist")
338
422
 
339
423
  # Batch system options
340
424
  batchsystem_options = parser.add_argument_group(
341
- title="Toil options for specifying the batch system.",
342
- description="Allows the specification of the batch system."
425
+ title="Toil options for specifying the batch system",
426
+ description="Allows the specification of the batch system.",
343
427
  )
344
428
  add_all_batchsystem_options(batchsystem_options)
345
429
 
346
430
  # File store options
347
431
  file_store_options = parser.add_argument_group(
348
- title="Toil options for configuring storage.",
349
- description="Allows configuring Toil's data storage."
432
+ title="Toil options for configuring storage",
433
+ description="Allows configuring Toil's data storage.",
350
434
  )
351
435
  link_imports = file_store_options.add_mutually_exclusive_group()
352
- link_imports_help = ("When using a filesystem based job store, CWL input files are by default symlinked in. "
353
- "Setting this option to True instead copies the files into the job store, which may protect "
354
- "them from being modified externally. When set to False, as long as caching is enabled, "
355
- "Toil will protect the file automatically by changing the permissions to read-only."
356
- "default=%(default)s")
357
- link_imports.add_argument("--symlinkImports", dest="symlinkImports", type=convert_bool, default=True,
358
- metavar="BOOL", help=link_imports_help)
436
+ link_imports_help = (
437
+ "When using a filesystem based job store, CWL input files are by default symlinked in. "
438
+ "Setting this option to True instead copies the files into the job store, which may protect "
439
+ "them from being modified externally. When set to False, as long as caching is enabled, "
440
+ "Toil will protect the file automatically by changing the permissions to read-only. "
441
+ "default=%(default)s"
442
+ )
443
+ link_imports.add_argument(
444
+ "--symlinkImports",
445
+ dest="symlinkImports",
446
+ type=strtobool,
447
+ default=True,
448
+ metavar="BOOL",
449
+ help=link_imports_help,
450
+ )
359
451
  move_exports = file_store_options.add_mutually_exclusive_group()
360
- move_exports_help = ('When using a filesystem based job store, output files are by default moved to the '
361
- 'output directory, and a symlink to the moved exported file is created at the initial '
362
- 'location. Setting this option to True instead copies the files into the output directory. '
363
- 'Applies to filesystem-based job stores only.'
364
- 'default=%(default)s')
365
- move_exports.add_argument("--moveOutputs", dest="moveOutputs", type=convert_bool, default=False, metavar="BOOL",
366
- help=move_exports_help)
452
+ move_exports_help = (
453
+ "When using a filesystem based job store, output files are by default moved to the "
454
+ "output directory, and a symlink to the moved exported file is created at the initial "
455
+ "location. Setting this option to True instead copies the files into the output directory. "
456
+ "Applies to filesystem-based job stores only. "
457
+ "default=%(default)s"
458
+ )
459
+ move_exports.add_argument(
460
+ "--moveOutputs",
461
+ dest="moveOutputs",
462
+ type=strtobool,
463
+ default=False,
464
+ metavar="BOOL",
465
+ help=move_exports_help,
466
+ )
367
467
 
368
468
  caching = file_store_options.add_mutually_exclusive_group()
369
469
  caching_help = "Enable or disable caching for your workflow, specifying this overrides default from job store"
370
- caching.add_argument('--caching', dest='caching', type=opt_strtobool, default=None, metavar="BOOL",
371
- help=caching_help)
470
+ caching.add_argument(
471
+ "--caching",
472
+ dest="caching",
473
+ type=opt_strtobool,
474
+ default=None,
475
+ metavar="BOOL",
476
+ help=caching_help,
477
+ )
372
478
  # default is None according to PR 4299, seems to be generated at runtime
373
479
 
480
+ file_store_options.add_argument(
481
+ "--symlinkJobStoreReads",
482
+ dest="symlink_job_store_reads",
483
+ type=strtobool,
484
+ default=True,
485
+ metavar="BOOL",
486
+ help="Allow reads and container mounts from a JobStore's shared filesystem directly "
487
+ "via symlink. default=%(default)s",
488
+ )
489
+
374
490
  # Auto scaling options
375
491
  autoscaling_options = parser.add_argument_group(
376
- title="Toil options for autoscaling the cluster of worker nodes.",
492
+ title="Toil options for autoscaling the cluster of worker nodes",
377
493
  description="Allows the specification of the minimum and maximum number of nodes in an autoscaled cluster, "
378
- "as well as parameters to control the level of provisioning."
494
+ "as well as parameters to control the level of provisioning.",
379
495
  )
380
- provisioner_choices = ['aws', 'gce', None]
496
+ provisioner_choices = ["aws", "gce", None]
381
497
 
382
498
  # TODO: Better consolidate this provisioner arg and the one in provisioners/__init__.py?
383
- autoscaling_options.add_argument('--provisioner', '-p', dest="provisioner", choices=provisioner_choices,
384
- default=None,
385
- help=f"The provisioner for cluster auto-scaling. This is the main Toil "
386
- f"'--provisioner' option, and defaults to None for running on single "
387
- f"machine and non-auto-scaling batch systems. The currently supported "
388
- f"choices are {provisioner_choices}. The default is %(default)s.")
389
- autoscaling_options.add_argument('--nodeTypes', default=[], dest="nodeTypes", type=parse_node_types,
390
- action="extend",
391
- help="Specifies a list of comma-separated node types, each of which is "
392
- "composed of slash-separated instance types, and an optional spot "
393
- "bid set off by a colon, making the node type preemptible. Instance "
394
- "types may appear in multiple node types, and the same node type "
395
- "may appear as both preemptible and non-preemptible.\n"
396
- "Valid argument specifying two node types:\n"
397
- "\tc5.4xlarge/c5a.4xlarge:0.42,t2.large\n"
398
- "Node types:\n"
399
- "\tc5.4xlarge/c5a.4xlarge:0.42 and t2.large\n"
400
- "Instance types:\n"
401
- "\tc5.4xlarge, c5a.4xlarge, and t2.large\n"
402
- "Semantics:\n"
403
- "\tBid $0.42/hour for either c5.4xlarge or c5a.4xlarge instances,\n"
404
- "\ttreated interchangeably, while they are available at that price,\n"
405
- "\tand buy t2.large instances at full price.\n"
406
- "default=%(default)s")
499
+ autoscaling_options.add_argument(
500
+ "--provisioner",
501
+ "-p",
502
+ dest="provisioner",
503
+ choices=provisioner_choices,
504
+ default=None,
505
+ help=f"The provisioner for cluster auto-scaling. This is the main Toil "
506
+ f"'--provisioner' option, and defaults to None for running on single "
507
+ f"machine and non-auto-scaling batch systems. The currently supported "
508
+ f"choices are {provisioner_choices}. The default is %(default)s.",
509
+ )
510
+ autoscaling_options.add_argument(
511
+ "--nodeTypes",
512
+ default=[],
513
+ dest="nodeTypes",
514
+ type=parse_node_types,
515
+ action="extend",
516
+ help="Specifies a list of comma-separated node types, each of which is "
517
+ "composed of slash-separated instance types, and an optional spot "
518
+ "bid set off by a colon, making the node type preemptible. Instance "
519
+ "types may appear in multiple node types, and the same node type "
520
+ "may appear as both preemptible and non-preemptible.\n"
521
+ "Valid argument specifying two node types:\n"
522
+ "\tc5.4xlarge/c5a.4xlarge:0.42,t2.large\n"
523
+ "Node types:\n"
524
+ "\tc5.4xlarge/c5a.4xlarge:0.42 and t2.large\n"
525
+ "Instance types:\n"
526
+ "\tc5.4xlarge, c5a.4xlarge, and t2.large\n"
527
+ "Semantics:\n"
528
+ "\tBid $0.42/hour for either c5.4xlarge or c5a.4xlarge instances,\n"
529
+ "\ttreated interchangeably, while they are available at that price,\n"
530
+ "\tand buy t2.large instances at full price.\n"
531
+ "default=%(default)s",
532
+ )
407
533
 
408
534
  class NodeExtendAction(_AppendAction):
409
535
  """
@@ -416,246 +542,495 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
416
542
  super().__init__(option_strings, dest, **kwargs)
417
543
  self.is_default = True
418
544
 
419
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
545
+ def __call__(
546
+ self, parser: Any, namespace: Any, values: Any, option_string: Any = None
547
+ ) -> None:
420
548
  if self.is_default:
421
549
  setattr(namespace, self.dest, values)
422
550
  self.is_default = False
423
551
  else:
424
552
  super().__call__(parser, namespace, values, option_string)
425
553
 
426
- autoscaling_options.add_argument('--maxNodes', default=[10], dest="maxNodes", type=parse_int_list,
427
- action=NodeExtendAction, metavar="INT[,INT...]",
428
- help=f"Maximum number of nodes of each type in the cluster, if using autoscaling, "
429
- f"provided as a comma-separated list. The first value is used as a default "
430
- f"if the list length is less than the number of nodeTypes. "
431
- f"default=%(default)s")
432
- autoscaling_options.add_argument('--minNodes', default=[0], dest="minNodes", type=parse_int_list,
433
- action=NodeExtendAction, metavar="INT[,INT...]",
434
- help="Mininum number of nodes of each type in the cluster, if using "
435
- "auto-scaling. This should be provided as a comma-separated list of the "
436
- "same length as the list of node types. default=%(default)s")
437
- autoscaling_options.add_argument("--targetTime", dest="targetTime", default=defaultTargetTime, type=int,
438
- action=make_closed_interval_action(0), metavar="INT",
439
- help=f"Sets how rapidly you aim to complete jobs in seconds. Shorter times mean "
440
- f"more aggressive parallelization. The autoscaler attempts to scale up/down "
441
- f"so that it expects all queued jobs will complete within targetTime "
442
- f"seconds. default=%(default)s")
443
- autoscaling_options.add_argument("--betaInertia", dest="betaInertia", default=0.1, type=float,
444
- action=make_closed_interval_action(0.0, 0.9), metavar="FLOAT",
445
- help=f"A smoothing parameter to prevent unnecessary oscillations in the number "
446
- f"of provisioned nodes. This controls an exponentially weighted moving "
447
- f"average of the estimated number of nodes. A value of 0.0 disables any "
448
- f"smoothing, and a value of 0.9 will smooth so much that few changes will "
449
- f"ever be made. Must be between 0.0 and 0.9. default=%(default)s")
450
- autoscaling_options.add_argument("--scaleInterval", dest="scaleInterval", default=60, type=int, metavar="INT",
451
- help=f"The interval (seconds) between assessing if the scale of "
452
- f"the cluster needs to change. default=%(default)s")
453
- autoscaling_options.add_argument("--preemptibleCompensation", "--preemptableCompensation",
454
- dest="preemptibleCompensation", default=0.0, type=float,
455
- action=make_closed_interval_action(0.0, 1.0), metavar="FLOAT",
456
- help=f"The preference of the autoscaler to replace preemptible nodes with "
457
- f"non-preemptible nodes, when preemptible nodes cannot be started for some "
458
- f"reason. This value must be between 0.0 and 1.0, inclusive. "
459
- f"A value of 0.0 disables such "
460
- f"compensation, a value of 0.5 compensates two missing preemptible nodes "
461
- f"with a non-preemptible one. A value of 1.0 replaces every missing "
462
- f"pre-emptable node with a non-preemptible one. default=%(default)s")
463
- autoscaling_options.add_argument("--nodeStorage", dest="nodeStorage", default=50, type=int, metavar="INT",
464
- help="Specify the size of the root volume of worker nodes when they are launched "
465
- "in gigabytes. You may want to set this if your jobs require a lot of disk "
466
- f"space. (default=%(default)s).")
467
- autoscaling_options.add_argument('--nodeStorageOverrides', dest="nodeStorageOverrides", default=[],
468
- type=parse_str_list, action="extend",
469
- metavar="NODETYPE:NODESTORAGE[,NODETYPE:NODESTORAGE...]",
470
- help="Comma-separated list of nodeType:nodeStorage that are used to override "
471
- "the default value from --nodeStorage for the specified nodeType(s). "
472
- "This is useful for heterogeneous jobs where some tasks require much more "
473
- "disk than others.")
474
-
475
- autoscaling_options.add_argument("--metrics", dest="metrics", default=False, type=convert_bool, metavar="BOOL",
476
- help="Enable the prometheus/grafana dashboard for monitoring CPU/RAM usage, "
477
- "queue size, and issued jobs.")
478
- autoscaling_options.add_argument("--assumeZeroOverhead", dest="assume_zero_overhead", default=False,
479
- type=convert_bool, metavar="BOOL",
480
- help="Ignore scheduler and OS overhead and assume jobs can use every last byte "
481
- "of memory and disk on a node when autoscaling.")
554
+ autoscaling_options.add_argument(
555
+ "--maxNodes",
556
+ default=[10],
557
+ dest="maxNodes",
558
+ type=parse_int_list,
559
+ action=NodeExtendAction,
560
+ metavar="INT[,INT...]",
561
+ help=f"Maximum number of nodes of each type in the cluster, if using autoscaling, "
562
+ f"provided as a comma-separated list. The first value is used as a default "
563
+ f"if the list length is less than the number of nodeTypes. "
564
+ f"default=%(default)s",
565
+ )
566
+ autoscaling_options.add_argument(
567
+ "--minNodes",
568
+ default=[0],
569
+ dest="minNodes",
570
+ type=parse_int_list,
571
+ action=NodeExtendAction,
572
+ metavar="INT[,INT...]",
573
+ help="Mininum number of nodes of each type in the cluster, if using "
574
+ "auto-scaling. This should be provided as a comma-separated list of the "
575
+ "same length as the list of node types. default=%(default)s",
576
+ )
577
+ autoscaling_options.add_argument(
578
+ "--targetTime",
579
+ dest="targetTime",
580
+ default=defaultTargetTime,
581
+ type=int,
582
+ action=make_closed_interval_action(0),
583
+ metavar="INT",
584
+ help=f"Sets how rapidly you aim to complete jobs in seconds. Shorter times mean "
585
+ f"more aggressive parallelization. The autoscaler attempts to scale up/down "
586
+ f"so that it expects all queued jobs will complete within targetTime "
587
+ f"seconds. default=%(default)s",
588
+ )
589
+ autoscaling_options.add_argument(
590
+ "--betaInertia",
591
+ dest="betaInertia",
592
+ default=0.1,
593
+ type=float,
594
+ action=make_closed_interval_action(0.0, 0.9),
595
+ metavar="FLOAT",
596
+ help=f"A smoothing parameter to prevent unnecessary oscillations in the number "
597
+ f"of provisioned nodes. This controls an exponentially weighted moving "
598
+ f"average of the estimated number of nodes. A value of 0.0 disables any "
599
+ f"smoothing, and a value of 0.9 will smooth so much that few changes will "
600
+ f"ever be made. Must be between 0.0 and 0.9. default=%(default)s",
601
+ )
602
+ autoscaling_options.add_argument(
603
+ "--scaleInterval",
604
+ dest="scaleInterval",
605
+ default=60,
606
+ type=int,
607
+ metavar="INT",
608
+ help=f"The interval (seconds) between assessing if the scale of "
609
+ f"the cluster needs to change. default=%(default)s",
610
+ )
611
+ autoscaling_options.add_argument(
612
+ "--preemptibleCompensation",
613
+ "--preemptableCompensation",
614
+ dest="preemptibleCompensation",
615
+ default=0.0,
616
+ type=float,
617
+ action=make_closed_interval_action(0.0, 1.0),
618
+ metavar="FLOAT",
619
+ help=f"The preference of the autoscaler to replace preemptible nodes with "
620
+ f"non-preemptible nodes, when preemptible nodes cannot be started for some "
621
+ f"reason. This value must be between 0.0 and 1.0, inclusive. "
622
+ f"A value of 0.0 disables such "
623
+ f"compensation, a value of 0.5 compensates two missing preemptible nodes "
624
+ f"with a non-preemptible one. A value of 1.0 replaces every missing "
625
+ f"pre-emptable node with a non-preemptible one. default=%(default)s",
626
+ )
627
+ autoscaling_options.add_argument(
628
+ "--nodeStorage",
629
+ dest="nodeStorage",
630
+ default=50,
631
+ type=int,
632
+ metavar="INT",
633
+ help="Specify the size of the root volume of worker nodes when they are launched "
634
+ "in gigabytes. You may want to set this if your jobs require a lot of disk "
635
+ f"space. (default=%(default)s).",
636
+ )
637
+ autoscaling_options.add_argument(
638
+ "--nodeStorageOverrides",
639
+ dest="nodeStorageOverrides",
640
+ default=[],
641
+ type=parse_str_list,
642
+ action="extend",
643
+ metavar="NODETYPE:NODESTORAGE[,NODETYPE:NODESTORAGE...]",
644
+ help="Comma-separated list of nodeType:nodeStorage that are used to override "
645
+ "the default value from --nodeStorage for the specified nodeType(s). "
646
+ "This is useful for heterogeneous jobs where some tasks require much more "
647
+ "disk than others.",
648
+ )
649
+
650
+ autoscaling_options.add_argument(
651
+ "--metrics",
652
+ dest="metrics",
653
+ default=False,
654
+ type=strtobool,
655
+ metavar="BOOL",
656
+ help="Enable the prometheus/grafana dashboard for monitoring CPU/RAM usage, "
657
+ "queue size, and issued jobs.",
658
+ )
659
+ autoscaling_options.add_argument(
660
+ "--assumeZeroOverhead",
661
+ dest="assume_zero_overhead",
662
+ default=False,
663
+ type=strtobool,
664
+ metavar="BOOL",
665
+ help="Ignore scheduler and OS overhead and assume jobs can use every last byte "
666
+ "of memory and disk on a node when autoscaling.",
667
+ )
482
668
 
483
669
  # Parameters to limit service jobs / detect service deadlocks
484
670
  service_options = parser.add_argument_group(
485
671
  title="Toil options for limiting the number of service jobs and detecting service deadlocks",
486
- description="Allows the specification of the maximum number of service jobs in a cluster. By keeping "
487
- "this limited we can avoid nodes occupied with services causing deadlocks."
488
- )
489
- service_options.add_argument("--maxServiceJobs", dest="maxServiceJobs", default=SYS_MAX_SIZE, type=int,
490
- metavar="INT",
491
- help=SUPPRESS if cwl else f"The maximum number of service jobs that can be run "
492
- f"concurrently, excluding service jobs running on "
493
- f"preemptible nodes. default=%(default)s")
494
- service_options.add_argument("--maxPreemptibleServiceJobs", dest="maxPreemptibleServiceJobs",
495
- default=SYS_MAX_SIZE,
496
- type=int, metavar="INT",
497
- help=SUPPRESS if cwl else "The maximum number of service jobs that can run "
498
- "concurrently on preemptible nodes. default=%(default)s")
499
- service_options.add_argument("--deadlockWait", dest="deadlockWait", default=60, type=int, metavar="INT",
500
- help=SUPPRESS if cwl else f"Time, in seconds, to tolerate the workflow running only "
501
- f"the same service jobs, with no jobs to use them, "
502
- f"before declaring the workflow to be deadlocked and "
503
- f"stopping. default=%(default)s")
504
- service_options.add_argument("--deadlockCheckInterval", dest="deadlockCheckInterval", default=30, type=int,
505
- metavar="INT",
506
- help=SUPPRESS if cwl else "Time, in seconds, to wait between checks to see if the "
507
- "workflow is stuck running only service jobs, with no jobs "
508
- "to use them. Should be shorter than --deadlockWait. May "
509
- "need to be increased if the batch system cannot enumerate "
510
- "running jobs quickly enough, or if polling for running "
511
- "jobs is placing an unacceptable load on a shared cluster."
512
- f"default=%(default)s")
672
+ description=(
673
+ SUPPRESS
674
+ if cwl
675
+ else "Allows the specification of the maximum number of service jobs in a cluster. "
676
+ "By keeping this limited we can avoid nodes occupied with services causing "
677
+ "deadlocks."
678
+ ),
679
+ )
680
+ service_options.add_argument(
681
+ "--maxServiceJobs",
682
+ dest="maxServiceJobs",
683
+ default=SYS_MAX_SIZE,
684
+ type=int,
685
+ metavar="INT",
686
+ help=(
687
+ SUPPRESS
688
+ if cwl
689
+ else f"The maximum number of service jobs that can be run "
690
+ f"concurrently, excluding service jobs running on "
691
+ f"preemptible nodes. default=%(default)s"
692
+ ),
693
+ )
694
+ service_options.add_argument(
695
+ "--maxPreemptibleServiceJobs",
696
+ dest="maxPreemptibleServiceJobs",
697
+ default=SYS_MAX_SIZE,
698
+ type=int,
699
+ metavar="INT",
700
+ help=(
701
+ SUPPRESS
702
+ if cwl
703
+ else "The maximum number of service jobs that can run "
704
+ "concurrently on preemptible nodes. default=%(default)s"
705
+ ),
706
+ )
707
+ service_options.add_argument(
708
+ "--deadlockWait",
709
+ dest="deadlockWait",
710
+ default=60,
711
+ type=int,
712
+ metavar="INT",
713
+ help=(
714
+ SUPPRESS
715
+ if cwl
716
+ else f"Time, in seconds, to tolerate the workflow running only "
717
+ f"the same service jobs, with no jobs to use them, "
718
+ f"before declaring the workflow to be deadlocked and "
719
+ f"stopping. default=%(default)s"
720
+ ),
721
+ )
722
+ service_options.add_argument(
723
+ "--deadlockCheckInterval",
724
+ dest="deadlockCheckInterval",
725
+ default=30,
726
+ type=int,
727
+ metavar="INT",
728
+ help=(
729
+ SUPPRESS
730
+ if cwl
731
+ else "Time, in seconds, to wait between checks to see if the "
732
+ "workflow is stuck running only service jobs, with no jobs "
733
+ "to use them. Should be shorter than --deadlockWait. May "
734
+ "need to be increased if the batch system cannot enumerate "
735
+ "running jobs quickly enough, or if polling for running "
736
+ "jobs is placing an unacceptable load on a shared cluster."
737
+ f"default=%(default)s"
738
+ ),
739
+ )
513
740
 
514
741
  # Resource requirements
515
742
  resource_options = parser.add_argument_group(
516
- title="Toil options for cores/memory requirements.",
743
+ title="Toil options for cores/memory requirements",
517
744
  description="The options to specify default cores/memory requirements (if not specified by the jobs "
518
- "themselves), and to limit the total amount of memory/cores requested from the batch system."
519
- )
520
- resource_help_msg = ('The {} amount of {} to request for a job. '
521
- 'Only applicable to jobs that do not specify an explicit value for this requirement. '
522
- '{}. '
523
- 'Default is {}.')
524
- cpu_note = 'Fractions of a core (for example 0.1) are supported on some batch systems [mesos, single_machine]'
525
- disk_mem_note = 'Standard suffixes like K, Ki, M, Mi, G or Gi are supported'
745
+ "themselves), and to limit the total amount of memory/cores requested from the batch system.",
746
+ )
747
+ resource_help_msg = (
748
+ "The {} amount of {} to request for a job. "
749
+ "Only applicable to jobs that do not specify an explicit value for this requirement. "
750
+ "{}. "
751
+ "Default is {}."
752
+ )
753
+ cpu_note = "Fractions of a core (for example 0.1) are supported on some batch systems [mesos, single_machine]"
754
+ disk_mem_note = "Standard suffixes like K, Ki, M, Mi, G or Gi are supported"
526
755
  accelerators_note = (
527
- 'Each accelerator specification can have a type (gpu [default], nvidia, amd, cuda, rocm, opencl, '
528
- 'or a specific model like nvidia-tesla-k80), and a count [default: 1]. If both a type and a count '
529
- 'are used, they must be separated by a colon. If multiple types of accelerators are '
530
- 'used, the specifications are separated by commas')
756
+ "Each accelerator specification can have a type (gpu [default], nvidia, amd, cuda, rocm, opencl, "
757
+ "or a specific model like nvidia-tesla-k80), and a count [default: 1]. If both a type and a count "
758
+ "are used, they must be separated by a colon. If multiple types of accelerators are "
759
+ "used, the specifications are separated by commas"
760
+ )
531
761
 
532
762
  h2b = lambda x: human2bytes(str(x))
533
763
 
534
- resource_options.add_argument('--defaultMemory', dest='defaultMemory', default="2.0 Gi", type=h2b,
535
- action=make_open_interval_action(1),
536
- help=resource_help_msg.format('default', 'memory', disk_mem_note,
537
- bytes2human(2147483648)))
538
- resource_options.add_argument('--defaultCores', dest='defaultCores', default=1, metavar='FLOAT', type=float,
539
- action=make_open_interval_action(1.0),
540
- help=resource_help_msg.format('default', 'cpu', cpu_note, str(1)))
541
- resource_options.add_argument('--defaultDisk', dest='defaultDisk', default="2.0 Gi", metavar='INT', type=h2b,
542
- action=make_open_interval_action(1),
543
- help=resource_help_msg.format('default', 'disk', disk_mem_note,
544
- bytes2human(2147483648)))
545
- resource_options.add_argument('--defaultAccelerators', dest='defaultAccelerators', default=[],
546
- metavar='ACCELERATOR[,ACCELERATOR...]', type=parse_accelerator_list, action="extend",
547
- help=resource_help_msg.format('default', 'accelerators', accelerators_note, []))
548
- resource_options.add_argument('--defaultPreemptible', '--defaultPreemptable', dest='defaultPreemptible',
549
- metavar='BOOL',
550
- type=convert_bool, nargs='?', const=True, default=False,
551
- help='Make all jobs able to run on preemptible (spot) nodes by default.')
552
- resource_options.add_argument('--maxCores', dest='maxCores', default=SYS_MAX_SIZE, metavar='INT', type=int,
553
- action=make_open_interval_action(1),
554
- help=resource_help_msg.format('max', 'cpu', cpu_note, str(SYS_MAX_SIZE)))
555
- resource_options.add_argument('--maxMemory', dest='maxMemory', default=SYS_MAX_SIZE, metavar='INT', type=h2b,
556
- action=make_open_interval_action(1),
557
- help=resource_help_msg.format('max', 'memory', disk_mem_note,
558
- bytes2human(SYS_MAX_SIZE)))
559
- resource_options.add_argument('--maxDisk', dest='maxDisk', default=SYS_MAX_SIZE, metavar='INT', type=h2b,
560
- action=make_open_interval_action(1),
561
- help=resource_help_msg.format('max', 'disk', disk_mem_note,
562
- bytes2human(SYS_MAX_SIZE)))
764
+ resource_options.add_argument(
765
+ "--defaultMemory",
766
+ dest="defaultMemory",
767
+ default="2.0 Gi",
768
+ type=h2b,
769
+ action=make_open_interval_action(1),
770
+ help=resource_help_msg.format(
771
+ "default", "memory", disk_mem_note, bytes2human(2147483648)
772
+ ),
773
+ )
774
+ resource_options.add_argument(
775
+ "--defaultCores",
776
+ dest="defaultCores",
777
+ default=1,
778
+ metavar="FLOAT",
779
+ type=float,
780
+ action=make_open_interval_action(1.0),
781
+ help=resource_help_msg.format("default", "cpu", cpu_note, str(1)),
782
+ )
783
+ resource_options.add_argument(
784
+ "--defaultDisk",
785
+ dest="defaultDisk",
786
+ default="2.0 Gi",
787
+ metavar="INT",
788
+ type=h2b,
789
+ action=make_open_interval_action(1),
790
+ help=resource_help_msg.format(
791
+ "default", "disk", disk_mem_note, bytes2human(2147483648)
792
+ ),
793
+ )
794
+ resource_options.add_argument(
795
+ "--defaultAccelerators",
796
+ dest="defaultAccelerators",
797
+ default=[],
798
+ metavar="ACCELERATOR[,ACCELERATOR...]",
799
+ type=parse_accelerator_list,
800
+ action="extend",
801
+ help=resource_help_msg.format("default", "accelerators", accelerators_note, []),
802
+ )
803
+ resource_options.add_argument(
804
+ "--defaultPreemptible",
805
+ "--defaultPreemptable",
806
+ dest="defaultPreemptible",
807
+ metavar="BOOL",
808
+ type=strtobool,
809
+ nargs="?",
810
+ const=True,
811
+ default=False,
812
+ help="Make all jobs able to run on preemptible (spot) nodes by default.",
813
+ )
814
+ resource_options.add_argument(
815
+ "--maxCores",
816
+ dest="maxCores",
817
+ default=SYS_MAX_SIZE,
818
+ metavar="INT",
819
+ type=int,
820
+ action=make_open_interval_action(1),
821
+ help=resource_help_msg.format("max", "cpu", cpu_note, str(SYS_MAX_SIZE)),
822
+ )
823
+ resource_options.add_argument(
824
+ "--maxMemory",
825
+ dest="maxMemory",
826
+ default=SYS_MAX_SIZE,
827
+ metavar="INT",
828
+ type=h2b,
829
+ action=make_open_interval_action(1),
830
+ help=resource_help_msg.format(
831
+ "max", "memory", disk_mem_note, bytes2human(SYS_MAX_SIZE)
832
+ ),
833
+ )
834
+ resource_options.add_argument(
835
+ "--maxDisk",
836
+ dest="maxDisk",
837
+ default=SYS_MAX_SIZE,
838
+ metavar="INT",
839
+ type=h2b,
840
+ action=make_open_interval_action(1),
841
+ help=resource_help_msg.format(
842
+ "max", "disk", disk_mem_note, bytes2human(SYS_MAX_SIZE)
843
+ ),
844
+ )
563
845
 
564
846
  # Retrying/rescuing jobs
565
847
  job_options = parser.add_argument_group(
566
- title="Toil options for rescuing/killing/restarting jobs.",
567
- description="The options for jobs that either run too long/fail or get lost (some batch systems have issues!)."
568
- )
569
- job_options.add_argument("--retryCount", dest="retryCount", default=1, type=int,
570
- action=make_open_interval_action(0), metavar="INT",
571
- help=f"Number of times to retry a failing job before giving up and "
572
- f"labeling job failed. default={1}")
573
- job_options.add_argument("--enableUnlimitedPreemptibleRetries", "--enableUnlimitedPreemptableRetries",
574
- dest="enableUnlimitedPreemptibleRetries",
575
- type=convert_bool, default=False, metavar="BOOL",
576
- help="If set, preemptible failures (or any failure due to an instance getting "
577
- "unexpectedly terminated) will not count towards job failures and --retryCount.")
578
- job_options.add_argument("--doubleMem", dest="doubleMem", type=convert_bool, default=False, metavar="BOOL",
579
- help="If set, batch jobs which die to reaching memory limit on batch schedulers "
580
- "will have their memory doubled and they will be retried. The remaining "
581
- "retry count will be reduced by 1. Currently supported by LSF.")
582
- job_options.add_argument("--maxJobDuration", dest="maxJobDuration", default=SYS_MAX_SIZE, type=int,
583
- action=make_open_interval_action(1), metavar="INT",
584
- help=f"Maximum runtime of a job (in seconds) before we kill it (this is a lower bound, "
585
- f"and the actual time before killing the job may be longer). "
586
- f"default=%(default)s")
587
- job_options.add_argument("--rescueJobsFrequency", dest="rescueJobsFrequency", default=60, type=int,
588
- action=make_open_interval_action(1), metavar="INT",
589
- help=f"Period of time to wait (in seconds) between checking for missing/overlong jobs, "
590
- f"that is jobs which get lost by the batch system. Expert parameter. "
591
- f"default=%(default)s")
848
+ title="Toil options for rescuing/killing/restarting jobs",
849
+ description="The options for jobs that either run too long/fail or get lost (some batch systems have issues!).",
850
+ )
851
+ job_options.add_argument(
852
+ "--retryCount",
853
+ dest="retryCount",
854
+ default=1,
855
+ type=int,
856
+ action=make_open_interval_action(0),
857
+ metavar="INT",
858
+ help=f"Number of times to retry a failing job before giving up and "
859
+ f"labeling job failed. default={1}",
860
+ )
861
+ job_options.add_argument(
862
+ "--enableUnlimitedPreemptibleRetries",
863
+ "--enableUnlimitedPreemptableRetries",
864
+ dest="enableUnlimitedPreemptibleRetries",
865
+ type=strtobool,
866
+ default=False,
867
+ metavar="BOOL",
868
+ help="If set, preemptible failures (or any failure due to an instance getting "
869
+ "unexpectedly terminated) will not count towards job failures and --retryCount.",
870
+ )
871
+ job_options.add_argument(
872
+ "--doubleMem",
873
+ dest="doubleMem",
874
+ type=strtobool,
875
+ default=False,
876
+ metavar="BOOL",
877
+ help="If set, batch jobs which die to reaching memory limit on batch schedulers "
878
+ "will have their memory doubled and they will be retried. The remaining "
879
+ "retry count will be reduced by 1. Currently supported by LSF.",
880
+ )
881
+ job_options.add_argument(
882
+ "--maxJobDuration",
883
+ dest="maxJobDuration",
884
+ default=SYS_MAX_SIZE,
885
+ type=int,
886
+ action=make_open_interval_action(1),
887
+ metavar="INT",
888
+ help=f"Maximum runtime of a job (in seconds) before we kill it (this is a lower bound, "
889
+ f"and the actual time before killing the job may be longer). "
890
+ f"default=%(default)s",
891
+ )
892
+ job_options.add_argument(
893
+ "--rescueJobsFrequency",
894
+ dest="rescueJobsFrequency",
895
+ default=60,
896
+ type=int,
897
+ action=make_open_interval_action(1),
898
+ metavar="INT",
899
+ help=f"Period of time to wait (in seconds) between checking for missing/overlong jobs, "
900
+ f"that is jobs which get lost by the batch system. Expert parameter. "
901
+ f"default=%(default)s",
902
+ )
903
+ job_options.add_argument(
904
+ "--jobStoreTimeout",
905
+ dest="job_store_timeout",
906
+ default=30,
907
+ type=float,
908
+ action=make_open_interval_action(0),
909
+ metavar="FLOAT",
910
+ help=f"Maximum time (in seconds) to wait for a job's update to the job store "
911
+ f"before declaring it failed. default=%(default)s",
912
+ )
592
913
 
593
914
  # Log management options
594
915
  log_options = parser.add_argument_group(
595
- title="Toil log management options.",
596
- description="Options for how Toil should manage its logs."
597
- )
598
- log_options.add_argument("--maxLogFileSize", dest="maxLogFileSize", default=64000, type=h2b,
599
- action=make_open_interval_action(1),
600
- help=f"The maximum size of a job log file to keep (in bytes), log files larger than "
601
- f"this will be truncated to the last X bytes. Setting this option to zero will "
602
- f"prevent any truncation. Setting this option to a negative value will truncate "
603
- f"from the beginning. Default={bytes2human(64000)}")
604
- log_options.add_argument("--writeLogs", dest="writeLogs", nargs='?', action='store', default=None,
605
- const=os.getcwd(), metavar="OPT_PATH",
606
- help="Write worker logs received by the leader into their own files at the specified "
607
- "path. Any non-empty standard output and error from failed batch system jobs will "
608
- "also be written into files at this path. The current working directory will be "
609
- "used if a path is not specified explicitly. Note: By default only the logs of "
610
- "failed jobs are returned to leader. Set log level to 'debug' or enable "
611
- "'--writeLogsFromAllJobs' to get logs back from successful jobs, and adjust "
612
- "'maxLogFileSize' to control the truncation limit for worker logs.")
613
- log_options.add_argument("--writeLogsGzip", dest="writeLogsGzip", nargs='?', action='store', default=None,
614
- const=os.getcwd(), metavar="OPT_PATH",
615
- help="Identical to --writeLogs except the logs files are gzipped on the leader.")
616
- log_options.add_argument("--writeLogsFromAllJobs", dest="writeLogsFromAllJobs", type=convert_bool,
617
- default=False, metavar="BOOL",
618
- help="Whether to write logs from all jobs (including the successful ones) without "
619
- "necessarily setting the log level to 'debug'. Ensure that either --writeLogs "
620
- "or --writeLogsGzip is set if enabling this option.")
621
- log_options.add_argument("--writeMessages", dest="write_messages", default=None,
622
- type=lambda x: None if x is None else os.path.abspath(x), metavar="PATH",
623
- help="File to send messages from the leader's message bus to.")
624
- log_options.add_argument("--realTimeLogging", dest="realTimeLogging", type=convert_bool, default=False,
625
- help="Enable real-time logging from workers to leader")
916
+ title="Toil log management options",
917
+ description="Options for how Toil should manage its logs.",
918
+ )
919
+ log_options.add_argument(
920
+ "--maxLogFileSize",
921
+ dest="maxLogFileSize",
922
+ default=100 * 1024 * 1024,
923
+ type=h2b,
924
+ action=make_open_interval_action(1),
925
+ help=f"The maximum size of a job log file to keep (in bytes), log files larger than "
926
+ f"this will be truncated to the last X bytes. Setting this option to zero will "
927
+ f"prevent any truncation. Setting this option to a negative value will truncate "
928
+ f"from the beginning. Default={bytes2human(100 * 1024 * 1024)}",
929
+ )
930
+ log_options.add_argument(
931
+ "--writeLogs",
932
+ dest="writeLogs",
933
+ nargs="?",
934
+ action="store",
935
+ default=None,
936
+ const=os.getcwd(),
937
+ metavar="OPT_PATH",
938
+ help="Write worker logs received by the leader into their own files at the specified "
939
+ "path. Any non-empty standard output and error from failed batch system jobs will "
940
+ "also be written into files at this path. The current working directory will be "
941
+ "used if a path is not specified explicitly. Note: By default only the logs of "
942
+ "failed jobs are returned to leader. Set log level to 'debug' or enable "
943
+ "'--writeLogsFromAllJobs' to get logs back from successful jobs, and adjust "
944
+ "'maxLogFileSize' to control the truncation limit for worker logs.",
945
+ )
946
+ log_options.add_argument(
947
+ "--writeLogsGzip",
948
+ dest="writeLogsGzip",
949
+ nargs="?",
950
+ action="store",
951
+ default=None,
952
+ const=os.getcwd(),
953
+ metavar="OPT_PATH",
954
+ help="Identical to --writeLogs except the logs files are gzipped on the leader.",
955
+ )
956
+ log_options.add_argument(
957
+ "--writeLogsFromAllJobs",
958
+ dest="writeLogsFromAllJobs",
959
+ type=strtobool,
960
+ default=False,
961
+ metavar="BOOL",
962
+ help="Whether to write logs from all jobs (including the successful ones) without "
963
+ "necessarily setting the log level to 'debug'. Ensure that either --writeLogs "
964
+ "or --writeLogsGzip is set if enabling this option.",
965
+ )
966
+ log_options.add_argument(
967
+ "--writeMessages",
968
+ dest="write_messages",
969
+ default=None,
970
+ type=lambda x: None if x is None else os.path.abspath(x),
971
+ metavar="PATH",
972
+ help="File to send messages from the leader's message bus to.",
973
+ )
974
+ log_options.add_argument(
975
+ "--realTimeLogging",
976
+ dest="realTimeLogging",
977
+ type=strtobool,
978
+ default=False,
979
+ metavar="BOOL",
980
+ help="Enable real-time logging from workers to leader",
981
+ )
626
982
 
627
983
  # Misc options
628
984
  misc_options = parser.add_argument_group(
629
- title="Toil miscellaneous options.",
630
- description="Everything else."
631
- )
632
- misc_options.add_argument('--disableChaining', dest='disableChaining', type=convert_bool, default=False,
633
- metavar="BOOL",
634
- help="Disables chaining of jobs (chaining uses one job's resource allocation "
635
- "for its successor job if possible).")
636
- misc_options.add_argument("--disableJobStoreChecksumVerification", dest="disableJobStoreChecksumVerification",
637
- default=False, type=convert_bool, metavar="BOOL",
638
- help="Disables checksum verification for files transferred to/from the job store. "
639
- "Checksum verification is a safety check to ensure the data is not corrupted "
640
- "during transfer. Currently only supported for non-streaming AWS files.")
985
+ title="Toil miscellaneous options", description="Everything else."
986
+ )
987
+ misc_options.add_argument(
988
+ "--disableChaining",
989
+ dest="disableChaining",
990
+ type=strtobool,
991
+ default=False,
992
+ metavar="BOOL",
993
+ help="Disables chaining of jobs (chaining uses one job's resource allocation "
994
+ "for its successor job if possible).",
995
+ )
996
+ misc_options.add_argument(
997
+ "--disableJobStoreChecksumVerification",
998
+ dest="disableJobStoreChecksumVerification",
999
+ default=False,
1000
+ type=strtobool,
1001
+ metavar="BOOL",
1002
+ help="Disables checksum verification for files transferred to/from the job store. "
1003
+ "Checksum verification is a safety check to ensure the data is not corrupted "
1004
+ "during transfer. Currently only supported for non-streaming AWS files.",
1005
+ )
641
1006
 
642
1007
  class SSEKeyAction(Action):
643
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
1008
+ def __call__(
1009
+ self, parser: Any, namespace: Any, values: Any, option_string: Any = None
1010
+ ) -> None:
644
1011
  if values is not None:
645
1012
  sse_key = values
646
1013
  if sse_key is None:
647
1014
  return
648
1015
  with open(sse_key) as f:
649
- assert len(f.readline().rstrip()) == 32, 'SSE key appears to be invalid.'
1016
+ assert (
1017
+ len(f.readline().rstrip()) == 32
1018
+ ), "SSE key appears to be invalid."
650
1019
  setattr(namespace, self.dest, values)
651
1020
 
652
- misc_options.add_argument("--sseKey", dest="sseKey", default=None, action=SSEKeyAction, metavar="PATH",
653
- help="Path to file containing 32 character key to be used for server-side encryption on "
654
- "awsJobStore or googleJobStore. SSE will not be used if this flag is not passed.")
1021
+ misc_options.add_argument(
1022
+ "--sseKey",
1023
+ dest="sseKey",
1024
+ default=None,
1025
+ action=SSEKeyAction,
1026
+ metavar="PATH",
1027
+ help="Path to file containing 32 character key to be used for server-side encryption on "
1028
+ "awsJobStore or googleJobStore. SSE will not be used if this flag is not passed.",
1029
+ )
655
1030
 
656
1031
  # yaml.safe_load is being deprecated, this is the suggested workaround
657
1032
  def yaml_safe_load(stream: Any) -> Any:
658
- yaml = YAML(typ='safe', pure=True)
1033
+ yaml = YAML(typ="safe", pure=True)
659
1034
  d = yaml.load(stream)
660
1035
  if isinstance(d, dict):
661
1036
  # this means the argument was a dictionary and is valid yaml (for configargparse)
@@ -669,70 +1044,129 @@ def add_base_toil_options(parser: ArgumentParser, jobstore_as_flag: bool = False
669
1044
  Argparse action class to implement the action="extend" functionality on dictionaries
670
1045
  """
671
1046
 
672
- def __call__(self, parser: Any, namespace: Any, values: Any, option_string: Any = None) -> None:
1047
+ def __call__(
1048
+ self, parser: Any, namespace: Any, values: Any, option_string: Any = None
1049
+ ) -> None:
673
1050
  items = getattr(namespace, self.dest, None)
674
- assert items is not None # for mypy. This should never be None, esp. if called in setEnv
1051
+ assert (
1052
+ items is not None
1053
+ ) # for mypy. This should never be None, esp. if called in setEnv
675
1054
  # note: this will overwrite existing entries
676
1055
  items.update(values)
677
1056
 
678
- misc_options.add_argument("--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment",
679
- default={}, type=yaml_safe_load, action=ExtendActionDict,
680
- help="Set an environment variable early on in the worker. If VALUE is null, it will "
681
- "be looked up in the current environment. Independently of this option, the worker "
682
- "will try to emulate the leader's environment before running a job, except for "
683
- "some variables known to vary across systems. Using this option, a variable can "
684
- "be injected into the worker process itself before it is started.")
685
- misc_options.add_argument("--servicePollingInterval", dest="servicePollingInterval", default=60.0, type=float,
686
- action=make_open_interval_action(0.0), metavar="FLOAT",
687
- help=f"Interval of time service jobs wait between polling for the existence of the "
688
- f"keep-alive flag. Default: {60.0}")
689
- misc_options.add_argument('--forceDockerAppliance', dest='forceDockerAppliance', type=convert_bool, default=False,
690
- metavar="BOOL",
691
- help='Disables sanity checking the existence of the docker image specified by '
692
- 'TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for autoscaling.')
693
- misc_options.add_argument('--statusWait', dest='statusWait', type=int, default=3600, metavar="INT",
694
- help="Seconds to wait between reports of running jobs.")
695
- misc_options.add_argument('--disableProgress', dest='disableProgress', type=convert_bool, default=False,
696
- metavar="BOOL",
697
- help="Disables the progress bar shown when standard error is a terminal.")
1057
+ misc_options.add_argument(
1058
+ "--setEnv",
1059
+ "-e",
1060
+ metavar="NAME=VALUE or NAME",
1061
+ dest="environment",
1062
+ default={},
1063
+ type=yaml_safe_load,
1064
+ action=ExtendActionDict,
1065
+ help="Set an environment variable early on in the worker. If VALUE is null, it will "
1066
+ "be looked up in the current environment. Independently of this option, the worker "
1067
+ "will try to emulate the leader's environment before running a job, except for "
1068
+ "some variables known to vary across systems. Using this option, a variable can "
1069
+ "be injected into the worker process itself before it is started.",
1070
+ )
1071
+ misc_options.add_argument(
1072
+ "--servicePollingInterval",
1073
+ dest="servicePollingInterval",
1074
+ default=60.0,
1075
+ type=float,
1076
+ action=make_open_interval_action(0.0),
1077
+ metavar="FLOAT",
1078
+ help=f"Interval of time service jobs wait between polling for the existence of the "
1079
+ f"keep-alive flag. Default: {60.0}",
1080
+ )
1081
+ misc_options.add_argument(
1082
+ "--forceDockerAppliance",
1083
+ dest="forceDockerAppliance",
1084
+ type=strtobool,
1085
+ default=False,
1086
+ metavar="BOOL",
1087
+ help="Disables sanity checking the existence of the docker image specified by "
1088
+ "TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for autoscaling.",
1089
+ )
1090
+ misc_options.add_argument(
1091
+ "--statusWait",
1092
+ dest="statusWait",
1093
+ type=int,
1094
+ default=3600,
1095
+ metavar="INT",
1096
+ help="Seconds to wait between reports of running jobs.",
1097
+ )
1098
+ misc_options.add_argument(
1099
+ "--disableProgress",
1100
+ dest="disableProgress",
1101
+ action="store_true",
1102
+ default=False,
1103
+ help="Disables the progress bar shown when standard error is a terminal.",
1104
+ )
698
1105
 
699
1106
  # Debug options
700
1107
  debug_options = parser.add_argument_group(
701
- title="Toil debug options.",
702
- description="Debug options for finding problems or helping with testing."
703
- )
704
- debug_options.add_argument("--debugWorker", dest="debugWorker", default=False, action="store_true",
705
- help="Experimental no forking mode for local debugging. Specifically, workers "
706
- "are not forked and stderr/stdout are not redirected to the log.")
707
- debug_options.add_argument("--disableWorkerOutputCapture", dest="disableWorkerOutputCapture", default=False,
708
- action="store_true",
709
- help="Let worker output go to worker's standard out/error instead of per-job logs.")
710
- debug_options.add_argument("--badWorker", dest="badWorker", default=0.0, type=float,
711
- action=make_closed_interval_action(0.0, 1.0), metavar="FLOAT",
712
- help=f"For testing purposes randomly kill --badWorker proportion of jobs using "
713
- f"SIGKILL. default={0.0}")
714
- debug_options.add_argument("--badWorkerFailInterval", dest="badWorkerFailInterval", default=0.01, type=float,
715
- action=make_open_interval_action(0.0), metavar="FLOAT", # might be cyclical?
716
- help=f"When killing the job pick uniformly within the interval from 0.0 to "
717
- f"--badWorkerFailInterval seconds after the worker starts. "
718
- f"default={0.01}")
1108
+ title="Toil debug options",
1109
+ description="Debug options for finding problems or helping with testing.",
1110
+ )
1111
+ debug_options.add_argument(
1112
+ "--debugWorker",
1113
+ dest="debugWorker",
1114
+ default=False,
1115
+ action="store_true",
1116
+ help="Experimental no forking mode for local debugging. Specifically, workers "
1117
+ "are not forked and stderr/stdout are not redirected to the log.",
1118
+ )
1119
+ debug_options.add_argument(
1120
+ "--disableWorkerOutputCapture",
1121
+ dest="disableWorkerOutputCapture",
1122
+ default=False,
1123
+ action="store_true",
1124
+ help="Let worker output go to worker's standard out/error instead of per-job logs.",
1125
+ )
1126
+ debug_options.add_argument(
1127
+ "--badWorker",
1128
+ dest="badWorker",
1129
+ default=0.0,
1130
+ type=float,
1131
+ action=make_closed_interval_action(0.0, 1.0),
1132
+ metavar="FLOAT",
1133
+ help=f"For testing purposes randomly kill --badWorker proportion of jobs using "
1134
+ f"SIGKILL. default={0.0}",
1135
+ )
1136
+ debug_options.add_argument(
1137
+ "--badWorkerFailInterval",
1138
+ dest="badWorkerFailInterval",
1139
+ default=0.01,
1140
+ type=float,
1141
+ action=make_open_interval_action(0.0),
1142
+ metavar="FLOAT", # might be cyclical?
1143
+ help=f"When killing the job pick uniformly within the interval from 0.0 to "
1144
+ f"--badWorkerFailInterval seconds after the worker starts. "
1145
+ f"default={0.01}",
1146
+ )
719
1147
 
720
1148
  # All deprecated options:
721
1149
 
722
1150
  # These are deprecated in favor of a simpler option
723
1151
  # ex: noLinkImports and linkImports can be simplified into a single link_imports argument
724
- link_imports.add_argument("--noLinkImports", dest="linkImports", action="store_false",
725
- help=SUPPRESS)
726
- link_imports.add_argument("--linkImports", dest="linkImports", action="store_true",
727
- help=SUPPRESS)
1152
+ link_imports.add_argument(
1153
+ "--noLinkImports", dest="linkImports", action="store_false", help=SUPPRESS
1154
+ )
1155
+ link_imports.add_argument(
1156
+ "--linkImports", dest="linkImports", action="store_true", help=SUPPRESS
1157
+ )
728
1158
  link_imports.set_defaults(linkImports=None)
729
1159
 
730
- move_exports.add_argument("--moveExports", dest="moveExports", action="store_true",
731
- help=SUPPRESS)
732
- move_exports.add_argument("--noMoveExports", dest="moveExports", action="store_false",
733
- help=SUPPRESS)
1160
+ move_exports.add_argument(
1161
+ "--moveExports", dest="moveExports", action="store_true", help=SUPPRESS
1162
+ )
1163
+ move_exports.add_argument(
1164
+ "--noMoveExports", dest="moveExports", action="store_false", help=SUPPRESS
1165
+ )
734
1166
  link_imports.set_defaults(moveExports=None)
735
1167
 
736
1168
  # dest is set to enableCaching to not conflict with the current --caching destination
737
- caching.add_argument('--disableCaching', dest='enableCaching', action='store_false', help=SUPPRESS)
738
- caching.set_defaults(disableCaching=None)
1169
+ caching.add_argument(
1170
+ "--disableCaching", dest="enableCaching", action="store_false", help=SUPPRESS
1171
+ )
1172
+ caching.set_defaults(enableCaching=None)