toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/common.py CHANGED
@@ -23,11 +23,15 @@ import tempfile
23
23
  import time
24
24
  import uuid
25
25
  import warnings
26
- from argparse import (ArgumentDefaultsHelpFormatter,
26
+
27
+ from ruamel.yaml import YAML
28
+ from ruamel.yaml.comments import CommentedMap
29
+ from configargparse import ArgParser, YAMLConfigFileParser
30
+ from argparse import (SUPPRESS,
31
+ ArgumentDefaultsHelpFormatter,
27
32
  ArgumentParser,
28
33
  Namespace,
29
- _ArgumentGroup)
30
- from distutils.util import strtobool
34
+ _ArgumentGroup, Action, _StoreFalseAction, _StoreTrueAction, _AppendAction)
31
35
  from functools import lru_cache
32
36
  from types import TracebackType
33
37
  from typing import (IO,
@@ -37,7 +41,6 @@ from typing import (IO,
37
41
  ContextManager,
38
42
  Dict,
39
43
  List,
40
- MutableMapping,
41
44
  Optional,
42
45
  Set,
43
46
  Tuple,
@@ -46,10 +49,14 @@ from typing import (IO,
46
49
  Union,
47
50
  cast,
48
51
  overload)
49
- from urllib.parse import urlparse
52
+ from urllib.parse import urlparse, unquote, quote
50
53
 
51
54
  import requests
52
55
 
56
+ from toil.options.common import add_base_toil_options, JOBSTORE_HELP
57
+ from toil.options.cwl import add_cwl_options
58
+ from toil.options.wdl import add_wdl_options
59
+
53
60
  if sys.version_info >= (3, 8):
54
61
  from typing import Literal
55
62
  else:
@@ -57,7 +64,6 @@ else:
57
64
 
58
65
  from toil import logProcessContext, lookupEnvVar
59
66
  from toil.batchSystems.options import (add_all_batchsystem_options,
60
- set_batchsystem_config_defaults,
61
67
  set_batchsystem_options)
62
68
  from toil.bus import (ClusterDesiredSizeMessage,
63
69
  ClusterSizeMessage,
@@ -66,22 +72,19 @@ from toil.bus import (ClusterDesiredSizeMessage,
66
72
  JobIssuedMessage,
67
73
  JobMissingMessage,
68
74
  MessageBus,
69
- QueueSizeMessage,
70
- gen_message_bus_path)
75
+ QueueSizeMessage)
71
76
  from toil.fileStores import FileID
72
77
  from toil.lib.aws import zone_to_region, build_tag_dict_from_env
73
78
  from toil.lib.compatibility import deprecated
74
- from toil.lib.conversions import bytes2human, human2bytes
75
- from toil.lib.io import try_path
79
+ from toil.lib.io import try_path, AtomicFileCreate
76
80
  from toil.lib.retry import retry
77
81
  from toil.provisioners import (add_provisioner_options,
78
82
  cluster_factory,
79
83
  parse_node_types)
80
84
  from toil.realtimeLogger import RealtimeLogger
81
85
  from toil.statsAndLogging import (add_logging_options,
82
- root_logger,
83
86
  set_logging_from_options)
84
- from toil.version import dockerRegistry, dockerTag, version
87
+ from toil.version import dockerRegistry, dockerTag, version, baseVersion
85
88
 
86
89
  if TYPE_CHECKING:
87
90
  from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
@@ -94,14 +97,14 @@ if TYPE_CHECKING:
94
97
  from toil.provisioners.abstractProvisioner import AbstractProvisioner
95
98
  from toil.resource import ModuleDescriptor
96
99
 
97
- # aim to pack autoscaling jobs within a 30 minute block before provisioning a new node
98
- defaultTargetTime = 1800
99
- SYS_MAX_SIZE = 9223372036854775807
100
- # sys.max_size on 64 bit systems is 9223372036854775807, so that 32-bit systems
101
- # use the same number
102
100
  UUID_LENGTH = 32
103
101
  logger = logging.getLogger(__name__)
104
102
 
103
+ # TODO: should this use an XDG config directory or ~/.config to not clutter the
104
+ # base home directory?
105
+ TOIL_HOME_DIR: str = os.path.join(os.path.expanduser("~"), ".toil")
106
+ DEFAULT_CONFIG_FILE: str = os.path.join(TOIL_HOME_DIR, "default.yaml")
107
+
105
108
 
106
109
  class Config:
107
110
  """Class to represent configuration operations for a toil workflow run."""
@@ -110,118 +113,148 @@ class Config:
110
113
  cleanWorkDir: str
111
114
  max_jobs: int
112
115
  max_local_jobs: int
116
+ manualMemArgs: bool
113
117
  run_local_jobs_on_workers: bool
118
+ coalesceStatusCalls: bool
119
+ mesos_endpoint: Optional[str]
120
+ mesos_framework_id: Optional[str]
121
+ mesos_role: Optional[str]
122
+ mesos_name: str
123
+ kubernetes_host_path: Optional[str]
124
+ kubernetes_owner: Optional[str]
125
+ kubernetes_service_account: Optional[str]
126
+ kubernetes_pod_timeout: float
114
127
  tes_endpoint: str
115
128
  tes_user: str
116
129
  tes_password: str
117
130
  tes_bearer_token: str
118
- jobStore: str
131
+ aws_batch_region: Optional[str]
132
+ aws_batch_queue: Optional[str]
133
+ aws_batch_job_role_arn: Optional[str]
134
+ scale: float
119
135
  batchSystem: str
120
- batch_logs_dir: Optional[str] = None
136
+ batch_logs_dir: Optional[str]
121
137
  """The backing scheduler will be instructed, if possible, to save logs
122
138
  to this directory, where the leader can read them."""
123
- workflowAttemptNumber: int
139
+ statePollingWait: int
124
140
  disableAutoDeployment: bool
125
141
 
126
- def __init__(self) -> None:
127
- # Core options
128
- self.workflowID: Optional[str] = None
129
- """This attribute uniquely identifies the job store and therefore the workflow. It is
130
- necessary in order to distinguish between two consecutive workflows for which
131
- self.jobStore is the same, e.g. when a job store name is reused after a previous run has
132
- finished successfully and its job store has been clean up."""
133
- self.workflowAttemptNumber: int = 0
134
- self.jobStore: Optional[str] = None # type: ignore
135
- self.logLevel: str = logging.getLevelName(root_logger.getEffectiveLevel())
136
- self.workDir: Optional[str] = None
137
- self.coordination_dir: Optional[str] = None
138
- self.noStdOutErr: bool = False
139
- self.stats: bool = False
140
-
141
- # Because the stats option needs the jobStore to persist past the end of the run,
142
- # the clean default value depends the specified stats option and is determined in setOptions
143
- self.clean: Optional[str] = None
144
- self.clusterStats = None
145
-
146
- # Restarting the workflow options
147
- self.restart: bool = False
142
+ # Core options
143
+ workflowID: Optional[str]
144
+ """This attribute uniquely identifies the job store and therefore the workflow. It is
145
+ necessary in order to distinguish between two consecutive workflows for which
146
+ self.jobStore is the same, e.g. when a job store name is reused after a previous run has
147
+ finished successfully and its job store has been clean up."""
148
+ workflowAttemptNumber: int
149
+ jobStore: str
150
+ logLevel: str
151
+ workDir: Optional[str]
152
+ coordination_dir: Optional[str]
153
+ noStdOutErr: bool
154
+ stats: bool
148
155
 
149
- # Batch system options
150
- set_batchsystem_config_defaults(self)
156
+ # Because the stats option needs the jobStore to persist past the end of the run,
157
+ # the clean default value depends the specified stats option and is determined in setOptions
158
+ clean: Optional[str]
159
+ clusterStats: str
151
160
 
152
- # File store options
153
- self.caching: Optional[bool] = None
154
- self.linkImports: bool = True
155
- self.moveExports: bool = False
161
+ # Restarting the workflow options
162
+ restart: bool
156
163
 
157
- # Autoscaling options
158
- self.provisioner: Optional[str] = None
159
- self.nodeTypes: List[Tuple[Set[str], Optional[float]]] = []
160
- self.minNodes = None
161
- self.maxNodes = [10]
162
- self.targetTime: float = defaultTargetTime
163
- self.betaInertia: float = 0.1
164
- self.scaleInterval: int = 60
165
- self.preemptibleCompensation: float = 0.0
166
- self.nodeStorage: int = 50
167
- self.nodeStorageOverrides: List[str] = []
168
- self.metrics: bool = False
169
- self.assume_zero_overhead: bool = False
170
-
171
- # Parameters to limit service jobs, so preventing deadlock scheduling scenarios
172
- self.maxPreemptibleServiceJobs: int = sys.maxsize
173
- self.maxServiceJobs: int = sys.maxsize
174
- self.deadlockWait: Union[float, int] = 60 # Number of seconds we must be stuck with all services before declaring a deadlock
175
- self.deadlockCheckInterval: Union[float, int] = 30 # Minimum polling delay for deadlocks
176
-
177
- # Resource requirements
178
- self.defaultMemory: int = 2147483648
179
- self.defaultCores: Union[float, int] = 1
180
- self.defaultDisk: int = 2147483648
181
- self.defaultPreemptible: bool = False
182
- # TODO: These names are generated programmatically in
183
- # Requirer._fetchRequirement so we can't use snake_case until we fix
184
- # that (and add compatibility getters/setters?)
185
- self.defaultAccelerators: List['AcceleratorRequirement'] = []
186
- self.maxCores: int = SYS_MAX_SIZE
187
- self.maxMemory: int = SYS_MAX_SIZE
188
- self.maxDisk: int = SYS_MAX_SIZE
164
+ # Batch system options
189
165
 
190
- # Retrying/rescuing jobs
191
- self.retryCount: int = 1
192
- self.enableUnlimitedPreemptibleRetries: bool = False
193
- self.doubleMem: bool = False
194
- self.maxJobDuration: int = sys.maxsize
195
- self.rescueJobsFrequency: int = 60
166
+ # File store options
167
+ caching: Optional[bool]
168
+ symlinkImports: bool
169
+ moveOutputs: bool
170
+
171
+ # Autoscaling options
172
+ provisioner: Optional[str]
173
+ nodeTypes: List[Tuple[Set[str], Optional[float]]]
174
+ minNodes: List[int]
175
+ maxNodes: List[int]
176
+ targetTime: float
177
+ betaInertia: float
178
+ scaleInterval: int
179
+ preemptibleCompensation: float
180
+ nodeStorage: int
181
+ nodeStorageOverrides: List[str]
182
+ metrics: bool
183
+ assume_zero_overhead: bool
184
+
185
+ # Parameters to limit service jobs, so preventing deadlock scheduling scenarios
186
+ maxPreemptibleServiceJobs: int
187
+ maxServiceJobs: int
188
+ deadlockWait: Union[
189
+ float, int]
190
+ deadlockCheckInterval: Union[float, int]
196
191
 
197
- # Log management
198
- self.maxLogFileSize: int = 64000
199
- self.writeLogs = None
200
- self.writeLogsGzip = None
201
- self.writeLogsFromAllJobs: bool = False
202
- self.write_messages: Optional[str] = None
192
+ # Resource requirements
193
+ defaultMemory: int
194
+ defaultCores: Union[float, int]
195
+ defaultDisk: int
196
+ defaultPreemptible: bool
197
+ # TODO: These names are generated programmatically in
198
+ # Requirer._fetchRequirement so we can't use snake_case until we fix
199
+ # that (and add compatibility getters/setters?)
200
+ defaultAccelerators: List['AcceleratorRequirement']
201
+ maxCores: int
202
+ maxMemory: int
203
+ maxDisk: int
203
204
 
204
- # Misc
205
- self.environment: Dict[str, str] = {}
206
- self.disableChaining: bool = False
207
- self.disableJobStoreChecksumVerification: bool = False
208
- self.sseKey: Optional[str] = None
209
- self.servicePollingInterval: int = 60
210
- self.useAsync: bool = True
211
- self.forceDockerAppliance: bool = False
212
- self.statusWait: int = 3600
213
- self.disableProgress: bool = False
214
- self.readGlobalFileMutableByDefault: bool = False
215
- self.kill_polling_interval: int = 5
205
+ # Retrying/rescuing jobs
206
+ retryCount: int
207
+ enableUnlimitedPreemptibleRetries: bool
208
+ doubleMem: bool
209
+ maxJobDuration: int
210
+ rescueJobsFrequency: int
211
+
212
+ # Log management
213
+ maxLogFileSize: int
214
+ writeLogs: str
215
+ writeLogsGzip: str
216
+ writeLogsFromAllJobs: bool
217
+ write_messages: Optional[str]
218
+ realTimeLogging: bool
219
+
220
+ # Misc
221
+ environment: Dict[str, str]
222
+ disableChaining: bool
223
+ disableJobStoreChecksumVerification: bool
224
+ sseKey: Optional[str]
225
+ servicePollingInterval: int
226
+ useAsync: bool
227
+ forceDockerAppliance: bool
228
+ statusWait: int
229
+ disableProgress: bool
230
+ readGlobalFileMutableByDefault: bool
216
231
 
217
- # Debug options
218
- self.debugWorker: bool = False
219
- self.disableWorkerOutputCapture: bool = False
220
- self.badWorker = 0.0
221
- self.badWorkerFailInterval = 0.01
232
+ # Debug options
233
+ debugWorker: bool
234
+ disableWorkerOutputCapture: bool
235
+ badWorker: float
236
+ badWorkerFailInterval: float
237
+ kill_polling_interval: int
238
+
239
+ # CWL
240
+ cwl: bool
222
241
 
223
- # CWL
224
- self.cwl: bool = False
242
+ def __init__(self) -> None:
243
+ # only default options that are not CLI options defined here (thus CLI options are centralized)
244
+ self.cwl = False # will probably remove later
245
+ self.workflowID = None
246
+ self.kill_polling_interval = 5
247
+
248
+ self.set_from_default_config()
249
+
250
+ def set_from_default_config(self) -> None:
251
+ # get defaults from a config file by simulating an argparse run
252
+ # as Config often expects defaults to already be instantiated
253
+ parser = ArgParser()
254
+ addOptions(parser, jobstore_as_flag=True, cwl=self.cwl)
255
+ # The parser already knows about the default config file
256
+ ns = parser.parse_args("")
257
+ self.setOptions(ns)
225
258
 
226
259
  def prepare_start(self) -> None:
227
260
  """
@@ -239,17 +272,11 @@ class Config:
239
272
  # auto-generated and point to a temp directory that could no longer
240
273
  # exist and that can't safely be re-made.
241
274
  self.write_messages = None
242
-
243
275
 
244
276
  def setOptions(self, options: Namespace) -> None:
245
277
  """Creates a config object from the options object."""
246
- OptionType = TypeVar("OptionType")
247
278
 
248
279
  def set_option(option_name: str,
249
- parsing_function: Optional[Callable[[Any], OptionType]] = None,
250
- check_function: Optional[Callable[[OptionType], Union[None, bool]]] = None,
251
- default: Optional[OptionType] = None,
252
- env: Optional[List[str]] = None,
253
280
  old_names: Optional[List[str]] = None) -> None:
254
281
  """
255
282
  Determine the correct value for the given option.
@@ -269,192 +296,134 @@ class Config:
269
296
  If the option gets a non-None value, sets it as an attribute in
270
297
  this Config.
271
298
  """
272
- option_value = getattr(options, option_name, default)
299
+ option_value = getattr(options, option_name, None)
273
300
 
274
301
  if old_names is not None:
275
302
  for old_name in old_names:
303
+ # If the option is already set with the new name and not the old name
304
+ # prioritize the new name over the old name and break
305
+ if option_value is not None and option_value != [] and option_value != {}:
306
+ break
276
307
  # Try all the old names in case user code is setting them
277
308
  # in an options object.
278
- if option_value != default:
279
- break
280
- if hasattr(options, old_name):
309
+ # This does assume that all deprecated options have a default value of None
310
+ if getattr(options, old_name, None) is not None:
281
311
  warnings.warn(f'Using deprecated option field {old_name} to '
282
312
  f'provide value for config field {option_name}',
283
313
  DeprecationWarning)
284
314
  option_value = getattr(options, old_name)
285
-
286
- if env is not None:
287
- for env_var in env:
288
- # Try all the environment variables
289
- if option_value != default:
290
- break
291
- option_value = os.environ.get(env_var, default)
292
-
293
315
  if option_value is not None or not hasattr(self, option_name):
294
- if parsing_function is not None:
295
- # Parse whatever it is (string, argparse-made list, etc.)
296
- option_value = parsing_function(option_value)
297
- if check_function is not None:
298
- try:
299
- check_function(option_value) # type: ignore
300
- except AssertionError:
301
- raise RuntimeError(f"The {option_name} option has an invalid value: {option_value}")
302
316
  setattr(self, option_name, option_value)
303
317
 
304
- # Function to parse integer from string expressed in different formats
305
- h2b = lambda x: human2bytes(str(x))
306
-
307
- def parse_jobstore(jobstore_uri: str) -> str:
308
- name, rest = Toil.parseLocator(jobstore_uri)
309
- if name == 'file':
310
- # We need to resolve relative paths early, on the leader, because the worker process
311
- # may have a different working directory than the leader, e.g. under Mesos.
312
- return Toil.buildLocator(name, os.path.abspath(rest))
313
- else:
314
- return jobstore_uri
315
-
316
- def parse_str_list(s: str) -> List[str]:
317
- return [str(x) for x in s.split(",")]
318
-
319
- def parse_int_list(s: str) -> List[int]:
320
- return [int(x) for x in s.split(",")]
321
-
322
318
  # Core options
323
- set_option("jobStore", parsing_function=parse_jobstore)
319
+ set_option("jobStore")
324
320
  # TODO: LOG LEVEL STRING
325
321
  set_option("workDir")
326
- if self.workDir is not None:
327
- self.workDir = os.path.abspath(self.workDir)
328
- if not os.path.exists(self.workDir):
329
- raise RuntimeError(f"The path provided to --workDir ({self.workDir}) does not exist.")
330
-
331
- if len(self.workDir) > 80:
332
- logger.warning(f'Length of workDir path "{self.workDir}" is {len(self.workDir)} characters. '
333
- f'Consider setting a shorter path with --workPath or setting TMPDIR to something '
334
- f'like "/tmp" to avoid overly long paths.')
335
322
  set_option("coordination_dir")
336
- if self.coordination_dir is not None:
337
- self.coordination_dir = os.path.abspath(self.coordination_dir)
338
- if not os.path.exists(self.coordination_dir):
339
- raise RuntimeError(f"The path provided to --coordinationDir ({self.coordination_dir}) does not exist.")
340
323
 
341
324
  set_option("noStdOutErr")
342
325
  set_option("stats")
343
326
  set_option("cleanWorkDir")
344
327
  set_option("clean")
345
- if self.stats:
346
- if self.clean != "never" and self.clean is not None:
347
- raise RuntimeError("Contradicting options passed: Clean flag is set to %s "
348
- "despite the stats flag requiring "
349
- "the jobStore to be intact at the end of the run. "
350
- "Set clean to \'never\'" % self.clean)
351
- self.clean = "never"
352
- elif self.clean is None:
353
- self.clean = "onSuccess"
354
328
  set_option('clusterStats')
355
329
  set_option("restart")
356
330
 
357
331
  # Batch system options
358
332
  set_option("batchSystem")
359
- set_batchsystem_options(self.batchSystem, cast("OptionSetter", set_option))
333
+ set_batchsystem_options(None, cast("OptionSetter",
334
+ set_option)) # None as that will make set_batchsystem_options iterate through all batch systems and set their corresponding values
360
335
 
361
336
  # File store options
362
- set_option("linkImports", bool, default=True)
363
- set_option("moveExports", bool, default=False)
364
- set_option("caching", bool, default=None)
337
+ set_option("symlinkImports", old_names=["linkImports"])
338
+ set_option("moveOutputs", old_names=["moveExports"])
339
+ set_option("caching", old_names=["enableCaching"])
365
340
 
366
341
  # Autoscaling options
367
342
  set_option("provisioner")
368
- set_option("nodeTypes", parse_node_types)
369
- set_option("minNodes", parse_int_list)
370
- set_option("maxNodes", parse_int_list)
371
- set_option("targetTime", int)
372
- if self.targetTime <= 0:
373
- raise RuntimeError(f'targetTime ({self.targetTime}) must be a positive integer!')
374
- set_option("betaInertia", float)
375
- if not 0.0 <= self.betaInertia <= 0.9:
376
- raise RuntimeError(f'betaInertia ({self.betaInertia}) must be between 0.0 and 0.9!')
377
- set_option("scaleInterval", float)
343
+ set_option("nodeTypes")
344
+ set_option("minNodes")
345
+ set_option("maxNodes")
346
+ set_option("targetTime")
347
+ set_option("betaInertia")
348
+ set_option("scaleInterval")
378
349
  set_option("metrics")
379
350
  set_option("assume_zero_overhead")
380
- set_option("preemptibleCompensation", float)
381
- if not 0.0 <= self.preemptibleCompensation <= 1.0:
382
- raise RuntimeError(f'preemptibleCompensation ({self.preemptibleCompensation}) must be between 0.0 and 1.0!')
383
- set_option("nodeStorage", int)
384
-
385
- def check_nodestoreage_overrides(overrides: List[str]) -> bool:
386
- for override in overrides:
387
- tokens = override.split(":")
388
- if len(tokens) != 2:
389
- raise ValueError("Each component of --nodeStorageOverrides must be of the form <instance type>:<storage in GiB>")
390
- if not any(tokens[0] in n[0] for n in self.nodeTypes):
391
- raise ValueError("Instance type in --nodeStorageOverrides must be in --nodeTypes")
392
- if not tokens[1].isdigit():
393
- raise ValueError("storage must be an integer in --nodeStorageOverrides")
394
- return True
395
- set_option("nodeStorageOverrides", parse_str_list, check_function=check_nodestoreage_overrides)
396
-
397
- # Parameters to limit service jobs / detect deadlocks
398
- set_option("maxServiceJobs", int)
399
- set_option("maxPreemptibleServiceJobs", int)
400
- set_option("deadlockWait", int)
401
- set_option("deadlockCheckInterval", int)
402
-
403
- # Resource requirements
404
- set_option("defaultMemory", h2b, iC(1))
405
- set_option("defaultCores", float, fC(1.0))
406
- set_option("defaultDisk", h2b, iC(1))
407
- set_option("defaultAccelerators", parse_accelerator_list)
408
- set_option("readGlobalFileMutableByDefault")
409
- set_option("maxCores", int, iC(1))
410
- set_option("maxMemory", h2b, iC(1))
411
- set_option("maxDisk", h2b, iC(1))
351
+ set_option("preemptibleCompensation")
352
+ set_option("nodeStorage")
353
+
354
+ set_option("nodeStorageOverrides")
355
+
356
+ if self.cwl is False:
357
+ # Parameters to limit service jobs / detect deadlocks
358
+ set_option("maxServiceJobs")
359
+ set_option("maxPreemptibleServiceJobs")
360
+ set_option("deadlockWait")
361
+ set_option("deadlockCheckInterval")
362
+
363
+ set_option("defaultMemory")
364
+ set_option("defaultCores")
365
+ set_option("defaultDisk")
366
+ set_option("defaultAccelerators")
367
+ set_option("maxCores")
368
+ set_option("maxMemory")
369
+ set_option("maxDisk")
412
370
  set_option("defaultPreemptible")
413
371
 
414
372
  # Retrying/rescuing jobs
415
- set_option("retryCount", int, iC(1))
373
+ set_option("retryCount")
416
374
  set_option("enableUnlimitedPreemptibleRetries")
417
375
  set_option("doubleMem")
418
- set_option("maxJobDuration", int, iC(1))
419
- set_option("rescueJobsFrequency", int, iC(1))
376
+ set_option("maxJobDuration")
377
+ set_option("rescueJobsFrequency")
420
378
 
421
379
  # Log management
422
- set_option("maxLogFileSize", h2b, iC(1))
380
+ set_option("maxLogFileSize")
423
381
  set_option("writeLogs")
424
382
  set_option("writeLogsGzip")
425
383
  set_option("writeLogsFromAllJobs")
426
- set_option("write_messages", os.path.abspath)
427
-
428
- if not self.write_messages:
429
- # The user hasn't specified a place for the message bus so we
430
- # should make one.
431
- self.write_messages = gen_message_bus_path()
432
-
433
- assert not (self.writeLogs and self.writeLogsGzip), \
434
- "Cannot use both --writeLogs and --writeLogsGzip at the same time."
435
- assert not self.writeLogsFromAllJobs or self.writeLogs or self.writeLogsGzip, \
436
- "To enable --writeLogsFromAllJobs, either --writeLogs or --writeLogsGzip must be set."
384
+ set_option("write_messages")
437
385
 
438
386
  # Misc
439
- set_option("environment", parseSetEnv)
387
+ set_option("environment")
388
+
440
389
  set_option("disableChaining")
441
390
  set_option("disableJobStoreChecksumVerification")
442
- set_option("statusWait", int)
391
+ set_option("statusWait")
443
392
  set_option("disableProgress")
444
393
 
445
- def check_sse_key(sse_key: str) -> None:
446
- with open(sse_key) as f:
447
- assert len(f.readline().rstrip()) == 32, 'SSE key appears to be invalid.'
448
-
449
- set_option("sseKey", check_function=check_sse_key)
450
- set_option("servicePollingInterval", float, fC(0.0))
394
+ set_option("sseKey")
395
+ set_option("servicePollingInterval")
451
396
  set_option("forceDockerAppliance")
452
397
 
453
398
  # Debug options
454
399
  set_option("debugWorker")
455
400
  set_option("disableWorkerOutputCapture")
456
- set_option("badWorker", float, fC(0.0, 1.0))
457
- set_option("badWorkerFailInterval", float, fC(0.0))
401
+ set_option("badWorker")
402
+ set_option("badWorkerFailInterval")
403
+ set_option("logLevel")
404
+
405
+ self.check_configuration_consistency()
406
+
407
+ logger.debug("Loaded configuration: %s", vars(options))
408
+
409
+ def check_configuration_consistency(self) -> None:
410
+ """Old checks that cannot be fit into an action class for argparse"""
411
+ if self.writeLogs and self.writeLogsGzip:
412
+ raise ValueError("Cannot use both --writeLogs and --writeLogsGzip at the same time.")
413
+ if self.writeLogsFromAllJobs and not self.writeLogs and not self.writeLogsGzip:
414
+ raise ValueError("To enable --writeLogsFromAllJobs, either --writeLogs or --writeLogsGzip must be set.")
415
+ for override in self.nodeStorageOverrides:
416
+ tokens = override.split(":")
417
+ if not any(tokens[0] in n[0] for n in self.nodeTypes):
418
+ raise ValueError("Instance type in --nodeStorageOverrides must be in --nodeTypes")
419
+
420
+ if self.stats:
421
+ if self.clean != "never" and self.clean is not None:
422
+ logger.warning("Contradicting options passed: Clean flag is set to %s "
423
+ "despite the stats flag requiring "
424
+ "the jobStore to be intact at the end of the run. "
425
+ "Setting clean to \'never\'." % self.clean)
426
+ self.clean = "never"
458
427
 
459
428
  def __eq__(self, other: object) -> bool:
460
429
  return self.__dict__ == other.__dict__
@@ -463,25 +432,169 @@ class Config:
463
432
  return self.__dict__.__hash__() # type: ignore
464
433
 
465
434
 
466
- JOBSTORE_HELP = ("The location of the job store for the workflow. "
467
- "A job store holds persistent information about the jobs, stats, and files in a "
468
- "workflow. If the workflow is run with a distributed batch system, the job "
469
- "store must be accessible by all worker nodes. Depending on the desired "
470
- "job store implementation, the location should be formatted according to "
471
- "one of the following schemes:\n\n"
472
- "file:<path> where <path> points to a directory on the file systen\n\n"
473
- "aws:<region>:<prefix> where <region> is the name of an AWS region like "
474
- "us-west-2 and <prefix> will be prepended to the names of any top-level "
475
- "AWS resources in use by job store, e.g. S3 buckets.\n\n "
476
- "google:<project_id>:<prefix> TODO: explain\n\n"
477
- "For backwards compatibility, you may also specify ./foo (equivalent to "
478
- "file:./foo or just file:foo) or /bar (equivalent to file:/bar).")
435
+ def check_and_create_toil_home_dir() -> None:
436
+ """
437
+ Ensure that TOIL_HOME_DIR exists.
438
+
439
+ Raises an error if it does not exist and cannot be created. Safe to run
440
+ simultaneously in multiple processes.
441
+ """
442
+
443
+ dir_path = try_path(TOIL_HOME_DIR)
444
+ if dir_path is None:
445
+ raise RuntimeError(f"Cannot create or access Toil configuration directory {TOIL_HOME_DIR}")
446
+
447
+
448
+ def check_and_create_default_config_file() -> None:
449
+ """
450
+ If the default config file does not exist, create it in the Toil home directory. Create the Toil home directory
451
+ if needed
452
+
453
+ Raises an error if the default config file cannot be created.
454
+ Safe to run simultaneously in multiple processes. If this process runs
455
+ this function, it will always see the default config file existing with
456
+ parseable contents, even if other processes are racing to create it.
457
+
458
+ No process will see an empty or partially-written default config file.
459
+ """
460
+ check_and_create_toil_home_dir()
461
+ # The default config file did not appear to exist when we checked.
462
+ # It might exist now, though. Try creating it.
463
+ check_and_create_config_file(DEFAULT_CONFIG_FILE)
464
+
465
+
466
+ def check_and_create_config_file(filepath: str) -> None:
467
+ """
468
+ If the config file at the filepath does not exist, try creating it.
469
+ The parent directory should be created prior to calling this
470
+ :param filepath: path to config file
471
+ :return: None
472
+ """
473
+ if not os.path.exists(filepath):
474
+ generate_config(filepath)
475
+
476
+
477
+ def generate_config(filepath: str) -> None:
478
+ """
479
+ Write a Toil config file to the given path.
480
+
481
+ Safe to run simultaneously in multiple processes. No process will see an
482
+ empty or partially-written file at the given path.
483
+
484
+ Set include to "cwl" or "wdl" to include cwl options and wdl options respectfully
485
+ """
486
+ # this is placed in common.py rather than toilConfig.py to prevent circular imports
487
+
488
+ # configargparse's write_config function does not write options with a None value
489
+ # Thus, certain CLI options that use None as their default won't be written to the config file.
490
+ # it also does not support printing config elements in nonalphabetical order
491
+
492
+ # Instead, mimic configargparser's write_config behavior and also make it output arguments with
493
+ # a default value of None
494
+
495
+ # To do this, iterate through the options
496
+ # Skip --help and --config as they should not be included in the config file
497
+ # Skip deprecated/redundant options
498
+ # Various log options are skipped as they are store_const arguments that are redundant to --logLevel
499
+ # linkImports, moveExports, disableCaching, are deprecated in favor of --symlinkImports, --moveOutputs,
500
+ # and --caching respectively
501
+ # Skip StoreTrue and StoreFalse options that have opposite defaults as including it in the config would
502
+ # override those defaults
503
+ deprecated_or_redundant_options = ("help", "config", "logCritical", "logDebug", "logError", "logInfo", "logOff",
504
+ "logWarning", "linkImports", "noLinkImports", "moveExports", "noMoveExports",
505
+ "enableCaching", "disableCaching", "version")
506
+
507
+ def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
508
+ """
509
+ Creates a CommentedMap of the config file output from a given parser. This will put every parser action and it's
510
+ default into the output
511
+
512
+ :param parser: parser to generate from
513
+ :return: CommentedMap of what to put into the config file
514
+ """
515
+ data = CommentedMap() # to preserve order
516
+ group_title_key: Dict[str, str] = dict()
517
+ for action in parser._actions:
518
+ if any(s.replace("-", "") in deprecated_or_redundant_options for s in action.option_strings):
519
+ continue
520
+ # if action is StoreFalse and default is True then don't include
521
+ if isinstance(action, _StoreFalseAction) and action.default is True:
522
+ continue
523
+ # if action is StoreTrue and default is False then don't include
524
+ if isinstance(action, _StoreTrueAction) and action.default is False:
525
+ continue
526
+
527
+ if len(action.option_strings) == 0:
528
+ continue
529
+
530
+ option_string = action.option_strings[0] if action.option_strings[0].find("--") != -1 else \
531
+ action.option_strings[1]
532
+ option = option_string[2:]
533
+
534
+ default = action.default
535
+
536
+ data[option] = default
537
+
538
+ # store where each argparse group starts
539
+ group_title = action.container.title # type: ignore[attr-defined]
540
+ group_title_key.setdefault(group_title, option)
541
+
542
+ # add comment for when each argparse group starts
543
+ for group_title, key in group_title_key.items():
544
+ data.yaml_set_comment_before_after_key(key, group_title)
545
+
546
+ return data
547
+
548
+ all_data = []
549
+
550
+ parser = ArgParser(YAMLConfigFileParser())
551
+ add_base_toil_options(parser, jobstore_as_flag=True, cwl=False)
552
+ toil_base_data = create_config_dict_from_parser(parser)
553
+
554
+ toil_base_data.yaml_set_start_comment("This is the configuration file for Toil. To set an option, uncomment an "
555
+ "existing option and set its value. The current values are the defaults. "
556
+ "If the default configuration file is outdated, it can be refreshed with "
557
+ "`toil config ~/.toil/default.yaml`.\n\nBASE TOIL OPTIONS\n")
558
+ all_data.append(toil_base_data)
559
+
560
+ parser = ArgParser(YAMLConfigFileParser())
561
+ add_cwl_options(parser)
562
+ toil_cwl_data = create_config_dict_from_parser(parser)
563
+ toil_cwl_data.yaml_set_start_comment("\nTOIL CWL RUNNER OPTIONS")
564
+ all_data.append(toil_cwl_data)
565
+
566
+ parser = ArgParser(YAMLConfigFileParser())
567
+ add_wdl_options(parser)
568
+ toil_wdl_data = create_config_dict_from_parser(parser)
569
+ toil_wdl_data.yaml_set_start_comment("\nTOIL WDL RUNNER OPTIONS")
570
+ all_data.append(toil_wdl_data)
571
+
572
+ # Now we need to put the config file in place at filepath.
573
+ # But someone else may have already created a file at that path, or may be
574
+ # about to open the file at that path and read it before we can finish
575
+ # writing the contents. So we write the config file at a temporary path and
576
+ # atomically move it over. There's still a race to see which process's
577
+ # config file actually is left at the name in the end, but nobody will ever
578
+ # see an empty or partially-written file at that name (if there wasn't one
579
+ # there to begin with).
580
+ with AtomicFileCreate(filepath) as temp_path:
581
+ with open(temp_path, "w") as f:
582
+ f.write("config_version: 1.0\n")
583
+ yaml = YAML(typ=['rt', 'string'])
584
+ for data in all_data:
585
+ if "config_version" in data:
586
+ del data["config_version"]
587
+ for line in yaml.dump_to_string(data).split("\n"): # type: ignore[attr-defined]
588
+ if line:
589
+ f.write("#")
590
+ f.write(line)
591
+ f.write("\n")
479
592
 
480
593
 
481
594
  def parser_with_common_options(
482
- provisioner_options: bool = False, jobstore_option: bool = True
483
- ) -> ArgumentParser:
484
- parser = ArgumentParser(prog="Toil", formatter_class=ArgumentDefaultsHelpFormatter)
595
+ provisioner_options: bool = False, jobstore_option: bool = True, prog: Optional[str] = None
596
+ ) -> ArgParser:
597
+ parser = ArgParser(prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter)
485
598
 
486
599
  if provisioner_options:
487
600
  add_provisioner_options(parser)
@@ -498,372 +611,100 @@ def parser_with_common_options(
498
611
  return parser
499
612
 
500
613
 
501
- def addOptions(parser: ArgumentParser, config: Optional[Config] = None, jobstore_as_flag: bool = False) -> None:
614
+ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool = False, wdl: bool = False) -> None:
502
615
  """
503
- Add Toil command line options to a parser.
616
+ Add all Toil command line options to a parser.
504
617
 
505
- :param config: If specified, take defaults from the given Config.
618
+ Support for config files if using configargparse. This will also check and set up the default config file.
506
619
 
507
620
  :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
508
- """
509
621
 
510
- if config is None:
511
- config = Config()
512
- if not (isinstance(parser, ArgumentParser) or isinstance(parser, _ArgumentGroup)):
513
- raise ValueError(f"Unanticipated class: {parser.__class__}. Must be: argparse.ArgumentParser or ArgumentGroup.")
622
+ :param cwl: Whether CWL options are expected. If so, CWL options won't be suppressed.
514
623
 
515
- add_logging_options(parser)
516
- parser.register("type", "bool", parseBool) # Custom type for arg=True/False.
517
-
518
- # Core options
519
- core_options = parser.add_argument_group(
520
- title="Toil core options.",
521
- description="Options to specify the location of the Toil workflow and "
522
- "turn on stats collation about the performance of jobs."
523
- )
524
- if jobstore_as_flag:
525
- core_options.add_argument('--jobStore', '--jobstore', dest='jobStore', type=str, default=None, help=JOBSTORE_HELP)
624
+ :param wdl: Whether WDL options are expected. If so, WDL options won't be suppressed.
625
+ """
626
+ if cwl and wdl:
627
+ raise RuntimeError("CWL and WDL cannot both be true at the same time when adding options.")
628
+ if not (isinstance(parser, ArgumentParser) or isinstance(parser, _ArgumentGroup)):
629
+ raise ValueError(
630
+ f"Unanticipated class: {parser.__class__}. Must be: argparse.ArgumentParser or ArgumentGroup.")
631
+
632
+ if isinstance(parser, ArgParser):
633
+ # in case the user passes in their own configargparse instance instead of calling getDefaultArgumentParser()
634
+ # this forces configargparser to process the config file in YAML rather than in it's own format
635
+ parser._config_file_parser = YAMLConfigFileParser() # type: ignore[misc]
636
+ parser._default_config_files = [DEFAULT_CONFIG_FILE] # type: ignore[misc]
526
637
  else:
527
- core_options.add_argument('jobStore', type=str, help=JOBSTORE_HELP)
528
- core_options.add_argument("--workDir", dest="workDir", default=None,
529
- help="Absolute path to directory where temporary files generated during the Toil "
530
- "run should be placed. Standard output and error from batch system jobs "
531
- "(unless --noStdOutErr is set) will be placed in this directory. A cache directory "
532
- "may be placed in this directory. Temp files and folders will be placed in a "
533
- "directory toil-<workflowID> within workDir. The workflowID is generated by "
534
- "Toil and will be reported in the workflow logs. Default is determined by the "
535
- "variables (TMPDIR, TEMP, TMP) via mkdtemp. This directory needs to exist on "
536
- "all machines running jobs; if capturing standard output and error from batch "
537
- "system jobs is desired, it will generally need to be on a shared file system. "
538
- "When sharing a cache between containers on a host, this directory must be "
539
- "shared between the containers.")
540
- core_options.add_argument("--coordinationDir", dest="coordination_dir", default=None,
541
- help="Absolute path to directory where Toil will keep state and lock files."
542
- "When sharing a cache between containers on a host, this directory must be "
543
- "shared between the containers.")
544
- core_options.add_argument("--noStdOutErr", dest="noStdOutErr", action="store_true", default=None,
545
- help="Do not capture standard output and error from batch system jobs.")
546
- core_options.add_argument("--stats", dest="stats", action="store_true", default=None,
547
- help="Records statistics about the toil workflow to be used by 'toil stats'.")
548
- clean_choices = ['always', 'onError', 'never', 'onSuccess']
549
- core_options.add_argument("--clean", dest="clean", choices=clean_choices, default=None,
550
- help=f"Determines the deletion of the jobStore upon completion of the program. "
551
- f"Choices: {clean_choices}. The --stats option requires information from the "
552
- f"jobStore upon completion so the jobStore will never be deleted with that flag. "
553
- f"If you wish to be able to restart the run, choose \'never\' or \'onSuccess\'. "
554
- f"Default is \'never\' if stats is enabled, and \'onSuccess\' otherwise.")
555
- core_options.add_argument("--cleanWorkDir", dest="cleanWorkDir", choices=clean_choices, default='always',
556
- help=f"Determines deletion of temporary worker directory upon completion of a job. "
557
- f"Choices: {clean_choices}. Default = always. WARNING: This option should be "
558
- f"changed for debugging only. Running a full pipeline with this option could "
559
- f"fill your disk with excessive intermediate data.")
560
- core_options.add_argument("--clusterStats", dest="clusterStats", nargs='?', action='store', default=None,
561
- const=os.getcwd(),
562
- help="If enabled, writes out JSON resource usage statistics to a file. "
563
- "The default location for this file is the current working directory, but an "
564
- "absolute path can also be passed to specify where this file should be written. "
565
- "This options only applies when using scalable batch systems.")
566
-
567
- # Restarting the workflow options
568
- restart_options = parser.add_argument_group(
569
- title="Toil options for restarting an existing workflow.",
570
- description="Allows the restart of an existing workflow"
571
- )
572
- restart_options.add_argument("--restart", dest="restart", default=None, action="store_true",
573
- help="If --restart is specified then will attempt to restart existing workflow "
574
- "at the location pointed to by the --jobStore option. Will raise an exception "
575
- "if the workflow does not exist")
576
-
577
- # Batch system options
578
- batchsystem_options = parser.add_argument_group(
579
- title="Toil options for specifying the batch system.",
580
- description="Allows the specification of the batch system."
581
- )
582
- add_all_batchsystem_options(batchsystem_options)
583
-
584
- # File store options
585
- file_store_options = parser.add_argument_group(
586
- title="Toil options for configuring storage.",
587
- description="Allows configuring Toil's data storage."
588
- )
589
- link_imports = file_store_options.add_mutually_exclusive_group()
590
- link_imports_help = ("When using a filesystem based job store, CWL input files are by default symlinked in. "
591
- "Specifying this option instead copies the files into the job store, which may protect "
592
- "them from being modified externally. When not specified and as long as caching is enabled, "
593
- "Toil will protect the file automatically by changing the permissions to read-only.")
594
- link_imports.add_argument("--linkImports", dest="linkImports", action='store_true', help=link_imports_help)
595
- link_imports.add_argument("--noLinkImports", dest="linkImports", action='store_false', help=link_imports_help)
596
- link_imports.set_defaults(linkImports=True)
597
-
598
- move_exports = file_store_options.add_mutually_exclusive_group()
599
- move_exports_help = ('When using a filesystem based job store, output files are by default moved to the '
600
- 'output directory, and a symlink to the moved exported file is created at the initial '
601
- 'location. Specifying this option instead copies the files into the output directory. '
602
- 'Applies to filesystem-based job stores only.')
603
- move_exports.add_argument("--moveExports", dest="moveExports", action='store_true', help=move_exports_help)
604
- move_exports.add_argument("--noMoveExports", dest="moveExports", action='store_false', help=move_exports_help)
605
- move_exports.set_defaults(moveExports=False)
606
-
607
- caching = file_store_options.add_mutually_exclusive_group()
608
- caching_help = ("Enable or disable caching for your workflow, specifying this overrides default from job store")
609
- caching.add_argument('--disableCaching', dest='caching', action='store_false', help=caching_help)
610
- caching.add_argument('--caching', dest='caching', type=lambda val: bool(strtobool(val)), help=caching_help)
611
- caching.set_defaults(caching=None)
612
-
613
- # Auto scaling options
614
- autoscaling_options = parser.add_argument_group(
615
- title="Toil options for autoscaling the cluster of worker nodes.",
616
- description="Allows the specification of the minimum and maximum number of nodes in an autoscaled cluster, "
617
- "as well as parameters to control the level of provisioning."
618
- )
619
- provisioner_choices = ['aws', 'gce', None]
620
- # TODO: Better consolidate this provisioner arg and the one in provisioners/__init__.py?
621
- autoscaling_options.add_argument('--provisioner', '-p', dest="provisioner", choices=provisioner_choices,
622
- help=f"The provisioner for cluster auto-scaling. This is the main Toil "
623
- f"'--provisioner' option, and defaults to None for running on single "
624
- f"machine and non-auto-scaling batch systems. The currently supported "
625
- f"choices are {provisioner_choices}. The default is {config.provisioner}.")
626
- autoscaling_options.add_argument('--nodeTypes', default=None,
627
- help="Specifies a list of comma-separated node types, each of which is "
628
- "composed of slash-separated instance types, and an optional spot "
629
- "bid set off by a colon, making the node type preemptible. Instance "
630
- "types may appear in multiple node types, and the same node type "
631
- "may appear as both preemptible and non-preemptible.\n"
632
- "Valid argument specifying two node types:\n"
633
- "\tc5.4xlarge/c5a.4xlarge:0.42,t2.large\n"
634
- "Node types:\n"
635
- "\tc5.4xlarge/c5a.4xlarge:0.42 and t2.large\n"
636
- "Instance types:\n"
637
- "\tc5.4xlarge, c5a.4xlarge, and t2.large\n"
638
- "Semantics:\n"
639
- "\tBid $0.42/hour for either c5.4xlarge or c5a.4xlarge instances,\n"
640
- "\ttreated interchangeably, while they are available at that price,\n"
641
- "\tand buy t2.large instances at full price")
642
- autoscaling_options.add_argument('--minNodes', default=None,
643
- help="Mininum number of nodes of each type in the cluster, if using "
644
- "auto-scaling. This should be provided as a comma-separated list of the "
645
- "same length as the list of node types. default=0")
646
- autoscaling_options.add_argument('--maxNodes', default=None,
647
- help=f"Maximum number of nodes of each type in the cluster, if using autoscaling, "
648
- f"provided as a comma-separated list. The first value is used as a default "
649
- f"if the list length is less than the number of nodeTypes. "
650
- f"default={config.maxNodes[0]}")
651
- autoscaling_options.add_argument("--targetTime", dest="targetTime", default=None,
652
- help=f"Sets how rapidly you aim to complete jobs in seconds. Shorter times mean "
653
- f"more aggressive parallelization. The autoscaler attempts to scale up/down "
654
- f"so that it expects all queued jobs will complete within targetTime "
655
- f"seconds. default={config.targetTime}")
656
- autoscaling_options.add_argument("--betaInertia", dest="betaInertia", default=None,
657
- help=f"A smoothing parameter to prevent unnecessary oscillations in the number "
658
- f"of provisioned nodes. This controls an exponentially weighted moving "
659
- f"average of the estimated number of nodes. A value of 0.0 disables any "
660
- f"smoothing, and a value of 0.9 will smooth so much that few changes will "
661
- f"ever be made. Must be between 0.0 and 0.9. default={config.betaInertia}")
662
- autoscaling_options.add_argument("--scaleInterval", dest="scaleInterval", default=None,
663
- help=f"The interval (seconds) between assessing if the scale of "
664
- f"the cluster needs to change. default={config.scaleInterval}")
665
- autoscaling_options.add_argument("--preemptibleCompensation", "--preemptableCompensation", dest="preemptibleCompensation", default=None,
666
- help=f"The preference of the autoscaler to replace preemptible nodes with "
667
- f"non-preemptible nodes, when preemptible nodes cannot be started for some "
668
- f"reason. Defaults to {config.preemptibleCompensation}. This value must be "
669
- f"between 0.0 and 1.0, inclusive. A value of 0.0 disables such "
670
- f"compensation, a value of 0.5 compensates two missing preemptible nodes "
671
- f"with a non-preemptible one. A value of 1.0 replaces every missing "
672
- f"pre-emptable node with a non-preemptible one.")
673
- autoscaling_options.add_argument("--nodeStorage", dest="nodeStorage", default=50,
674
- help="Specify the size of the root volume of worker nodes when they are launched "
675
- "in gigabytes. You may want to set this if your jobs require a lot of disk "
676
- "space. (default: %(default)s).")
677
- autoscaling_options.add_argument('--nodeStorageOverrides', default=None,
678
- help="Comma-separated list of nodeType:nodeStorage that are used to override "
679
- "the default value from --nodeStorage for the specified nodeType(s). "
680
- "This is useful for heterogeneous jobs where some tasks require much more "
681
- "disk than others.")
682
- autoscaling_options.add_argument("--metrics", dest="metrics", default=False, action="store_true",
683
- help="Enable the prometheus/grafana dashboard for monitoring CPU/RAM usage, "
684
- "queue size, and issued jobs.")
685
- autoscaling_options.add_argument("--assumeZeroOverhead", dest="assume_zero_overhead", default=False, action="store_true",
686
- help="Ignore scheduler and OS overhead and assume jobs can use every last byte "
687
- "of memory and disk on a node when autoscaling.")
688
-
689
- # Parameters to limit service jobs / detect service deadlocks
690
- if not config.cwl:
691
- service_options = parser.add_argument_group(
692
- title="Toil options for limiting the number of service jobs and detecting service deadlocks",
693
- description="Allows the specification of the maximum number of service jobs in a cluster. By keeping "
694
- "this limited we can avoid nodes occupied with services causing deadlocks."
695
- )
696
- service_options.add_argument("--maxServiceJobs", dest="maxServiceJobs", default=None, type=int,
697
- help=f"The maximum number of service jobs that can be run concurrently, "
698
- f"excluding service jobs running on preemptible nodes. "
699
- f"default={config.maxServiceJobs}")
700
- service_options.add_argument("--maxPreemptibleServiceJobs", dest="maxPreemptibleServiceJobs", default=None,
701
- type=int,
702
- help=f"The maximum number of service jobs that can run concurrently on "
703
- f"preemptible nodes. default={config.maxPreemptibleServiceJobs}")
704
- service_options.add_argument("--deadlockWait", dest="deadlockWait", default=None, type=int,
705
- help=f"Time, in seconds, to tolerate the workflow running only the same service "
706
- f"jobs, with no jobs to use them, before declaring the workflow to be "
707
- f"deadlocked and stopping. default={config.deadlockWait}")
708
- service_options.add_argument("--deadlockCheckInterval", dest="deadlockCheckInterval", default=None, type=int,
709
- help="Time, in seconds, to wait between checks to see if the workflow is stuck "
710
- "running only service jobs, with no jobs to use them. Should be shorter "
711
- "than --deadlockWait. May need to be increased if the batch system cannot "
712
- "enumerate running jobs quickly enough, or if polling for running jobs is "
713
- "placing an unacceptable load on a shared cluster. "
714
- "default={config.deadlockCheckInterval}")
715
-
716
- # Resource requirements
717
- resource_options = parser.add_argument_group(
718
- title="Toil options for cores/memory requirements.",
719
- description="The options to specify default cores/memory requirements (if not specified by the jobs "
720
- "themselves), and to limit the total amount of memory/cores requested from the batch system."
721
- )
722
- resource_help_msg = ('The {} amount of {} to request for a job. '
723
- 'Only applicable to jobs that do not specify an explicit value for this requirement. '
724
- '{}. '
725
- 'Default is {}.')
726
- cpu_note = 'Fractions of a core (for example 0.1) are supported on some batch systems [mesos, single_machine]'
727
- disk_mem_note = 'Standard suffixes like K, Ki, M, Mi, G or Gi are supported'
728
- accelerators_note = ('Each accelerator specification can have a type (gpu [default], nvidia, amd, cuda, rocm, opencl, '
729
- 'or a specific model like nvidia-tesla-k80), and a count [default: 1]. If both a type and a count '
730
- 'are used, they must be separated by a colon. If multiple types of accelerators are '
731
- 'used, the specifications are separated by commas')
732
- resource_options.add_argument('--defaultMemory', dest='defaultMemory', default=None, metavar='INT',
733
- help=resource_help_msg.format('default', 'memory', disk_mem_note, bytes2human(config.defaultMemory)))
734
- resource_options.add_argument('--defaultCores', dest='defaultCores', default=None, metavar='FLOAT',
735
- help=resource_help_msg.format('default', 'cpu', cpu_note, str(config.defaultCores)))
736
- resource_options.add_argument('--defaultDisk', dest='defaultDisk', default=None, metavar='INT',
737
- help=resource_help_msg.format('default', 'disk', disk_mem_note, bytes2human(config.defaultDisk)))
738
- resource_options.add_argument('--defaultAccelerators', dest='defaultAccelerators', default=None, metavar='ACCELERATOR[,ACCELERATOR...]',
739
- help=resource_help_msg.format('default', 'accelerators', accelerators_note, config.defaultAccelerators))
740
- resource_options.add_argument('--defaultPreemptible', '--defaultPreemptable', dest='defaultPreemptible', metavar='BOOL',
741
- type=bool, nargs='?', const=True, default=False,
742
- help='Make all jobs able to run on preemptible (spot) nodes by default.')
743
- resource_options.add_argument('--maxCores', dest='maxCores', default=None, metavar='INT',
744
- help=resource_help_msg.format('max', 'cpu', cpu_note, str(config.maxCores)))
745
- resource_options.add_argument('--maxMemory', dest='maxMemory', default=None, metavar='INT',
746
- help=resource_help_msg.format('max', 'memory', disk_mem_note, bytes2human(config.maxMemory)))
747
- resource_options.add_argument('--maxDisk', dest='maxDisk', default=None, metavar='INT',
748
- help=resource_help_msg.format('max', 'disk', disk_mem_note, bytes2human(config.maxDisk)))
638
+ # configargparse advertises itself as a drag and drop replacement, and running the normal argparse ArgumentParser
639
+ # through this code still seems to work (with the exception of --config and environmental variables)
640
+ warnings.warn(f'Using deprecated library argparse for options parsing.'
641
+ f'This will not parse config files or use environment variables.'
642
+ f'Use configargparse instead or call Job.Runner.getDefaultArgumentParser()',
643
+ DeprecationWarning)
644
+
645
+ check_and_create_default_config_file()
646
+ # Check on the config file to make sure it is sensible
647
+ config_status = os.stat(DEFAULT_CONFIG_FILE)
648
+ if config_status.st_size == 0:
649
+ # If we have an empty config file, someone has to manually delete
650
+ # it before we will work again.
651
+ raise RuntimeError(
652
+ f"Config file {DEFAULT_CONFIG_FILE} exists but is empty. Delete it! Stat says: {config_status}")
653
+ try:
654
+ with open(DEFAULT_CONFIG_FILE, "r") as f:
655
+ yaml = YAML(typ="safe")
656
+ s = yaml.load(f)
657
+ logger.debug("Initialized default configuration: %s", json.dumps(s))
658
+ except:
659
+ # Something went wrong reading the default config, so dump its
660
+ # contents to the log.
661
+ logger.info("Configuration file contents: %s", open(DEFAULT_CONFIG_FILE, 'r').read())
662
+ raise
663
+
664
+ # Add base toil options
665
+ add_base_toil_options(parser, jobstore_as_flag, cwl)
666
+ # Add CWL and WDL options
667
+ # This is done so the config file can hold all available options
668
+ add_cwl_options(parser, suppress=not cwl)
669
+ add_wdl_options(parser, suppress=not wdl)
670
+
671
+ def check_arguments(typ: str) -> None:
672
+ """
673
+ Check that the other opposing runner's options are not on the command line.
674
+ Ex: if the parser is supposed to be a CWL parser, ensure that WDL commands are not on the command line
675
+ :param typ: string of either "cwl" or "wdl" to specify which runner to check against
676
+ :return: None, raise parser error if option is found
677
+ """
678
+ check_parser = ArgParser()
679
+ if typ == "wdl":
680
+ add_cwl_options(check_parser)
681
+ if typ == "cwl":
682
+ add_wdl_options(check_parser)
683
+ for action in check_parser._actions:
684
+ action.default = SUPPRESS
685
+ other_options, _ = check_parser.parse_known_args(sys.argv[1:], ignore_help_args=True)
686
+ if len(vars(other_options)) != 0:
687
+ raise parser.error(f"{'WDL' if typ == 'cwl' else 'CWL'} options are not allowed on the command line.")
688
+
689
+ # if cwl is set, format the namespace for cwl and check that wdl options are not set on the command line
690
+ if cwl:
691
+ parser.add_argument("cwltool", type=str, help="CWL file to run.")
692
+ parser.add_argument("cwljob", nargs="*", help="Input file or CWL options. If CWL workflow takes an input, "
693
+ "the name of the input can be used as an option. "
694
+ "For example: \"%(prog)s workflow.cwl --file1 file\". "
695
+ "If an input has the same name as a Toil option, pass '--' before it.")
696
+ check_arguments(typ="cwl")
697
+
698
+ # if wdl is set, format the namespace for wdl and check that cwl options are not set on the command line
699
+ if wdl:
700
+ parser.add_argument("wdl_uri", type=str,
701
+ help="WDL document URI")
702
+ parser.add_argument("inputs_uri", type=str, nargs='?',
703
+ help="WDL input JSON URI")
704
+ parser.add_argument("--input", "-i", dest="inputs_uri", type=str,
705
+ help="WDL input JSON URI")
706
+ check_arguments(typ="wdl")
749
707
 
750
- # Retrying/rescuing jobs
751
- job_options = parser.add_argument_group(
752
- title="Toil options for rescuing/killing/restarting jobs.",
753
- description="The options for jobs that either run too long/fail or get lost (some batch systems have issues!)."
754
- )
755
- job_options.add_argument("--retryCount", dest="retryCount", default=None,
756
- help=f"Number of times to retry a failing job before giving up and "
757
- f"labeling job failed. default={config.retryCount}")
758
- job_options.add_argument("--enableUnlimitedPreemptibleRetries", "--enableUnlimitedPreemptableRetries", dest="enableUnlimitedPreemptibleRetries",
759
- action='store_true', default=False,
760
- help="If set, preemptible failures (or any failure due to an instance getting "
761
- "unexpectedly terminated) will not count towards job failures and --retryCount.")
762
- job_options.add_argument("--doubleMem", dest="doubleMem", action='store_true', default=False,
763
- help="If set, batch jobs which die to reaching memory limit on batch schedulers "
764
- "will have their memory doubled and they will be retried. The remaining "
765
- "retry count will be reduced by 1. Currently supported by LSF.")
766
- job_options.add_argument("--maxJobDuration", dest="maxJobDuration", default=None,
767
- help=f"Maximum runtime of a job (in seconds) before we kill it (this is a lower bound, "
768
- f"and the actual time before killing the job may be longer). "
769
- f"default={config.maxJobDuration}")
770
- job_options.add_argument("--rescueJobsFrequency", dest="rescueJobsFrequency", default=None,
771
- help=f"Period of time to wait (in seconds) between checking for missing/overlong jobs, "
772
- f"that is jobs which get lost by the batch system. Expert parameter. "
773
- f"default={config.rescueJobsFrequency}")
774
-
775
- # Log management options
776
- log_options = parser.add_argument_group(
777
- title="Toil log management options.",
778
- description="Options for how Toil should manage its logs."
779
- )
780
- log_options.add_argument("--maxLogFileSize", dest="maxLogFileSize", default=None,
781
- help=f"The maximum size of a job log file to keep (in bytes), log files larger than "
782
- f"this will be truncated to the last X bytes. Setting this option to zero will "
783
- f"prevent any truncation. Setting this option to a negative value will truncate "
784
- f"from the beginning. Default={bytes2human(config.maxLogFileSize)}")
785
- log_options.add_argument("--writeLogs", dest="writeLogs", nargs='?', action='store', default=None,
786
- const=os.getcwd(),
787
- help="Write worker logs received by the leader into their own files at the specified "
788
- "path. Any non-empty standard output and error from failed batch system jobs will "
789
- "also be written into files at this path. The current working directory will be "
790
- "used if a path is not specified explicitly. Note: By default only the logs of "
791
- "failed jobs are returned to leader. Set log level to 'debug' or enable "
792
- "'--writeLogsFromAllJobs' to get logs back from successful jobs, and adjust "
793
- "'maxLogFileSize' to control the truncation limit for worker logs.")
794
- log_options.add_argument("--writeLogsGzip", dest="writeLogsGzip", nargs='?', action='store', default=None,
795
- const=os.getcwd(),
796
- help="Identical to --writeLogs except the logs files are gzipped on the leader.")
797
- log_options.add_argument("--writeLogsFromAllJobs", dest="writeLogsFromAllJobs", action='store_true',
798
- default=False,
799
- help="Whether to write logs from all jobs (including the successful ones) without "
800
- "necessarily setting the log level to 'debug'. Ensure that either --writeLogs "
801
- "or --writeLogsGzip is set if enabling this option.")
802
- log_options.add_argument("--writeMessages", dest="write_messages", default=None,
803
- help="File to send messages from the leader's message bus to.")
804
- log_options.add_argument("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False,
805
- help="Enable real-time logging from workers to leader")
806
-
807
- # Misc options
808
- misc_options = parser.add_argument_group(
809
- title="Toil miscellaneous options.",
810
- description="Everything else."
811
- )
812
- misc_options.add_argument('--disableChaining', dest='disableChaining', action='store_true', default=False,
813
- help="Disables chaining of jobs (chaining uses one job's resource allocation "
814
- "for its successor job if possible).")
815
- misc_options.add_argument("--disableJobStoreChecksumVerification", dest="disableJobStoreChecksumVerification",
816
- default=False, action="store_true",
817
- help="Disables checksum verification for files transferred to/from the job store. "
818
- "Checksum verification is a safety check to ensure the data is not corrupted "
819
- "during transfer. Currently only supported for non-streaming AWS files.")
820
- misc_options.add_argument("--sseKey", dest="sseKey", default=None,
821
- help="Path to file containing 32 character key to be used for server-side encryption on "
822
- "awsJobStore or googleJobStore. SSE will not be used if this flag is not passed.")
823
- misc_options.add_argument("--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment", default=[],
824
- action="append",
825
- help="Set an environment variable early on in the worker. If VALUE is omitted, it will "
826
- "be looked up in the current environment. Independently of this option, the worker "
827
- "will try to emulate the leader's environment before running a job, except for "
828
- "some variables known to vary across systems. Using this option, a variable can "
829
- "be injected into the worker process itself before it is started.")
830
- misc_options.add_argument("--servicePollingInterval", dest="servicePollingInterval", default=None,
831
- help=f"Interval of time service jobs wait between polling for the existence of the "
832
- f"keep-alive flag. Default: {config.servicePollingInterval}")
833
- misc_options.add_argument('--forceDockerAppliance', dest='forceDockerAppliance', action='store_true', default=False,
834
- help='Disables sanity checking the existence of the docker image specified by '
835
- 'TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for autoscaling.')
836
- misc_options.add_argument('--statusWait', dest='statusWait', type=int, default=3600,
837
- help="Seconds to wait between reports of running jobs.")
838
- misc_options.add_argument('--disableProgress', dest='disableProgress', action='store_true', default=False,
839
- help="Disables the progress bar shown when standard error is a terminal.")
840
-
841
- # Debug options
842
- debug_options = parser.add_argument_group(
843
- title="Toil debug options.",
844
- description="Debug options for finding problems or helping with testing."
845
- )
846
- debug_options.add_argument("--debugWorker", default=False, action="store_true",
847
- help="Experimental no forking mode for local debugging. Specifically, workers "
848
- "are not forked and stderr/stdout are not redirected to the log.")
849
- debug_options.add_argument("--disableWorkerOutputCapture", default=False, action="store_true",
850
- help="Let worker output go to worker's standard out/error instead of per-job logs.")
851
- debug_options.add_argument("--badWorker", dest="badWorker", default=None,
852
- help=f"For testing purposes randomly kill --badWorker proportion of jobs using "
853
- f"SIGKILL. default={config.badWorker}")
854
- debug_options.add_argument("--badWorkerFailInterval", dest="badWorkerFailInterval", default=None,
855
- help=f"When killing the job pick uniformly within the interval from 0.0 to "
856
- f"--badWorkerFailInterval seconds after the worker starts. "
857
- f"default={config.badWorkerFailInterval}")
858
-
859
-
860
- def parseBool(val: str) -> bool:
861
- if val.lower() in ['true', 't', 'yes', 'y', 'on', '1']:
862
- return True
863
- elif val.lower() in ['false', 'f', 'no', 'n', 'off', '0']:
864
- return False
865
- else:
866
- raise RuntimeError("Could not interpret \"%s\" as a boolean value" % val)
867
708
 
868
709
  @lru_cache(maxsize=None)
869
710
  def getNodeID() -> str:
@@ -960,10 +801,12 @@ class Toil(ContextManager["Toil"]):
960
801
  set_logging_from_options(self.options)
961
802
  config = Config()
962
803
  config.setOptions(self.options)
804
+ if config.jobStore is None:
805
+ raise RuntimeError("No jobstore provided!")
963
806
  jobStore = self.getJobStore(config.jobStore)
964
807
  if config.caching is None:
965
808
  config.caching = jobStore.default_caching()
966
- #Set the caching option because it wasn't set originally, resuming jobstore rebuilds config from CLI options
809
+ # Set the caching option because it wasn't set originally, resuming jobstore rebuilds config from CLI options
967
810
  self.options.caching = config.caching
968
811
 
969
812
  if not config.restart:
@@ -986,10 +829,10 @@ class Toil(ContextManager["Toil"]):
986
829
  return self
987
830
 
988
831
  def __exit__(
989
- self,
990
- exc_type: Optional[Type[BaseException]],
991
- exc_val: Optional[BaseException],
992
- exc_tb: Optional[TracebackType],
832
+ self,
833
+ exc_type: Optional[Type[BaseException]],
834
+ exc_val: Optional[BaseException],
835
+ exc_tb: Optional[TracebackType],
993
836
  ) -> Literal[False]:
994
837
  """
995
838
  Clean up after a workflow invocation.
@@ -1174,13 +1017,13 @@ class Toil(ContextManager["Toil"]):
1174
1017
  maxMemory=config.maxMemory,
1175
1018
  maxDisk=config.maxDisk)
1176
1019
 
1177
- from toil.batchSystems.registry import BATCH_SYSTEM_FACTORY_REGISTRY
1020
+ from toil.batchSystems.registry import get_batch_system, get_batch_systems
1178
1021
 
1179
1022
  try:
1180
- batch_system = BATCH_SYSTEM_FACTORY_REGISTRY[config.batchSystem]()
1023
+ batch_system = get_batch_system(config.batchSystem)
1181
1024
  except KeyError:
1182
1025
  raise RuntimeError(f'Unrecognized batch system: {config.batchSystem} '
1183
- f'(choose from: {BATCH_SYSTEM_FACTORY_REGISTRY.keys()})')
1026
+ f'(choose from: {", ".join(get_batch_systems())})')
1184
1027
 
1185
1028
  if config.caching and not batch_system.supportsWorkerCleanup():
1186
1029
  raise RuntimeError(f'{config.batchSystem} currently does not support shared caching, because it '
@@ -1192,7 +1035,7 @@ class Toil(ContextManager["Toil"]):
1192
1035
  return batch_system(**kwargs)
1193
1036
 
1194
1037
  def _setupAutoDeployment(
1195
- self, userScript: Optional["ModuleDescriptor"] = None
1038
+ self, userScript: Optional["ModuleDescriptor"] = None
1196
1039
  ) -> None:
1197
1040
  """
1198
1041
  Determine the user script, save it to the job store and inject a reference to the saved copy into the batch system.
@@ -1236,7 +1079,7 @@ class Toil(ContextManager["Toil"]):
1236
1079
  logger.debug('No user script to auto-deploy.')
1237
1080
  else:
1238
1081
  logger.debug('Saving user script %s as a resource', userScript)
1239
- userScriptResource = userScript.saveAsResourceTo(self._jobStore) # type: ignore[misc]
1082
+ userScriptResource = userScript.saveAsResourceTo(self._jobStore)
1240
1083
  logger.debug('Injecting user script %s into batch system.', userScriptResource)
1241
1084
  self._batchSystem.setUserScript(userScriptResource)
1242
1085
 
@@ -1247,13 +1090,15 @@ class Toil(ContextManager["Toil"]):
1247
1090
  def importFile(self,
1248
1091
  srcUrl: str,
1249
1092
  sharedFileName: str,
1250
- symlink: bool = True) -> None: ...
1093
+ symlink: bool = True) -> None:
1094
+ ...
1251
1095
 
1252
1096
  @overload
1253
1097
  def importFile(self,
1254
1098
  srcUrl: str,
1255
1099
  sharedFileName: None = None,
1256
- symlink: bool = True) -> FileID: ...
1100
+ symlink: bool = True) -> FileID:
1101
+ ...
1257
1102
 
1258
1103
  @deprecated(new_function_name='import_file')
1259
1104
  def importFile(self,
@@ -1267,14 +1112,16 @@ class Toil(ContextManager["Toil"]):
1267
1112
  src_uri: str,
1268
1113
  shared_file_name: str,
1269
1114
  symlink: bool = True,
1270
- check_existence: bool = True) -> None: ...
1115
+ check_existence: bool = True) -> None:
1116
+ ...
1271
1117
 
1272
1118
  @overload
1273
1119
  def import_file(self,
1274
1120
  src_uri: str,
1275
1121
  shared_file_name: None = None,
1276
1122
  symlink: bool = True,
1277
- check_existence: bool = True) -> FileID: ...
1123
+ check_existence: bool = True) -> FileID:
1124
+ ...
1278
1125
 
1279
1126
  def import_file(self,
1280
1127
  src_uri: str,
@@ -1340,7 +1187,7 @@ class Toil(ContextManager["Toil"]):
1340
1187
  a local file that does not exist.
1341
1188
  """
1342
1189
  if urlparse(uri).scheme == 'file':
1343
- uri = urlparse(uri).path # this should strip off the local file scheme; it will be added back
1190
+ uri = unquote(urlparse(uri).path) # this should strip off the local file scheme; it will be added back
1344
1191
 
1345
1192
  # account for the scheme-less case, which should be coerced to a local absolute path
1346
1193
  if urlparse(uri).scheme == '':
@@ -1350,7 +1197,7 @@ class Toil(ContextManager["Toil"]):
1350
1197
  f'Could not find local file "{abs_path}" when importing "{uri}".\n'
1351
1198
  f'Make sure paths are relative to "{os.getcwd()}" or use absolute paths.\n'
1352
1199
  f'If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.).')
1353
- return f'file://{abs_path}'
1200
+ return f'file://{quote(abs_path)}'
1354
1201
  return uri
1355
1202
 
1356
1203
  def _setBatchSystemEnvVars(self) -> None:
@@ -1392,7 +1239,8 @@ class Toil(ContextManager["Toil"]):
1392
1239
  :param configWorkDir: Value passed to the program using the --workDir flag
1393
1240
  :return: Path to the Toil work directory, constant across all machines
1394
1241
  """
1395
- workDir = os.getenv('TOIL_WORKDIR_OVERRIDE') or configWorkDir or os.getenv('TOIL_WORKDIR') or tempfile.gettempdir()
1242
+ workDir = os.getenv('TOIL_WORKDIR_OVERRIDE') or configWorkDir or os.getenv(
1243
+ 'TOIL_WORKDIR') or tempfile.gettempdir()
1396
1244
  if not os.path.exists(workDir):
1397
1245
  raise RuntimeError(f'The directory specified by --workDir or TOIL_WORKDIR ({workDir}) does not exist.')
1398
1246
  return workDir
@@ -1416,31 +1264,33 @@ class Toil(ContextManager["Toil"]):
1416
1264
  if 'XDG_RUNTIME_DIR' in os.environ and not os.path.exists(os.environ['XDG_RUNTIME_DIR']):
1417
1265
  # Slurm has been observed providing this variable but not keeping
1418
1266
  # the directory live as long as we run for.
1419
- logger.warning('XDG_RUNTIME_DIR is set to nonexistent directory %s; your environment may be out of spec!', os.environ['XDG_RUNTIME_DIR'])
1267
+ logger.warning('XDG_RUNTIME_DIR is set to nonexistent directory %s; your environment may be out of spec!',
1268
+ os.environ['XDG_RUNTIME_DIR'])
1420
1269
 
1421
1270
  # Go get a coordination directory, using a lot of short-circuiting of
1422
1271
  # or and the fact that and returns its second argument when it
1423
1272
  # succeeds.
1424
1273
  coordination_dir: Optional[str] = (
1425
1274
  # First try an override env var
1426
- os.getenv('TOIL_COORDINATION_DIR_OVERRIDE') or
1427
- # Then the value from the config
1428
- config_coordination_dir or
1429
- # Then a normal env var
1430
- # TODO: why/how would this propagate when not using single machine?
1431
- os.getenv('TOIL_COORDINATION_DIR') or
1432
- # Then try a `toil` subdirectory of the XDG runtime directory
1433
- # (often /var/run/users/<UID>). But only if we are actually in a
1434
- # session that has the env var set. Otherwise it might belong to a
1435
- # different set of sessions and get cleaned up out from under us
1436
- # when that session ends.
1437
- # We don't think Slurm XDG sessions are trustworthy, depending on
1438
- # the cluster's PAM configuration, so don't use them.
1439
- ('XDG_RUNTIME_DIR' in os.environ and 'SLURM_JOBID' not in os.environ and try_path(os.path.join(os.environ['XDG_RUNTIME_DIR'], 'toil'))) or
1440
- # Try under /run/lock. It might be a temp dir style sticky directory.
1441
- try_path('/run/lock') or
1442
- # Finally, fall back on the work dir and hope it's a legit filesystem.
1443
- cls.getToilWorkDir(config_work_dir)
1275
+ os.getenv('TOIL_COORDINATION_DIR_OVERRIDE') or
1276
+ # Then the value from the config
1277
+ config_coordination_dir or
1278
+ # Then a normal env var
1279
+ # TODO: why/how would this propagate when not using single machine?
1280
+ os.getenv('TOIL_COORDINATION_DIR') or
1281
+ # Then try a `toil` subdirectory of the XDG runtime directory
1282
+ # (often /var/run/users/<UID>). But only if we are actually in a
1283
+ # session that has the env var set. Otherwise it might belong to a
1284
+ # different set of sessions and get cleaned up out from under us
1285
+ # when that session ends.
1286
+ # We don't think Slurm XDG sessions are trustworthy, depending on
1287
+ # the cluster's PAM configuration, so don't use them.
1288
+ ('XDG_RUNTIME_DIR' in os.environ and 'SLURM_JOBID' not in os.environ and try_path(
1289
+ os.path.join(os.environ['XDG_RUNTIME_DIR'], 'toil'))) or
1290
+ # Try under /run/lock. It might be a temp dir style sticky directory.
1291
+ try_path('/run/lock') or
1292
+ # Finally, fall back on the work dir and hope it's a legit filesystem.
1293
+ cls.getToilWorkDir(config_work_dir)
1444
1294
  )
1445
1295
 
1446
1296
  if coordination_dir is None:
@@ -1462,7 +1312,7 @@ class Toil(ContextManager["Toil"]):
1462
1312
 
1463
1313
  @classmethod
1464
1314
  def getLocalWorkflowDir(
1465
- cls, workflowID: str, configWorkDir: Optional[str] = None
1315
+ cls, workflowID: str, configWorkDir: Optional[str] = None
1466
1316
  ) -> str:
1467
1317
  """
1468
1318
  Return the directory where worker directories and the cache will be located for this workflow on this machine.
@@ -1489,10 +1339,10 @@ class Toil(ContextManager["Toil"]):
1489
1339
 
1490
1340
  @classmethod
1491
1341
  def get_local_workflow_coordination_dir(
1492
- cls,
1493
- workflow_id: str,
1494
- config_work_dir: Optional[str],
1495
- config_coordination_dir: Optional[str]
1342
+ cls,
1343
+ workflow_id: str,
1344
+ config_work_dir: Optional[str],
1345
+ config_coordination_dir: Optional[str]
1496
1346
  ) -> str:
1497
1347
  """
1498
1348
  Return the directory where coordination files should be located for
@@ -1654,8 +1504,10 @@ class ToilMetrics:
1654
1504
  # The only way to make this inteligible to MyPy is to wrap the dict in
1655
1505
  # a function that can cast.
1656
1506
  MessageType = TypeVar('MessageType')
1507
+
1657
1508
  def get_listener(message_type: Type[MessageType]) -> Callable[[MessageType], None]:
1658
1509
  return cast(Callable[[MessageType], None], TARGETS[message_type])
1510
+
1659
1511
  # Then set up the listeners.
1660
1512
  self._listeners = [bus.subscribe(message_type, get_listener(message_type)) for message_type in TARGETS.keys()]
1661
1513
 
@@ -1720,12 +1572,12 @@ class ToilMetrics:
1720
1572
  # remaining intact
1721
1573
 
1722
1574
  def logClusterSize(
1723
- self, m: ClusterSizeMessage
1575
+ self, m: ClusterSizeMessage
1724
1576
  ) -> None:
1725
1577
  self.log("current_size '%s' %i" % (m.instance_type, m.current_size))
1726
1578
 
1727
1579
  def logClusterDesiredSize(
1728
- self, m: ClusterDesiredSizeMessage
1580
+ self, m: ClusterDesiredSizeMessage
1729
1581
  ) -> None:
1730
1582
  self.log("desired_size '%s' %i" % (m.instance_type, m.desired_size))
1731
1583
 
@@ -1756,76 +1608,6 @@ class ToilMetrics:
1756
1608
  self._listeners = []
1757
1609
 
1758
1610
 
1759
- def parseSetEnv(l: List[str]) -> Dict[str, Optional[str]]:
1760
- """
1761
- Parse a list of strings of the form "NAME=VALUE" or just "NAME" into a dictionary.
1762
-
1763
- Strings of the latter from will result in dictionary entries whose value is None.
1764
-
1765
- >>> parseSetEnv([])
1766
- {}
1767
- >>> parseSetEnv(['a'])
1768
- {'a': None}
1769
- >>> parseSetEnv(['a='])
1770
- {'a': ''}
1771
- >>> parseSetEnv(['a=b'])
1772
- {'a': 'b'}
1773
- >>> parseSetEnv(['a=a', 'a=b'])
1774
- {'a': 'b'}
1775
- >>> parseSetEnv(['a=b', 'c=d'])
1776
- {'a': 'b', 'c': 'd'}
1777
- >>> parseSetEnv(['a=b=c'])
1778
- {'a': 'b=c'}
1779
- >>> parseSetEnv([''])
1780
- Traceback (most recent call last):
1781
- ...
1782
- ValueError: Empty name
1783
- >>> parseSetEnv(['=1'])
1784
- Traceback (most recent call last):
1785
- ...
1786
- ValueError: Empty name
1787
- """
1788
- d = {}
1789
- v: Optional[str] = None
1790
- for i in l:
1791
- try:
1792
- k, v = i.split('=', 1)
1793
- except ValueError:
1794
- k, v = i, None
1795
- if not k:
1796
- raise ValueError('Empty name')
1797
- d[k] = v
1798
- return d
1799
-
1800
-
1801
- def iC(minValue: int, maxValue: int = SYS_MAX_SIZE) -> Callable[[int], bool]:
1802
- """Returns a function that checks if a given int is in the given half-open interval."""
1803
- assert isinstance(minValue, int) and isinstance(maxValue, int)
1804
- return lambda x: minValue <= x < maxValue
1805
-
1806
-
1807
- def fC(minValue: float, maxValue: Optional[float] = None) -> Callable[[float], bool]:
1808
- """Returns a function that checks if a given float is in the given half-open interval."""
1809
- assert isinstance(minValue, float)
1810
- if maxValue is None:
1811
- return lambda x: minValue <= x
1812
- assert isinstance(maxValue, float)
1813
- return lambda x: minValue <= x < maxValue # type: ignore
1814
-
1815
- def parse_accelerator_list(specs: Optional[str]) -> List['AcceleratorRequirement']:
1816
- """
1817
- Parse a string description of one or more accelerator requirements.
1818
- """
1819
-
1820
- if specs is None or len(specs) == 0:
1821
- # Not specified, so the default default is to not need any.
1822
- return []
1823
- # Otherwise parse each requirement.
1824
- from toil.job import parse_accelerator
1825
-
1826
- return [parse_accelerator(r) for r in specs.split(',')]
1827
-
1828
-
1829
1611
  def cacheDirName(workflowID: str) -> str:
1830
1612
  """
1831
1613
  :return: Name of the cache directory.
@@ -1844,10 +1626,7 @@ def getDirSizeRecursively(dirPath: str) -> int:
1844
1626
  internally, and a (possibly 0) lower bound on the size of the directory
1845
1627
  will be returned.
1846
1628
 
1847
- The environment variable 'BLOCKSIZE'='512' is set instead of the much cleaner
1848
- --block-size=1 because Apple can't handle it.
1849
-
1850
- :param str dirPath: A valid path to a directory or file.
1629
+ :param dirPath: A valid path to a directory or file.
1851
1630
  :return: Total size, in bytes, of the file or directory at dirPath.
1852
1631
  """
1853
1632
 
@@ -1857,12 +1636,22 @@ def getDirSizeRecursively(dirPath: str) -> int:
1857
1636
  # allocated with the environment variable: BLOCKSIZE='512' set, and we
1858
1637
  # multiply this by 512 to return the filesize in bytes.
1859
1638
 
1639
+ dirPath = os.path.abspath(dirPath)
1860
1640
  try:
1861
1641
  return int(subprocess.check_output(['du', '-s', dirPath],
1862
1642
  env=dict(os.environ, BLOCKSIZE='512')).decode('utf-8').split()[0]) * 512
1863
- except subprocess.CalledProcessError:
1864
- # Something was inaccessible or went away
1865
- return 0
1643
+ # The environment variable 'BLOCKSIZE'='512' is set instead of the much cleaner
1644
+ # --block-size=1 because Apple can't handle it.
1645
+ except (OSError, subprocess.CalledProcessError):
1646
+ # Fallback to pure Python implementation, useful for when kernel limits
1647
+ # to argument list size are hit, etc..
1648
+ total_size: int = 0
1649
+ if os.path.isfile(dirPath):
1650
+ return os.lstat(dirPath).st_blocks * 512
1651
+ for dir_path, dir_names, filenames in os.walk(dirPath):
1652
+ for name in filenames:
1653
+ total_size += os.lstat(os.path.join(dir_path, name)).st_blocks * 512
1654
+ return total_size
1866
1655
 
1867
1656
 
1868
1657
  def getFileSystemSize(dirPath: str) -> Tuple[int, int]: