toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/common.py
CHANGED
|
@@ -23,11 +23,16 @@ import tempfile
|
|
|
23
23
|
import time
|
|
24
24
|
import uuid
|
|
25
25
|
import warnings
|
|
26
|
-
from
|
|
26
|
+
from io import StringIO
|
|
27
|
+
|
|
28
|
+
from ruamel.yaml import YAML
|
|
29
|
+
from ruamel.yaml.comments import CommentedMap
|
|
30
|
+
from configargparse import ArgParser, YAMLConfigFileParser
|
|
31
|
+
from argparse import (SUPPRESS,
|
|
32
|
+
ArgumentDefaultsHelpFormatter,
|
|
27
33
|
ArgumentParser,
|
|
28
34
|
Namespace,
|
|
29
|
-
_ArgumentGroup)
|
|
30
|
-
from distutils.util import strtobool
|
|
35
|
+
_ArgumentGroup, Action, _StoreFalseAction, _StoreTrueAction, _AppendAction)
|
|
31
36
|
from functools import lru_cache
|
|
32
37
|
from types import TracebackType
|
|
33
38
|
from typing import (IO,
|
|
@@ -37,7 +42,6 @@ from typing import (IO,
|
|
|
37
42
|
ContextManager,
|
|
38
43
|
Dict,
|
|
39
44
|
List,
|
|
40
|
-
MutableMapping,
|
|
41
45
|
Optional,
|
|
42
46
|
Set,
|
|
43
47
|
Tuple,
|
|
@@ -46,10 +50,14 @@ from typing import (IO,
|
|
|
46
50
|
Union,
|
|
47
51
|
cast,
|
|
48
52
|
overload)
|
|
49
|
-
from urllib.parse import urlparse
|
|
53
|
+
from urllib.parse import urlparse, unquote, quote
|
|
50
54
|
|
|
51
55
|
import requests
|
|
52
56
|
|
|
57
|
+
from toil.options.common import add_base_toil_options, JOBSTORE_HELP
|
|
58
|
+
from toil.options.cwl import add_cwl_options
|
|
59
|
+
from toil.options.wdl import add_wdl_options
|
|
60
|
+
|
|
53
61
|
if sys.version_info >= (3, 8):
|
|
54
62
|
from typing import Literal
|
|
55
63
|
else:
|
|
@@ -57,7 +65,6 @@ else:
|
|
|
57
65
|
|
|
58
66
|
from toil import logProcessContext, lookupEnvVar
|
|
59
67
|
from toil.batchSystems.options import (add_all_batchsystem_options,
|
|
60
|
-
set_batchsystem_config_defaults,
|
|
61
68
|
set_batchsystem_options)
|
|
62
69
|
from toil.bus import (ClusterDesiredSizeMessage,
|
|
63
70
|
ClusterSizeMessage,
|
|
@@ -66,22 +73,19 @@ from toil.bus import (ClusterDesiredSizeMessage,
|
|
|
66
73
|
JobIssuedMessage,
|
|
67
74
|
JobMissingMessage,
|
|
68
75
|
MessageBus,
|
|
69
|
-
QueueSizeMessage
|
|
70
|
-
gen_message_bus_path)
|
|
76
|
+
QueueSizeMessage)
|
|
71
77
|
from toil.fileStores import FileID
|
|
72
78
|
from toil.lib.aws import zone_to_region, build_tag_dict_from_env
|
|
73
79
|
from toil.lib.compatibility import deprecated
|
|
74
|
-
from toil.lib.
|
|
75
|
-
from toil.lib.io import try_path
|
|
80
|
+
from toil.lib.io import try_path, AtomicFileCreate
|
|
76
81
|
from toil.lib.retry import retry
|
|
77
82
|
from toil.provisioners import (add_provisioner_options,
|
|
78
83
|
cluster_factory,
|
|
79
84
|
parse_node_types)
|
|
80
85
|
from toil.realtimeLogger import RealtimeLogger
|
|
81
86
|
from toil.statsAndLogging import (add_logging_options,
|
|
82
|
-
root_logger,
|
|
83
87
|
set_logging_from_options)
|
|
84
|
-
from toil.version import dockerRegistry, dockerTag, version
|
|
88
|
+
from toil.version import dockerRegistry, dockerTag, version, baseVersion
|
|
85
89
|
|
|
86
90
|
if TYPE_CHECKING:
|
|
87
91
|
from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
|
|
@@ -94,14 +98,14 @@ if TYPE_CHECKING:
|
|
|
94
98
|
from toil.provisioners.abstractProvisioner import AbstractProvisioner
|
|
95
99
|
from toil.resource import ModuleDescriptor
|
|
96
100
|
|
|
97
|
-
# aim to pack autoscaling jobs within a 30 minute block before provisioning a new node
|
|
98
|
-
defaultTargetTime = 1800
|
|
99
|
-
SYS_MAX_SIZE = 9223372036854775807
|
|
100
|
-
# sys.max_size on 64 bit systems is 9223372036854775807, so that 32-bit systems
|
|
101
|
-
# use the same number
|
|
102
101
|
UUID_LENGTH = 32
|
|
103
102
|
logger = logging.getLogger(__name__)
|
|
104
103
|
|
|
104
|
+
# TODO: should this use an XDG config directory or ~/.config to not clutter the
|
|
105
|
+
# base home directory?
|
|
106
|
+
TOIL_HOME_DIR: str = os.path.join(os.path.expanduser("~"), ".toil")
|
|
107
|
+
DEFAULT_CONFIG_FILE: str = os.path.join(TOIL_HOME_DIR, "default.yaml")
|
|
108
|
+
|
|
105
109
|
|
|
106
110
|
class Config:
|
|
107
111
|
"""Class to represent configuration operations for a toil workflow run."""
|
|
@@ -110,118 +114,148 @@ class Config:
|
|
|
110
114
|
cleanWorkDir: str
|
|
111
115
|
max_jobs: int
|
|
112
116
|
max_local_jobs: int
|
|
117
|
+
manualMemArgs: bool
|
|
113
118
|
run_local_jobs_on_workers: bool
|
|
119
|
+
coalesceStatusCalls: bool
|
|
120
|
+
mesos_endpoint: Optional[str]
|
|
121
|
+
mesos_framework_id: Optional[str]
|
|
122
|
+
mesos_role: Optional[str]
|
|
123
|
+
mesos_name: str
|
|
124
|
+
kubernetes_host_path: Optional[str]
|
|
125
|
+
kubernetes_owner: Optional[str]
|
|
126
|
+
kubernetes_service_account: Optional[str]
|
|
127
|
+
kubernetes_pod_timeout: float
|
|
114
128
|
tes_endpoint: str
|
|
115
129
|
tes_user: str
|
|
116
130
|
tes_password: str
|
|
117
131
|
tes_bearer_token: str
|
|
118
|
-
|
|
132
|
+
aws_batch_region: Optional[str]
|
|
133
|
+
aws_batch_queue: Optional[str]
|
|
134
|
+
aws_batch_job_role_arn: Optional[str]
|
|
135
|
+
scale: float
|
|
119
136
|
batchSystem: str
|
|
120
|
-
batch_logs_dir: Optional[str]
|
|
137
|
+
batch_logs_dir: Optional[str]
|
|
121
138
|
"""The backing scheduler will be instructed, if possible, to save logs
|
|
122
139
|
to this directory, where the leader can read them."""
|
|
123
|
-
|
|
140
|
+
statePollingWait: int
|
|
124
141
|
disableAutoDeployment: bool
|
|
125
142
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
self.stats: bool = False
|
|
140
|
-
|
|
141
|
-
# Because the stats option needs the jobStore to persist past the end of the run,
|
|
142
|
-
# the clean default value depends the specified stats option and is determined in setOptions
|
|
143
|
-
self.clean: Optional[str] = None
|
|
144
|
-
self.clusterStats = None
|
|
145
|
-
|
|
146
|
-
# Restarting the workflow options
|
|
147
|
-
self.restart: bool = False
|
|
143
|
+
# Core options
|
|
144
|
+
workflowID: Optional[str]
|
|
145
|
+
"""This attribute uniquely identifies the job store and therefore the workflow. It is
|
|
146
|
+
necessary in order to distinguish between two consecutive workflows for which
|
|
147
|
+
self.jobStore is the same, e.g. when a job store name is reused after a previous run has
|
|
148
|
+
finished successfully and its job store has been clean up."""
|
|
149
|
+
workflowAttemptNumber: int
|
|
150
|
+
jobStore: str
|
|
151
|
+
logLevel: str
|
|
152
|
+
workDir: Optional[str]
|
|
153
|
+
coordination_dir: Optional[str]
|
|
154
|
+
noStdOutErr: bool
|
|
155
|
+
stats: bool
|
|
148
156
|
|
|
149
|
-
|
|
150
|
-
|
|
157
|
+
# Because the stats option needs the jobStore to persist past the end of the run,
|
|
158
|
+
# the clean default value depends the specified stats option and is determined in setOptions
|
|
159
|
+
clean: Optional[str]
|
|
160
|
+
clusterStats: str
|
|
151
161
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
self.linkImports: bool = True
|
|
155
|
-
self.moveExports: bool = False
|
|
162
|
+
# Restarting the workflow options
|
|
163
|
+
restart: bool
|
|
156
164
|
|
|
157
|
-
|
|
158
|
-
self.provisioner: Optional[str] = None
|
|
159
|
-
self.nodeTypes: List[Tuple[Set[str], Optional[float]]] = []
|
|
160
|
-
self.minNodes = None
|
|
161
|
-
self.maxNodes = [10]
|
|
162
|
-
self.targetTime: float = defaultTargetTime
|
|
163
|
-
self.betaInertia: float = 0.1
|
|
164
|
-
self.scaleInterval: int = 60
|
|
165
|
-
self.preemptibleCompensation: float = 0.0
|
|
166
|
-
self.nodeStorage: int = 50
|
|
167
|
-
self.nodeStorageOverrides: List[str] = []
|
|
168
|
-
self.metrics: bool = False
|
|
169
|
-
self.assume_zero_overhead: bool = False
|
|
170
|
-
|
|
171
|
-
# Parameters to limit service jobs, so preventing deadlock scheduling scenarios
|
|
172
|
-
self.maxPreemptibleServiceJobs: int = sys.maxsize
|
|
173
|
-
self.maxServiceJobs: int = sys.maxsize
|
|
174
|
-
self.deadlockWait: Union[float, int] = 60 # Number of seconds we must be stuck with all services before declaring a deadlock
|
|
175
|
-
self.deadlockCheckInterval: Union[float, int] = 30 # Minimum polling delay for deadlocks
|
|
176
|
-
|
|
177
|
-
# Resource requirements
|
|
178
|
-
self.defaultMemory: int = 2147483648
|
|
179
|
-
self.defaultCores: Union[float, int] = 1
|
|
180
|
-
self.defaultDisk: int = 2147483648
|
|
181
|
-
self.defaultPreemptible: bool = False
|
|
182
|
-
# TODO: These names are generated programmatically in
|
|
183
|
-
# Requirer._fetchRequirement so we can't use snake_case until we fix
|
|
184
|
-
# that (and add compatibility getters/setters?)
|
|
185
|
-
self.defaultAccelerators: List['AcceleratorRequirement'] = []
|
|
186
|
-
self.maxCores: int = SYS_MAX_SIZE
|
|
187
|
-
self.maxMemory: int = SYS_MAX_SIZE
|
|
188
|
-
self.maxDisk: int = SYS_MAX_SIZE
|
|
165
|
+
# Batch system options
|
|
189
166
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
167
|
+
# File store options
|
|
168
|
+
caching: Optional[bool]
|
|
169
|
+
symlinkImports: bool
|
|
170
|
+
moveOutputs: bool
|
|
171
|
+
|
|
172
|
+
# Autoscaling options
|
|
173
|
+
provisioner: Optional[str]
|
|
174
|
+
nodeTypes: List[Tuple[Set[str], Optional[float]]]
|
|
175
|
+
minNodes: List[int]
|
|
176
|
+
maxNodes: List[int]
|
|
177
|
+
targetTime: float
|
|
178
|
+
betaInertia: float
|
|
179
|
+
scaleInterval: int
|
|
180
|
+
preemptibleCompensation: float
|
|
181
|
+
nodeStorage: int
|
|
182
|
+
nodeStorageOverrides: List[str]
|
|
183
|
+
metrics: bool
|
|
184
|
+
assume_zero_overhead: bool
|
|
185
|
+
|
|
186
|
+
# Parameters to limit service jobs, so preventing deadlock scheduling scenarios
|
|
187
|
+
maxPreemptibleServiceJobs: int
|
|
188
|
+
maxServiceJobs: int
|
|
189
|
+
deadlockWait: Union[
|
|
190
|
+
float, int]
|
|
191
|
+
deadlockCheckInterval: Union[float, int]
|
|
196
192
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
193
|
+
# Resource requirements
|
|
194
|
+
defaultMemory: int
|
|
195
|
+
defaultCores: Union[float, int]
|
|
196
|
+
defaultDisk: int
|
|
197
|
+
defaultPreemptible: bool
|
|
198
|
+
# TODO: These names are generated programmatically in
|
|
199
|
+
# Requirer._fetchRequirement so we can't use snake_case until we fix
|
|
200
|
+
# that (and add compatibility getters/setters?)
|
|
201
|
+
defaultAccelerators: List['AcceleratorRequirement']
|
|
202
|
+
maxCores: int
|
|
203
|
+
maxMemory: int
|
|
204
|
+
maxDisk: int
|
|
203
205
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
206
|
+
# Retrying/rescuing jobs
|
|
207
|
+
retryCount: int
|
|
208
|
+
enableUnlimitedPreemptibleRetries: bool
|
|
209
|
+
doubleMem: bool
|
|
210
|
+
maxJobDuration: int
|
|
211
|
+
rescueJobsFrequency: int
|
|
212
|
+
|
|
213
|
+
# Log management
|
|
214
|
+
maxLogFileSize: int
|
|
215
|
+
writeLogs: str
|
|
216
|
+
writeLogsGzip: str
|
|
217
|
+
writeLogsFromAllJobs: bool
|
|
218
|
+
write_messages: Optional[str]
|
|
219
|
+
realTimeLogging: bool
|
|
220
|
+
|
|
221
|
+
# Misc
|
|
222
|
+
environment: Dict[str, str]
|
|
223
|
+
disableChaining: bool
|
|
224
|
+
disableJobStoreChecksumVerification: bool
|
|
225
|
+
sseKey: Optional[str]
|
|
226
|
+
servicePollingInterval: int
|
|
227
|
+
useAsync: bool
|
|
228
|
+
forceDockerAppliance: bool
|
|
229
|
+
statusWait: int
|
|
230
|
+
disableProgress: bool
|
|
231
|
+
readGlobalFileMutableByDefault: bool
|
|
216
232
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
233
|
+
# Debug options
|
|
234
|
+
debugWorker: bool
|
|
235
|
+
disableWorkerOutputCapture: bool
|
|
236
|
+
badWorker: float
|
|
237
|
+
badWorkerFailInterval: float
|
|
238
|
+
kill_polling_interval: int
|
|
239
|
+
|
|
240
|
+
# CWL
|
|
241
|
+
cwl: bool
|
|
222
242
|
|
|
223
|
-
|
|
224
|
-
|
|
243
|
+
def __init__(self) -> None:
|
|
244
|
+
# only default options that are not CLI options defined here (thus CLI options are centralized)
|
|
245
|
+
self.cwl = False # will probably remove later
|
|
246
|
+
self.workflowID = None
|
|
247
|
+
self.kill_polling_interval = 5
|
|
248
|
+
|
|
249
|
+
self.set_from_default_config()
|
|
250
|
+
|
|
251
|
+
def set_from_default_config(self) -> None:
|
|
252
|
+
# get defaults from a config file by simulating an argparse run
|
|
253
|
+
# as Config often expects defaults to already be instantiated
|
|
254
|
+
parser = ArgParser()
|
|
255
|
+
addOptions(parser, jobstore_as_flag=True, cwl=self.cwl)
|
|
256
|
+
# The parser already knows about the default config file
|
|
257
|
+
ns = parser.parse_args("")
|
|
258
|
+
self.setOptions(ns)
|
|
225
259
|
|
|
226
260
|
def prepare_start(self) -> None:
|
|
227
261
|
"""
|
|
@@ -239,17 +273,11 @@ class Config:
|
|
|
239
273
|
# auto-generated and point to a temp directory that could no longer
|
|
240
274
|
# exist and that can't safely be re-made.
|
|
241
275
|
self.write_messages = None
|
|
242
|
-
|
|
243
276
|
|
|
244
277
|
def setOptions(self, options: Namespace) -> None:
|
|
245
278
|
"""Creates a config object from the options object."""
|
|
246
|
-
OptionType = TypeVar("OptionType")
|
|
247
279
|
|
|
248
280
|
def set_option(option_name: str,
|
|
249
|
-
parsing_function: Optional[Callable[[Any], OptionType]] = None,
|
|
250
|
-
check_function: Optional[Callable[[OptionType], Union[None, bool]]] = None,
|
|
251
|
-
default: Optional[OptionType] = None,
|
|
252
|
-
env: Optional[List[str]] = None,
|
|
253
281
|
old_names: Optional[List[str]] = None) -> None:
|
|
254
282
|
"""
|
|
255
283
|
Determine the correct value for the given option.
|
|
@@ -258,8 +286,6 @@ class Config:
|
|
|
258
286
|
|
|
259
287
|
1. options object under option_name
|
|
260
288
|
2. options object under old_names
|
|
261
|
-
3. environment variables in env
|
|
262
|
-
4. provided default value
|
|
263
289
|
|
|
264
290
|
Selected option value is run through parsing_funtion if it is set.
|
|
265
291
|
Then the parsed value is run through check_function to check it for
|
|
@@ -269,192 +295,142 @@ class Config:
|
|
|
269
295
|
If the option gets a non-None value, sets it as an attribute in
|
|
270
296
|
this Config.
|
|
271
297
|
"""
|
|
272
|
-
option_value = getattr(options, option_name,
|
|
298
|
+
option_value = getattr(options, option_name, None)
|
|
273
299
|
|
|
274
300
|
if old_names is not None:
|
|
275
301
|
for old_name in old_names:
|
|
302
|
+
# If the option is already set with the new name and not the old name
|
|
303
|
+
# prioritize the new name over the old name and break
|
|
304
|
+
if option_value is not None and option_value != [] and option_value != {}:
|
|
305
|
+
break
|
|
276
306
|
# Try all the old names in case user code is setting them
|
|
277
307
|
# in an options object.
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
if hasattr(options, old_name):
|
|
308
|
+
# This does assume that all deprecated options have a default value of None
|
|
309
|
+
if getattr(options, old_name, None) is not None:
|
|
281
310
|
warnings.warn(f'Using deprecated option field {old_name} to '
|
|
282
311
|
f'provide value for config field {option_name}',
|
|
283
312
|
DeprecationWarning)
|
|
284
313
|
option_value = getattr(options, old_name)
|
|
285
|
-
|
|
286
|
-
if env is not None:
|
|
287
|
-
for env_var in env:
|
|
288
|
-
# Try all the environment variables
|
|
289
|
-
if option_value != default:
|
|
290
|
-
break
|
|
291
|
-
option_value = os.environ.get(env_var, default)
|
|
292
|
-
|
|
293
314
|
if option_value is not None or not hasattr(self, option_name):
|
|
294
|
-
if parsing_function is not None:
|
|
295
|
-
# Parse whatever it is (string, argparse-made list, etc.)
|
|
296
|
-
option_value = parsing_function(option_value)
|
|
297
|
-
if check_function is not None:
|
|
298
|
-
try:
|
|
299
|
-
check_function(option_value) # type: ignore
|
|
300
|
-
except AssertionError:
|
|
301
|
-
raise RuntimeError(f"The {option_name} option has an invalid value: {option_value}")
|
|
302
315
|
setattr(self, option_name, option_value)
|
|
303
316
|
|
|
304
|
-
# Function to parse integer from string expressed in different formats
|
|
305
|
-
h2b = lambda x: human2bytes(str(x))
|
|
306
|
-
|
|
307
|
-
def parse_jobstore(jobstore_uri: str) -> str:
|
|
308
|
-
name, rest = Toil.parseLocator(jobstore_uri)
|
|
309
|
-
if name == 'file':
|
|
310
|
-
# We need to resolve relative paths early, on the leader, because the worker process
|
|
311
|
-
# may have a different working directory than the leader, e.g. under Mesos.
|
|
312
|
-
return Toil.buildLocator(name, os.path.abspath(rest))
|
|
313
|
-
else:
|
|
314
|
-
return jobstore_uri
|
|
315
|
-
|
|
316
|
-
def parse_str_list(s: str) -> List[str]:
|
|
317
|
-
return [str(x) for x in s.split(",")]
|
|
318
|
-
|
|
319
|
-
def parse_int_list(s: str) -> List[int]:
|
|
320
|
-
return [int(x) for x in s.split(",")]
|
|
321
|
-
|
|
322
317
|
# Core options
|
|
323
|
-
set_option("jobStore"
|
|
318
|
+
set_option("jobStore")
|
|
324
319
|
# TODO: LOG LEVEL STRING
|
|
325
320
|
set_option("workDir")
|
|
326
|
-
if self.workDir is not None:
|
|
327
|
-
self.workDir = os.path.abspath(self.workDir)
|
|
328
|
-
if not os.path.exists(self.workDir):
|
|
329
|
-
raise RuntimeError(f"The path provided to --workDir ({self.workDir}) does not exist.")
|
|
330
|
-
|
|
331
|
-
if len(self.workDir) > 80:
|
|
332
|
-
logger.warning(f'Length of workDir path "{self.workDir}" is {len(self.workDir)} characters. '
|
|
333
|
-
f'Consider setting a shorter path with --workPath or setting TMPDIR to something '
|
|
334
|
-
f'like "/tmp" to avoid overly long paths.')
|
|
335
321
|
set_option("coordination_dir")
|
|
336
|
-
if self.coordination_dir is not None:
|
|
337
|
-
self.coordination_dir = os.path.abspath(self.coordination_dir)
|
|
338
|
-
if not os.path.exists(self.coordination_dir):
|
|
339
|
-
raise RuntimeError(f"The path provided to --coordinationDir ({self.coordination_dir}) does not exist.")
|
|
340
322
|
|
|
341
323
|
set_option("noStdOutErr")
|
|
342
324
|
set_option("stats")
|
|
343
325
|
set_option("cleanWorkDir")
|
|
344
326
|
set_option("clean")
|
|
345
|
-
if self.stats:
|
|
346
|
-
if self.clean != "never" and self.clean is not None:
|
|
347
|
-
raise RuntimeError("Contradicting options passed: Clean flag is set to %s "
|
|
348
|
-
"despite the stats flag requiring "
|
|
349
|
-
"the jobStore to be intact at the end of the run. "
|
|
350
|
-
"Set clean to \'never\'" % self.clean)
|
|
351
|
-
self.clean = "never"
|
|
352
|
-
elif self.clean is None:
|
|
353
|
-
self.clean = "onSuccess"
|
|
354
327
|
set_option('clusterStats')
|
|
355
328
|
set_option("restart")
|
|
356
329
|
|
|
357
330
|
# Batch system options
|
|
358
331
|
set_option("batchSystem")
|
|
359
|
-
set_batchsystem_options(
|
|
332
|
+
set_batchsystem_options(None, cast("OptionSetter",
|
|
333
|
+
set_option)) # None as that will make set_batchsystem_options iterate through all batch systems and set their corresponding values
|
|
360
334
|
|
|
361
335
|
# File store options
|
|
362
|
-
set_option("
|
|
363
|
-
set_option("
|
|
364
|
-
set_option("caching",
|
|
336
|
+
set_option("symlinkImports", old_names=["linkImports"])
|
|
337
|
+
set_option("moveOutputs", old_names=["moveExports"])
|
|
338
|
+
set_option("caching", old_names=["enableCaching"])
|
|
365
339
|
|
|
366
340
|
# Autoscaling options
|
|
367
341
|
set_option("provisioner")
|
|
368
|
-
set_option("nodeTypes"
|
|
369
|
-
set_option("minNodes"
|
|
370
|
-
set_option("maxNodes"
|
|
371
|
-
set_option("targetTime"
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
set_option("betaInertia", float)
|
|
375
|
-
if not 0.0 <= self.betaInertia <= 0.9:
|
|
376
|
-
raise RuntimeError(f'betaInertia ({self.betaInertia}) must be between 0.0 and 0.9!')
|
|
377
|
-
set_option("scaleInterval", float)
|
|
342
|
+
set_option("nodeTypes")
|
|
343
|
+
set_option("minNodes")
|
|
344
|
+
set_option("maxNodes")
|
|
345
|
+
set_option("targetTime")
|
|
346
|
+
set_option("betaInertia")
|
|
347
|
+
set_option("scaleInterval")
|
|
378
348
|
set_option("metrics")
|
|
379
349
|
set_option("assume_zero_overhead")
|
|
380
|
-
set_option("preemptibleCompensation"
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
set_option("
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
set_option("
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
set_option("
|
|
399
|
-
set_option("maxPreemptibleServiceJobs", int)
|
|
400
|
-
set_option("deadlockWait", int)
|
|
401
|
-
set_option("deadlockCheckInterval", int)
|
|
402
|
-
|
|
403
|
-
# Resource requirements
|
|
404
|
-
set_option("defaultMemory", h2b, iC(1))
|
|
405
|
-
set_option("defaultCores", float, fC(1.0))
|
|
406
|
-
set_option("defaultDisk", h2b, iC(1))
|
|
407
|
-
set_option("defaultAccelerators", parse_accelerator_list)
|
|
408
|
-
set_option("readGlobalFileMutableByDefault")
|
|
409
|
-
set_option("maxCores", int, iC(1))
|
|
410
|
-
set_option("maxMemory", h2b, iC(1))
|
|
411
|
-
set_option("maxDisk", h2b, iC(1))
|
|
350
|
+
set_option("preemptibleCompensation")
|
|
351
|
+
set_option("nodeStorage")
|
|
352
|
+
|
|
353
|
+
set_option("nodeStorageOverrides")
|
|
354
|
+
|
|
355
|
+
if self.cwl is False:
|
|
356
|
+
# Parameters to limit service jobs / detect deadlocks
|
|
357
|
+
set_option("maxServiceJobs")
|
|
358
|
+
set_option("maxPreemptibleServiceJobs")
|
|
359
|
+
set_option("deadlockWait")
|
|
360
|
+
set_option("deadlockCheckInterval")
|
|
361
|
+
|
|
362
|
+
set_option("defaultMemory")
|
|
363
|
+
set_option("defaultCores")
|
|
364
|
+
set_option("defaultDisk")
|
|
365
|
+
set_option("defaultAccelerators")
|
|
366
|
+
set_option("maxCores")
|
|
367
|
+
set_option("maxMemory")
|
|
368
|
+
set_option("maxDisk")
|
|
412
369
|
set_option("defaultPreemptible")
|
|
413
370
|
|
|
414
371
|
# Retrying/rescuing jobs
|
|
415
|
-
set_option("retryCount"
|
|
372
|
+
set_option("retryCount")
|
|
416
373
|
set_option("enableUnlimitedPreemptibleRetries")
|
|
417
374
|
set_option("doubleMem")
|
|
418
|
-
set_option("maxJobDuration"
|
|
419
|
-
set_option("rescueJobsFrequency"
|
|
375
|
+
set_option("maxJobDuration")
|
|
376
|
+
set_option("rescueJobsFrequency")
|
|
420
377
|
|
|
421
378
|
# Log management
|
|
422
|
-
set_option("maxLogFileSize"
|
|
379
|
+
set_option("maxLogFileSize")
|
|
423
380
|
set_option("writeLogs")
|
|
424
381
|
set_option("writeLogsGzip")
|
|
425
382
|
set_option("writeLogsFromAllJobs")
|
|
426
|
-
set_option("write_messages"
|
|
427
|
-
|
|
428
|
-
if not self.write_messages:
|
|
429
|
-
# The user hasn't specified a place for the message bus so we
|
|
430
|
-
# should make one.
|
|
431
|
-
self.write_messages = gen_message_bus_path()
|
|
432
|
-
|
|
433
|
-
assert not (self.writeLogs and self.writeLogsGzip), \
|
|
434
|
-
"Cannot use both --writeLogs and --writeLogsGzip at the same time."
|
|
435
|
-
assert not self.writeLogsFromAllJobs or self.writeLogs or self.writeLogsGzip, \
|
|
436
|
-
"To enable --writeLogsFromAllJobs, either --writeLogs or --writeLogsGzip must be set."
|
|
383
|
+
set_option("write_messages")
|
|
437
384
|
|
|
438
385
|
# Misc
|
|
439
|
-
set_option("environment"
|
|
386
|
+
set_option("environment")
|
|
387
|
+
|
|
440
388
|
set_option("disableChaining")
|
|
441
389
|
set_option("disableJobStoreChecksumVerification")
|
|
442
|
-
set_option("statusWait"
|
|
390
|
+
set_option("statusWait")
|
|
443
391
|
set_option("disableProgress")
|
|
444
392
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
assert len(f.readline().rstrip()) == 32, 'SSE key appears to be invalid.'
|
|
448
|
-
|
|
449
|
-
set_option("sseKey", check_function=check_sse_key)
|
|
450
|
-
set_option("servicePollingInterval", float, fC(0.0))
|
|
393
|
+
set_option("sseKey")
|
|
394
|
+
set_option("servicePollingInterval")
|
|
451
395
|
set_option("forceDockerAppliance")
|
|
452
396
|
|
|
453
397
|
# Debug options
|
|
454
398
|
set_option("debugWorker")
|
|
455
399
|
set_option("disableWorkerOutputCapture")
|
|
456
|
-
set_option("badWorker"
|
|
457
|
-
set_option("badWorkerFailInterval"
|
|
400
|
+
set_option("badWorker")
|
|
401
|
+
set_option("badWorkerFailInterval")
|
|
402
|
+
set_option("logLevel")
|
|
403
|
+
|
|
404
|
+
# Apply overrides as highest priority
|
|
405
|
+
# Override workDir with value of TOIL_WORKDIR_OVERRIDE if it exists
|
|
406
|
+
if os.getenv('TOIL_WORKDIR_OVERRIDE') is not None:
|
|
407
|
+
self.workDir = os.getenv('TOIL_WORKDIR_OVERRIDE')
|
|
408
|
+
# Override workDir with value of TOIL_WORKDIR_OVERRIDE if it exists
|
|
409
|
+
if os.getenv('TOIL_COORDINATION_DIR_OVERRIDE') is not None:
|
|
410
|
+
self.workDir = os.getenv('TOIL_COORDINATION_DIR_OVERRIDE')
|
|
411
|
+
|
|
412
|
+
self.check_configuration_consistency()
|
|
413
|
+
|
|
414
|
+
logger.debug("Loaded configuration: %s", vars(options))
|
|
415
|
+
|
|
416
|
+
def check_configuration_consistency(self) -> None:
|
|
417
|
+
"""Old checks that cannot be fit into an action class for argparse"""
|
|
418
|
+
if self.writeLogs and self.writeLogsGzip:
|
|
419
|
+
raise ValueError("Cannot use both --writeLogs and --writeLogsGzip at the same time.")
|
|
420
|
+
if self.writeLogsFromAllJobs and not self.writeLogs and not self.writeLogsGzip:
|
|
421
|
+
raise ValueError("To enable --writeLogsFromAllJobs, either --writeLogs or --writeLogsGzip must be set.")
|
|
422
|
+
for override in self.nodeStorageOverrides:
|
|
423
|
+
tokens = override.split(":")
|
|
424
|
+
if not any(tokens[0] in n[0] for n in self.nodeTypes):
|
|
425
|
+
raise ValueError("Instance type in --nodeStorageOverrides must be in --nodeTypes")
|
|
426
|
+
|
|
427
|
+
if self.stats:
|
|
428
|
+
if self.clean != "never" and self.clean is not None:
|
|
429
|
+
logger.warning("Contradicting options passed: Clean flag is set to %s "
|
|
430
|
+
"despite the stats flag requiring "
|
|
431
|
+
"the jobStore to be intact at the end of the run. "
|
|
432
|
+
"Setting clean to \'never\'." % self.clean)
|
|
433
|
+
self.clean = "never"
|
|
458
434
|
|
|
459
435
|
def __eq__(self, other: object) -> bool:
|
|
460
436
|
return self.__dict__ == other.__dict__
|
|
@@ -463,25 +439,170 @@ class Config:
|
|
|
463
439
|
return self.__dict__.__hash__() # type: ignore
|
|
464
440
|
|
|
465
441
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
442
|
+
def check_and_create_toil_home_dir() -> None:
|
|
443
|
+
"""
|
|
444
|
+
Ensure that TOIL_HOME_DIR exists.
|
|
445
|
+
|
|
446
|
+
Raises an error if it does not exist and cannot be created. Safe to run
|
|
447
|
+
simultaneously in multiple processes.
|
|
448
|
+
"""
|
|
449
|
+
|
|
450
|
+
dir_path = try_path(TOIL_HOME_DIR)
|
|
451
|
+
if dir_path is None:
|
|
452
|
+
raise RuntimeError(f"Cannot create or access Toil configuration directory {TOIL_HOME_DIR}")
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def check_and_create_default_config_file() -> None:
|
|
456
|
+
"""
|
|
457
|
+
If the default config file does not exist, create it in the Toil home directory. Create the Toil home directory
|
|
458
|
+
if needed
|
|
459
|
+
|
|
460
|
+
Raises an error if the default config file cannot be created.
|
|
461
|
+
Safe to run simultaneously in multiple processes. If this process runs
|
|
462
|
+
this function, it will always see the default config file existing with
|
|
463
|
+
parseable contents, even if other processes are racing to create it.
|
|
464
|
+
|
|
465
|
+
No process will see an empty or partially-written default config file.
|
|
466
|
+
"""
|
|
467
|
+
check_and_create_toil_home_dir()
|
|
468
|
+
# The default config file did not appear to exist when we checked.
|
|
469
|
+
# It might exist now, though. Try creating it.
|
|
470
|
+
check_and_create_config_file(DEFAULT_CONFIG_FILE)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def check_and_create_config_file(filepath: str) -> None:
|
|
474
|
+
"""
|
|
475
|
+
If the config file at the filepath does not exist, try creating it.
|
|
476
|
+
The parent directory should be created prior to calling this
|
|
477
|
+
:param filepath: path to config file
|
|
478
|
+
:return: None
|
|
479
|
+
"""
|
|
480
|
+
if not os.path.exists(filepath):
|
|
481
|
+
generate_config(filepath)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def generate_config(filepath: str) -> None:
|
|
485
|
+
"""
|
|
486
|
+
Write a Toil config file to the given path.
|
|
487
|
+
|
|
488
|
+
Safe to run simultaneously in multiple processes. No process will see an
|
|
489
|
+
empty or partially-written file at the given path.
|
|
490
|
+
|
|
491
|
+
Set include to "cwl" or "wdl" to include cwl options and wdl options respectfully
|
|
492
|
+
"""
|
|
493
|
+
# this is placed in common.py rather than toilConfig.py to prevent circular imports
|
|
494
|
+
|
|
495
|
+
# configargparse's write_config function does not write options with a None value
|
|
496
|
+
# Thus, certain CLI options that use None as their default won't be written to the config file.
|
|
497
|
+
# it also does not support printing config elements in nonalphabetical order
|
|
498
|
+
|
|
499
|
+
# Instead, mimic configargparser's write_config behavior and also make it output arguments with
|
|
500
|
+
# a default value of None
|
|
501
|
+
|
|
502
|
+
# To do this, iterate through the options
|
|
503
|
+
# Skip --help and --config as they should not be included in the config file
|
|
504
|
+
# Skip deprecated/redundant options
|
|
505
|
+
# Various log options are skipped as they are store_const arguments that are redundant to --logLevel
|
|
506
|
+
# linkImports, moveExports, disableCaching, are deprecated in favor of --symlinkImports, --moveOutputs,
|
|
507
|
+
# and --caching respectively
|
|
508
|
+
# Skip StoreTrue and StoreFalse options that have opposite defaults as including it in the config would
|
|
509
|
+
# override those defaults
|
|
510
|
+
deprecated_or_redundant_options = ("help", "config", "logCritical", "logDebug", "logError", "logInfo", "logOff",
|
|
511
|
+
"logWarning", "linkImports", "noLinkImports", "moveExports", "noMoveExports",
|
|
512
|
+
"enableCaching", "disableCaching", "version")
|
|
513
|
+
|
|
514
|
+
def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
|
|
515
|
+
"""
|
|
516
|
+
Creates a CommentedMap of the config file output from a given parser. This will put every parser action and it's
|
|
517
|
+
default into the output
|
|
518
|
+
|
|
519
|
+
:param parser: parser to generate from
|
|
520
|
+
:return: CommentedMap of what to put into the config file
|
|
521
|
+
"""
|
|
522
|
+
data = CommentedMap() # to preserve order
|
|
523
|
+
group_title_key: Dict[str, str] = dict()
|
|
524
|
+
for action in parser._actions:
|
|
525
|
+
if any(s.replace("-", "") in deprecated_or_redundant_options for s in action.option_strings):
|
|
526
|
+
continue
|
|
527
|
+
# if action is StoreFalse and default is True then don't include
|
|
528
|
+
if isinstance(action, _StoreFalseAction) and action.default is True:
|
|
529
|
+
continue
|
|
530
|
+
# if action is StoreTrue and default is False then don't include
|
|
531
|
+
if isinstance(action, _StoreTrueAction) and action.default is False:
|
|
532
|
+
continue
|
|
533
|
+
|
|
534
|
+
if len(action.option_strings) == 0:
|
|
535
|
+
continue
|
|
536
|
+
|
|
537
|
+
option_string = action.option_strings[0] if action.option_strings[0].find("--") != -1 else \
|
|
538
|
+
action.option_strings[1]
|
|
539
|
+
option = option_string[2:]
|
|
540
|
+
|
|
541
|
+
default = action.default
|
|
542
|
+
|
|
543
|
+
data[option] = default
|
|
544
|
+
|
|
545
|
+
# store where each argparse group starts
|
|
546
|
+
group_title = action.container.title # type: ignore[attr-defined]
|
|
547
|
+
group_title_key.setdefault(group_title, option)
|
|
548
|
+
|
|
549
|
+
# add comment for when each argparse group starts
|
|
550
|
+
for group_title, key in group_title_key.items():
|
|
551
|
+
data.yaml_set_comment_before_after_key(key, group_title)
|
|
552
|
+
|
|
553
|
+
return data
|
|
554
|
+
|
|
555
|
+
all_data = []
|
|
556
|
+
|
|
557
|
+
parser = ArgParser(YAMLConfigFileParser())
|
|
558
|
+
add_base_toil_options(parser, jobstore_as_flag=True, cwl=False)
|
|
559
|
+
toil_base_data = create_config_dict_from_parser(parser)
|
|
560
|
+
|
|
561
|
+
toil_base_data.yaml_set_start_comment("This is the configuration file for Toil. To set an option, uncomment an "
|
|
562
|
+
"existing option and set its value. The current values are the defaults. "
|
|
563
|
+
"If the default configuration file is outdated, it can be refreshed with "
|
|
564
|
+
"`toil config ~/.toil/default.yaml`.\n\nBASE TOIL OPTIONS\n")
|
|
565
|
+
all_data.append(toil_base_data)
|
|
566
|
+
|
|
567
|
+
parser = ArgParser(YAMLConfigFileParser())
|
|
568
|
+
add_cwl_options(parser)
|
|
569
|
+
toil_cwl_data = create_config_dict_from_parser(parser)
|
|
570
|
+
toil_cwl_data.yaml_set_start_comment("\nTOIL CWL RUNNER OPTIONS")
|
|
571
|
+
all_data.append(toil_cwl_data)
|
|
572
|
+
|
|
573
|
+
parser = ArgParser(YAMLConfigFileParser())
|
|
574
|
+
add_wdl_options(parser)
|
|
575
|
+
toil_wdl_data = create_config_dict_from_parser(parser)
|
|
576
|
+
toil_wdl_data.yaml_set_start_comment("\nTOIL WDL RUNNER OPTIONS")
|
|
577
|
+
all_data.append(toil_wdl_data)
|
|
578
|
+
|
|
579
|
+
# Now we need to put the config file in place at filepath.
|
|
580
|
+
# But someone else may have already created a file at that path, or may be
|
|
581
|
+
# about to open the file at that path and read it before we can finish
|
|
582
|
+
# writing the contents. So we write the config file at a temporary path and
|
|
583
|
+
# atomically move it over. There's still a race to see which process's
|
|
584
|
+
# config file actually is left at the name in the end, but nobody will ever
|
|
585
|
+
# see an empty or partially-written file at that name (if there wasn't one
|
|
586
|
+
# there to begin with).
|
|
587
|
+
with AtomicFileCreate(filepath) as temp_path:
|
|
588
|
+
with open(temp_path, "w") as f:
|
|
589
|
+
f.write("config_version: 1.0\n")
|
|
590
|
+
yaml = YAML(typ='rt')
|
|
591
|
+
for data in all_data:
|
|
592
|
+
if "config_version" in data:
|
|
593
|
+
del data["config_version"]
|
|
594
|
+
with StringIO() as data_string:
|
|
595
|
+
yaml.dump(data, data_string)
|
|
596
|
+
for line in data_string.readline():
|
|
597
|
+
if line:
|
|
598
|
+
f.write("#")
|
|
599
|
+
f.write(f"{line}\n")
|
|
479
600
|
|
|
480
601
|
|
|
481
602
|
def parser_with_common_options(
|
|
482
|
-
|
|
483
|
-
) ->
|
|
484
|
-
parser =
|
|
603
|
+
provisioner_options: bool = False, jobstore_option: bool = True, prog: Optional[str] = None
|
|
604
|
+
) -> ArgParser:
|
|
605
|
+
parser = ArgParser(prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter)
|
|
485
606
|
|
|
486
607
|
if provisioner_options:
|
|
487
608
|
add_provisioner_options(parser)
|
|
@@ -498,372 +619,100 @@ def parser_with_common_options(
|
|
|
498
619
|
return parser
|
|
499
620
|
|
|
500
621
|
|
|
501
|
-
def addOptions(parser: ArgumentParser,
|
|
622
|
+
def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool = False, wdl: bool = False) -> None:
|
|
502
623
|
"""
|
|
503
|
-
Add Toil command line options to a parser.
|
|
624
|
+
Add all Toil command line options to a parser.
|
|
504
625
|
|
|
505
|
-
|
|
626
|
+
Support for config files if using configargparse. This will also check and set up the default config file.
|
|
506
627
|
|
|
507
628
|
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
508
|
-
"""
|
|
509
629
|
|
|
510
|
-
|
|
511
|
-
config = Config()
|
|
512
|
-
if not (isinstance(parser, ArgumentParser) or isinstance(parser, _ArgumentGroup)):
|
|
513
|
-
raise ValueError(f"Unanticipated class: {parser.__class__}. Must be: argparse.ArgumentParser or ArgumentGroup.")
|
|
514
|
-
|
|
515
|
-
add_logging_options(parser)
|
|
516
|
-
parser.register("type", "bool", parseBool) # Custom type for arg=True/False.
|
|
630
|
+
:param cwl: Whether CWL options are expected. If so, CWL options won't be suppressed.
|
|
517
631
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
632
|
+
:param wdl: Whether WDL options are expected. If so, WDL options won't be suppressed.
|
|
633
|
+
"""
|
|
634
|
+
if cwl and wdl:
|
|
635
|
+
raise RuntimeError("CWL and WDL cannot both be true at the same time when adding options.")
|
|
636
|
+
if not (isinstance(parser, ArgumentParser) or isinstance(parser, _ArgumentGroup)):
|
|
637
|
+
raise ValueError(
|
|
638
|
+
f"Unanticipated class: {parser.__class__}. Must be: argparse.ArgumentParser or ArgumentGroup.")
|
|
639
|
+
|
|
640
|
+
if isinstance(parser, ArgParser):
|
|
641
|
+
# in case the user passes in their own configargparse instance instead of calling getDefaultArgumentParser()
|
|
642
|
+
# this forces configargparser to process the config file in YAML rather than in it's own format
|
|
643
|
+
parser._config_file_parser = YAMLConfigFileParser() # type: ignore[misc]
|
|
644
|
+
parser._default_config_files = [DEFAULT_CONFIG_FILE] # type: ignore[misc]
|
|
526
645
|
else:
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
link_imports.set_defaults(linkImports=True)
|
|
597
|
-
|
|
598
|
-
move_exports = file_store_options.add_mutually_exclusive_group()
|
|
599
|
-
move_exports_help = ('When using a filesystem based job store, output files are by default moved to the '
|
|
600
|
-
'output directory, and a symlink to the moved exported file is created at the initial '
|
|
601
|
-
'location. Specifying this option instead copies the files into the output directory. '
|
|
602
|
-
'Applies to filesystem-based job stores only.')
|
|
603
|
-
move_exports.add_argument("--moveExports", dest="moveExports", action='store_true', help=move_exports_help)
|
|
604
|
-
move_exports.add_argument("--noMoveExports", dest="moveExports", action='store_false', help=move_exports_help)
|
|
605
|
-
move_exports.set_defaults(moveExports=False)
|
|
606
|
-
|
|
607
|
-
caching = file_store_options.add_mutually_exclusive_group()
|
|
608
|
-
caching_help = ("Enable or disable caching for your workflow, specifying this overrides default from job store")
|
|
609
|
-
caching.add_argument('--disableCaching', dest='caching', action='store_false', help=caching_help)
|
|
610
|
-
caching.add_argument('--caching', dest='caching', type=lambda val: bool(strtobool(val)), help=caching_help)
|
|
611
|
-
caching.set_defaults(caching=None)
|
|
612
|
-
|
|
613
|
-
# Auto scaling options
|
|
614
|
-
autoscaling_options = parser.add_argument_group(
|
|
615
|
-
title="Toil options for autoscaling the cluster of worker nodes.",
|
|
616
|
-
description="Allows the specification of the minimum and maximum number of nodes in an autoscaled cluster, "
|
|
617
|
-
"as well as parameters to control the level of provisioning."
|
|
618
|
-
)
|
|
619
|
-
provisioner_choices = ['aws', 'gce', None]
|
|
620
|
-
# TODO: Better consolidate this provisioner arg and the one in provisioners/__init__.py?
|
|
621
|
-
autoscaling_options.add_argument('--provisioner', '-p', dest="provisioner", choices=provisioner_choices,
|
|
622
|
-
help=f"The provisioner for cluster auto-scaling. This is the main Toil "
|
|
623
|
-
f"'--provisioner' option, and defaults to None for running on single "
|
|
624
|
-
f"machine and non-auto-scaling batch systems. The currently supported "
|
|
625
|
-
f"choices are {provisioner_choices}. The default is {config.provisioner}.")
|
|
626
|
-
autoscaling_options.add_argument('--nodeTypes', default=None,
|
|
627
|
-
help="Specifies a list of comma-separated node types, each of which is "
|
|
628
|
-
"composed of slash-separated instance types, and an optional spot "
|
|
629
|
-
"bid set off by a colon, making the node type preemptible. Instance "
|
|
630
|
-
"types may appear in multiple node types, and the same node type "
|
|
631
|
-
"may appear as both preemptible and non-preemptible.\n"
|
|
632
|
-
"Valid argument specifying two node types:\n"
|
|
633
|
-
"\tc5.4xlarge/c5a.4xlarge:0.42,t2.large\n"
|
|
634
|
-
"Node types:\n"
|
|
635
|
-
"\tc5.4xlarge/c5a.4xlarge:0.42 and t2.large\n"
|
|
636
|
-
"Instance types:\n"
|
|
637
|
-
"\tc5.4xlarge, c5a.4xlarge, and t2.large\n"
|
|
638
|
-
"Semantics:\n"
|
|
639
|
-
"\tBid $0.42/hour for either c5.4xlarge or c5a.4xlarge instances,\n"
|
|
640
|
-
"\ttreated interchangeably, while they are available at that price,\n"
|
|
641
|
-
"\tand buy t2.large instances at full price")
|
|
642
|
-
autoscaling_options.add_argument('--minNodes', default=None,
|
|
643
|
-
help="Mininum number of nodes of each type in the cluster, if using "
|
|
644
|
-
"auto-scaling. This should be provided as a comma-separated list of the "
|
|
645
|
-
"same length as the list of node types. default=0")
|
|
646
|
-
autoscaling_options.add_argument('--maxNodes', default=None,
|
|
647
|
-
help=f"Maximum number of nodes of each type in the cluster, if using autoscaling, "
|
|
648
|
-
f"provided as a comma-separated list. The first value is used as a default "
|
|
649
|
-
f"if the list length is less than the number of nodeTypes. "
|
|
650
|
-
f"default={config.maxNodes[0]}")
|
|
651
|
-
autoscaling_options.add_argument("--targetTime", dest="targetTime", default=None,
|
|
652
|
-
help=f"Sets how rapidly you aim to complete jobs in seconds. Shorter times mean "
|
|
653
|
-
f"more aggressive parallelization. The autoscaler attempts to scale up/down "
|
|
654
|
-
f"so that it expects all queued jobs will complete within targetTime "
|
|
655
|
-
f"seconds. default={config.targetTime}")
|
|
656
|
-
autoscaling_options.add_argument("--betaInertia", dest="betaInertia", default=None,
|
|
657
|
-
help=f"A smoothing parameter to prevent unnecessary oscillations in the number "
|
|
658
|
-
f"of provisioned nodes. This controls an exponentially weighted moving "
|
|
659
|
-
f"average of the estimated number of nodes. A value of 0.0 disables any "
|
|
660
|
-
f"smoothing, and a value of 0.9 will smooth so much that few changes will "
|
|
661
|
-
f"ever be made. Must be between 0.0 and 0.9. default={config.betaInertia}")
|
|
662
|
-
autoscaling_options.add_argument("--scaleInterval", dest="scaleInterval", default=None,
|
|
663
|
-
help=f"The interval (seconds) between assessing if the scale of "
|
|
664
|
-
f"the cluster needs to change. default={config.scaleInterval}")
|
|
665
|
-
autoscaling_options.add_argument("--preemptibleCompensation", "--preemptableCompensation", dest="preemptibleCompensation", default=None,
|
|
666
|
-
help=f"The preference of the autoscaler to replace preemptible nodes with "
|
|
667
|
-
f"non-preemptible nodes, when preemptible nodes cannot be started for some "
|
|
668
|
-
f"reason. Defaults to {config.preemptibleCompensation}. This value must be "
|
|
669
|
-
f"between 0.0 and 1.0, inclusive. A value of 0.0 disables such "
|
|
670
|
-
f"compensation, a value of 0.5 compensates two missing preemptible nodes "
|
|
671
|
-
f"with a non-preemptible one. A value of 1.0 replaces every missing "
|
|
672
|
-
f"pre-emptable node with a non-preemptible one.")
|
|
673
|
-
autoscaling_options.add_argument("--nodeStorage", dest="nodeStorage", default=50,
|
|
674
|
-
help="Specify the size of the root volume of worker nodes when they are launched "
|
|
675
|
-
"in gigabytes. You may want to set this if your jobs require a lot of disk "
|
|
676
|
-
"space. (default: %(default)s).")
|
|
677
|
-
autoscaling_options.add_argument('--nodeStorageOverrides', default=None,
|
|
678
|
-
help="Comma-separated list of nodeType:nodeStorage that are used to override "
|
|
679
|
-
"the default value from --nodeStorage for the specified nodeType(s). "
|
|
680
|
-
"This is useful for heterogeneous jobs where some tasks require much more "
|
|
681
|
-
"disk than others.")
|
|
682
|
-
autoscaling_options.add_argument("--metrics", dest="metrics", default=False, action="store_true",
|
|
683
|
-
help="Enable the prometheus/grafana dashboard for monitoring CPU/RAM usage, "
|
|
684
|
-
"queue size, and issued jobs.")
|
|
685
|
-
autoscaling_options.add_argument("--assumeZeroOverhead", dest="assume_zero_overhead", default=False, action="store_true",
|
|
686
|
-
help="Ignore scheduler and OS overhead and assume jobs can use every last byte "
|
|
687
|
-
"of memory and disk on a node when autoscaling.")
|
|
688
|
-
|
|
689
|
-
# Parameters to limit service jobs / detect service deadlocks
|
|
690
|
-
if not config.cwl:
|
|
691
|
-
service_options = parser.add_argument_group(
|
|
692
|
-
title="Toil options for limiting the number of service jobs and detecting service deadlocks",
|
|
693
|
-
description="Allows the specification of the maximum number of service jobs in a cluster. By keeping "
|
|
694
|
-
"this limited we can avoid nodes occupied with services causing deadlocks."
|
|
695
|
-
)
|
|
696
|
-
service_options.add_argument("--maxServiceJobs", dest="maxServiceJobs", default=None, type=int,
|
|
697
|
-
help=f"The maximum number of service jobs that can be run concurrently, "
|
|
698
|
-
f"excluding service jobs running on preemptible nodes. "
|
|
699
|
-
f"default={config.maxServiceJobs}")
|
|
700
|
-
service_options.add_argument("--maxPreemptibleServiceJobs", dest="maxPreemptibleServiceJobs", default=None,
|
|
701
|
-
type=int,
|
|
702
|
-
help=f"The maximum number of service jobs that can run concurrently on "
|
|
703
|
-
f"preemptible nodes. default={config.maxPreemptibleServiceJobs}")
|
|
704
|
-
service_options.add_argument("--deadlockWait", dest="deadlockWait", default=None, type=int,
|
|
705
|
-
help=f"Time, in seconds, to tolerate the workflow running only the same service "
|
|
706
|
-
f"jobs, with no jobs to use them, before declaring the workflow to be "
|
|
707
|
-
f"deadlocked and stopping. default={config.deadlockWait}")
|
|
708
|
-
service_options.add_argument("--deadlockCheckInterval", dest="deadlockCheckInterval", default=None, type=int,
|
|
709
|
-
help="Time, in seconds, to wait between checks to see if the workflow is stuck "
|
|
710
|
-
"running only service jobs, with no jobs to use them. Should be shorter "
|
|
711
|
-
"than --deadlockWait. May need to be increased if the batch system cannot "
|
|
712
|
-
"enumerate running jobs quickly enough, or if polling for running jobs is "
|
|
713
|
-
"placing an unacceptable load on a shared cluster. "
|
|
714
|
-
"default={config.deadlockCheckInterval}")
|
|
715
|
-
|
|
716
|
-
# Resource requirements
|
|
717
|
-
resource_options = parser.add_argument_group(
|
|
718
|
-
title="Toil options for cores/memory requirements.",
|
|
719
|
-
description="The options to specify default cores/memory requirements (if not specified by the jobs "
|
|
720
|
-
"themselves), and to limit the total amount of memory/cores requested from the batch system."
|
|
721
|
-
)
|
|
722
|
-
resource_help_msg = ('The {} amount of {} to request for a job. '
|
|
723
|
-
'Only applicable to jobs that do not specify an explicit value for this requirement. '
|
|
724
|
-
'{}. '
|
|
725
|
-
'Default is {}.')
|
|
726
|
-
cpu_note = 'Fractions of a core (for example 0.1) are supported on some batch systems [mesos, single_machine]'
|
|
727
|
-
disk_mem_note = 'Standard suffixes like K, Ki, M, Mi, G or Gi are supported'
|
|
728
|
-
accelerators_note = ('Each accelerator specification can have a type (gpu [default], nvidia, amd, cuda, rocm, opencl, '
|
|
729
|
-
'or a specific model like nvidia-tesla-k80), and a count [default: 1]. If both a type and a count '
|
|
730
|
-
'are used, they must be separated by a colon. If multiple types of accelerators are '
|
|
731
|
-
'used, the specifications are separated by commas')
|
|
732
|
-
resource_options.add_argument('--defaultMemory', dest='defaultMemory', default=None, metavar='INT',
|
|
733
|
-
help=resource_help_msg.format('default', 'memory', disk_mem_note, bytes2human(config.defaultMemory)))
|
|
734
|
-
resource_options.add_argument('--defaultCores', dest='defaultCores', default=None, metavar='FLOAT',
|
|
735
|
-
help=resource_help_msg.format('default', 'cpu', cpu_note, str(config.defaultCores)))
|
|
736
|
-
resource_options.add_argument('--defaultDisk', dest='defaultDisk', default=None, metavar='INT',
|
|
737
|
-
help=resource_help_msg.format('default', 'disk', disk_mem_note, bytes2human(config.defaultDisk)))
|
|
738
|
-
resource_options.add_argument('--defaultAccelerators', dest='defaultAccelerators', default=None, metavar='ACCELERATOR[,ACCELERATOR...]',
|
|
739
|
-
help=resource_help_msg.format('default', 'accelerators', accelerators_note, config.defaultAccelerators))
|
|
740
|
-
resource_options.add_argument('--defaultPreemptible', '--defaultPreemptable', dest='defaultPreemptible', metavar='BOOL',
|
|
741
|
-
type=bool, nargs='?', const=True, default=False,
|
|
742
|
-
help='Make all jobs able to run on preemptible (spot) nodes by default.')
|
|
743
|
-
resource_options.add_argument('--maxCores', dest='maxCores', default=None, metavar='INT',
|
|
744
|
-
help=resource_help_msg.format('max', 'cpu', cpu_note, str(config.maxCores)))
|
|
745
|
-
resource_options.add_argument('--maxMemory', dest='maxMemory', default=None, metavar='INT',
|
|
746
|
-
help=resource_help_msg.format('max', 'memory', disk_mem_note, bytes2human(config.maxMemory)))
|
|
747
|
-
resource_options.add_argument('--maxDisk', dest='maxDisk', default=None, metavar='INT',
|
|
748
|
-
help=resource_help_msg.format('max', 'disk', disk_mem_note, bytes2human(config.maxDisk)))
|
|
749
|
-
|
|
750
|
-
# Retrying/rescuing jobs
|
|
751
|
-
job_options = parser.add_argument_group(
|
|
752
|
-
title="Toil options for rescuing/killing/restarting jobs.",
|
|
753
|
-
description="The options for jobs that either run too long/fail or get lost (some batch systems have issues!)."
|
|
754
|
-
)
|
|
755
|
-
job_options.add_argument("--retryCount", dest="retryCount", default=None,
|
|
756
|
-
help=f"Number of times to retry a failing job before giving up and "
|
|
757
|
-
f"labeling job failed. default={config.retryCount}")
|
|
758
|
-
job_options.add_argument("--enableUnlimitedPreemptibleRetries", "--enableUnlimitedPreemptableRetries", dest="enableUnlimitedPreemptibleRetries",
|
|
759
|
-
action='store_true', default=False,
|
|
760
|
-
help="If set, preemptible failures (or any failure due to an instance getting "
|
|
761
|
-
"unexpectedly terminated) will not count towards job failures and --retryCount.")
|
|
762
|
-
job_options.add_argument("--doubleMem", dest="doubleMem", action='store_true', default=False,
|
|
763
|
-
help="If set, batch jobs which die to reaching memory limit on batch schedulers "
|
|
764
|
-
"will have their memory doubled and they will be retried. The remaining "
|
|
765
|
-
"retry count will be reduced by 1. Currently supported by LSF.")
|
|
766
|
-
job_options.add_argument("--maxJobDuration", dest="maxJobDuration", default=None,
|
|
767
|
-
help=f"Maximum runtime of a job (in seconds) before we kill it (this is a lower bound, "
|
|
768
|
-
f"and the actual time before killing the job may be longer). "
|
|
769
|
-
f"default={config.maxJobDuration}")
|
|
770
|
-
job_options.add_argument("--rescueJobsFrequency", dest="rescueJobsFrequency", default=None,
|
|
771
|
-
help=f"Period of time to wait (in seconds) between checking for missing/overlong jobs, "
|
|
772
|
-
f"that is jobs which get lost by the batch system. Expert parameter. "
|
|
773
|
-
f"default={config.rescueJobsFrequency}")
|
|
774
|
-
|
|
775
|
-
# Log management options
|
|
776
|
-
log_options = parser.add_argument_group(
|
|
777
|
-
title="Toil log management options.",
|
|
778
|
-
description="Options for how Toil should manage its logs."
|
|
779
|
-
)
|
|
780
|
-
log_options.add_argument("--maxLogFileSize", dest="maxLogFileSize", default=None,
|
|
781
|
-
help=f"The maximum size of a job log file to keep (in bytes), log files larger than "
|
|
782
|
-
f"this will be truncated to the last X bytes. Setting this option to zero will "
|
|
783
|
-
f"prevent any truncation. Setting this option to a negative value will truncate "
|
|
784
|
-
f"from the beginning. Default={bytes2human(config.maxLogFileSize)}")
|
|
785
|
-
log_options.add_argument("--writeLogs", dest="writeLogs", nargs='?', action='store', default=None,
|
|
786
|
-
const=os.getcwd(),
|
|
787
|
-
help="Write worker logs received by the leader into their own files at the specified "
|
|
788
|
-
"path. Any non-empty standard output and error from failed batch system jobs will "
|
|
789
|
-
"also be written into files at this path. The current working directory will be "
|
|
790
|
-
"used if a path is not specified explicitly. Note: By default only the logs of "
|
|
791
|
-
"failed jobs are returned to leader. Set log level to 'debug' or enable "
|
|
792
|
-
"'--writeLogsFromAllJobs' to get logs back from successful jobs, and adjust "
|
|
793
|
-
"'maxLogFileSize' to control the truncation limit for worker logs.")
|
|
794
|
-
log_options.add_argument("--writeLogsGzip", dest="writeLogsGzip", nargs='?', action='store', default=None,
|
|
795
|
-
const=os.getcwd(),
|
|
796
|
-
help="Identical to --writeLogs except the logs files are gzipped on the leader.")
|
|
797
|
-
log_options.add_argument("--writeLogsFromAllJobs", dest="writeLogsFromAllJobs", action='store_true',
|
|
798
|
-
default=False,
|
|
799
|
-
help="Whether to write logs from all jobs (including the successful ones) without "
|
|
800
|
-
"necessarily setting the log level to 'debug'. Ensure that either --writeLogs "
|
|
801
|
-
"or --writeLogsGzip is set if enabling this option.")
|
|
802
|
-
log_options.add_argument("--writeMessages", dest="write_messages", default=None,
|
|
803
|
-
help="File to send messages from the leader's message bus to.")
|
|
804
|
-
log_options.add_argument("--realTimeLogging", dest="realTimeLogging", action="store_true", default=False,
|
|
805
|
-
help="Enable real-time logging from workers to leader")
|
|
806
|
-
|
|
807
|
-
# Misc options
|
|
808
|
-
misc_options = parser.add_argument_group(
|
|
809
|
-
title="Toil miscellaneous options.",
|
|
810
|
-
description="Everything else."
|
|
811
|
-
)
|
|
812
|
-
misc_options.add_argument('--disableChaining', dest='disableChaining', action='store_true', default=False,
|
|
813
|
-
help="Disables chaining of jobs (chaining uses one job's resource allocation "
|
|
814
|
-
"for its successor job if possible).")
|
|
815
|
-
misc_options.add_argument("--disableJobStoreChecksumVerification", dest="disableJobStoreChecksumVerification",
|
|
816
|
-
default=False, action="store_true",
|
|
817
|
-
help="Disables checksum verification for files transferred to/from the job store. "
|
|
818
|
-
"Checksum verification is a safety check to ensure the data is not corrupted "
|
|
819
|
-
"during transfer. Currently only supported for non-streaming AWS files.")
|
|
820
|
-
misc_options.add_argument("--sseKey", dest="sseKey", default=None,
|
|
821
|
-
help="Path to file containing 32 character key to be used for server-side encryption on "
|
|
822
|
-
"awsJobStore or googleJobStore. SSE will not be used if this flag is not passed.")
|
|
823
|
-
misc_options.add_argument("--setEnv", '-e', metavar='NAME=VALUE or NAME', dest="environment", default=[],
|
|
824
|
-
action="append",
|
|
825
|
-
help="Set an environment variable early on in the worker. If VALUE is omitted, it will "
|
|
826
|
-
"be looked up in the current environment. Independently of this option, the worker "
|
|
827
|
-
"will try to emulate the leader's environment before running a job, except for "
|
|
828
|
-
"some variables known to vary across systems. Using this option, a variable can "
|
|
829
|
-
"be injected into the worker process itself before it is started.")
|
|
830
|
-
misc_options.add_argument("--servicePollingInterval", dest="servicePollingInterval", default=None,
|
|
831
|
-
help=f"Interval of time service jobs wait between polling for the existence of the "
|
|
832
|
-
f"keep-alive flag. Default: {config.servicePollingInterval}")
|
|
833
|
-
misc_options.add_argument('--forceDockerAppliance', dest='forceDockerAppliance', action='store_true', default=False,
|
|
834
|
-
help='Disables sanity checking the existence of the docker image specified by '
|
|
835
|
-
'TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for autoscaling.')
|
|
836
|
-
misc_options.add_argument('--statusWait', dest='statusWait', type=int, default=3600,
|
|
837
|
-
help="Seconds to wait between reports of running jobs.")
|
|
838
|
-
misc_options.add_argument('--disableProgress', dest='disableProgress', action='store_true', default=False,
|
|
839
|
-
help="Disables the progress bar shown when standard error is a terminal.")
|
|
646
|
+
# configargparse advertises itself as a drag and drop replacement, and running the normal argparse ArgumentParser
|
|
647
|
+
# through this code still seems to work (with the exception of --config and environmental variables)
|
|
648
|
+
warnings.warn(f'Using deprecated library argparse for options parsing.'
|
|
649
|
+
f'This will not parse config files or use environment variables.'
|
|
650
|
+
f'Use configargparse instead or call Job.Runner.getDefaultArgumentParser()',
|
|
651
|
+
DeprecationWarning)
|
|
652
|
+
|
|
653
|
+
check_and_create_default_config_file()
|
|
654
|
+
# Check on the config file to make sure it is sensible
|
|
655
|
+
config_status = os.stat(DEFAULT_CONFIG_FILE)
|
|
656
|
+
if config_status.st_size == 0:
|
|
657
|
+
# If we have an empty config file, someone has to manually delete
|
|
658
|
+
# it before we will work again.
|
|
659
|
+
raise RuntimeError(
|
|
660
|
+
f"Config file {DEFAULT_CONFIG_FILE} exists but is empty. Delete it! Stat says: {config_status}")
|
|
661
|
+
try:
|
|
662
|
+
with open(DEFAULT_CONFIG_FILE, "r") as f:
|
|
663
|
+
yaml = YAML(typ="safe")
|
|
664
|
+
s = yaml.load(f)
|
|
665
|
+
logger.debug("Initialized default configuration: %s", json.dumps(s))
|
|
666
|
+
except:
|
|
667
|
+
# Something went wrong reading the default config, so dump its
|
|
668
|
+
# contents to the log.
|
|
669
|
+
logger.info("Configuration file contents: %s", open(DEFAULT_CONFIG_FILE, 'r').read())
|
|
670
|
+
raise
|
|
671
|
+
|
|
672
|
+
# Add base toil options
|
|
673
|
+
add_base_toil_options(parser, jobstore_as_flag, cwl)
|
|
674
|
+
# Add CWL and WDL options
|
|
675
|
+
# This is done so the config file can hold all available options
|
|
676
|
+
add_cwl_options(parser, suppress=not cwl)
|
|
677
|
+
add_wdl_options(parser, suppress=not wdl)
|
|
678
|
+
|
|
679
|
+
def check_arguments(typ: str) -> None:
|
|
680
|
+
"""
|
|
681
|
+
Check that the other opposing runner's options are not on the command line.
|
|
682
|
+
Ex: if the parser is supposed to be a CWL parser, ensure that WDL commands are not on the command line
|
|
683
|
+
:param typ: string of either "cwl" or "wdl" to specify which runner to check against
|
|
684
|
+
:return: None, raise parser error if option is found
|
|
685
|
+
"""
|
|
686
|
+
check_parser = ArgParser()
|
|
687
|
+
if typ == "wdl":
|
|
688
|
+
add_cwl_options(check_parser)
|
|
689
|
+
if typ == "cwl":
|
|
690
|
+
add_wdl_options(check_parser)
|
|
691
|
+
for action in check_parser._actions:
|
|
692
|
+
action.default = SUPPRESS
|
|
693
|
+
other_options, _ = check_parser.parse_known_args(sys.argv[1:], ignore_help_args=True)
|
|
694
|
+
if len(vars(other_options)) != 0:
|
|
695
|
+
raise parser.error(f"{'WDL' if typ == 'cwl' else 'CWL'} options are not allowed on the command line.")
|
|
696
|
+
|
|
697
|
+
# if cwl is set, format the namespace for cwl and check that wdl options are not set on the command line
|
|
698
|
+
if cwl:
|
|
699
|
+
parser.add_argument("cwltool", type=str, help="CWL file to run.")
|
|
700
|
+
parser.add_argument("cwljob", nargs="*", help="Input file or CWL options. If CWL workflow takes an input, "
|
|
701
|
+
"the name of the input can be used as an option. "
|
|
702
|
+
"For example: \"%(prog)s workflow.cwl --file1 file\". "
|
|
703
|
+
"If an input has the same name as a Toil option, pass '--' before it.")
|
|
704
|
+
check_arguments(typ="cwl")
|
|
705
|
+
|
|
706
|
+
# if wdl is set, format the namespace for wdl and check that cwl options are not set on the command line
|
|
707
|
+
if wdl:
|
|
708
|
+
parser.add_argument("wdl_uri", type=str,
|
|
709
|
+
help="WDL document URI")
|
|
710
|
+
parser.add_argument("inputs_uri", type=str, nargs='?',
|
|
711
|
+
help="WDL input JSON URI")
|
|
712
|
+
parser.add_argument("--input", "-i", dest="inputs_uri", type=str,
|
|
713
|
+
help="WDL input JSON URI")
|
|
714
|
+
check_arguments(typ="wdl")
|
|
840
715
|
|
|
841
|
-
# Debug options
|
|
842
|
-
debug_options = parser.add_argument_group(
|
|
843
|
-
title="Toil debug options.",
|
|
844
|
-
description="Debug options for finding problems or helping with testing."
|
|
845
|
-
)
|
|
846
|
-
debug_options.add_argument("--debugWorker", default=False, action="store_true",
|
|
847
|
-
help="Experimental no forking mode for local debugging. Specifically, workers "
|
|
848
|
-
"are not forked and stderr/stdout are not redirected to the log.")
|
|
849
|
-
debug_options.add_argument("--disableWorkerOutputCapture", default=False, action="store_true",
|
|
850
|
-
help="Let worker output go to worker's standard out/error instead of per-job logs.")
|
|
851
|
-
debug_options.add_argument("--badWorker", dest="badWorker", default=None,
|
|
852
|
-
help=f"For testing purposes randomly kill --badWorker proportion of jobs using "
|
|
853
|
-
f"SIGKILL. default={config.badWorker}")
|
|
854
|
-
debug_options.add_argument("--badWorkerFailInterval", dest="badWorkerFailInterval", default=None,
|
|
855
|
-
help=f"When killing the job pick uniformly within the interval from 0.0 to "
|
|
856
|
-
f"--badWorkerFailInterval seconds after the worker starts. "
|
|
857
|
-
f"default={config.badWorkerFailInterval}")
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
def parseBool(val: str) -> bool:
|
|
861
|
-
if val.lower() in ['true', 't', 'yes', 'y', 'on', '1']:
|
|
862
|
-
return True
|
|
863
|
-
elif val.lower() in ['false', 'f', 'no', 'n', 'off', '0']:
|
|
864
|
-
return False
|
|
865
|
-
else:
|
|
866
|
-
raise RuntimeError("Could not interpret \"%s\" as a boolean value" % val)
|
|
867
716
|
|
|
868
717
|
@lru_cache(maxsize=None)
|
|
869
718
|
def getNodeID() -> str:
|
|
@@ -960,10 +809,12 @@ class Toil(ContextManager["Toil"]):
|
|
|
960
809
|
set_logging_from_options(self.options)
|
|
961
810
|
config = Config()
|
|
962
811
|
config.setOptions(self.options)
|
|
812
|
+
if config.jobStore is None:
|
|
813
|
+
raise RuntimeError("No jobstore provided!")
|
|
963
814
|
jobStore = self.getJobStore(config.jobStore)
|
|
964
815
|
if config.caching is None:
|
|
965
816
|
config.caching = jobStore.default_caching()
|
|
966
|
-
#Set the caching option because it wasn't set originally, resuming jobstore rebuilds config from CLI options
|
|
817
|
+
# Set the caching option because it wasn't set originally, resuming jobstore rebuilds config from CLI options
|
|
967
818
|
self.options.caching = config.caching
|
|
968
819
|
|
|
969
820
|
if not config.restart:
|
|
@@ -986,10 +837,10 @@ class Toil(ContextManager["Toil"]):
|
|
|
986
837
|
return self
|
|
987
838
|
|
|
988
839
|
def __exit__(
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
840
|
+
self,
|
|
841
|
+
exc_type: Optional[Type[BaseException]],
|
|
842
|
+
exc_val: Optional[BaseException],
|
|
843
|
+
exc_tb: Optional[TracebackType],
|
|
993
844
|
) -> Literal[False]:
|
|
994
845
|
"""
|
|
995
846
|
Clean up after a workflow invocation.
|
|
@@ -1174,13 +1025,13 @@ class Toil(ContextManager["Toil"]):
|
|
|
1174
1025
|
maxMemory=config.maxMemory,
|
|
1175
1026
|
maxDisk=config.maxDisk)
|
|
1176
1027
|
|
|
1177
|
-
from toil.batchSystems.registry import
|
|
1028
|
+
from toil.batchSystems.registry import get_batch_system, get_batch_systems
|
|
1178
1029
|
|
|
1179
1030
|
try:
|
|
1180
|
-
batch_system =
|
|
1031
|
+
batch_system = get_batch_system(config.batchSystem)
|
|
1181
1032
|
except KeyError:
|
|
1182
1033
|
raise RuntimeError(f'Unrecognized batch system: {config.batchSystem} '
|
|
1183
|
-
f'(choose from: {
|
|
1034
|
+
f'(choose from: {", ".join(get_batch_systems())})')
|
|
1184
1035
|
|
|
1185
1036
|
if config.caching and not batch_system.supportsWorkerCleanup():
|
|
1186
1037
|
raise RuntimeError(f'{config.batchSystem} currently does not support shared caching, because it '
|
|
@@ -1192,7 +1043,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1192
1043
|
return batch_system(**kwargs)
|
|
1193
1044
|
|
|
1194
1045
|
def _setupAutoDeployment(
|
|
1195
|
-
|
|
1046
|
+
self, userScript: Optional["ModuleDescriptor"] = None
|
|
1196
1047
|
) -> None:
|
|
1197
1048
|
"""
|
|
1198
1049
|
Determine the user script, save it to the job store and inject a reference to the saved copy into the batch system.
|
|
@@ -1236,7 +1087,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1236
1087
|
logger.debug('No user script to auto-deploy.')
|
|
1237
1088
|
else:
|
|
1238
1089
|
logger.debug('Saving user script %s as a resource', userScript)
|
|
1239
|
-
userScriptResource = userScript.saveAsResourceTo(self._jobStore)
|
|
1090
|
+
userScriptResource = userScript.saveAsResourceTo(self._jobStore)
|
|
1240
1091
|
logger.debug('Injecting user script %s into batch system.', userScriptResource)
|
|
1241
1092
|
self._batchSystem.setUserScript(userScriptResource)
|
|
1242
1093
|
|
|
@@ -1247,13 +1098,15 @@ class Toil(ContextManager["Toil"]):
|
|
|
1247
1098
|
def importFile(self,
|
|
1248
1099
|
srcUrl: str,
|
|
1249
1100
|
sharedFileName: str,
|
|
1250
|
-
symlink: bool = True) -> None:
|
|
1101
|
+
symlink: bool = True) -> None:
|
|
1102
|
+
...
|
|
1251
1103
|
|
|
1252
1104
|
@overload
|
|
1253
1105
|
def importFile(self,
|
|
1254
1106
|
srcUrl: str,
|
|
1255
1107
|
sharedFileName: None = None,
|
|
1256
|
-
symlink: bool = True) -> FileID:
|
|
1108
|
+
symlink: bool = True) -> FileID:
|
|
1109
|
+
...
|
|
1257
1110
|
|
|
1258
1111
|
@deprecated(new_function_name='import_file')
|
|
1259
1112
|
def importFile(self,
|
|
@@ -1267,14 +1120,16 @@ class Toil(ContextManager["Toil"]):
|
|
|
1267
1120
|
src_uri: str,
|
|
1268
1121
|
shared_file_name: str,
|
|
1269
1122
|
symlink: bool = True,
|
|
1270
|
-
check_existence: bool = True) -> None:
|
|
1123
|
+
check_existence: bool = True) -> None:
|
|
1124
|
+
...
|
|
1271
1125
|
|
|
1272
1126
|
@overload
|
|
1273
1127
|
def import_file(self,
|
|
1274
1128
|
src_uri: str,
|
|
1275
1129
|
shared_file_name: None = None,
|
|
1276
1130
|
symlink: bool = True,
|
|
1277
|
-
check_existence: bool = True) -> FileID:
|
|
1131
|
+
check_existence: bool = True) -> FileID:
|
|
1132
|
+
...
|
|
1278
1133
|
|
|
1279
1134
|
def import_file(self,
|
|
1280
1135
|
src_uri: str,
|
|
@@ -1340,7 +1195,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1340
1195
|
a local file that does not exist.
|
|
1341
1196
|
"""
|
|
1342
1197
|
if urlparse(uri).scheme == 'file':
|
|
1343
|
-
uri = urlparse(uri).path # this should strip off the local file scheme; it will be added back
|
|
1198
|
+
uri = unquote(urlparse(uri).path) # this should strip off the local file scheme; it will be added back
|
|
1344
1199
|
|
|
1345
1200
|
# account for the scheme-less case, which should be coerced to a local absolute path
|
|
1346
1201
|
if urlparse(uri).scheme == '':
|
|
@@ -1350,7 +1205,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1350
1205
|
f'Could not find local file "{abs_path}" when importing "{uri}".\n'
|
|
1351
1206
|
f'Make sure paths are relative to "{os.getcwd()}" or use absolute paths.\n'
|
|
1352
1207
|
f'If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.).')
|
|
1353
|
-
return f'file://{abs_path}'
|
|
1208
|
+
return f'file://{quote(abs_path)}'
|
|
1354
1209
|
return uri
|
|
1355
1210
|
|
|
1356
1211
|
def _setBatchSystemEnvVars(self) -> None:
|
|
@@ -1392,7 +1247,8 @@ class Toil(ContextManager["Toil"]):
|
|
|
1392
1247
|
:param configWorkDir: Value passed to the program using the --workDir flag
|
|
1393
1248
|
:return: Path to the Toil work directory, constant across all machines
|
|
1394
1249
|
"""
|
|
1395
|
-
workDir = os.getenv('TOIL_WORKDIR_OVERRIDE') or configWorkDir or os.getenv(
|
|
1250
|
+
workDir = os.getenv('TOIL_WORKDIR_OVERRIDE') or configWorkDir or os.getenv(
|
|
1251
|
+
'TOIL_WORKDIR') or tempfile.gettempdir()
|
|
1396
1252
|
if not os.path.exists(workDir):
|
|
1397
1253
|
raise RuntimeError(f'The directory specified by --workDir or TOIL_WORKDIR ({workDir}) does not exist.')
|
|
1398
1254
|
return workDir
|
|
@@ -1413,34 +1269,30 @@ class Toil(ContextManager["Toil"]):
|
|
|
1413
1269
|
deleted.
|
|
1414
1270
|
"""
|
|
1415
1271
|
|
|
1416
|
-
if 'XDG_RUNTIME_DIR' in os.environ and not os.path.exists(os.environ['XDG_RUNTIME_DIR']):
|
|
1417
|
-
# Slurm has been observed providing this variable but not keeping
|
|
1418
|
-
# the directory live as long as we run for.
|
|
1419
|
-
logger.warning('XDG_RUNTIME_DIR is set to nonexistent directory %s; your environment may be out of spec!', os.environ['XDG_RUNTIME_DIR'])
|
|
1420
|
-
|
|
1421
1272
|
# Go get a coordination directory, using a lot of short-circuiting of
|
|
1422
1273
|
# or and the fact that and returns its second argument when it
|
|
1423
1274
|
# succeeds.
|
|
1424
1275
|
coordination_dir: Optional[str] = (
|
|
1425
1276
|
# First try an override env var
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1277
|
+
os.getenv('TOIL_COORDINATION_DIR_OVERRIDE') or
|
|
1278
|
+
# Then the value from the config
|
|
1279
|
+
config_coordination_dir or
|
|
1280
|
+
# Then a normal env var
|
|
1281
|
+
# TODO: why/how would this propagate when not using single machine?
|
|
1282
|
+
os.getenv('TOIL_COORDINATION_DIR') or
|
|
1283
|
+
# Then try a `toil` subdirectory of the XDG runtime directory
|
|
1284
|
+
# (often /var/run/users/<UID>). But only if we are actually in a
|
|
1285
|
+
# session that has the env var set. Otherwise it might belong to a
|
|
1286
|
+
# different set of sessions and get cleaned up out from under us
|
|
1287
|
+
# when that session ends.
|
|
1288
|
+
# We don't think Slurm XDG sessions are trustworthy, depending on
|
|
1289
|
+
# the cluster's PAM configuration, so don't use them.
|
|
1290
|
+
('XDG_RUNTIME_DIR' in os.environ and 'SLURM_JOBID' not in os.environ and try_path(
|
|
1291
|
+
os.path.join(os.environ['XDG_RUNTIME_DIR'], 'toil'))) or
|
|
1292
|
+
# Try under /run/lock. It might be a temp dir style sticky directory.
|
|
1293
|
+
try_path('/run/lock') or
|
|
1294
|
+
# Finally, fall back on the work dir and hope it's a legit filesystem.
|
|
1295
|
+
cls.getToilWorkDir(config_work_dir)
|
|
1444
1296
|
)
|
|
1445
1297
|
|
|
1446
1298
|
if coordination_dir is None:
|
|
@@ -1449,7 +1301,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1449
1301
|
return coordination_dir
|
|
1450
1302
|
|
|
1451
1303
|
@staticmethod
|
|
1452
|
-
def
|
|
1304
|
+
def get_workflow_path_component(workflow_id: str) -> str:
|
|
1453
1305
|
"""
|
|
1454
1306
|
Get a safe filesystem path component for a workflow.
|
|
1455
1307
|
|
|
@@ -1458,11 +1310,11 @@ class Toil(ContextManager["Toil"]):
|
|
|
1458
1310
|
|
|
1459
1311
|
:param workflow_id: The ID of the current Toil workflow.
|
|
1460
1312
|
"""
|
|
1461
|
-
return str(uuid.uuid5(uuid.UUID(getNodeID()), workflow_id)).replace('-', '')
|
|
1313
|
+
return "toilwf-" + str(uuid.uuid5(uuid.UUID(getNodeID()), workflow_id)).replace('-', '')
|
|
1462
1314
|
|
|
1463
1315
|
@classmethod
|
|
1464
1316
|
def getLocalWorkflowDir(
|
|
1465
|
-
|
|
1317
|
+
cls, workflowID: str, configWorkDir: Optional[str] = None
|
|
1466
1318
|
) -> str:
|
|
1467
1319
|
"""
|
|
1468
1320
|
Return the directory where worker directories and the cache will be located for this workflow on this machine.
|
|
@@ -1475,7 +1327,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1475
1327
|
|
|
1476
1328
|
# Create a directory unique to each host in case workDir is on a shared FS.
|
|
1477
1329
|
# This prevents workers on different nodes from erasing each other's directories.
|
|
1478
|
-
workflowDir: str = os.path.join(base, cls.
|
|
1330
|
+
workflowDir: str = os.path.join(base, cls.get_workflow_path_component(workflowID))
|
|
1479
1331
|
try:
|
|
1480
1332
|
# Directory creation is atomic
|
|
1481
1333
|
os.mkdir(workflowDir)
|
|
@@ -1489,10 +1341,10 @@ class Toil(ContextManager["Toil"]):
|
|
|
1489
1341
|
|
|
1490
1342
|
@classmethod
|
|
1491
1343
|
def get_local_workflow_coordination_dir(
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1344
|
+
cls,
|
|
1345
|
+
workflow_id: str,
|
|
1346
|
+
config_work_dir: Optional[str],
|
|
1347
|
+
config_coordination_dir: Optional[str]
|
|
1496
1348
|
) -> str:
|
|
1497
1349
|
"""
|
|
1498
1350
|
Return the directory where coordination files should be located for
|
|
@@ -1517,7 +1369,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1517
1369
|
base = cls.get_toil_coordination_dir(config_work_dir, config_coordination_dir)
|
|
1518
1370
|
|
|
1519
1371
|
# Make a per-workflow and node subdirectory
|
|
1520
|
-
subdir = os.path.join(base, cls.
|
|
1372
|
+
subdir = os.path.join(base, cls.get_workflow_path_component(workflow_id))
|
|
1521
1373
|
# Make it exist
|
|
1522
1374
|
os.makedirs(subdir, exist_ok=True)
|
|
1523
1375
|
# TODO: May interfere with workflow directory creation logging if it's the same directory.
|
|
@@ -1654,8 +1506,10 @@ class ToilMetrics:
|
|
|
1654
1506
|
# The only way to make this inteligible to MyPy is to wrap the dict in
|
|
1655
1507
|
# a function that can cast.
|
|
1656
1508
|
MessageType = TypeVar('MessageType')
|
|
1509
|
+
|
|
1657
1510
|
def get_listener(message_type: Type[MessageType]) -> Callable[[MessageType], None]:
|
|
1658
1511
|
return cast(Callable[[MessageType], None], TARGETS[message_type])
|
|
1512
|
+
|
|
1659
1513
|
# Then set up the listeners.
|
|
1660
1514
|
self._listeners = [bus.subscribe(message_type, get_listener(message_type)) for message_type in TARGETS.keys()]
|
|
1661
1515
|
|
|
@@ -1720,12 +1574,12 @@ class ToilMetrics:
|
|
|
1720
1574
|
# remaining intact
|
|
1721
1575
|
|
|
1722
1576
|
def logClusterSize(
|
|
1723
|
-
|
|
1577
|
+
self, m: ClusterSizeMessage
|
|
1724
1578
|
) -> None:
|
|
1725
1579
|
self.log("current_size '%s' %i" % (m.instance_type, m.current_size))
|
|
1726
1580
|
|
|
1727
1581
|
def logClusterDesiredSize(
|
|
1728
|
-
|
|
1582
|
+
self, m: ClusterDesiredSizeMessage
|
|
1729
1583
|
) -> None:
|
|
1730
1584
|
self.log("desired_size '%s' %i" % (m.instance_type, m.desired_size))
|
|
1731
1585
|
|
|
@@ -1756,76 +1610,6 @@ class ToilMetrics:
|
|
|
1756
1610
|
self._listeners = []
|
|
1757
1611
|
|
|
1758
1612
|
|
|
1759
|
-
def parseSetEnv(l: List[str]) -> Dict[str, Optional[str]]:
|
|
1760
|
-
"""
|
|
1761
|
-
Parse a list of strings of the form "NAME=VALUE" or just "NAME" into a dictionary.
|
|
1762
|
-
|
|
1763
|
-
Strings of the latter from will result in dictionary entries whose value is None.
|
|
1764
|
-
|
|
1765
|
-
>>> parseSetEnv([])
|
|
1766
|
-
{}
|
|
1767
|
-
>>> parseSetEnv(['a'])
|
|
1768
|
-
{'a': None}
|
|
1769
|
-
>>> parseSetEnv(['a='])
|
|
1770
|
-
{'a': ''}
|
|
1771
|
-
>>> parseSetEnv(['a=b'])
|
|
1772
|
-
{'a': 'b'}
|
|
1773
|
-
>>> parseSetEnv(['a=a', 'a=b'])
|
|
1774
|
-
{'a': 'b'}
|
|
1775
|
-
>>> parseSetEnv(['a=b', 'c=d'])
|
|
1776
|
-
{'a': 'b', 'c': 'd'}
|
|
1777
|
-
>>> parseSetEnv(['a=b=c'])
|
|
1778
|
-
{'a': 'b=c'}
|
|
1779
|
-
>>> parseSetEnv([''])
|
|
1780
|
-
Traceback (most recent call last):
|
|
1781
|
-
...
|
|
1782
|
-
ValueError: Empty name
|
|
1783
|
-
>>> parseSetEnv(['=1'])
|
|
1784
|
-
Traceback (most recent call last):
|
|
1785
|
-
...
|
|
1786
|
-
ValueError: Empty name
|
|
1787
|
-
"""
|
|
1788
|
-
d = {}
|
|
1789
|
-
v: Optional[str] = None
|
|
1790
|
-
for i in l:
|
|
1791
|
-
try:
|
|
1792
|
-
k, v = i.split('=', 1)
|
|
1793
|
-
except ValueError:
|
|
1794
|
-
k, v = i, None
|
|
1795
|
-
if not k:
|
|
1796
|
-
raise ValueError('Empty name')
|
|
1797
|
-
d[k] = v
|
|
1798
|
-
return d
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
def iC(minValue: int, maxValue: int = SYS_MAX_SIZE) -> Callable[[int], bool]:
|
|
1802
|
-
"""Returns a function that checks if a given int is in the given half-open interval."""
|
|
1803
|
-
assert isinstance(minValue, int) and isinstance(maxValue, int)
|
|
1804
|
-
return lambda x: minValue <= x < maxValue
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
def fC(minValue: float, maxValue: Optional[float] = None) -> Callable[[float], bool]:
|
|
1808
|
-
"""Returns a function that checks if a given float is in the given half-open interval."""
|
|
1809
|
-
assert isinstance(minValue, float)
|
|
1810
|
-
if maxValue is None:
|
|
1811
|
-
return lambda x: minValue <= x
|
|
1812
|
-
assert isinstance(maxValue, float)
|
|
1813
|
-
return lambda x: minValue <= x < maxValue # type: ignore
|
|
1814
|
-
|
|
1815
|
-
def parse_accelerator_list(specs: Optional[str]) -> List['AcceleratorRequirement']:
|
|
1816
|
-
"""
|
|
1817
|
-
Parse a string description of one or more accelerator requirements.
|
|
1818
|
-
"""
|
|
1819
|
-
|
|
1820
|
-
if specs is None or len(specs) == 0:
|
|
1821
|
-
# Not specified, so the default default is to not need any.
|
|
1822
|
-
return []
|
|
1823
|
-
# Otherwise parse each requirement.
|
|
1824
|
-
from toil.job import parse_accelerator
|
|
1825
|
-
|
|
1826
|
-
return [parse_accelerator(r) for r in specs.split(',')]
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
1613
|
def cacheDirName(workflowID: str) -> str:
|
|
1830
1614
|
"""
|
|
1831
1615
|
:return: Name of the cache directory.
|
|
@@ -1844,10 +1628,7 @@ def getDirSizeRecursively(dirPath: str) -> int:
|
|
|
1844
1628
|
internally, and a (possibly 0) lower bound on the size of the directory
|
|
1845
1629
|
will be returned.
|
|
1846
1630
|
|
|
1847
|
-
|
|
1848
|
-
--block-size=1 because Apple can't handle it.
|
|
1849
|
-
|
|
1850
|
-
:param str dirPath: A valid path to a directory or file.
|
|
1631
|
+
:param dirPath: A valid path to a directory or file.
|
|
1851
1632
|
:return: Total size, in bytes, of the file or directory at dirPath.
|
|
1852
1633
|
"""
|
|
1853
1634
|
|
|
@@ -1857,12 +1638,22 @@ def getDirSizeRecursively(dirPath: str) -> int:
|
|
|
1857
1638
|
# allocated with the environment variable: BLOCKSIZE='512' set, and we
|
|
1858
1639
|
# multiply this by 512 to return the filesize in bytes.
|
|
1859
1640
|
|
|
1641
|
+
dirPath = os.path.abspath(dirPath)
|
|
1860
1642
|
try:
|
|
1861
1643
|
return int(subprocess.check_output(['du', '-s', dirPath],
|
|
1862
1644
|
env=dict(os.environ, BLOCKSIZE='512')).decode('utf-8').split()[0]) * 512
|
|
1863
|
-
|
|
1864
|
-
#
|
|
1865
|
-
|
|
1645
|
+
# The environment variable 'BLOCKSIZE'='512' is set instead of the much cleaner
|
|
1646
|
+
# --block-size=1 because Apple can't handle it.
|
|
1647
|
+
except (OSError, subprocess.CalledProcessError):
|
|
1648
|
+
# Fallback to pure Python implementation, useful for when kernel limits
|
|
1649
|
+
# to argument list size are hit, etc..
|
|
1650
|
+
total_size: int = 0
|
|
1651
|
+
if os.path.isfile(dirPath):
|
|
1652
|
+
return os.lstat(dirPath).st_blocks * 512
|
|
1653
|
+
for dir_path, dir_names, filenames in os.walk(dirPath):
|
|
1654
|
+
for name in filenames:
|
|
1655
|
+
total_size += os.lstat(os.path.join(dir_path, name)).st_blocks * 512
|
|
1656
|
+
return total_size
|
|
1866
1657
|
|
|
1867
1658
|
|
|
1868
1659
|
def getFileSystemSize(dirPath: str) -> Tuple[int, int]:
|