toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/lib/accelerators.py
CHANGED
|
@@ -15,8 +15,9 @@
|
|
|
15
15
|
"""Accelerator (i.e. GPU) utilities for Toil"""
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
|
+
import string
|
|
18
19
|
import subprocess
|
|
19
|
-
from typing import
|
|
20
|
+
from typing import Union, cast
|
|
20
21
|
from xml.dom import minidom
|
|
21
22
|
|
|
22
23
|
from toil.job import AcceleratorRequirement
|
|
@@ -33,13 +34,20 @@ def have_working_nvidia_smi() -> bool:
|
|
|
33
34
|
it can fulfill a CUDARequirement.
|
|
34
35
|
"""
|
|
35
36
|
try:
|
|
36
|
-
subprocess.check_call([
|
|
37
|
-
except (
|
|
37
|
+
subprocess.check_call(["nvidia-smi"])
|
|
38
|
+
except (
|
|
39
|
+
FileNotFoundError,
|
|
40
|
+
PermissionError,
|
|
41
|
+
subprocess.CalledProcessError,
|
|
42
|
+
OSError,
|
|
43
|
+
UnicodeDecodeError,
|
|
44
|
+
):
|
|
38
45
|
return False
|
|
39
46
|
return True
|
|
40
47
|
|
|
48
|
+
|
|
41
49
|
@memoize
|
|
42
|
-
def get_host_accelerator_numbers() ->
|
|
50
|
+
def get_host_accelerator_numbers() -> list[int]:
|
|
43
51
|
"""
|
|
44
52
|
Work out what accelerator is what.
|
|
45
53
|
|
|
@@ -51,7 +59,12 @@ def get_host_accelerator_numbers() -> List[int]:
|
|
|
51
59
|
right GPUs as seen from a Docker daemon.
|
|
52
60
|
"""
|
|
53
61
|
|
|
54
|
-
for number_list_var in [
|
|
62
|
+
for number_list_var in [
|
|
63
|
+
"SLURM_STEP_GPUS",
|
|
64
|
+
"SLURM_JOB_GPUS",
|
|
65
|
+
"CUDA_VISIBLE_DEVICES",
|
|
66
|
+
"NVIDIA_VISIBLE_DEVICES",
|
|
67
|
+
]:
|
|
55
68
|
# Any of these can have a list of GPU numbers, but the CUDA/NVIDIA ones
|
|
56
69
|
# also support a system of GPU GUIDs that we don't support.
|
|
57
70
|
# TODO: If Slurm confinement is set we ignore any attempt to further
|
|
@@ -61,7 +74,9 @@ def get_host_accelerator_numbers() -> List[int]:
|
|
|
61
74
|
if number_list_var in os.environ:
|
|
62
75
|
device_string = os.environ[number_list_var]
|
|
63
76
|
# Parse all the numbers we have
|
|
64
|
-
device_numbers = [
|
|
77
|
+
device_numbers = [
|
|
78
|
+
int(part) for part in device_string.split(",") if part.isnumeric()
|
|
79
|
+
]
|
|
65
80
|
if len(device_numbers) > 0:
|
|
66
81
|
# We found some numbers, so use those
|
|
67
82
|
return device_numbers
|
|
@@ -69,6 +84,7 @@ def get_host_accelerator_numbers() -> List[int]:
|
|
|
69
84
|
# If we don't see a set of limits we understand, say we have all nvidia GPUs
|
|
70
85
|
return list(range(count_nvidia_gpus()))
|
|
71
86
|
|
|
87
|
+
|
|
72
88
|
@memoize
|
|
73
89
|
def have_working_nvidia_docker_runtime() -> bool:
|
|
74
90
|
"""
|
|
@@ -76,11 +92,30 @@ def have_working_nvidia_docker_runtime() -> bool:
|
|
|
76
92
|
"""
|
|
77
93
|
try:
|
|
78
94
|
# The runtime injects nvidia-smi; it doesn't seem to have to be in the image we use here
|
|
79
|
-
subprocess.check_call(
|
|
80
|
-
|
|
95
|
+
subprocess.check_call(
|
|
96
|
+
[
|
|
97
|
+
"docker",
|
|
98
|
+
"run",
|
|
99
|
+
"--rm",
|
|
100
|
+
"--runtime",
|
|
101
|
+
"nvidia",
|
|
102
|
+
"--gpus",
|
|
103
|
+
"all",
|
|
104
|
+
"ubuntu:20.04",
|
|
105
|
+
"nvidia-smi",
|
|
106
|
+
]
|
|
107
|
+
)
|
|
108
|
+
except (
|
|
109
|
+
FileNotFoundError,
|
|
110
|
+
PermissionError,
|
|
111
|
+
subprocess.CalledProcessError,
|
|
112
|
+
OSError,
|
|
113
|
+
UnicodeDecodeError,
|
|
114
|
+
):
|
|
81
115
|
return False
|
|
82
116
|
return True
|
|
83
117
|
|
|
118
|
+
|
|
84
119
|
@memoize
|
|
85
120
|
def count_nvidia_gpus() -> int:
|
|
86
121
|
"""
|
|
@@ -100,7 +135,7 @@ def count_nvidia_gpus() -> int:
|
|
|
100
135
|
.firstChild,
|
|
101
136
|
).data
|
|
102
137
|
)
|
|
103
|
-
except:
|
|
138
|
+
except:
|
|
104
139
|
return 0
|
|
105
140
|
|
|
106
141
|
# TODO: Parse each gpu > product_name > text content and convert to some
|
|
@@ -108,7 +143,56 @@ def count_nvidia_gpus() -> int:
|
|
|
108
143
|
|
|
109
144
|
|
|
110
145
|
@memoize
|
|
111
|
-
def
|
|
146
|
+
def count_amd_gpus() -> int:
|
|
147
|
+
"""
|
|
148
|
+
Return the number of amd GPUs seen by rocm-smi, or 0 if it is not working.
|
|
149
|
+
:return:
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
# see if the amd-smi CLI tool is installed
|
|
153
|
+
# we believe this is the expected output for amd-smi, but we don't actually have and amd gpu to test against
|
|
154
|
+
# so we assume the output from the amd-smi documentation:
|
|
155
|
+
# https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/using-AMD-SMI-CLI-tool.html
|
|
156
|
+
out = subprocess.check_output(["amd-smi", "static"])
|
|
157
|
+
gpu_count = len(
|
|
158
|
+
[line for line in out.decode("utf-8").split("\n") if line.startswith("gpu")]
|
|
159
|
+
)
|
|
160
|
+
return gpu_count
|
|
161
|
+
except (
|
|
162
|
+
FileNotFoundError,
|
|
163
|
+
PermissionError,
|
|
164
|
+
subprocess.SubprocessError,
|
|
165
|
+
OSError,
|
|
166
|
+
UnicodeDecodeError,
|
|
167
|
+
):
|
|
168
|
+
# if the amd-smi command fails, try rocm-smi
|
|
169
|
+
# if a different exception is raised, something other than the subprocess call is wrong
|
|
170
|
+
pass
|
|
171
|
+
try:
|
|
172
|
+
# similarly, since we don't have an AMD gpu to test against, assume the output from the rocm-smi documentation:
|
|
173
|
+
# https://rocm.blogs.amd.com/software-tools-optimization/affinity/part-2/README.html#gpu-numa-configuration-rocm-smi-showtoponuma
|
|
174
|
+
out = subprocess.check_output(["rocm-smi"])
|
|
175
|
+
gpu_count = len(
|
|
176
|
+
[
|
|
177
|
+
line
|
|
178
|
+
for line in out.decode("utf-8").split("\n")
|
|
179
|
+
if len(line) > 0 and line[0] in string.digits
|
|
180
|
+
]
|
|
181
|
+
)
|
|
182
|
+
return gpu_count
|
|
183
|
+
except (
|
|
184
|
+
FileNotFoundError,
|
|
185
|
+
PermissionError,
|
|
186
|
+
subprocess.SubprocessError,
|
|
187
|
+
OSError,
|
|
188
|
+
UnicodeDecodeError,
|
|
189
|
+
):
|
|
190
|
+
pass
|
|
191
|
+
return 0
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@memoize
|
|
195
|
+
def get_individual_local_accelerators() -> list[AcceleratorRequirement]:
|
|
112
196
|
"""
|
|
113
197
|
Determine all the local accelerators available. Report each with count 1,
|
|
114
198
|
in the order of the number that can be used to assign them.
|
|
@@ -117,10 +201,22 @@ def get_individual_local_accelerators() -> List[AcceleratorRequirement]:
|
|
|
117
201
|
accelerator assignment API.
|
|
118
202
|
"""
|
|
119
203
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
204
|
+
gpus: list[AcceleratorRequirement] = [
|
|
205
|
+
{"kind": "gpu", "brand": "nvidia", "api": "cuda", "count": 1}
|
|
206
|
+
for _ in range(count_nvidia_gpus())
|
|
207
|
+
]
|
|
208
|
+
gpus.extend(
|
|
209
|
+
[
|
|
210
|
+
{"kind": "gpu", "brand": "amd", "api": "rocm", "count": 1}
|
|
211
|
+
for _ in range(count_amd_gpus())
|
|
212
|
+
]
|
|
213
|
+
)
|
|
214
|
+
return gpus
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_restrictive_environment_for_local_accelerators(
|
|
218
|
+
accelerator_numbers: Union[set[int], list[int]]
|
|
219
|
+
) -> dict[str, str]:
|
|
124
220
|
"""
|
|
125
221
|
Get environment variables which can be applied to a process to restrict it
|
|
126
222
|
to using only the given accelerator numbers.
|
|
@@ -131,11 +227,12 @@ def get_restrictive_environment_for_local_accelerators(accelerator_numbers : Uni
|
|
|
131
227
|
|
|
132
228
|
# Since we only know about nvidia GPUs right now, we can just say our
|
|
133
229
|
# accelerator numbering space is the same as nvidia's GPU numbering space.
|
|
134
|
-
gpu_list =
|
|
230
|
+
gpu_list = ",".join(str(i) for i in accelerator_numbers)
|
|
135
231
|
|
|
136
232
|
# Put this in several places: CUDA_VISIBLE_DEVICES for controlling
|
|
137
233
|
# processes right here, and SINGULARITYENV_CUDA_VISIBLE_DEVICES for
|
|
138
234
|
# propagating to Singularity containers.
|
|
139
|
-
return {
|
|
140
|
-
|
|
141
|
-
|
|
235
|
+
return {
|
|
236
|
+
"CUDA_VISIBLE_DEVICES": gpu_list,
|
|
237
|
+
"SINGULARITYENV_CUDA_VISIBLE_DEVICES": gpu_list,
|
|
238
|
+
}
|
toil/lib/aws/__init__.py
CHANGED
|
@@ -16,23 +16,25 @@ import logging
|
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
18
|
import socket
|
|
19
|
-
import
|
|
19
|
+
from collections.abc import MutableMapping
|
|
20
20
|
from http.client import HTTPException
|
|
21
|
-
from typing import
|
|
21
|
+
from typing import TYPE_CHECKING, Literal, Optional, Union
|
|
22
22
|
from urllib.error import URLError
|
|
23
23
|
from urllib.request import urlopen
|
|
24
24
|
|
|
25
25
|
from botocore.exceptions import ClientError
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
import toil.lib.retry
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from mypy_boto3_s3.literals import BucketLocationConstraintType
|
|
28
31
|
|
|
29
|
-
AWSRegionName = Union[BucketLocationConstraintType, Literal["us-east-1"]]
|
|
32
|
+
AWSRegionName = Union["BucketLocationConstraintType", Literal["us-east-1"]]
|
|
30
33
|
|
|
31
34
|
# These are errors where we think something randomly
|
|
32
35
|
# went wrong on the AWS side and we ought to retry.
|
|
33
36
|
AWSServerErrors = toil.lib.retry.ErrorCondition(
|
|
34
|
-
error=ClientError,
|
|
35
|
-
error_codes=[404, 500, 502, 503, 504]
|
|
37
|
+
error=ClientError, error_codes=[404, 500, 502, 503, 504]
|
|
36
38
|
)
|
|
37
39
|
|
|
38
40
|
logger = logging.getLogger(__name__)
|
|
@@ -40,6 +42,7 @@ logger = logging.getLogger(__name__)
|
|
|
40
42
|
# This file isn't allowed to import anything that depends on Boto or Boto3,
|
|
41
43
|
# which may not be installed, because it has to be importable everywhere.
|
|
42
44
|
|
|
45
|
+
|
|
43
46
|
def get_current_aws_region() -> Optional[str]:
|
|
44
47
|
"""
|
|
45
48
|
Return the AWS region that the currently configured AWS zone (see
|
|
@@ -49,11 +52,13 @@ def get_current_aws_region() -> Optional[str]:
|
|
|
49
52
|
aws_zone = get_current_aws_zone()
|
|
50
53
|
return zone_to_region(aws_zone) if aws_zone else None
|
|
51
54
|
|
|
55
|
+
|
|
52
56
|
def get_aws_zone_from_environment() -> Optional[str]:
|
|
53
57
|
"""
|
|
54
58
|
Get the AWS zone from TOIL_AWS_ZONE if set.
|
|
55
59
|
"""
|
|
56
|
-
return os.environ.get(
|
|
60
|
+
return os.environ.get("TOIL_AWS_ZONE", None)
|
|
61
|
+
|
|
57
62
|
|
|
58
63
|
def get_aws_zone_from_metadata() -> Optional[str]:
|
|
59
64
|
"""
|
|
@@ -68,11 +73,15 @@ def get_aws_zone_from_metadata() -> Optional[str]:
|
|
|
68
73
|
# Use the ECS metadata service
|
|
69
74
|
logger.debug("Fetch AZ from ECS metadata")
|
|
70
75
|
try:
|
|
71
|
-
resp = json.load(
|
|
76
|
+
resp = json.load(
|
|
77
|
+
urlopen(
|
|
78
|
+
os.environ["ECS_CONTAINER_METADATA_URI_V4"] + "/task", timeout=1
|
|
79
|
+
)
|
|
80
|
+
)
|
|
72
81
|
logger.debug("ECS metadata: %s", resp)
|
|
73
82
|
if isinstance(resp, dict):
|
|
74
83
|
# We found something. Go with that.
|
|
75
|
-
return resp.get(
|
|
84
|
+
return resp.get("AvailabilityZone")
|
|
76
85
|
except (json.decoder.JSONDecodeError, KeyError, URLError) as e:
|
|
77
86
|
# We're on ECS but can't get the metadata. That's odd.
|
|
78
87
|
logger.warning("Skipping ECS metadata due to error: %s", e)
|
|
@@ -93,6 +102,7 @@ def get_aws_zone_from_metadata() -> Optional[str]:
|
|
|
93
102
|
logger.warning("Skipping EC2 metadata due to error: %s", e)
|
|
94
103
|
return None
|
|
95
104
|
|
|
105
|
+
|
|
96
106
|
def get_aws_zone_from_boto() -> Optional[str]:
|
|
97
107
|
"""
|
|
98
108
|
Get the AWS zone from the Boto3 config file or from AWS_DEFAULT_REGION, if it is configured and the
|
|
@@ -100,28 +110,30 @@ def get_aws_zone_from_boto() -> Optional[str]:
|
|
|
100
110
|
"""
|
|
101
111
|
try:
|
|
102
112
|
import boto3
|
|
103
|
-
|
|
113
|
+
|
|
104
114
|
boto3_session = boto3.session.Session()
|
|
105
115
|
# this should check AWS_DEFAULT_REGION and ~/.aws/config
|
|
106
116
|
zone = boto3_session.region_name
|
|
107
117
|
if zone is not None:
|
|
108
|
-
zone +=
|
|
118
|
+
zone += "a" # derive an availability zone in the region
|
|
109
119
|
return zone
|
|
110
120
|
except ImportError:
|
|
111
121
|
pass
|
|
112
122
|
return None
|
|
113
123
|
|
|
124
|
+
|
|
114
125
|
def get_aws_zone_from_environment_region() -> Optional[str]:
|
|
115
126
|
"""
|
|
116
127
|
Pick an AWS zone in the region defined by TOIL_AWS_REGION, if it is set.
|
|
117
128
|
"""
|
|
118
|
-
aws_region = os.environ.get(
|
|
129
|
+
aws_region = os.environ.get("TOIL_AWS_REGION")
|
|
119
130
|
if aws_region is not None:
|
|
120
131
|
# If a region is specified, use the first zone in the region.
|
|
121
|
-
return aws_region +
|
|
132
|
+
return aws_region + "a"
|
|
122
133
|
# Otherwise, don't pick a region and let us fall back on the next method.
|
|
123
134
|
return None
|
|
124
135
|
|
|
136
|
+
|
|
125
137
|
def get_current_aws_zone() -> Optional[str]:
|
|
126
138
|
"""
|
|
127
139
|
Get the currently configured or occupied AWS zone to use.
|
|
@@ -137,63 +149,78 @@ def get_current_aws_zone() -> Optional[str]:
|
|
|
137
149
|
Finally, if we have boto2, and a default region is configured in Boto 2,
|
|
138
150
|
chooses a zone in that region.
|
|
139
151
|
|
|
140
|
-
Returns
|
|
152
|
+
Returns 'us-east-1a' if no method can produce a zone to use.
|
|
141
153
|
"""
|
|
142
|
-
return
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
154
|
+
return (
|
|
155
|
+
get_aws_zone_from_environment()
|
|
156
|
+
or get_aws_zone_from_metadata()
|
|
157
|
+
or get_aws_zone_from_environment_region()
|
|
158
|
+
or get_aws_zone_from_boto()
|
|
159
|
+
or "us-east-1a"
|
|
160
|
+
) # AWS's native default
|
|
161
|
+
|
|
146
162
|
|
|
147
163
|
def zone_to_region(zone: str) -> AWSRegionName:
|
|
148
164
|
"""Get a region (e.g. us-west-2) from a zone (e.g. us-west-1c)."""
|
|
149
165
|
# re.compile() caches the regex internally so we don't have to
|
|
150
|
-
availability_zone = re.compile(r
|
|
166
|
+
availability_zone = re.compile(r"^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$")
|
|
151
167
|
m = availability_zone.match(zone)
|
|
152
168
|
if not m:
|
|
153
169
|
raise ValueError(f"Can't extract region from availability zone '{zone}'")
|
|
154
170
|
return m.group(1)
|
|
155
171
|
|
|
172
|
+
|
|
156
173
|
def running_on_ec2() -> bool:
|
|
157
174
|
"""
|
|
158
175
|
Return True if we are currently running on EC2, and false otherwise.
|
|
159
176
|
"""
|
|
177
|
+
|
|
160
178
|
# TODO: Move this to toil.lib.ec2 and make toil.lib.ec2 importable without boto?
|
|
161
179
|
def file_begins_with(path, prefix):
|
|
162
180
|
with open(path) as f:
|
|
163
181
|
return f.read(len(prefix)) == prefix
|
|
164
182
|
|
|
165
|
-
hv_uuid_path =
|
|
166
|
-
if os.path.exists(hv_uuid_path) and file_begins_with(hv_uuid_path,
|
|
183
|
+
hv_uuid_path = "/sys/hypervisor/uuid"
|
|
184
|
+
if os.path.exists(hv_uuid_path) and file_begins_with(hv_uuid_path, "ec2"):
|
|
167
185
|
return True
|
|
168
186
|
# Some instances do not have the /sys/hypervisor/uuid file, so check the identity document instead.
|
|
169
187
|
# See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html
|
|
170
188
|
try:
|
|
171
|
-
urlopen(
|
|
189
|
+
urlopen(
|
|
190
|
+
"http://169.254.169.254/latest/dynamic/instance-identity/document",
|
|
191
|
+
timeout=1,
|
|
192
|
+
)
|
|
172
193
|
return True
|
|
173
194
|
except (URLError, socket.timeout, HTTPException):
|
|
174
195
|
return False
|
|
175
196
|
|
|
197
|
+
|
|
176
198
|
def running_on_ecs() -> bool:
|
|
177
199
|
"""
|
|
178
200
|
Return True if we are currently running on Amazon ECS, and false otherwise.
|
|
179
201
|
"""
|
|
180
202
|
# We only care about relatively current ECS
|
|
181
|
-
return
|
|
203
|
+
return "ECS_CONTAINER_METADATA_URI_V4" in os.environ
|
|
204
|
+
|
|
182
205
|
|
|
183
|
-
def build_tag_dict_from_env(
|
|
206
|
+
def build_tag_dict_from_env(
|
|
207
|
+
environment: MutableMapping[str, str] = os.environ
|
|
208
|
+
) -> dict[str, str]:
|
|
184
209
|
tags = dict()
|
|
185
|
-
owner_tag = environment.get(
|
|
210
|
+
owner_tag = environment.get("TOIL_OWNER_TAG")
|
|
186
211
|
if owner_tag:
|
|
187
|
-
tags.update({
|
|
212
|
+
tags.update({"Owner": owner_tag})
|
|
188
213
|
|
|
189
|
-
user_tags = environment.get(
|
|
214
|
+
user_tags = environment.get("TOIL_AWS_TAGS")
|
|
190
215
|
if user_tags:
|
|
191
216
|
try:
|
|
192
217
|
json_user_tags = json.loads(user_tags)
|
|
193
218
|
if isinstance(json_user_tags, dict):
|
|
194
219
|
tags.update(json.loads(user_tags))
|
|
195
220
|
else:
|
|
196
|
-
logger.error(
|
|
221
|
+
logger.error(
|
|
222
|
+
'TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}'
|
|
223
|
+
)
|
|
197
224
|
exit(1)
|
|
198
225
|
except json.decoder.JSONDecodeError:
|
|
199
226
|
logger.error('TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}')
|