toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/lib/accelerators.py
CHANGED
|
@@ -15,8 +15,9 @@
|
|
|
15
15
|
"""Accelerator (i.e. GPU) utilities for Toil"""
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
|
+
import string
|
|
18
19
|
import subprocess
|
|
19
|
-
from typing import
|
|
20
|
+
from typing import Union, cast
|
|
20
21
|
from xml.dom import minidom
|
|
21
22
|
|
|
22
23
|
from toil.job import AcceleratorRequirement
|
|
@@ -33,13 +34,20 @@ def have_working_nvidia_smi() -> bool:
|
|
|
33
34
|
it can fulfill a CUDARequirement.
|
|
34
35
|
"""
|
|
35
36
|
try:
|
|
36
|
-
subprocess.check_call([
|
|
37
|
-
except (
|
|
37
|
+
subprocess.check_call(["nvidia-smi"])
|
|
38
|
+
except (
|
|
39
|
+
FileNotFoundError,
|
|
40
|
+
PermissionError,
|
|
41
|
+
subprocess.CalledProcessError,
|
|
42
|
+
OSError,
|
|
43
|
+
UnicodeDecodeError,
|
|
44
|
+
):
|
|
38
45
|
return False
|
|
39
46
|
return True
|
|
40
47
|
|
|
48
|
+
|
|
41
49
|
@memoize
|
|
42
|
-
def get_host_accelerator_numbers() ->
|
|
50
|
+
def get_host_accelerator_numbers() -> list[int]:
|
|
43
51
|
"""
|
|
44
52
|
Work out what accelerator is what.
|
|
45
53
|
|
|
@@ -51,7 +59,12 @@ def get_host_accelerator_numbers() -> List[int]:
|
|
|
51
59
|
right GPUs as seen from a Docker daemon.
|
|
52
60
|
"""
|
|
53
61
|
|
|
54
|
-
for number_list_var in [
|
|
62
|
+
for number_list_var in [
|
|
63
|
+
"SLURM_STEP_GPUS",
|
|
64
|
+
"SLURM_JOB_GPUS",
|
|
65
|
+
"CUDA_VISIBLE_DEVICES",
|
|
66
|
+
"NVIDIA_VISIBLE_DEVICES",
|
|
67
|
+
]:
|
|
55
68
|
# Any of these can have a list of GPU numbers, but the CUDA/NVIDIA ones
|
|
56
69
|
# also support a system of GPU GUIDs that we don't support.
|
|
57
70
|
# TODO: If Slurm confinement is set we ignore any attempt to further
|
|
@@ -61,7 +74,9 @@ def get_host_accelerator_numbers() -> List[int]:
|
|
|
61
74
|
if number_list_var in os.environ:
|
|
62
75
|
device_string = os.environ[number_list_var]
|
|
63
76
|
# Parse all the numbers we have
|
|
64
|
-
device_numbers = [
|
|
77
|
+
device_numbers = [
|
|
78
|
+
int(part) for part in device_string.split(",") if part.isnumeric()
|
|
79
|
+
]
|
|
65
80
|
if len(device_numbers) > 0:
|
|
66
81
|
# We found some numbers, so use those
|
|
67
82
|
return device_numbers
|
|
@@ -69,6 +84,7 @@ def get_host_accelerator_numbers() -> List[int]:
|
|
|
69
84
|
# If we don't see a set of limits we understand, say we have all nvidia GPUs
|
|
70
85
|
return list(range(count_nvidia_gpus()))
|
|
71
86
|
|
|
87
|
+
|
|
72
88
|
@memoize
|
|
73
89
|
def have_working_nvidia_docker_runtime() -> bool:
|
|
74
90
|
"""
|
|
@@ -76,11 +92,30 @@ def have_working_nvidia_docker_runtime() -> bool:
|
|
|
76
92
|
"""
|
|
77
93
|
try:
|
|
78
94
|
# The runtime injects nvidia-smi; it doesn't seem to have to be in the image we use here
|
|
79
|
-
subprocess.check_call(
|
|
80
|
-
|
|
95
|
+
subprocess.check_call(
|
|
96
|
+
[
|
|
97
|
+
"docker",
|
|
98
|
+
"run",
|
|
99
|
+
"--rm",
|
|
100
|
+
"--runtime",
|
|
101
|
+
"nvidia",
|
|
102
|
+
"--gpus",
|
|
103
|
+
"all",
|
|
104
|
+
"ubuntu:20.04",
|
|
105
|
+
"nvidia-smi",
|
|
106
|
+
]
|
|
107
|
+
)
|
|
108
|
+
except (
|
|
109
|
+
FileNotFoundError,
|
|
110
|
+
PermissionError,
|
|
111
|
+
subprocess.CalledProcessError,
|
|
112
|
+
OSError,
|
|
113
|
+
UnicodeDecodeError,
|
|
114
|
+
):
|
|
81
115
|
return False
|
|
82
116
|
return True
|
|
83
117
|
|
|
118
|
+
|
|
84
119
|
@memoize
|
|
85
120
|
def count_nvidia_gpus() -> int:
|
|
86
121
|
"""
|
|
@@ -100,7 +135,7 @@ def count_nvidia_gpus() -> int:
|
|
|
100
135
|
.firstChild,
|
|
101
136
|
).data
|
|
102
137
|
)
|
|
103
|
-
except:
|
|
138
|
+
except:
|
|
104
139
|
return 0
|
|
105
140
|
|
|
106
141
|
# TODO: Parse each gpu > product_name > text content and convert to some
|
|
@@ -108,7 +143,56 @@ def count_nvidia_gpus() -> int:
|
|
|
108
143
|
|
|
109
144
|
|
|
110
145
|
@memoize
|
|
111
|
-
def
|
|
146
|
+
def count_amd_gpus() -> int:
|
|
147
|
+
"""
|
|
148
|
+
Return the number of amd GPUs seen by rocm-smi, or 0 if it is not working.
|
|
149
|
+
:return:
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
# see if the amd-smi CLI tool is installed
|
|
153
|
+
# we believe this is the expected output for amd-smi, but we don't actually have and amd gpu to test against
|
|
154
|
+
# so we assume the output from the amd-smi documentation:
|
|
155
|
+
# https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/using-AMD-SMI-CLI-tool.html
|
|
156
|
+
out = subprocess.check_output(["amd-smi", "static"])
|
|
157
|
+
gpu_count = len(
|
|
158
|
+
[line for line in out.decode("utf-8").split("\n") if line.startswith("gpu")]
|
|
159
|
+
)
|
|
160
|
+
return gpu_count
|
|
161
|
+
except (
|
|
162
|
+
FileNotFoundError,
|
|
163
|
+
PermissionError,
|
|
164
|
+
subprocess.SubprocessError,
|
|
165
|
+
OSError,
|
|
166
|
+
UnicodeDecodeError,
|
|
167
|
+
):
|
|
168
|
+
# if the amd-smi command fails, try rocm-smi
|
|
169
|
+
# if a different exception is raised, something other than the subprocess call is wrong
|
|
170
|
+
pass
|
|
171
|
+
try:
|
|
172
|
+
# similarly, since we don't have an AMD gpu to test against, assume the output from the rocm-smi documentation:
|
|
173
|
+
# https://rocm.blogs.amd.com/software-tools-optimization/affinity/part-2/README.html#gpu-numa-configuration-rocm-smi-showtoponuma
|
|
174
|
+
out = subprocess.check_output(["rocm-smi"])
|
|
175
|
+
gpu_count = len(
|
|
176
|
+
[
|
|
177
|
+
line
|
|
178
|
+
for line in out.decode("utf-8").split("\n")
|
|
179
|
+
if len(line) > 0 and line[0] in string.digits
|
|
180
|
+
]
|
|
181
|
+
)
|
|
182
|
+
return gpu_count
|
|
183
|
+
except (
|
|
184
|
+
FileNotFoundError,
|
|
185
|
+
PermissionError,
|
|
186
|
+
subprocess.SubprocessError,
|
|
187
|
+
OSError,
|
|
188
|
+
UnicodeDecodeError,
|
|
189
|
+
):
|
|
190
|
+
pass
|
|
191
|
+
return 0
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@memoize
|
|
195
|
+
def get_individual_local_accelerators() -> list[AcceleratorRequirement]:
|
|
112
196
|
"""
|
|
113
197
|
Determine all the local accelerators available. Report each with count 1,
|
|
114
198
|
in the order of the number that can be used to assign them.
|
|
@@ -117,10 +201,22 @@ def get_individual_local_accelerators() -> List[AcceleratorRequirement]:
|
|
|
117
201
|
accelerator assignment API.
|
|
118
202
|
"""
|
|
119
203
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
204
|
+
gpus: list[AcceleratorRequirement] = [
|
|
205
|
+
{"kind": "gpu", "brand": "nvidia", "api": "cuda", "count": 1}
|
|
206
|
+
for _ in range(count_nvidia_gpus())
|
|
207
|
+
]
|
|
208
|
+
gpus.extend(
|
|
209
|
+
[
|
|
210
|
+
{"kind": "gpu", "brand": "amd", "api": "rocm", "count": 1}
|
|
211
|
+
for _ in range(count_amd_gpus())
|
|
212
|
+
]
|
|
213
|
+
)
|
|
214
|
+
return gpus
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_restrictive_environment_for_local_accelerators(
|
|
218
|
+
accelerator_numbers: Union[set[int], list[int]]
|
|
219
|
+
) -> dict[str, str]:
|
|
124
220
|
"""
|
|
125
221
|
Get environment variables which can be applied to a process to restrict it
|
|
126
222
|
to using only the given accelerator numbers.
|
|
@@ -131,11 +227,12 @@ def get_restrictive_environment_for_local_accelerators(accelerator_numbers : Uni
|
|
|
131
227
|
|
|
132
228
|
# Since we only know about nvidia GPUs right now, we can just say our
|
|
133
229
|
# accelerator numbering space is the same as nvidia's GPU numbering space.
|
|
134
|
-
gpu_list =
|
|
230
|
+
gpu_list = ",".join(str(i) for i in accelerator_numbers)
|
|
135
231
|
|
|
136
232
|
# Put this in several places: CUDA_VISIBLE_DEVICES for controlling
|
|
137
233
|
# processes right here, and SINGULARITYENV_CUDA_VISIBLE_DEVICES for
|
|
138
234
|
# propagating to Singularity containers.
|
|
139
|
-
return {
|
|
140
|
-
|
|
141
|
-
|
|
235
|
+
return {
|
|
236
|
+
"CUDA_VISIBLE_DEVICES": gpu_list,
|
|
237
|
+
"SINGULARITYENV_CUDA_VISIBLE_DEVICES": gpu_list,
|
|
238
|
+
}
|
toil/lib/aws/__init__.py
CHANGED
|
@@ -16,16 +16,33 @@ import logging
|
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
18
|
import socket
|
|
19
|
+
from collections.abc import MutableMapping
|
|
19
20
|
from http.client import HTTPException
|
|
20
|
-
from typing import
|
|
21
|
+
from typing import TYPE_CHECKING, Literal, Optional, Union
|
|
21
22
|
from urllib.error import URLError
|
|
22
23
|
from urllib.request import urlopen
|
|
23
24
|
|
|
25
|
+
from botocore.exceptions import ClientError
|
|
26
|
+
|
|
27
|
+
import toil.lib.retry
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from mypy_boto3_s3.literals import BucketLocationConstraintType
|
|
31
|
+
|
|
32
|
+
AWSRegionName = Union["BucketLocationConstraintType", Literal["us-east-1"]]
|
|
33
|
+
|
|
34
|
+
# These are errors where we think something randomly
|
|
35
|
+
# went wrong on the AWS side and we ought to retry.
|
|
36
|
+
AWSServerErrors = toil.lib.retry.ErrorCondition(
|
|
37
|
+
error=ClientError, error_codes=[404, 500, 502, 503, 504]
|
|
38
|
+
)
|
|
39
|
+
|
|
24
40
|
logger = logging.getLogger(__name__)
|
|
25
41
|
|
|
26
42
|
# This file isn't allowed to import anything that depends on Boto or Boto3,
|
|
27
43
|
# which may not be installed, because it has to be importable everywhere.
|
|
28
44
|
|
|
45
|
+
|
|
29
46
|
def get_current_aws_region() -> Optional[str]:
|
|
30
47
|
"""
|
|
31
48
|
Return the AWS region that the currently configured AWS zone (see
|
|
@@ -35,11 +52,13 @@ def get_current_aws_region() -> Optional[str]:
|
|
|
35
52
|
aws_zone = get_current_aws_zone()
|
|
36
53
|
return zone_to_region(aws_zone) if aws_zone else None
|
|
37
54
|
|
|
55
|
+
|
|
38
56
|
def get_aws_zone_from_environment() -> Optional[str]:
|
|
39
57
|
"""
|
|
40
58
|
Get the AWS zone from TOIL_AWS_ZONE if set.
|
|
41
59
|
"""
|
|
42
|
-
return os.environ.get(
|
|
60
|
+
return os.environ.get("TOIL_AWS_ZONE", None)
|
|
61
|
+
|
|
43
62
|
|
|
44
63
|
def get_aws_zone_from_metadata() -> Optional[str]:
|
|
45
64
|
"""
|
|
@@ -54,11 +73,15 @@ def get_aws_zone_from_metadata() -> Optional[str]:
|
|
|
54
73
|
# Use the ECS metadata service
|
|
55
74
|
logger.debug("Fetch AZ from ECS metadata")
|
|
56
75
|
try:
|
|
57
|
-
resp = json.load(
|
|
76
|
+
resp = json.load(
|
|
77
|
+
urlopen(
|
|
78
|
+
os.environ["ECS_CONTAINER_METADATA_URI_V4"] + "/task", timeout=1
|
|
79
|
+
)
|
|
80
|
+
)
|
|
58
81
|
logger.debug("ECS metadata: %s", resp)
|
|
59
82
|
if isinstance(resp, dict):
|
|
60
83
|
# We found something. Go with that.
|
|
61
|
-
return resp.get(
|
|
84
|
+
return resp.get("AvailabilityZone")
|
|
62
85
|
except (json.decoder.JSONDecodeError, KeyError, URLError) as e:
|
|
63
86
|
# We're on ECS but can't get the metadata. That's odd.
|
|
64
87
|
logger.warning("Skipping ECS metadata due to error: %s", e)
|
|
@@ -67,11 +90,10 @@ def get_aws_zone_from_metadata() -> Optional[str]:
|
|
|
67
90
|
# metadata.
|
|
68
91
|
try:
|
|
69
92
|
# Use the EC2 metadata service
|
|
70
|
-
import
|
|
71
|
-
|
|
72
|
-
from boto.utils import get_instance_metadata
|
|
93
|
+
from ec2_metadata import ec2_metadata
|
|
94
|
+
|
|
73
95
|
logger.debug("Fetch AZ from EC2 metadata")
|
|
74
|
-
return
|
|
96
|
+
return ec2_metadata.availability_zone
|
|
75
97
|
except ImportError:
|
|
76
98
|
# This is expected to happen a lot
|
|
77
99
|
logger.debug("No boto to fetch ECS metadata")
|
|
@@ -80,32 +102,38 @@ def get_aws_zone_from_metadata() -> Optional[str]:
|
|
|
80
102
|
logger.warning("Skipping EC2 metadata due to error: %s", e)
|
|
81
103
|
return None
|
|
82
104
|
|
|
105
|
+
|
|
83
106
|
def get_aws_zone_from_boto() -> Optional[str]:
|
|
84
107
|
"""
|
|
85
|
-
Get the AWS zone from the
|
|
86
|
-
|
|
108
|
+
Get the AWS zone from the Boto3 config file or from AWS_DEFAULT_REGION, if it is configured and the
|
|
109
|
+
boto3 module is available.
|
|
87
110
|
"""
|
|
88
111
|
try:
|
|
89
|
-
import
|
|
90
|
-
|
|
112
|
+
import boto3
|
|
113
|
+
|
|
114
|
+
boto3_session = boto3.session.Session()
|
|
115
|
+
# this should check AWS_DEFAULT_REGION and ~/.aws/config
|
|
116
|
+
zone = boto3_session.region_name
|
|
91
117
|
if zone is not None:
|
|
92
|
-
zone +=
|
|
118
|
+
zone += "a" # derive an availability zone in the region
|
|
93
119
|
return zone
|
|
94
120
|
except ImportError:
|
|
95
121
|
pass
|
|
96
122
|
return None
|
|
97
123
|
|
|
124
|
+
|
|
98
125
|
def get_aws_zone_from_environment_region() -> Optional[str]:
|
|
99
126
|
"""
|
|
100
127
|
Pick an AWS zone in the region defined by TOIL_AWS_REGION, if it is set.
|
|
101
128
|
"""
|
|
102
|
-
aws_region = os.environ.get(
|
|
129
|
+
aws_region = os.environ.get("TOIL_AWS_REGION")
|
|
103
130
|
if aws_region is not None:
|
|
104
131
|
# If a region is specified, use the first zone in the region.
|
|
105
|
-
return aws_region +
|
|
132
|
+
return aws_region + "a"
|
|
106
133
|
# Otherwise, don't pick a region and let us fall back on the next method.
|
|
107
134
|
return None
|
|
108
135
|
|
|
136
|
+
|
|
109
137
|
def get_current_aws_zone() -> Optional[str]:
|
|
110
138
|
"""
|
|
111
139
|
Get the currently configured or occupied AWS zone to use.
|
|
@@ -121,63 +149,78 @@ def get_current_aws_zone() -> Optional[str]:
|
|
|
121
149
|
Finally, if we have boto2, and a default region is configured in Boto 2,
|
|
122
150
|
chooses a zone in that region.
|
|
123
151
|
|
|
124
|
-
Returns
|
|
152
|
+
Returns 'us-east-1a' if no method can produce a zone to use.
|
|
125
153
|
"""
|
|
126
|
-
return
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
154
|
+
return (
|
|
155
|
+
get_aws_zone_from_environment()
|
|
156
|
+
or get_aws_zone_from_metadata()
|
|
157
|
+
or get_aws_zone_from_environment_region()
|
|
158
|
+
or get_aws_zone_from_boto()
|
|
159
|
+
or "us-east-1a"
|
|
160
|
+
) # AWS's native default
|
|
161
|
+
|
|
130
162
|
|
|
131
|
-
def zone_to_region(zone: str) ->
|
|
163
|
+
def zone_to_region(zone: str) -> AWSRegionName:
|
|
132
164
|
"""Get a region (e.g. us-west-2) from a zone (e.g. us-west-1c)."""
|
|
133
165
|
# re.compile() caches the regex internally so we don't have to
|
|
134
|
-
availability_zone = re.compile(r
|
|
166
|
+
availability_zone = re.compile(r"^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$")
|
|
135
167
|
m = availability_zone.match(zone)
|
|
136
168
|
if not m:
|
|
137
169
|
raise ValueError(f"Can't extract region from availability zone '{zone}'")
|
|
138
170
|
return m.group(1)
|
|
139
171
|
|
|
172
|
+
|
|
140
173
|
def running_on_ec2() -> bool:
|
|
141
174
|
"""
|
|
142
175
|
Return True if we are currently running on EC2, and false otherwise.
|
|
143
176
|
"""
|
|
177
|
+
|
|
144
178
|
# TODO: Move this to toil.lib.ec2 and make toil.lib.ec2 importable without boto?
|
|
145
179
|
def file_begins_with(path, prefix):
|
|
146
180
|
with open(path) as f:
|
|
147
181
|
return f.read(len(prefix)) == prefix
|
|
148
182
|
|
|
149
|
-
hv_uuid_path =
|
|
150
|
-
if os.path.exists(hv_uuid_path) and file_begins_with(hv_uuid_path,
|
|
183
|
+
hv_uuid_path = "/sys/hypervisor/uuid"
|
|
184
|
+
if os.path.exists(hv_uuid_path) and file_begins_with(hv_uuid_path, "ec2"):
|
|
151
185
|
return True
|
|
152
186
|
# Some instances do not have the /sys/hypervisor/uuid file, so check the identity document instead.
|
|
153
187
|
# See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html
|
|
154
188
|
try:
|
|
155
|
-
urlopen(
|
|
189
|
+
urlopen(
|
|
190
|
+
"http://169.254.169.254/latest/dynamic/instance-identity/document",
|
|
191
|
+
timeout=1,
|
|
192
|
+
)
|
|
156
193
|
return True
|
|
157
194
|
except (URLError, socket.timeout, HTTPException):
|
|
158
195
|
return False
|
|
159
196
|
|
|
197
|
+
|
|
160
198
|
def running_on_ecs() -> bool:
|
|
161
199
|
"""
|
|
162
200
|
Return True if we are currently running on Amazon ECS, and false otherwise.
|
|
163
201
|
"""
|
|
164
202
|
# We only care about relatively current ECS
|
|
165
|
-
return
|
|
203
|
+
return "ECS_CONTAINER_METADATA_URI_V4" in os.environ
|
|
204
|
+
|
|
166
205
|
|
|
167
|
-
def build_tag_dict_from_env(
|
|
206
|
+
def build_tag_dict_from_env(
|
|
207
|
+
environment: MutableMapping[str, str] = os.environ
|
|
208
|
+
) -> dict[str, str]:
|
|
168
209
|
tags = dict()
|
|
169
|
-
owner_tag = environment.get(
|
|
210
|
+
owner_tag = environment.get("TOIL_OWNER_TAG")
|
|
170
211
|
if owner_tag:
|
|
171
|
-
tags.update({
|
|
212
|
+
tags.update({"Owner": owner_tag})
|
|
172
213
|
|
|
173
|
-
user_tags = environment.get(
|
|
214
|
+
user_tags = environment.get("TOIL_AWS_TAGS")
|
|
174
215
|
if user_tags:
|
|
175
216
|
try:
|
|
176
217
|
json_user_tags = json.loads(user_tags)
|
|
177
218
|
if isinstance(json_user_tags, dict):
|
|
178
219
|
tags.update(json.loads(user_tags))
|
|
179
220
|
else:
|
|
180
|
-
logger.error(
|
|
221
|
+
logger.error(
|
|
222
|
+
'TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}'
|
|
223
|
+
)
|
|
181
224
|
exit(1)
|
|
182
225
|
except json.decoder.JSONDecodeError:
|
|
183
226
|
logger.error('TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}')
|