toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/lib/ec2.py
CHANGED
|
@@ -1,22 +1,29 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
from base64 import b64encode
|
|
4
|
-
from
|
|
5
|
-
from typing import
|
|
6
|
-
|
|
7
|
-
from boto3.resources.base import ServiceResource
|
|
8
|
-
from boto.ec2.instance import Instance as Boto2Instance
|
|
9
|
-
from boto.ec2.spotinstancerequest import SpotInstanceRequest
|
|
10
|
-
from botocore.client import BaseClient
|
|
4
|
+
from collections.abc import Generator, Iterable, Mapping
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
11
6
|
|
|
12
7
|
from toil.lib.aws.session import establish_boto3_session
|
|
13
8
|
from toil.lib.aws.utils import flatten_tags
|
|
14
9
|
from toil.lib.exceptions import panic
|
|
15
|
-
from toil.lib.retry import (
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
from toil.lib.retry import (
|
|
11
|
+
ErrorCondition,
|
|
12
|
+
get_error_code,
|
|
13
|
+
get_error_message,
|
|
14
|
+
old_retry,
|
|
15
|
+
retry,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from mypy_boto3_autoscaling.client import AutoScalingClient
|
|
20
|
+
from mypy_boto3_ec2.client import EC2Client
|
|
21
|
+
from mypy_boto3_ec2.service_resource import EC2ServiceResource, Instance
|
|
22
|
+
from mypy_boto3_ec2.type_defs import (
|
|
23
|
+
DescribeInstancesResultTypeDef,
|
|
24
|
+
InstanceTypeDef,
|
|
25
|
+
SpotInstanceRequestTypeDef,
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
a_short_time = 5
|
|
22
29
|
a_long_time = 60 * 60
|
|
@@ -27,44 +34,55 @@ class UserError(RuntimeError):
|
|
|
27
34
|
def __init__(self, message=None, cause=None):
|
|
28
35
|
if (message is None) == (cause is None):
|
|
29
36
|
raise RuntimeError("Must pass either message or cause.")
|
|
30
|
-
super().__init__(
|
|
31
|
-
message if cause is None else cause.message)
|
|
37
|
+
super().__init__(message if cause is None else cause.message)
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
def not_found(e):
|
|
35
41
|
try:
|
|
36
|
-
return get_error_code(e).endswith(
|
|
42
|
+
return get_error_code(e).endswith(".NotFound")
|
|
37
43
|
except ValueError:
|
|
38
44
|
# Not the right kind of error
|
|
39
45
|
return False
|
|
40
46
|
|
|
47
|
+
|
|
41
48
|
def inconsistencies_detected(e):
|
|
42
|
-
if get_error_code(e) ==
|
|
49
|
+
if get_error_code(e) == "InvalidGroup.NotFound":
|
|
43
50
|
return True
|
|
44
51
|
m = get_error_message(e).lower()
|
|
45
|
-
matches = (
|
|
52
|
+
matches = ("invalid iam instance profile" in m) or ("no associated iam roles" in m)
|
|
46
53
|
return matches
|
|
47
54
|
|
|
55
|
+
|
|
48
56
|
# We also define these error categories for the new retry decorator
|
|
49
|
-
INCONSISTENCY_ERRORS = [
|
|
50
|
-
|
|
51
|
-
|
|
57
|
+
INCONSISTENCY_ERRORS = [
|
|
58
|
+
ErrorCondition(boto_error_codes=["InvalidGroup.NotFound"]),
|
|
59
|
+
ErrorCondition(error_message_must_include="Invalid IAM Instance Profile"),
|
|
60
|
+
ErrorCondition(error_message_must_include="no associated IAM Roles"),
|
|
61
|
+
]
|
|
52
62
|
|
|
53
63
|
|
|
54
64
|
def retry_ec2(t=a_short_time, retry_for=10 * a_short_time, retry_while=not_found):
|
|
55
|
-
return old_retry(
|
|
56
|
-
|
|
57
|
-
|
|
65
|
+
return old_retry(
|
|
66
|
+
delays=(t, t, t * 2, t * 4), timeout=retry_for, predicate=retry_while
|
|
67
|
+
)
|
|
58
68
|
|
|
59
69
|
|
|
60
70
|
class UnexpectedResourceState(Exception):
|
|
61
71
|
def __init__(self, resource, to_state, state):
|
|
62
72
|
super().__init__(
|
|
63
|
-
"Expected state of %s to be '%s' but got '%s'" %
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
73
|
+
"Expected state of %s to be '%s' but got '%s'" % (resource, to_state, state)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def wait_transition(
|
|
78
|
+
boto3_ec2: "EC2Client",
|
|
79
|
+
resource: "InstanceTypeDef",
|
|
80
|
+
from_states: Iterable[str],
|
|
81
|
+
to_state: str,
|
|
82
|
+
state_getter: Callable[["InstanceTypeDef"], str] = lambda x: x.get("State").get(
|
|
83
|
+
"Name"
|
|
84
|
+
),
|
|
85
|
+
):
|
|
68
86
|
"""
|
|
69
87
|
Wait until the specified EC2 resource (instance, image, volume, ...) transitions from any
|
|
70
88
|
of the given 'from' states to the specified 'to' state. If the instance is found in a state
|
|
@@ -76,59 +94,84 @@ def wait_transition(resource, from_states, to_state,
|
|
|
76
94
|
:param to_state: the state of the resource when this method returns
|
|
77
95
|
"""
|
|
78
96
|
state = state_getter(resource)
|
|
97
|
+
instance_id = resource["InstanceId"]
|
|
79
98
|
while state in from_states:
|
|
80
99
|
time.sleep(a_short_time)
|
|
81
100
|
for attempt in retry_ec2():
|
|
82
101
|
with attempt:
|
|
83
|
-
|
|
102
|
+
described = boto3_ec2.describe_instances(InstanceIds=[instance_id])
|
|
103
|
+
resource = described["Reservations"][0]["Instances"][
|
|
104
|
+
0
|
|
105
|
+
] # there should only be one requested
|
|
84
106
|
state = state_getter(resource)
|
|
85
107
|
if state != to_state:
|
|
86
108
|
raise UnexpectedResourceState(resource, to_state, state)
|
|
87
109
|
|
|
88
110
|
|
|
89
|
-
def wait_instances_running(
|
|
111
|
+
def wait_instances_running(
|
|
112
|
+
boto3_ec2: "EC2Client", instances: Iterable["InstanceTypeDef"]
|
|
113
|
+
) -> Generator["InstanceTypeDef", None, None]:
|
|
90
114
|
"""
|
|
91
115
|
Wait until no instance in the given iterable is 'pending'. Yield every instance that
|
|
92
116
|
entered the running state as soon as it does.
|
|
93
117
|
|
|
94
|
-
:param
|
|
95
|
-
:param
|
|
96
|
-
:rtype: Iterable[Boto2Instance]
|
|
118
|
+
:param boto3_ec2: the EC2 connection to use for making requests
|
|
119
|
+
:param instances: the instances to wait on
|
|
97
120
|
"""
|
|
98
121
|
running_ids = set()
|
|
99
122
|
other_ids = set()
|
|
100
123
|
while True:
|
|
101
124
|
pending_ids = set()
|
|
102
125
|
for i in instances:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
126
|
+
i: "InstanceTypeDef"
|
|
127
|
+
if i["State"]["Name"] == "pending":
|
|
128
|
+
pending_ids.add(i["InstanceId"])
|
|
129
|
+
elif i["State"]["Name"] == "running":
|
|
130
|
+
if i["InstanceId"] in running_ids:
|
|
131
|
+
raise RuntimeError(
|
|
132
|
+
"An instance was already added to the list of running instance IDs. Maybe there is a duplicate."
|
|
133
|
+
)
|
|
134
|
+
running_ids.add(i["InstanceId"])
|
|
109
135
|
yield i
|
|
110
136
|
else:
|
|
111
|
-
if i
|
|
112
|
-
raise RuntimeError(
|
|
113
|
-
|
|
137
|
+
if i["InstanceId"] in other_ids:
|
|
138
|
+
raise RuntimeError(
|
|
139
|
+
"An instance was already added to the list of other instances. Maybe there is a duplicate."
|
|
140
|
+
)
|
|
141
|
+
other_ids.add(i["InstanceId"])
|
|
114
142
|
yield i
|
|
115
|
-
logger.info(
|
|
116
|
-
|
|
143
|
+
logger.info(
|
|
144
|
+
"%i instance(s) pending, %i running, %i other.",
|
|
145
|
+
*list(map(len, (pending_ids, running_ids, other_ids))),
|
|
146
|
+
)
|
|
117
147
|
if not pending_ids:
|
|
118
148
|
break
|
|
119
149
|
seconds = max(a_short_time, min(len(pending_ids), 10 * a_short_time))
|
|
120
|
-
logger.info(
|
|
150
|
+
logger.info("Sleeping for %is", seconds)
|
|
121
151
|
time.sleep(seconds)
|
|
122
152
|
for attempt in retry_ec2():
|
|
123
153
|
with attempt:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
154
|
+
described_instances = boto3_ec2.describe_instances(
|
|
155
|
+
InstanceIds=list(pending_ids)
|
|
156
|
+
)
|
|
157
|
+
instances = [
|
|
158
|
+
instance
|
|
159
|
+
for reservation in described_instances["Reservations"]
|
|
160
|
+
for instance in reservation["Instances"]
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def wait_spot_requests_active(
|
|
165
|
+
boto3_ec2: "EC2Client",
|
|
166
|
+
requests: Iterable["SpotInstanceRequestTypeDef"],
|
|
167
|
+
timeout: float = None,
|
|
168
|
+
tentative: bool = False,
|
|
169
|
+
) -> Iterable[list["SpotInstanceRequestTypeDef"]]:
|
|
128
170
|
"""
|
|
129
171
|
Wait until no spot request in the given iterator is in the 'open' state or, optionally,
|
|
130
172
|
a timeout occurs. Yield spot requests as soon as they leave the 'open' state.
|
|
131
173
|
|
|
174
|
+
:param boto3_ec2: ec2 client
|
|
132
175
|
:param requests: The requests to wait on.
|
|
133
176
|
|
|
134
177
|
:param timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
|
|
@@ -145,55 +188,68 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
145
188
|
other_ids = set()
|
|
146
189
|
open_ids = None
|
|
147
190
|
|
|
148
|
-
def cancel():
|
|
149
|
-
logger.warning(
|
|
150
|
-
|
|
191
|
+
def cancel() -> None:
|
|
192
|
+
logger.warning("Cancelling remaining %i spot requests.", len(open_ids))
|
|
193
|
+
boto3_ec2.cancel_spot_instance_requests(SpotInstanceRequestIds=list(open_ids))
|
|
151
194
|
|
|
152
|
-
def spot_request_not_found(e):
|
|
153
|
-
return get_error_code(e) ==
|
|
195
|
+
def spot_request_not_found(e: Exception) -> bool:
|
|
196
|
+
return get_error_code(e) == "InvalidSpotInstanceRequestID.NotFound"
|
|
154
197
|
|
|
155
198
|
try:
|
|
156
199
|
while True:
|
|
157
200
|
open_ids, eval_ids, fulfill_ids = set(), set(), set()
|
|
158
201
|
batch = []
|
|
159
202
|
for r in requests:
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
203
|
+
r: "SpotInstanceRequestTypeDef" # pycharm thinks it is a string
|
|
204
|
+
if r["State"] == "open":
|
|
205
|
+
open_ids.add(r["InstanceId"])
|
|
206
|
+
if r["Status"] == "pending-evaluation":
|
|
207
|
+
eval_ids.add(r["InstanceId"])
|
|
208
|
+
elif r["Status"] == "pending-fulfillment":
|
|
209
|
+
fulfill_ids.add(r["InstanceId"])
|
|
166
210
|
else:
|
|
167
211
|
logger.info(
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
212
|
+
"Request %s entered status %s indicating that it will not be "
|
|
213
|
+
"fulfilled anytime soon.",
|
|
214
|
+
r["InstanceId"],
|
|
215
|
+
r["Status"],
|
|
216
|
+
)
|
|
217
|
+
elif r["State"] == "active":
|
|
218
|
+
if r["InstanceId"] in active_ids:
|
|
219
|
+
raise RuntimeError(
|
|
220
|
+
"A request was already added to the list of active requests. Maybe there are duplicate requests."
|
|
221
|
+
)
|
|
222
|
+
active_ids.add(r["InstanceId"])
|
|
174
223
|
batch.append(r)
|
|
175
224
|
else:
|
|
176
|
-
if r
|
|
177
|
-
raise RuntimeError(
|
|
178
|
-
|
|
225
|
+
if r["InstanceId"] in other_ids:
|
|
226
|
+
raise RuntimeError(
|
|
227
|
+
"A request was already added to the list of other IDs. Maybe there are duplicate requests."
|
|
228
|
+
)
|
|
229
|
+
other_ids.add(r["InstanceId"])
|
|
179
230
|
batch.append(r)
|
|
180
231
|
if batch:
|
|
181
232
|
yield batch
|
|
182
|
-
logger.info(
|
|
183
|
-
|
|
184
|
-
|
|
233
|
+
logger.info(
|
|
234
|
+
"%i spot requests(s) are open (%i of which are pending evaluation and %i "
|
|
235
|
+
"are pending fulfillment), %i are active and %i are in another state.",
|
|
236
|
+
*list(
|
|
237
|
+
map(len, (open_ids, eval_ids, fulfill_ids, active_ids, other_ids))
|
|
238
|
+
),
|
|
239
|
+
)
|
|
185
240
|
if not open_ids or tentative and not eval_ids and not fulfill_ids:
|
|
186
241
|
break
|
|
187
242
|
sleep_time = 2 * a_short_time
|
|
188
243
|
if timeout is not None and time.time() + sleep_time >= timeout:
|
|
189
|
-
logger.warning(
|
|
244
|
+
logger.warning("Timed out waiting for spot requests.")
|
|
190
245
|
break
|
|
191
|
-
logger.info(
|
|
246
|
+
logger.info("Sleeping for %is", sleep_time)
|
|
192
247
|
time.sleep(sleep_time)
|
|
193
248
|
for attempt in retry_ec2(retry_while=spot_request_not_found):
|
|
194
249
|
with attempt:
|
|
195
|
-
requests =
|
|
196
|
-
list(open_ids)
|
|
250
|
+
requests = boto3_ec2.describe_spot_instance_requests(
|
|
251
|
+
SpotInstanceRequestIds=list(open_ids)
|
|
252
|
+
)
|
|
197
253
|
except BaseException:
|
|
198
254
|
if open_ids:
|
|
199
255
|
with panic(logger):
|
|
@@ -204,73 +260,125 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
204
260
|
cancel()
|
|
205
261
|
|
|
206
262
|
|
|
207
|
-
def create_spot_instances(
|
|
263
|
+
def create_spot_instances(
|
|
264
|
+
boto3_ec2: "EC2Client",
|
|
265
|
+
price,
|
|
266
|
+
image_id,
|
|
267
|
+
spec,
|
|
268
|
+
num_instances=1,
|
|
269
|
+
timeout=None,
|
|
270
|
+
tentative=False,
|
|
271
|
+
tags=None,
|
|
272
|
+
) -> Generator["DescribeInstancesResultTypeDef", None, None]:
|
|
208
273
|
"""
|
|
209
274
|
Create instances on the spot market.
|
|
210
275
|
"""
|
|
211
|
-
def spotRequestNotFound(e):
|
|
212
|
-
return getattr(e, 'error_code', None) == "InvalidSpotInstanceRequestID.NotFound"
|
|
213
276
|
|
|
214
|
-
|
|
215
|
-
|
|
277
|
+
def spotRequestNotFound(e):
|
|
278
|
+
return getattr(e, "error_code", None) == "InvalidSpotInstanceRequestID.NotFound"
|
|
279
|
+
|
|
280
|
+
spec["LaunchSpecification"].update(
|
|
281
|
+
{"ImageId": image_id}
|
|
282
|
+
) # boto3 image id is in the launch specification
|
|
283
|
+
for attempt in retry_ec2(
|
|
284
|
+
retry_for=a_long_time, retry_while=inconsistencies_detected
|
|
285
|
+
):
|
|
216
286
|
with attempt:
|
|
217
|
-
|
|
218
|
-
price,
|
|
287
|
+
requests_dict = boto3_ec2.request_spot_instances(
|
|
288
|
+
SpotPrice=price, InstanceCount=num_instances, **spec
|
|
289
|
+
)
|
|
290
|
+
requests = requests_dict["SpotInstanceRequests"]
|
|
219
291
|
|
|
220
292
|
if tags is not None:
|
|
221
|
-
for requestID in (request
|
|
293
|
+
for requestID in (request["SpotInstanceRequestId"] for request in requests):
|
|
222
294
|
for attempt in retry_ec2(retry_while=spotRequestNotFound):
|
|
223
295
|
with attempt:
|
|
224
|
-
|
|
296
|
+
boto3_ec2.create_tags(Resources=[requestID], Tags=tags)
|
|
225
297
|
|
|
226
298
|
num_active, num_other = 0, 0
|
|
227
299
|
# noinspection PyUnboundLocalVariable,PyTypeChecker
|
|
228
300
|
# request_spot_instances's type annotation is wrong
|
|
229
|
-
for batch in wait_spot_requests_active(
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
tentative=tentative):
|
|
301
|
+
for batch in wait_spot_requests_active(
|
|
302
|
+
boto3_ec2, requests, timeout=timeout, tentative=tentative
|
|
303
|
+
):
|
|
233
304
|
instance_ids = []
|
|
234
305
|
for request in batch:
|
|
235
|
-
|
|
236
|
-
|
|
306
|
+
request: "SpotInstanceRequestTypeDef"
|
|
307
|
+
if request["State"] == "active":
|
|
308
|
+
instance_ids.append(request["InstanceId"])
|
|
237
309
|
num_active += 1
|
|
238
310
|
else:
|
|
239
311
|
logger.info(
|
|
240
|
-
|
|
241
|
-
request
|
|
242
|
-
request
|
|
312
|
+
"Request %s in unexpected state %s.",
|
|
313
|
+
request["InstanceId"],
|
|
314
|
+
request["State"],
|
|
315
|
+
)
|
|
243
316
|
num_other += 1
|
|
244
317
|
if instance_ids:
|
|
245
318
|
# This next line is the reason we batch. It's so we can get multiple instances in
|
|
246
319
|
# a single request.
|
|
247
|
-
|
|
320
|
+
for instance_id in instance_ids:
|
|
321
|
+
for attempt in retry_ec2():
|
|
322
|
+
with attempt:
|
|
323
|
+
# Increase hop limit from 1 to use Instance Metadata V2
|
|
324
|
+
boto3_ec2.modify_instance_metadata_options(
|
|
325
|
+
InstanceId=instance_id, HttpPutResponseHopLimit=3
|
|
326
|
+
)
|
|
327
|
+
yield boto3_ec2.describe_instances(InstanceIds=instance_ids)
|
|
248
328
|
if not num_active:
|
|
249
|
-
message =
|
|
329
|
+
message = "None of the spot requests entered the active state"
|
|
250
330
|
if tentative:
|
|
251
|
-
logger.warning(message +
|
|
331
|
+
logger.warning(message + ".")
|
|
252
332
|
else:
|
|
253
333
|
raise RuntimeError(message)
|
|
254
334
|
if num_other:
|
|
255
|
-
logger.warning(
|
|
335
|
+
logger.warning("%i request(s) entered a state other than active.", num_other)
|
|
256
336
|
|
|
257
337
|
|
|
258
|
-
def create_ondemand_instances(
|
|
338
|
+
def create_ondemand_instances(
|
|
339
|
+
boto3_ec2: "EC2Client",
|
|
340
|
+
image_id: str,
|
|
341
|
+
spec: Mapping[str, Any],
|
|
342
|
+
num_instances: int = 1,
|
|
343
|
+
) -> list["InstanceTypeDef"]:
|
|
259
344
|
"""
|
|
260
345
|
Requests the RunInstances EC2 API call but accounts for the race between recently created
|
|
261
346
|
instance profiles, IAM roles and an instance creation that refers to them.
|
|
262
|
-
|
|
263
|
-
:rtype: List[Boto2Instance]
|
|
264
347
|
"""
|
|
265
|
-
instance_type = spec[
|
|
266
|
-
logger.info(
|
|
267
|
-
|
|
268
|
-
|
|
348
|
+
instance_type = spec["InstanceType"]
|
|
349
|
+
logger.info("Creating %s instance(s) ... ", instance_type)
|
|
350
|
+
boto_instance_list = []
|
|
351
|
+
for attempt in retry_ec2(
|
|
352
|
+
retry_for=a_long_time, retry_while=inconsistencies_detected
|
|
353
|
+
):
|
|
269
354
|
with attempt:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
355
|
+
boto_instance_list: list["InstanceTypeDef"] = boto3_ec2.run_instances(
|
|
356
|
+
ImageId=image_id, MinCount=num_instances, MaxCount=num_instances, **spec
|
|
357
|
+
)["Instances"]
|
|
358
|
+
|
|
359
|
+
return boto_instance_list
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def increase_instance_hop_limit(
|
|
363
|
+
boto3_ec2: "EC2Client", boto_instance_list: list["InstanceTypeDef"]
|
|
364
|
+
) -> None:
|
|
365
|
+
"""
|
|
366
|
+
Increase the default HTTP hop limit, as we are running Toil and Kubernetes inside a Docker container, so the default
|
|
367
|
+
hop limit of 1 will not be enough when grabbing metadata information with ec2_metadata
|
|
368
|
+
|
|
369
|
+
Must be called after the instances are guaranteed to be running.
|
|
370
|
+
|
|
371
|
+
:param boto_instance_list: List of boto instances to modify
|
|
372
|
+
:return:
|
|
373
|
+
"""
|
|
374
|
+
for boto_instance in boto_instance_list:
|
|
375
|
+
instance_id = boto_instance["InstanceId"]
|
|
376
|
+
for attempt in retry_ec2():
|
|
377
|
+
with attempt:
|
|
378
|
+
# Increase hop limit from 1 to use Instance Metadata V2
|
|
379
|
+
boto3_ec2.modify_instance_metadata_options(
|
|
380
|
+
InstanceId=instance_id, HttpPutResponseHopLimit=3
|
|
381
|
+
)
|
|
274
382
|
|
|
275
383
|
|
|
276
384
|
def prune(bushy: dict) -> dict:
|
|
@@ -287,32 +395,37 @@ def prune(bushy: dict) -> dict:
|
|
|
287
395
|
|
|
288
396
|
# We need a module-level client to get the dynamically-generated error types to
|
|
289
397
|
# catch, and to wait on IAM items.
|
|
290
|
-
iam_client = establish_boto3_session().client(
|
|
398
|
+
iam_client = establish_boto3_session().client("iam")
|
|
399
|
+
|
|
291
400
|
|
|
292
401
|
# exception is generated by a factory so we weirdly need a client instance to reference it
|
|
293
|
-
@retry(
|
|
294
|
-
|
|
402
|
+
@retry(
|
|
403
|
+
errors=[iam_client.exceptions.NoSuchEntityException],
|
|
404
|
+
intervals=[1, 1, 2, 4, 8, 16, 32, 64],
|
|
405
|
+
)
|
|
295
406
|
def wait_until_instance_profile_arn_exists(instance_profile_arn: str):
|
|
296
407
|
# TODO: We have no guarantee that the ARN contains the name.
|
|
297
|
-
instance_profile_name = instance_profile_arn.split(
|
|
408
|
+
instance_profile_name = instance_profile_arn.split(":instance-profile/")[-1]
|
|
298
409
|
logger.debug("Checking for instance profile %s...", instance_profile_name)
|
|
299
410
|
iam_client.get_instance_profile(InstanceProfileName=instance_profile_name)
|
|
300
411
|
logger.debug("Instance profile found")
|
|
301
412
|
|
|
302
413
|
|
|
303
414
|
@retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
|
|
304
|
-
def create_instances(
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
415
|
+
def create_instances(
|
|
416
|
+
ec2_resource: "EC2ServiceResource",
|
|
417
|
+
image_id: str,
|
|
418
|
+
key_name: str,
|
|
419
|
+
instance_type: str,
|
|
420
|
+
num_instances: int = 1,
|
|
421
|
+
security_group_ids: Optional[list] = None,
|
|
422
|
+
user_data: Optional[Union[str, bytes]] = None,
|
|
423
|
+
block_device_map: Optional[list[dict]] = None,
|
|
424
|
+
instance_profile_arn: Optional[str] = None,
|
|
425
|
+
placement_az: Optional[str] = None,
|
|
426
|
+
subnet_id: str = None,
|
|
427
|
+
tags: Optional[dict[str, str]] = None,
|
|
428
|
+
) -> list["Instance"]:
|
|
316
429
|
"""
|
|
317
430
|
Replaces create_ondemand_instances. Uses boto3 and returns a list of Boto3 instance dicts.
|
|
318
431
|
|
|
@@ -323,20 +436,25 @@ def create_instances(ec2_resource: ServiceResource,
|
|
|
323
436
|
|
|
324
437
|
Tags, if given, are applied to the instances, and all volumes.
|
|
325
438
|
"""
|
|
326
|
-
logger.info(
|
|
439
|
+
logger.info("Creating %s instance(s) ... ", instance_type)
|
|
327
440
|
|
|
328
441
|
if isinstance(user_data, str):
|
|
329
|
-
user_data = user_data.encode(
|
|
330
|
-
|
|
331
|
-
request = {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
442
|
+
user_data = user_data.encode("utf-8")
|
|
443
|
+
|
|
444
|
+
request = {
|
|
445
|
+
"ImageId": image_id,
|
|
446
|
+
"MinCount": num_instances,
|
|
447
|
+
"MaxCount": num_instances,
|
|
448
|
+
"KeyName": key_name,
|
|
449
|
+
"SecurityGroupIds": security_group_ids,
|
|
450
|
+
"InstanceType": instance_type,
|
|
451
|
+
"UserData": user_data,
|
|
452
|
+
"BlockDeviceMappings": block_device_map,
|
|
453
|
+
"SubnetId": subnet_id,
|
|
454
|
+
# Metadata V2 defaults hops to 1, which is an issue when running inside a docker container
|
|
455
|
+
# https://github.com/adamchainz/ec2-metadata?tab=readme-ov-file#instance-metadata-service-version-2
|
|
456
|
+
"MetadataOptions": {"HttpPutResponseHopLimit": 3},
|
|
457
|
+
}
|
|
340
458
|
|
|
341
459
|
if instance_profile_arn:
|
|
342
460
|
# We could just retry when we get an error because the ARN doesn't
|
|
@@ -344,32 +462,37 @@ def create_instances(ec2_resource: ServiceResource,
|
|
|
344
462
|
wait_until_instance_profile_arn_exists(instance_profile_arn)
|
|
345
463
|
|
|
346
464
|
# Add it to the request
|
|
347
|
-
request[
|
|
465
|
+
request["IamInstanceProfile"] = {"Arn": instance_profile_arn}
|
|
348
466
|
|
|
349
467
|
if placement_az:
|
|
350
|
-
request[
|
|
468
|
+
request["Placement"] = {"AvailabilityZone": placement_az}
|
|
351
469
|
|
|
352
470
|
if tags:
|
|
353
471
|
# Tag everything when we make it.
|
|
354
472
|
flat_tags = flatten_tags(tags)
|
|
355
|
-
request[
|
|
356
|
-
|
|
473
|
+
request["TagSpecifications"] = [
|
|
474
|
+
{"ResourceType": "instance", "Tags": flat_tags},
|
|
475
|
+
{"ResourceType": "volume", "Tags": flat_tags},
|
|
476
|
+
]
|
|
357
477
|
|
|
358
478
|
return ec2_resource.create_instances(**prune(request))
|
|
359
479
|
|
|
480
|
+
|
|
360
481
|
@retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
|
|
361
|
-
def create_launch_template(
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
482
|
+
def create_launch_template(
|
|
483
|
+
ec2_client: "EC2Client",
|
|
484
|
+
template_name: str,
|
|
485
|
+
image_id: str,
|
|
486
|
+
key_name: str,
|
|
487
|
+
instance_type: str,
|
|
488
|
+
security_group_ids: Optional[list] = None,
|
|
489
|
+
user_data: Optional[Union[str, bytes]] = None,
|
|
490
|
+
block_device_map: Optional[list[dict]] = None,
|
|
491
|
+
instance_profile_arn: Optional[str] = None,
|
|
492
|
+
placement_az: Optional[str] = None,
|
|
493
|
+
subnet_id: Optional[str] = None,
|
|
494
|
+
tags: Optional[dict[str, str]] = None,
|
|
495
|
+
) -> str:
|
|
373
496
|
"""
|
|
374
497
|
Creates a launch template with the given name for launching instances with the given parameters.
|
|
375
498
|
|
|
@@ -385,22 +508,26 @@ def create_launch_template(ec2_client: BaseClient,
|
|
|
385
508
|
|
|
386
509
|
|
|
387
510
|
"""
|
|
388
|
-
logger.info(
|
|
511
|
+
logger.info("Creating launch template for %s instances ... ", instance_type)
|
|
389
512
|
|
|
390
513
|
if isinstance(user_data, str):
|
|
391
514
|
# Make sure we have bytes
|
|
392
|
-
user_data = user_data.encode(
|
|
515
|
+
user_data = user_data.encode("utf-8")
|
|
393
516
|
|
|
394
517
|
# Then base64 and decode back to str.
|
|
395
|
-
user_data = b64encode(user_data).decode(
|
|
396
|
-
|
|
397
|
-
template = {
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
518
|
+
user_data = b64encode(user_data).decode("utf-8")
|
|
519
|
+
|
|
520
|
+
template = {
|
|
521
|
+
"ImageId": image_id,
|
|
522
|
+
"KeyName": key_name,
|
|
523
|
+
"SecurityGroupIds": security_group_ids,
|
|
524
|
+
"InstanceType": instance_type,
|
|
525
|
+
"UserData": user_data,
|
|
526
|
+
"BlockDeviceMappings": block_device_map,
|
|
527
|
+
"SubnetId": subnet_id,
|
|
528
|
+
# Increase hop limit from 1 to use Instance Metadata V2
|
|
529
|
+
"MetadataOptions": {"HttpPutResponseHopLimit": 3},
|
|
530
|
+
}
|
|
404
531
|
|
|
405
532
|
if instance_profile_arn:
|
|
406
533
|
# We could just retry when we get an error because the ARN doesn't
|
|
@@ -408,38 +535,48 @@ def create_launch_template(ec2_client: BaseClient,
|
|
|
408
535
|
wait_until_instance_profile_arn_exists(instance_profile_arn)
|
|
409
536
|
|
|
410
537
|
# Add it to the request
|
|
411
|
-
template[
|
|
538
|
+
template["IamInstanceProfile"] = {"Arn": instance_profile_arn}
|
|
412
539
|
|
|
413
540
|
if placement_az:
|
|
414
|
-
template[
|
|
541
|
+
template["Placement"] = {"AvailabilityZone": placement_az}
|
|
415
542
|
|
|
543
|
+
flat_tags = []
|
|
416
544
|
if tags:
|
|
417
545
|
# Tag everything when we make it.
|
|
418
546
|
flat_tags = flatten_tags(tags)
|
|
419
|
-
template[
|
|
420
|
-
|
|
547
|
+
template["TagSpecifications"] = [
|
|
548
|
+
{"ResourceType": "instance", "Tags": flat_tags},
|
|
549
|
+
{"ResourceType": "volume", "Tags": flat_tags},
|
|
550
|
+
]
|
|
421
551
|
|
|
422
|
-
request = {
|
|
423
|
-
|
|
552
|
+
request = {
|
|
553
|
+
"LaunchTemplateData": prune(template),
|
|
554
|
+
"LaunchTemplateName": template_name,
|
|
555
|
+
}
|
|
424
556
|
|
|
425
557
|
if tags:
|
|
426
|
-
request[
|
|
558
|
+
request["TagSpecifications"] = [
|
|
559
|
+
{"ResourceType": "launch-template", "Tags": flat_tags}
|
|
560
|
+
]
|
|
427
561
|
|
|
428
|
-
return ec2_client.create_launch_template(**request)[
|
|
562
|
+
return ec2_client.create_launch_template(**request)["LaunchTemplate"][
|
|
563
|
+
"LaunchTemplateId"
|
|
564
|
+
]
|
|
429
565
|
|
|
430
566
|
|
|
431
567
|
@retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
|
|
432
|
-
def create_auto_scaling_group(
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
568
|
+
def create_auto_scaling_group(
|
|
569
|
+
autoscaling_client: "AutoScalingClient",
|
|
570
|
+
asg_name: str,
|
|
571
|
+
launch_template_ids: dict[str, str],
|
|
572
|
+
vpc_subnets: list[str],
|
|
573
|
+
min_size: int,
|
|
574
|
+
max_size: int,
|
|
575
|
+
instance_types: Optional[Iterable[str]] = None,
|
|
576
|
+
spot_bid: Optional[float] = None,
|
|
577
|
+
spot_cheapest: bool = False,
|
|
578
|
+
tags: Optional[dict[str, str]] = None,
|
|
579
|
+
) -> None:
|
|
443
580
|
"""
|
|
444
581
|
Create a new Auto Scaling Group with the given name (which is also its
|
|
445
582
|
unique identifier).
|
|
@@ -472,19 +609,26 @@ def create_auto_scaling_group(autoscaling_client: BaseClient,
|
|
|
472
609
|
"""
|
|
473
610
|
|
|
474
611
|
if instance_types is None:
|
|
475
|
-
instance_types = []
|
|
612
|
+
instance_types: list[str] = []
|
|
476
613
|
|
|
477
614
|
if instance_types is not None and len(instance_types) > 20:
|
|
478
|
-
raise RuntimeError(
|
|
615
|
+
raise RuntimeError(
|
|
616
|
+
f"Too many instance types ({len(instance_types)}) in group; AWS supports only 20."
|
|
617
|
+
)
|
|
479
618
|
|
|
480
619
|
if len(vpc_subnets) == 0:
|
|
481
|
-
raise RuntimeError(
|
|
620
|
+
raise RuntimeError(
|
|
621
|
+
"No VPC subnets specified to launch into; not clear where to put instances"
|
|
622
|
+
)
|
|
482
623
|
|
|
483
624
|
def get_launch_template_spec(instance_type):
|
|
484
625
|
"""
|
|
485
626
|
Get a LaunchTemplateSpecification for the given instance type.
|
|
486
627
|
"""
|
|
487
|
-
return {
|
|
628
|
+
return {
|
|
629
|
+
"LaunchTemplateId": launch_template_ids[instance_type],
|
|
630
|
+
"Version": "$Default",
|
|
631
|
+
}
|
|
488
632
|
|
|
489
633
|
# We always write the ASG with a MixedInstancesPolicy even when we have only one type.
|
|
490
634
|
# And we use a separate launch template for every instance type, and apply it as an override.
|
|
@@ -493,24 +637,42 @@ def create_auto_scaling_group(autoscaling_client: BaseClient,
|
|
|
493
637
|
# We need to use a launch template per instance type so that different
|
|
494
638
|
# instance types with specified EBS storage size overrides will get their
|
|
495
639
|
# storage.
|
|
496
|
-
mip = {
|
|
497
|
-
|
|
640
|
+
mip = {
|
|
641
|
+
"LaunchTemplate": {
|
|
642
|
+
"LaunchTemplateSpecification": get_launch_template_spec(
|
|
643
|
+
next(iter(instance_types))
|
|
644
|
+
), # noqa
|
|
645
|
+
"Overrides": [
|
|
646
|
+
{
|
|
647
|
+
"InstanceType": t,
|
|
648
|
+
"LaunchTemplateSpecification": get_launch_template_spec(t),
|
|
649
|
+
}
|
|
650
|
+
for t in instance_types
|
|
651
|
+
],
|
|
652
|
+
}
|
|
653
|
+
} # noqa
|
|
498
654
|
|
|
499
655
|
if spot_bid is not None:
|
|
500
656
|
# Ask for spot instances by saying everything above base capacity of 0 should be spot.
|
|
501
|
-
mip[
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
657
|
+
mip["InstancesDistribution"] = {
|
|
658
|
+
"OnDemandPercentageAboveBaseCapacity": 0,
|
|
659
|
+
"SpotAllocationStrategy": (
|
|
660
|
+
"capacity-optimized" if not spot_cheapest else "lowest-price"
|
|
661
|
+
),
|
|
662
|
+
"SpotMaxPrice": str(spot_bid),
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
asg = {
|
|
666
|
+
"AutoScalingGroupName": asg_name,
|
|
667
|
+
"MixedInstancesPolicy": prune(mip),
|
|
668
|
+
"MinSize": min_size,
|
|
669
|
+
"MaxSize": max_size,
|
|
670
|
+
"VPCZoneIdentifier": ",".join(vpc_subnets),
|
|
671
|
+
}
|
|
510
672
|
|
|
511
673
|
if tags:
|
|
512
674
|
# Tag the ASG itself.
|
|
513
|
-
asg[
|
|
675
|
+
asg["Tags"] = flatten_tags(tags)
|
|
514
676
|
|
|
515
677
|
logger.debug("Creating Autoscaling Group across subnets: %s", vpc_subnets)
|
|
516
678
|
|