toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +41 -17
- toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +9 -9
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +129 -16
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +45 -3
- toil/common.py +56 -31
- toil/cwl/cwltoil.py +442 -371
- toil/deferred.py +1 -1
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +69 -20
- toil/fileStores/cachingFileStore.py +6 -22
- toil/fileStores/nonCachingFileStore.py +6 -15
- toil/job.py +270 -86
- toil/jobStores/abstractJobStore.py +37 -31
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +60 -31
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +3 -3
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +89 -38
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +24 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +42 -4
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +57 -16
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +29 -14
- toil/lib/throttle.py +1 -1
- toil/options/common.py +31 -30
- toil/options/wdl.py +5 -0
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +12 -2
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +93 -23
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +22 -7
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +245 -236
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +11 -14
- toil/test/jobStores/jobStoreTest.py +40 -54
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +99 -16
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +62 -4
- toil/test/utils/utilsTest.py +23 -21
- toil/test/wdl/wdltoil_test.py +49 -21
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +310 -266
- toil/utils/toilStatus.py +98 -52
- toil/version.py +11 -11
- toil/wdl/wdltoil.py +644 -225
- toil/worker.py +125 -83
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- toil-7.0.0.dist-info/METADATA +158 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/lib/ec2.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
from base64 import b64encode
|
|
4
|
-
from operator import
|
|
5
|
-
from typing import Dict, Iterable, List, Optional, Union
|
|
4
|
+
from operator import itemgetter
|
|
5
|
+
from typing import Dict, Iterable, List, Optional, Union, TYPE_CHECKING, Generator, Callable, Mapping, Any
|
|
6
6
|
|
|
7
|
+
import botocore.client
|
|
7
8
|
from boto3.resources.base import ServiceResource
|
|
8
|
-
from boto.ec2.instance import Instance as Boto2Instance
|
|
9
|
-
from boto.ec2.spotinstancerequest import SpotInstanceRequest
|
|
10
|
-
from botocore.client import BaseClient
|
|
11
9
|
|
|
12
10
|
from toil.lib.aws.session import establish_boto3_session
|
|
13
11
|
from toil.lib.aws.utils import flatten_tags
|
|
@@ -18,6 +16,11 @@ from toil.lib.retry import (ErrorCondition,
|
|
|
18
16
|
old_retry,
|
|
19
17
|
retry)
|
|
20
18
|
|
|
19
|
+
from mypy_boto3_ec2.client import EC2Client
|
|
20
|
+
from mypy_boto3_autoscaling.client import AutoScalingClient
|
|
21
|
+
from mypy_boto3_ec2.type_defs import SpotInstanceRequestTypeDef, DescribeInstancesResultTypeDef, InstanceTypeDef
|
|
22
|
+
from mypy_boto3_ec2.service_resource import EC2ServiceResource, Instance
|
|
23
|
+
|
|
21
24
|
a_short_time = 5
|
|
22
25
|
a_long_time = 60 * 60
|
|
23
26
|
logger = logging.getLogger(__name__)
|
|
@@ -38,6 +41,7 @@ def not_found(e):
|
|
|
38
41
|
# Not the right kind of error
|
|
39
42
|
return False
|
|
40
43
|
|
|
44
|
+
|
|
41
45
|
def inconsistencies_detected(e):
|
|
42
46
|
if get_error_code(e) == 'InvalidGroup.NotFound':
|
|
43
47
|
return True
|
|
@@ -45,6 +49,7 @@ def inconsistencies_detected(e):
|
|
|
45
49
|
matches = ('invalid iam instance profile' in m) or ('no associated iam roles' in m)
|
|
46
50
|
return matches
|
|
47
51
|
|
|
52
|
+
|
|
48
53
|
# We also define these error categories for the new retry decorator
|
|
49
54
|
INCONSISTENCY_ERRORS = [ErrorCondition(boto_error_codes=['InvalidGroup.NotFound']),
|
|
50
55
|
ErrorCondition(error_message_must_include='Invalid IAM Instance Profile'),
|
|
@@ -62,9 +67,10 @@ class UnexpectedResourceState(Exception):
|
|
|
62
67
|
super().__init__(
|
|
63
68
|
"Expected state of %s to be '%s' but got '%s'" %
|
|
64
69
|
(resource, to_state, state))
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def wait_transition(boto3_ec2: EC2Client, resource: InstanceTypeDef, from_states: Iterable[str], to_state: str,
|
|
73
|
+
state_getter: Callable[[InstanceTypeDef], str]=lambda x: x.get('State').get('Name')):
|
|
68
74
|
"""
|
|
69
75
|
Wait until the specified EC2 resource (instance, image, volume, ...) transitions from any
|
|
70
76
|
of the given 'from' states to the specified 'to' state. If the instance is found in a state
|
|
@@ -76,41 +82,44 @@ def wait_transition(resource, from_states, to_state,
|
|
|
76
82
|
:param to_state: the state of the resource when this method returns
|
|
77
83
|
"""
|
|
78
84
|
state = state_getter(resource)
|
|
85
|
+
instance_id = resource["InstanceId"]
|
|
79
86
|
while state in from_states:
|
|
80
87
|
time.sleep(a_short_time)
|
|
81
88
|
for attempt in retry_ec2():
|
|
82
89
|
with attempt:
|
|
83
|
-
|
|
90
|
+
described = boto3_ec2.describe_instances(InstanceIds=[instance_id])
|
|
91
|
+
resource = described["Reservations"][0]["Instances"][0] # there should only be one requested
|
|
84
92
|
state = state_getter(resource)
|
|
85
93
|
if state != to_state:
|
|
86
94
|
raise UnexpectedResourceState(resource, to_state, state)
|
|
87
95
|
|
|
88
96
|
|
|
89
|
-
def wait_instances_running(
|
|
97
|
+
def wait_instances_running(boto3_ec2: EC2Client, instances: Iterable[InstanceTypeDef]) -> Generator[InstanceTypeDef, None, None]:
|
|
90
98
|
"""
|
|
91
99
|
Wait until no instance in the given iterable is 'pending'. Yield every instance that
|
|
92
100
|
entered the running state as soon as it does.
|
|
93
101
|
|
|
94
|
-
:param
|
|
95
|
-
:param Iterable[
|
|
96
|
-
:rtype: Iterable[
|
|
102
|
+
:param EC2Client boto3_ec2: the EC2 connection to use for making requests
|
|
103
|
+
:param Iterable[InstanceTypeDef] instances: the instances to wait on
|
|
104
|
+
:rtype: Iterable[InstanceTypeDef]
|
|
97
105
|
"""
|
|
98
106
|
running_ids = set()
|
|
99
107
|
other_ids = set()
|
|
100
108
|
while True:
|
|
101
109
|
pending_ids = set()
|
|
102
110
|
for i in instances:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
111
|
+
i: InstanceTypeDef
|
|
112
|
+
if i['State']['Name'] == 'pending':
|
|
113
|
+
pending_ids.add(i['InstanceId'])
|
|
114
|
+
elif i['State']['Name'] == 'running':
|
|
115
|
+
if i['InstanceId'] in running_ids:
|
|
107
116
|
raise RuntimeError("An instance was already added to the list of running instance IDs. Maybe there is a duplicate.")
|
|
108
|
-
running_ids.add(i
|
|
117
|
+
running_ids.add(i['InstanceId'])
|
|
109
118
|
yield i
|
|
110
119
|
else:
|
|
111
|
-
if i
|
|
120
|
+
if i['InstanceId'] in other_ids:
|
|
112
121
|
raise RuntimeError("An instance was already added to the list of other instances. Maybe there is a duplicate.")
|
|
113
|
-
other_ids.add(i
|
|
122
|
+
other_ids.add(i['InstanceId'])
|
|
114
123
|
yield i
|
|
115
124
|
logger.info('%i instance(s) pending, %i running, %i other.',
|
|
116
125
|
*list(map(len, (pending_ids, running_ids, other_ids))))
|
|
@@ -121,14 +130,16 @@ def wait_instances_running(ec2, instances: Iterable[Boto2Instance]) -> Iterable[
|
|
|
121
130
|
time.sleep(seconds)
|
|
122
131
|
for attempt in retry_ec2():
|
|
123
132
|
with attempt:
|
|
124
|
-
|
|
133
|
+
described_instances = boto3_ec2.describe_instances(InstanceIds=list(pending_ids))
|
|
134
|
+
instances = [instance for reservation in described_instances["Reservations"] for instance in reservation["Instances"]]
|
|
125
135
|
|
|
126
136
|
|
|
127
|
-
def wait_spot_requests_active(
|
|
137
|
+
def wait_spot_requests_active(boto3_ec2: EC2Client, requests: Iterable[SpotInstanceRequestTypeDef], timeout: float = None, tentative: bool = False) -> Iterable[List[SpotInstanceRequestTypeDef]]:
|
|
128
138
|
"""
|
|
129
139
|
Wait until no spot request in the given iterator is in the 'open' state or, optionally,
|
|
130
140
|
a timeout occurs. Yield spot requests as soon as they leave the 'open' state.
|
|
131
141
|
|
|
142
|
+
:param boto3_ec2: ec2 client
|
|
132
143
|
:param requests: The requests to wait on.
|
|
133
144
|
|
|
134
145
|
:param timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
|
|
@@ -145,11 +156,11 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
145
156
|
other_ids = set()
|
|
146
157
|
open_ids = None
|
|
147
158
|
|
|
148
|
-
def cancel():
|
|
159
|
+
def cancel() -> None:
|
|
149
160
|
logger.warning('Cancelling remaining %i spot requests.', len(open_ids))
|
|
150
|
-
|
|
161
|
+
boto3_ec2.cancel_spot_instance_requests(SpotInstanceRequestIds=list(open_ids))
|
|
151
162
|
|
|
152
|
-
def spot_request_not_found(e):
|
|
163
|
+
def spot_request_not_found(e: Exception) -> bool:
|
|
153
164
|
return get_error_code(e) == 'InvalidSpotInstanceRequestID.NotFound'
|
|
154
165
|
|
|
155
166
|
try:
|
|
@@ -157,30 +168,31 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
157
168
|
open_ids, eval_ids, fulfill_ids = set(), set(), set()
|
|
158
169
|
batch = []
|
|
159
170
|
for r in requests:
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
171
|
+
r: SpotInstanceRequestTypeDef # pycharm thinks it is a string
|
|
172
|
+
if r['State'] == 'open':
|
|
173
|
+
open_ids.add(r['InstanceId'])
|
|
174
|
+
if r['Status'] == 'pending-evaluation':
|
|
175
|
+
eval_ids.add(r['InstanceId'])
|
|
176
|
+
elif r['Status'] == 'pending-fulfillment':
|
|
177
|
+
fulfill_ids.add(r['InstanceId'])
|
|
166
178
|
else:
|
|
167
179
|
logger.info(
|
|
168
180
|
'Request %s entered status %s indicating that it will not be '
|
|
169
|
-
'fulfilled anytime soon.', r
|
|
170
|
-
elif r
|
|
171
|
-
if r
|
|
181
|
+
'fulfilled anytime soon.', r['InstanceId'], r['Status'])
|
|
182
|
+
elif r['State'] == 'active':
|
|
183
|
+
if r['InstanceId'] in active_ids:
|
|
172
184
|
raise RuntimeError("A request was already added to the list of active requests. Maybe there are duplicate requests.")
|
|
173
|
-
active_ids.add(r
|
|
185
|
+
active_ids.add(r['InstanceId'])
|
|
174
186
|
batch.append(r)
|
|
175
187
|
else:
|
|
176
|
-
if r
|
|
188
|
+
if r['InstanceId'] in other_ids:
|
|
177
189
|
raise RuntimeError("A request was already added to the list of other IDs. Maybe there are duplicate requests.")
|
|
178
|
-
other_ids.add(r
|
|
190
|
+
other_ids.add(r['InstanceId'])
|
|
179
191
|
batch.append(r)
|
|
180
192
|
if batch:
|
|
181
193
|
yield batch
|
|
182
194
|
logger.info('%i spot requests(s) are open (%i of which are pending evaluation and %i '
|
|
183
|
-
|
|
195
|
+
'are pending fulfillment), %i are active and %i are in another state.',
|
|
184
196
|
*list(map(len, (open_ids, eval_ids, fulfill_ids, active_ids, other_ids))))
|
|
185
197
|
if not open_ids or tentative and not eval_ids and not fulfill_ids:
|
|
186
198
|
break
|
|
@@ -192,8 +204,7 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
192
204
|
time.sleep(sleep_time)
|
|
193
205
|
for attempt in retry_ec2(retry_while=spot_request_not_found):
|
|
194
206
|
with attempt:
|
|
195
|
-
requests =
|
|
196
|
-
list(open_ids))
|
|
207
|
+
requests = boto3_ec2.describe_spot_instance_requests(SpotInstanceRequestIds=list(open_ids))
|
|
197
208
|
except BaseException:
|
|
198
209
|
if open_ids:
|
|
199
210
|
with panic(logger):
|
|
@@ -204,47 +215,56 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
204
215
|
cancel()
|
|
205
216
|
|
|
206
217
|
|
|
207
|
-
def create_spot_instances(
|
|
218
|
+
def create_spot_instances(boto3_ec2: EC2Client, price, image_id, spec, num_instances=1, timeout=None, tentative=False, tags=None) -> Generator[DescribeInstancesResultTypeDef, None, None]:
|
|
208
219
|
"""
|
|
209
220
|
Create instances on the spot market.
|
|
210
221
|
"""
|
|
222
|
+
|
|
211
223
|
def spotRequestNotFound(e):
|
|
212
224
|
return getattr(e, 'error_code', None) == "InvalidSpotInstanceRequestID.NotFound"
|
|
213
225
|
|
|
226
|
+
spec['LaunchSpecification'].update({'ImageId': image_id}) # boto3 image id is in the launch specification
|
|
214
227
|
for attempt in retry_ec2(retry_for=a_long_time,
|
|
215
228
|
retry_while=inconsistencies_detected):
|
|
216
229
|
with attempt:
|
|
217
|
-
|
|
218
|
-
price,
|
|
230
|
+
requests_dict = boto3_ec2.request_spot_instances(
|
|
231
|
+
SpotPrice=price, InstanceCount=num_instances, **spec)
|
|
232
|
+
requests = requests_dict['SpotInstanceRequests']
|
|
219
233
|
|
|
220
234
|
if tags is not None:
|
|
221
|
-
for requestID in (request
|
|
235
|
+
for requestID in (request['SpotInstanceRequestId'] for request in requests):
|
|
222
236
|
for attempt in retry_ec2(retry_while=spotRequestNotFound):
|
|
223
237
|
with attempt:
|
|
224
|
-
|
|
238
|
+
boto3_ec2.create_tags(Resources=[requestID], Tags=tags)
|
|
225
239
|
|
|
226
240
|
num_active, num_other = 0, 0
|
|
227
241
|
# noinspection PyUnboundLocalVariable,PyTypeChecker
|
|
228
242
|
# request_spot_instances's type annotation is wrong
|
|
229
|
-
for batch in wait_spot_requests_active(
|
|
243
|
+
for batch in wait_spot_requests_active(boto3_ec2,
|
|
230
244
|
requests,
|
|
231
245
|
timeout=timeout,
|
|
232
246
|
tentative=tentative):
|
|
233
247
|
instance_ids = []
|
|
234
248
|
for request in batch:
|
|
235
|
-
|
|
236
|
-
|
|
249
|
+
request: SpotInstanceRequestTypeDef
|
|
250
|
+
if request["State"] == 'active':
|
|
251
|
+
instance_ids.append(request["InstanceId"])
|
|
237
252
|
num_active += 1
|
|
238
253
|
else:
|
|
239
254
|
logger.info(
|
|
240
255
|
'Request %s in unexpected state %s.',
|
|
241
|
-
request
|
|
242
|
-
request
|
|
256
|
+
request["InstanceId"],
|
|
257
|
+
request["State"])
|
|
243
258
|
num_other += 1
|
|
244
259
|
if instance_ids:
|
|
245
260
|
# This next line is the reason we batch. It's so we can get multiple instances in
|
|
246
261
|
# a single request.
|
|
247
|
-
|
|
262
|
+
for instance_id in instance_ids:
|
|
263
|
+
for attempt in retry_ec2():
|
|
264
|
+
with attempt:
|
|
265
|
+
# Increase hop limit from 1 to use Instance Metadata V2
|
|
266
|
+
boto3_ec2.modify_instance_metadata_options(InstanceId=instance_id, HttpPutResponseHopLimit=3)
|
|
267
|
+
yield boto3_ec2.describe_instances(InstanceIds=instance_ids)
|
|
248
268
|
if not num_active:
|
|
249
269
|
message = 'None of the spot requests entered the active state'
|
|
250
270
|
if tentative:
|
|
@@ -255,22 +275,43 @@ def create_spot_instances(ec2, price, image_id, spec, num_instances=1, timeout=N
|
|
|
255
275
|
logger.warning('%i request(s) entered a state other than active.', num_other)
|
|
256
276
|
|
|
257
277
|
|
|
258
|
-
def create_ondemand_instances(
|
|
278
|
+
def create_ondemand_instances(boto3_ec2: EC2Client, image_id: str, spec: Mapping[str, Any], num_instances: int=1) -> List[InstanceTypeDef]:
|
|
259
279
|
"""
|
|
260
280
|
Requests the RunInstances EC2 API call but accounts for the race between recently created
|
|
261
281
|
instance profiles, IAM roles and an instance creation that refers to them.
|
|
262
282
|
|
|
263
|
-
:rtype: List[
|
|
283
|
+
:rtype: List[InstanceTypeDef]
|
|
264
284
|
"""
|
|
265
|
-
instance_type = spec['
|
|
285
|
+
instance_type = spec['InstanceType']
|
|
266
286
|
logger.info('Creating %s instance(s) ... ', instance_type)
|
|
287
|
+
boto_instance_list = []
|
|
267
288
|
for attempt in retry_ec2(retry_for=a_long_time,
|
|
268
289
|
retry_while=inconsistencies_detected):
|
|
269
290
|
with attempt:
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
291
|
+
boto_instance_list: List[InstanceTypeDef] = boto3_ec2.run_instances(ImageId=image_id,
|
|
292
|
+
MinCount=num_instances,
|
|
293
|
+
MaxCount=num_instances,
|
|
294
|
+
**spec)['Instances']
|
|
295
|
+
|
|
296
|
+
return boto_instance_list
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def increase_instance_hop_limit(boto3_ec2: EC2Client, boto_instance_list: List[InstanceTypeDef]) -> None:
|
|
300
|
+
"""
|
|
301
|
+
Increase the default HTTP hop limit, as we are running Toil and Kubernetes inside a Docker container, so the default
|
|
302
|
+
hop limit of 1 will not be enough when grabbing metadata information with ec2_metadata
|
|
303
|
+
|
|
304
|
+
Must be called after the instances are guaranteed to be running.
|
|
305
|
+
|
|
306
|
+
:param boto_instance_list: List of boto instances to modify
|
|
307
|
+
:return:
|
|
308
|
+
"""
|
|
309
|
+
for boto_instance in boto_instance_list:
|
|
310
|
+
instance_id = boto_instance['InstanceId']
|
|
311
|
+
for attempt in retry_ec2():
|
|
312
|
+
with attempt:
|
|
313
|
+
# Increase hop limit from 1 to use Instance Metadata V2
|
|
314
|
+
boto3_ec2.modify_instance_metadata_options(InstanceId=instance_id, HttpPutResponseHopLimit=3)
|
|
274
315
|
|
|
275
316
|
|
|
276
317
|
def prune(bushy: dict) -> dict:
|
|
@@ -289,6 +330,7 @@ def prune(bushy: dict) -> dict:
|
|
|
289
330
|
# catch, and to wait on IAM items.
|
|
290
331
|
iam_client = establish_boto3_session().client('iam')
|
|
291
332
|
|
|
333
|
+
|
|
292
334
|
# exception is generated by a factory so we weirdly need a client instance to reference it
|
|
293
335
|
@retry(errors=[iam_client.exceptions.NoSuchEntityException],
|
|
294
336
|
intervals=[1, 1, 2, 4, 8, 16, 32, 64])
|
|
@@ -301,7 +343,7 @@ def wait_until_instance_profile_arn_exists(instance_profile_arn: str):
|
|
|
301
343
|
|
|
302
344
|
|
|
303
345
|
@retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
|
|
304
|
-
def create_instances(ec2_resource:
|
|
346
|
+
def create_instances(ec2_resource: EC2ServiceResource,
|
|
305
347
|
image_id: str,
|
|
306
348
|
key_name: str,
|
|
307
349
|
instance_type: str,
|
|
@@ -312,7 +354,7 @@ def create_instances(ec2_resource: ServiceResource,
|
|
|
312
354
|
instance_profile_arn: Optional[str] = None,
|
|
313
355
|
placement_az: Optional[str] = None,
|
|
314
356
|
subnet_id: str = None,
|
|
315
|
-
tags: Optional[Dict[str, str]] = None) -> List[
|
|
357
|
+
tags: Optional[Dict[str, str]] = None) -> List[Instance]:
|
|
316
358
|
"""
|
|
317
359
|
Replaces create_ondemand_instances. Uses boto3 and returns a list of Boto3 instance dicts.
|
|
318
360
|
|
|
@@ -336,7 +378,10 @@ def create_instances(ec2_resource: ServiceResource,
|
|
|
336
378
|
'InstanceType': instance_type,
|
|
337
379
|
'UserData': user_data,
|
|
338
380
|
'BlockDeviceMappings': block_device_map,
|
|
339
|
-
'SubnetId': subnet_id
|
|
381
|
+
'SubnetId': subnet_id,
|
|
382
|
+
# Metadata V2 defaults hops to 1, which is an issue when running inside a docker container
|
|
383
|
+
# https://github.com/adamchainz/ec2-metadata?tab=readme-ov-file#instance-metadata-service-version-2
|
|
384
|
+
'MetadataOptions': {'HttpPutResponseHopLimit': 3}}
|
|
340
385
|
|
|
341
386
|
if instance_profile_arn:
|
|
342
387
|
# We could just retry when we get an error because the ARN doesn't
|
|
@@ -357,8 +402,9 @@ def create_instances(ec2_resource: ServiceResource,
|
|
|
357
402
|
|
|
358
403
|
return ec2_resource.create_instances(**prune(request))
|
|
359
404
|
|
|
405
|
+
|
|
360
406
|
@retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
|
|
361
|
-
def create_launch_template(ec2_client:
|
|
407
|
+
def create_launch_template(ec2_client: EC2Client,
|
|
362
408
|
template_name: str,
|
|
363
409
|
image_id: str,
|
|
364
410
|
key_name: str,
|
|
@@ -400,7 +446,10 @@ def create_launch_template(ec2_client: BaseClient,
|
|
|
400
446
|
'InstanceType': instance_type,
|
|
401
447
|
'UserData': user_data,
|
|
402
448
|
'BlockDeviceMappings': block_device_map,
|
|
403
|
-
'SubnetId': subnet_id
|
|
449
|
+
'SubnetId': subnet_id,
|
|
450
|
+
# Increase hop limit from 1 to use Instance Metadata V2
|
|
451
|
+
'MetadataOptions': {'HttpPutResponseHopLimit': 3}
|
|
452
|
+
}
|
|
404
453
|
|
|
405
454
|
if instance_profile_arn:
|
|
406
455
|
# We could just retry when we get an error because the ARN doesn't
|
|
@@ -413,6 +462,7 @@ def create_launch_template(ec2_client: BaseClient,
|
|
|
413
462
|
if placement_az:
|
|
414
463
|
template['Placement'] = {'AvailabilityZone': placement_az}
|
|
415
464
|
|
|
465
|
+
flat_tags = []
|
|
416
466
|
if tags:
|
|
417
467
|
# Tag everything when we make it.
|
|
418
468
|
flat_tags = flatten_tags(tags)
|
|
@@ -429,17 +479,16 @@ def create_launch_template(ec2_client: BaseClient,
|
|
|
429
479
|
|
|
430
480
|
|
|
431
481
|
@retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
|
|
432
|
-
def create_auto_scaling_group(autoscaling_client:
|
|
482
|
+
def create_auto_scaling_group(autoscaling_client: AutoScalingClient,
|
|
433
483
|
asg_name: str,
|
|
434
484
|
launch_template_ids: Dict[str, str],
|
|
435
485
|
vpc_subnets: List[str],
|
|
436
486
|
min_size: int,
|
|
437
487
|
max_size: int,
|
|
438
|
-
instance_types: Optional[
|
|
488
|
+
instance_types: Optional[Iterable[str]] = None,
|
|
439
489
|
spot_bid: Optional[float] = None,
|
|
440
490
|
spot_cheapest: bool = False,
|
|
441
491
|
tags: Optional[Dict[str, str]] = None) -> None:
|
|
442
|
-
|
|
443
492
|
"""
|
|
444
493
|
Create a new Auto Scaling Group with the given name (which is also its
|
|
445
494
|
unique identifier).
|
|
@@ -472,7 +521,7 @@ def create_auto_scaling_group(autoscaling_client: BaseClient,
|
|
|
472
521
|
"""
|
|
473
522
|
|
|
474
523
|
if instance_types is None:
|
|
475
|
-
instance_types = []
|
|
524
|
+
instance_types: List[str] = []
|
|
476
525
|
|
|
477
526
|
if instance_types is not None and len(instance_types) > 20:
|
|
478
527
|
raise RuntimeError(f"Too many instance types ({len(instance_types)}) in group; AWS supports only 20.")
|
|
@@ -493,8 +542,8 @@ def create_auto_scaling_group(autoscaling_client: BaseClient,
|
|
|
493
542
|
# We need to use a launch template per instance type so that different
|
|
494
543
|
# instance types with specified EBS storage size overrides will get their
|
|
495
544
|
# storage.
|
|
496
|
-
mip = {'LaunchTemplate': {'LaunchTemplateSpecification': get_launch_template_spec(next(iter(instance_types))),
|
|
497
|
-
'Overrides': [{'InstanceType': t, 'LaunchTemplateSpecification': get_launch_template_spec(t)} for t in instance_types]}}
|
|
545
|
+
mip = {'LaunchTemplate': {'LaunchTemplateSpecification': get_launch_template_spec(next(iter(instance_types))), # noqa
|
|
546
|
+
'Overrides': [{'InstanceType': t, 'LaunchTemplateSpecification': get_launch_template_spec(t)} for t in instance_types]}} # noqa
|
|
498
547
|
|
|
499
548
|
if spot_bid is not None:
|
|
500
549
|
# Ask for spot instances by saying everything above base capacity of 0 should be spot.
|