toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +41 -17
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +4 -5
  7. toil/batchSystems/gridengine.py +1 -1
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +25 -11
  10. toil/batchSystems/local_support.py +3 -3
  11. toil/batchSystems/lsf.py +9 -9
  12. toil/batchSystems/mesos/batchSystem.py +4 -4
  13. toil/batchSystems/mesos/executor.py +3 -2
  14. toil/batchSystems/options.py +9 -0
  15. toil/batchSystems/singleMachine.py +11 -10
  16. toil/batchSystems/slurm.py +129 -16
  17. toil/batchSystems/torque.py +1 -1
  18. toil/bus.py +45 -3
  19. toil/common.py +56 -31
  20. toil/cwl/cwltoil.py +442 -371
  21. toil/deferred.py +1 -1
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/abstractFileStore.py +69 -20
  24. toil/fileStores/cachingFileStore.py +6 -22
  25. toil/fileStores/nonCachingFileStore.py +6 -15
  26. toil/job.py +270 -86
  27. toil/jobStores/abstractJobStore.py +37 -31
  28. toil/jobStores/aws/jobStore.py +280 -218
  29. toil/jobStores/aws/utils.py +60 -31
  30. toil/jobStores/conftest.py +2 -2
  31. toil/jobStores/fileJobStore.py +3 -3
  32. toil/jobStores/googleJobStore.py +3 -4
  33. toil/leader.py +89 -38
  34. toil/lib/aws/__init__.py +26 -10
  35. toil/lib/aws/iam.py +2 -2
  36. toil/lib/aws/session.py +62 -22
  37. toil/lib/aws/utils.py +73 -37
  38. toil/lib/conversions.py +24 -1
  39. toil/lib/ec2.py +118 -69
  40. toil/lib/expando.py +1 -1
  41. toil/lib/generatedEC2Lists.py +8 -8
  42. toil/lib/io.py +42 -4
  43. toil/lib/misc.py +1 -3
  44. toil/lib/resources.py +57 -16
  45. toil/lib/retry.py +12 -5
  46. toil/lib/threading.py +29 -14
  47. toil/lib/throttle.py +1 -1
  48. toil/options/common.py +31 -30
  49. toil/options/wdl.py +5 -0
  50. toil/provisioners/__init__.py +9 -3
  51. toil/provisioners/abstractProvisioner.py +12 -2
  52. toil/provisioners/aws/__init__.py +20 -15
  53. toil/provisioners/aws/awsProvisioner.py +406 -329
  54. toil/provisioners/gceProvisioner.py +2 -2
  55. toil/provisioners/node.py +13 -5
  56. toil/server/app.py +1 -1
  57. toil/statsAndLogging.py +93 -23
  58. toil/test/__init__.py +27 -12
  59. toil/test/batchSystems/batchSystemTest.py +40 -33
  60. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  61. toil/test/batchSystems/test_slurm.py +22 -7
  62. toil/test/cactus/__init__.py +0 -0
  63. toil/test/cactus/test_cactus_integration.py +58 -0
  64. toil/test/cwl/cwlTest.py +245 -236
  65. toil/test/cwl/seqtk_seq.cwl +1 -1
  66. toil/test/docs/scriptsTest.py +11 -14
  67. toil/test/jobStores/jobStoreTest.py +40 -54
  68. toil/test/lib/aws/test_iam.py +2 -2
  69. toil/test/lib/test_ec2.py +1 -1
  70. toil/test/options/__init__.py +13 -0
  71. toil/test/options/options.py +37 -0
  72. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  73. toil/test/provisioners/clusterTest.py +99 -16
  74. toil/test/server/serverTest.py +2 -2
  75. toil/test/src/autoDeploymentTest.py +1 -1
  76. toil/test/src/dockerCheckTest.py +2 -1
  77. toil/test/src/environmentTest.py +125 -0
  78. toil/test/src/fileStoreTest.py +1 -1
  79. toil/test/src/jobDescriptionTest.py +18 -8
  80. toil/test/src/jobTest.py +1 -1
  81. toil/test/src/realtimeLoggerTest.py +4 -0
  82. toil/test/src/workerTest.py +52 -19
  83. toil/test/utils/toilDebugTest.py +62 -4
  84. toil/test/utils/utilsTest.py +23 -21
  85. toil/test/wdl/wdltoil_test.py +49 -21
  86. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  87. toil/toilState.py +68 -9
  88. toil/utils/toilDebugFile.py +1 -1
  89. toil/utils/toilDebugJob.py +153 -26
  90. toil/utils/toilLaunchCluster.py +12 -2
  91. toil/utils/toilRsyncCluster.py +7 -2
  92. toil/utils/toilSshCluster.py +7 -3
  93. toil/utils/toilStats.py +310 -266
  94. toil/utils/toilStatus.py +98 -52
  95. toil/version.py +11 -11
  96. toil/wdl/wdltoil.py +644 -225
  97. toil/worker.py +125 -83
  98. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  99. toil-7.0.0.dist-info/METADATA +158 -0
  100. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
  101. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  102. toil-6.1.0a1.dist-info/METADATA +0 -125
  103. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  104. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from __future__ import annotations
15
+
14
16
  import json
15
17
  import logging
16
18
  import os
@@ -29,34 +31,34 @@ from typing import (Any,
29
31
  Iterable,
30
32
  List,
31
33
  Optional,
32
- Set)
34
+ Set,
35
+ Union,
36
+ Literal,
37
+ cast,
38
+ TypeVar)
33
39
  from urllib.parse import unquote
34
40
 
35
41
  # We need these to exist as attributes we can get off of the boto object
36
- import boto.ec2
37
- import boto.iam
38
- import boto.vpc
39
- from boto.ec2.blockdevicemapping import \
40
- BlockDeviceMapping as Boto2BlockDeviceMapping
41
- from boto.ec2.blockdevicemapping import BlockDeviceType as Boto2BlockDeviceType
42
- from boto.ec2.instance import Instance as Boto2Instance
43
- from boto.exception import BotoServerError, EC2ResponseError
44
- from boto.utils import get_instance_metadata
45
42
  from botocore.exceptions import ClientError
43
+ from mypy_boto3_autoscaling.client import AutoScalingClient
44
+ from mypy_boto3_ec2.service_resource import Instance
45
+ from mypy_boto3_iam.type_defs import InstanceProfileTypeDef, RoleTypeDef, ListRolePoliciesResponseTypeDef
46
+ from mypy_extensions import VarArg, KwArg
46
47
 
47
- from toil.lib.aws import zone_to_region
48
+ from toil.lib.aws import zone_to_region, AWSRegionName, AWSServerErrors
48
49
  from toil.lib.aws.ami import get_flatcar_ami
49
50
  from toil.lib.aws.iam import (CLUSTER_LAUNCHING_PERMISSIONS,
50
51
  get_policy_permissions,
51
52
  policy_permissions_allow)
52
53
  from toil.lib.aws.session import AWSConnectionManager
53
- from toil.lib.aws.utils import create_s3_bucket
54
+ from toil.lib.aws.utils import create_s3_bucket, flatten_tags, boto3_pager
54
55
  from toil.lib.conversions import human2bytes
55
56
  from toil.lib.ec2 import (a_short_time,
56
57
  create_auto_scaling_group,
57
58
  create_instances,
58
59
  create_launch_template,
59
60
  create_ondemand_instances,
61
+ increase_instance_hop_limit,
60
62
  create_spot_instances,
61
63
  wait_instances_running,
62
64
  wait_transition,
@@ -73,12 +75,20 @@ from toil.lib.retry import (ErrorCondition,
73
75
  old_retry,
74
76
  retry)
75
77
  from toil.provisioners import (ClusterCombinationNotSupportedException,
76
- NoSuchClusterException)
78
+ NoSuchClusterException,
79
+ NoSuchZoneException)
77
80
  from toil.provisioners.abstractProvisioner import (AbstractProvisioner,
78
81
  ManagedNodesNotSupportedException,
79
82
  Shape)
80
83
  from toil.provisioners.aws import get_best_aws_zone
81
84
  from toil.provisioners.node import Node
85
+ from toil.lib.aws.session import client as get_client
86
+
87
+ from mypy_boto3_ec2.client import EC2Client
88
+ from mypy_boto3_iam.client import IAMClient
89
+ from mypy_boto3_ec2.type_defs import DescribeInstancesResultTypeDef, InstanceTypeDef, TagTypeDef, BlockDeviceMappingTypeDef, EbsBlockDeviceTypeDef, FilterTypeDef, SpotInstanceRequestTypeDef, TagDescriptionTypeDef, SecurityGroupTypeDef, \
90
+ CreateSecurityGroupResultTypeDef, IpPermissionTypeDef, ReservationTypeDef
91
+ from mypy_boto3_s3.literals import BucketLocationConstraintType
82
92
 
83
93
  logger = logging.getLogger(__name__)
84
94
  logging.getLogger("boto").setLevel(logging.CRITICAL)
@@ -98,14 +108,8 @@ _S3_BUCKET_MAX_NAME_LEN = 63
98
108
  # The suffix of the S3 bucket associated with the cluster
99
109
  _S3_BUCKET_INTERNAL_SUFFIX = '--internal'
100
110
 
101
- # prevent removal of these imports
102
- str(boto.ec2)
103
- str(boto.iam)
104
- str(boto.vpc)
105
-
106
111
 
107
-
108
- def awsRetryPredicate(e):
112
+ def awsRetryPredicate(e: Exception) -> bool:
109
113
  if isinstance(e, socket.gaierror):
110
114
  # Could be a DNS outage:
111
115
  # socket.gaierror: [Errno -2] Name or service not known
@@ -135,33 +139,38 @@ def expectedShutdownErrors(e: Exception) -> bool:
135
139
  return get_error_status(e) == 400 and 'dependent object' in get_error_body(e)
136
140
 
137
141
 
138
- def awsRetry(f):
142
+ F = TypeVar("F") # so mypy understands passed through types
143
+
144
+
145
+ def awsRetry(f: Callable[..., F]) -> Callable[..., F]:
139
146
  """
140
- This decorator retries the wrapped function if aws throws unexpected errors
141
- errors.
147
+ This decorator retries the wrapped function if aws throws unexpected errors.
148
+
142
149
  It should wrap any function that makes use of boto
143
150
  """
151
+
144
152
  @wraps(f)
145
- def wrapper(*args, **kwargs):
153
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
146
154
  for attempt in old_retry(delays=truncExpBackoff(),
147
155
  timeout=300,
148
156
  predicate=awsRetryPredicate):
149
157
  with attempt:
150
158
  return f(*args, **kwargs)
159
+
151
160
  return wrapper
152
161
 
153
162
 
154
- def awsFilterImpairedNodes(nodes, ec2):
163
+ def awsFilterImpairedNodes(nodes: List[InstanceTypeDef], boto3_ec2: EC2Client) -> List[InstanceTypeDef]:
155
164
  # if TOIL_AWS_NODE_DEBUG is set don't terminate nodes with
156
165
  # failing status checks so they can be debugged
157
166
  nodeDebug = os.environ.get('TOIL_AWS_NODE_DEBUG') in ('True', 'TRUE', 'true', True)
158
167
  if not nodeDebug:
159
168
  return nodes
160
- nodeIDs = [node.id for node in nodes]
161
- statuses = ec2.get_all_instance_status(instance_ids=nodeIDs)
162
- statusMap = {status.id: status.instance_status for status in statuses}
163
- healthyNodes = [node for node in nodes if statusMap.get(node.id, None) != 'impaired']
164
- impairedNodes = [node.id for node in nodes if statusMap.get(node.id, None) == 'impaired']
169
+ nodeIDs = [node["InstanceId"] for node in nodes]
170
+ statuses = boto3_ec2.describe_instance_status(InstanceIds=nodeIDs)
171
+ statusMap = {status["InstanceId"]: status["InstanceStatus"]["Status"] for status in statuses["InstanceStatuses"]}
172
+ healthyNodes = [node for node in nodes if statusMap.get(node["InstanceId"], None) != 'impaired']
173
+ impairedNodes = [node["InstanceId"] for node in nodes if statusMap.get(node["InstanceId"], None) == 'impaired']
165
174
  logger.warning('TOIL_AWS_NODE_DEBUG is set and nodes %s have failed EC2 status checks so '
166
175
  'will not be terminated.', ' '.join(impairedNodes))
167
176
  return healthyNodes
@@ -170,8 +179,24 @@ def awsFilterImpairedNodes(nodes, ec2):
170
179
  class InvalidClusterStateException(Exception):
171
180
  pass
172
181
 
182
+
183
+ def collapse_tags(instance_tags: List[TagTypeDef]) -> Dict[str, str]:
184
+ """
185
+ Collapse tags from boto3 format to node format
186
+ :param instance_tags: tags as list of TagTypeDef
187
+ :return: Dict of tags
188
+ """
189
+ collapsed_tags: Dict[str, str] = dict()
190
+ for tag in instance_tags:
191
+ if tag.get("Key") is not None:
192
+ collapsed_tags[tag["Key"]] = tag["Value"]
193
+ return collapsed_tags
194
+
195
+
173
196
  class AWSProvisioner(AbstractProvisioner):
174
- def __init__(self, clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides, sseKey):
197
+ def __init__(self, clusterName: Optional[str], clusterType: Optional[str], zone: Optional[str],
198
+ nodeStorage: int, nodeStorageOverrides: Optional[List[str]], sseKey: Optional[str],
199
+ enable_fuse: bool):
175
200
  self.cloud = 'aws'
176
201
  self._sseKey = sseKey
177
202
  # self._zone will be filled in by base class constructor
@@ -186,7 +211,7 @@ class AWSProvisioner(AbstractProvisioner):
186
211
 
187
212
  # Determine our region to work in, before readClusterSettings() which
188
213
  # might need it. TODO: support multiple regions in one cluster
189
- self._region = zone_to_region(zone)
214
+ self._region: AWSRegionName = zone_to_region(zone)
190
215
 
191
216
  # Set up our connections to AWS
192
217
  self.aws = AWSConnectionManager()
@@ -197,16 +222,22 @@ class AWSProvisioner(AbstractProvisioner):
197
222
 
198
223
  # Call base class constructor, which will call createClusterSettings()
199
224
  # or readClusterSettings()
200
- super().__init__(clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides)
225
+ super().__init__(clusterName, clusterType, zone, nodeStorage, nodeStorageOverrides, enable_fuse)
226
+
227
+ if self._zone is None:
228
+ logger.warning("Leader zone was never initialized before creating AWS provisioner. Defaulting to cluster zone.")
229
+
230
+ self._leader_subnet: str = self._get_default_subnet(self._zone or zone)
231
+ self._tags: Dict[str, Any] = {}
201
232
 
202
233
  # After self.clusterName is set, generate a valid name for the S3 bucket associated with this cluster
203
234
  suffix = _S3_BUCKET_INTERNAL_SUFFIX
204
235
  self.s3_bucket_name = self.clusterName[:_S3_BUCKET_MAX_NAME_LEN - len(suffix)] + suffix
205
236
 
206
- def supportedClusterTypes(self):
237
+ def supportedClusterTypes(self) -> Set[str]:
207
238
  return {'mesos', 'kubernetes'}
208
239
 
209
- def createClusterSettings(self):
240
+ def createClusterSettings(self) -> None:
210
241
  """
211
242
  Create a new set of cluster settings for a cluster to be deployed into
212
243
  AWS.
@@ -216,41 +247,51 @@ class AWSProvisioner(AbstractProvisioner):
216
247
  # constructor.
217
248
  assert self._zone is not None
218
249
 
219
- def readClusterSettings(self):
250
+ def readClusterSettings(self) -> None:
220
251
  """
221
252
  Reads the cluster settings from the instance metadata, which assumes
222
253
  the instance is the leader.
223
254
  """
224
- instanceMetaData = get_instance_metadata()
225
- ec2 = self.aws.boto2(self._region, 'ec2')
226
- instance = ec2.get_all_instances(instance_ids=[instanceMetaData["instance-id"]])[0].instances[0]
255
+ from ec2_metadata import ec2_metadata
256
+ boto3_ec2 = self.aws.client(self._region, 'ec2')
257
+ instance: InstanceTypeDef = boto3_ec2.describe_instances(InstanceIds=[ec2_metadata.instance_id])["Reservations"][0]["Instances"][0]
227
258
  # The cluster name is the same as the name of the leader.
228
- self.clusterName = str(instance.tags["Name"])
259
+ self.clusterName: str = "default-toil-cluster-name"
260
+ for tag in instance["Tags"]:
261
+ if tag.get("Key") == "Name":
262
+ self.clusterName = tag["Value"]
229
263
  # Determine what subnet we, the leader, are in
230
- self._leader_subnet = instance.subnet_id
264
+ self._leader_subnet = instance["SubnetId"]
231
265
  # Determine where to deploy workers.
232
266
  self._worker_subnets_by_zone = self._get_good_subnets_like(self._leader_subnet)
233
267
 
234
- self._leaderPrivateIP = instanceMetaData['local-ipv4'] # this is PRIVATE IP
235
- self._keyName = list(instanceMetaData['public-keys'].keys())[0]
236
- self._tags = {k: v for k, v in self.getLeader().tags.items() if k != _TAG_KEY_TOIL_NODE_TYPE}
268
+ self._leaderPrivateIP = ec2_metadata.private_ipv4 # this is PRIVATE IP
269
+ self._tags = {k: v for k, v in (self.getLeader().tags or {}).items() if k != _TAG_KEY_TOIL_NODE_TYPE}
237
270
  # Grab the ARN name of the instance profile (a str) to apply to workers
238
- self._leaderProfileArn = instanceMetaData['iam']['info']['InstanceProfileArn']
271
+ leader_info = None
272
+ for attempt in old_retry(timeout=300, predicate=lambda e: True):
273
+ with attempt:
274
+ leader_info = ec2_metadata.iam_info
275
+ if leader_info is None:
276
+ raise RuntimeError("Could not get EC2 metadata IAM info")
277
+ if leader_info is None:
278
+ # This is more for mypy as it is unable to see that the retry will guarantee this is not None
279
+ # and that this is not reachable
280
+ raise RuntimeError(f"Leader IAM metadata is unreachable.")
281
+ self._leaderProfileArn = leader_info["InstanceProfileArn"]
282
+
239
283
  # The existing metadata API returns a single string if there is one security group, but
240
284
  # a list when there are multiple: change the format to always be a list.
241
- rawSecurityGroups = instanceMetaData['security-groups']
242
- self._leaderSecurityGroupNames = {rawSecurityGroups} if not isinstance(rawSecurityGroups, list) else set(rawSecurityGroups)
285
+ rawSecurityGroups = ec2_metadata.security_groups
286
+ self._leaderSecurityGroupNames: Set[str] = set(rawSecurityGroups)
243
287
  # Since we have access to the names, we don't also need to use any IDs
244
- self._leaderSecurityGroupIDs = set()
288
+ self._leaderSecurityGroupIDs: Set[str] = set()
245
289
 
246
290
  # Let the base provisioner work out how to deploy duly authorized
247
291
  # workers for this leader.
248
292
  self._setLeaderWorkerAuthentication()
249
293
 
250
- @retry(errors=[ErrorCondition(
251
- error=ClientError,
252
- error_codes=[404, 500, 502, 503, 504]
253
- )])
294
+ @retry(errors=[AWSServerErrors])
254
295
  def _write_file_to_cloud(self, key: str, contents: bytes) -> str:
255
296
  bucket_name = self.s3_bucket_name
256
297
 
@@ -289,7 +330,7 @@ class AWSProvisioner(AbstractProvisioner):
289
330
  obj = self.aws.resource(self._region, 's3').Object(bucket_name, key)
290
331
 
291
332
  try:
292
- return obj.get().get('Body').read()
333
+ return obj.get()['Body'].read()
293
334
  except ClientError as e:
294
335
  if get_error_status(e) == 404:
295
336
  logger.warning(f'Trying to read non-existent file "{key}" from {bucket_name}.')
@@ -305,11 +346,11 @@ class AWSProvisioner(AbstractProvisioner):
305
346
  owner: str,
306
347
  keyName: str,
307
348
  botoPath: str,
308
- userTags: Optional[dict],
349
+ userTags: Optional[Dict[str, str]],
309
350
  vpcSubnet: Optional[str],
310
351
  awsEc2ProfileArn: Optional[str],
311
- awsEc2ExtraSecurityGroupIds: Optional[list],
312
- **kwargs):
352
+ awsEc2ExtraSecurityGroupIds: Optional[List[str]],
353
+ **kwargs: Dict[str, Any]) -> None:
313
354
  """
314
355
  Starts a single leader node and populates this class with the leader's metadata.
315
356
 
@@ -352,9 +393,6 @@ class AWSProvisioner(AbstractProvisioner):
352
393
  if vpcSubnet:
353
394
  # This is where we put the leader
354
395
  self._leader_subnet = vpcSubnet
355
- else:
356
- # Find the default subnet for the zone
357
- self._leader_subnet = self._get_default_subnet(self._zone)
358
396
 
359
397
  profileArn = awsEc2ProfileArn or self._createProfileArn()
360
398
 
@@ -370,7 +408,7 @@ class AWSProvisioner(AbstractProvisioner):
370
408
  if userTags is not None:
371
409
  self._tags.update(userTags)
372
410
 
373
- #All user specified tags have been set
411
+ # All user specified tags have been set
374
412
  userData = self._getIgnitionUserData('leader', architecture=self._architecture)
375
413
 
376
414
  if self.clusterType == 'kubernetes':
@@ -383,18 +421,18 @@ class AWSProvisioner(AbstractProvisioner):
383
421
  leader_tags[_TAG_KEY_TOIL_NODE_TYPE] = 'leader'
384
422
  logger.debug('Launching leader with tags: %s', leader_tags)
385
423
 
386
- instances = create_instances(self.aws.resource(self._region, 'ec2'),
387
- image_id=self._discoverAMI(),
388
- num_instances=1,
389
- key_name=self._keyName,
390
- security_group_ids=createdSGs + (awsEc2ExtraSecurityGroupIds or []),
391
- instance_type=leader_type.name,
392
- user_data=userData,
393
- block_device_map=bdms,
394
- instance_profile_arn=profileArn,
395
- placement_az=self._zone,
396
- subnet_id=self._leader_subnet,
397
- tags=leader_tags)
424
+ instances: List[Instance] = create_instances(self.aws.resource(self._region, 'ec2'),
425
+ image_id=self._discoverAMI(),
426
+ num_instances=1,
427
+ key_name=self._keyName,
428
+ security_group_ids=createdSGs + (awsEc2ExtraSecurityGroupIds or []),
429
+ instance_type=leader_type.name,
430
+ user_data=userData,
431
+ block_device_map=bdms,
432
+ instance_profile_arn=profileArn,
433
+ placement_az=self._zone,
434
+ subnet_id=self._leader_subnet,
435
+ tags=leader_tags)
398
436
 
399
437
  # wait for the leader to exist at all
400
438
  leader = instances[0]
@@ -425,7 +463,7 @@ class AWSProvisioner(AbstractProvisioner):
425
463
  leaderNode = Node(publicIP=leader.public_ip_address, privateIP=leader.private_ip_address,
426
464
  name=leader.id, launchTime=leader.launch_time,
427
465
  nodeType=leader_type.name, preemptible=False,
428
- tags=leader.tags)
466
+ tags=collapse_tags(leader.tags))
429
467
  leaderNode.waitForNode('toil_leader')
430
468
 
431
469
  # Download credentials
@@ -483,7 +521,7 @@ class AWSProvisioner(AbstractProvisioner):
483
521
  acls = set(self._get_subnet_acls(base_subnet_id))
484
522
 
485
523
  # Compose a filter that selects the subnets we might want
486
- filters = [{
524
+ filters: List[FilterTypeDef] = [{
487
525
  'Name': 'vpc-id',
488
526
  'Values': [vpc_id]
489
527
  }, {
@@ -495,7 +533,7 @@ class AWSProvisioner(AbstractProvisioner):
495
533
  }]
496
534
 
497
535
  # Fill in this collection
498
- by_az = {}
536
+ by_az: Dict[str, List[str]] = {}
499
537
 
500
538
  # Go get all the subnets. There's no way to page manually here so it
501
539
  # must page automatically.
@@ -534,7 +572,7 @@ class AWSProvisioner(AbstractProvisioner):
534
572
  }]
535
573
 
536
574
  # TODO: Can't we use the resource's network_acls.filter(Filters=)?
537
- return [item['NetworkAclId'] for item in self._pager(ec2.describe_network_acls,
575
+ return [item['NetworkAclId'] for item in boto3_pager(ec2.describe_network_acls,
538
576
  'NetworkAcls',
539
577
  Filters=filters)]
540
578
 
@@ -546,7 +584,7 @@ class AWSProvisioner(AbstractProvisioner):
546
584
  """
547
585
 
548
586
  # Compose a filter that selects the default subnet in the AZ
549
- filters = [{
587
+ filters: List[FilterTypeDef] = [{
550
588
  'Name': 'default-for-az',
551
589
  'Values': ['true']
552
590
  }, {
@@ -586,7 +624,7 @@ class AWSProvisioner(AbstractProvisioner):
586
624
 
587
625
  return 'aws'
588
626
 
589
- def getNodeShape(self, instance_type: str, preemptible=False) -> Shape:
627
+ def getNodeShape(self, instance_type: str, preemptible: bool=False) -> Shape:
590
628
  """
591
629
  Get the Shape for the given instance type (e.g. 't2.medium').
592
630
  """
@@ -603,13 +641,13 @@ class AWSProvisioner(AbstractProvisioner):
603
641
  # mesos about whether a job can run on a particular node type
604
642
  memory = (type_info.memory - 0.1) * 2 ** 30
605
643
  return Shape(wallTime=60 * 60,
606
- memory=memory,
644
+ memory=int(memory),
607
645
  cores=type_info.cores,
608
- disk=disk,
646
+ disk=int(disk),
609
647
  preemptible=preemptible)
610
648
 
611
649
  @staticmethod
612
- def retryPredicate(e):
650
+ def retryPredicate(e: Exception) -> bool:
613
651
  return awsRetryPredicate(e)
614
652
 
615
653
  def destroyCluster(self) -> None:
@@ -619,8 +657,8 @@ class AWSProvisioner(AbstractProvisioner):
619
657
  # The leader may create more instances while we're terminating the workers.
620
658
  vpcId = None
621
659
  try:
622
- leader = self._getLeaderInstance()
623
- vpcId = leader.vpc_id
660
+ leader = self._getLeaderInstanceBoto3()
661
+ vpcId = leader.get("VpcId")
624
662
  logger.info('Terminating the leader first ...')
625
663
  self._terminateInstances([leader])
626
664
  except (NoSuchClusterException, InvalidClusterStateException):
@@ -651,14 +689,16 @@ class AWSProvisioner(AbstractProvisioner):
651
689
  # Do the workers after the ASGs because some may belong to ASGs
652
690
  logger.info('Terminating any remaining workers ...')
653
691
  removed = False
654
- instances = self._get_nodes_in_cluster(include_stopped_nodes=True)
692
+ instances = self._get_nodes_in_cluster_boto3(include_stopped_nodes=True)
655
693
  spotIDs = self._getSpotRequestIDs()
694
+ boto3_ec2: EC2Client = self.aws.client(region=self._region, service_name="ec2")
656
695
  if spotIDs:
657
- self.aws.boto2(self._region, 'ec2').cancel_spot_instance_requests(request_ids=spotIDs)
696
+ boto3_ec2.cancel_spot_instance_requests(SpotInstanceRequestIds=spotIDs)
697
+ # self.aws.boto2(self._region, 'ec2').cancel_spot_instance_requests(request_ids=spotIDs)
658
698
  removed = True
659
- instancesToTerminate = awsFilterImpairedNodes(instances, self.aws.boto2(self._region, 'ec2'))
699
+ instancesToTerminate = awsFilterImpairedNodes(instances, self.aws.client(self._region, 'ec2'))
660
700
  if instancesToTerminate:
661
- vpcId = vpcId or instancesToTerminate[0].vpc_id
701
+ vpcId = vpcId or instancesToTerminate[0].get("VpcId")
662
702
  self._terminateInstances(instancesToTerminate)
663
703
  removed = True
664
704
  if removed:
@@ -672,7 +712,7 @@ class AWSProvisioner(AbstractProvisioner):
672
712
  # for some LuanchTemplate.
673
713
  mistake = False
674
714
  for ltID in self._get_launch_template_ids():
675
- response = self.aws.client(self._region, 'ec2').delete_launch_template(LaunchTemplateId=ltID)
715
+ response = boto3_ec2.delete_launch_template(LaunchTemplateId=ltID)
676
716
  if 'LaunchTemplate' not in response:
677
717
  mistake = True
678
718
  else:
@@ -694,16 +734,17 @@ class AWSProvisioner(AbstractProvisioner):
694
734
  removed = False
695
735
  for attempt in old_retry(timeout=300, predicate=expectedShutdownErrors):
696
736
  with attempt:
697
- for sg in self.aws.boto2(self._region, 'ec2').get_all_security_groups():
737
+ security_groups: List[SecurityGroupTypeDef] = boto3_ec2.describe_security_groups()["SecurityGroups"]
738
+ for security_group in security_groups:
698
739
  # TODO: If we terminate the leader and the workers but
699
740
  # miss the security group, we won't find it now because
700
741
  # we won't have vpcId set.
701
- if sg.name == self.clusterName and vpcId and sg.vpc_id == vpcId:
742
+ if security_group.get("GroupName") == self.clusterName and vpcId and security_group.get("VpcId") == vpcId:
702
743
  try:
703
- self.aws.boto2(self._region, 'ec2').delete_security_group(group_id=sg.id)
744
+ boto3_ec2.delete_security_group(GroupId=security_group["GroupId"])
704
745
  removed = True
705
- except BotoServerError as e:
706
- if e.error_code == 'InvalidGroup.NotFound':
746
+ except ClientError as e:
747
+ if get_error_code(e) == 'InvalidGroup.NotFound':
707
748
  pass
708
749
  else:
709
750
  raise
@@ -777,10 +818,9 @@ class AWSProvisioner(AbstractProvisioner):
777
818
 
778
819
  return spot_bid
779
820
 
780
- def addNodes(self, nodeTypes: Set[str], numNodes, preemptible, spotBid=None) -> int:
821
+ def addNodes(self, nodeTypes: Set[str], numNodes: int, preemptible: bool, spotBid: Optional[float]=None) -> int:
781
822
  # Grab the AWS connection we need
782
- ec2 = self.aws.boto2(self._region, 'ec2')
783
-
823
+ boto3_ec2 = get_client(service_name='ec2', region_name=self._region)
784
824
  assert self._leaderPrivateIP
785
825
 
786
826
  if preemptible:
@@ -792,7 +832,7 @@ class AWSProvisioner(AbstractProvisioner):
792
832
  node_type = next(iter(nodeTypes))
793
833
  type_info = E2Instances[node_type]
794
834
  root_vol_size = self._nodeStorageOverrides.get(node_type, self._nodeStorage)
795
- bdm = self._getBoto2BlockDeviceMapping(type_info,
835
+ bdm = self._getBoto3BlockDeviceMapping(type_info,
796
836
  rootVolSize=root_vol_size)
797
837
 
798
838
  # Pick a zone and subnet_id to launch into
@@ -803,7 +843,7 @@ class AWSProvisioner(AbstractProvisioner):
803
843
  # We're allowed to pick from any of these zones.
804
844
  zone_options = list(self._worker_subnets_by_zone.keys())
805
845
 
806
- zone = get_best_aws_zone(spotBid, type_info.name, ec2, zone_options)
846
+ zone = get_best_aws_zone(spotBid, type_info.name, boto3_ec2, zone_options)
807
847
  else:
808
848
  # We don't need to ever do any balancing across zones for on-demand
809
849
  # instances. Just pick a zone.
@@ -814,6 +854,9 @@ class AWSProvisioner(AbstractProvisioner):
814
854
  # The workers aren't allowed in the leader's zone.
815
855
  # Pick an arbitrary zone we can use.
816
856
  zone = next(iter(self._worker_subnets_by_zone.keys()))
857
+ if zone is None:
858
+ logger.exception("Could not find a valid zone. Make sure TOIL_AWS_ZONE is set or spot bids are not too low.")
859
+ raise NoSuchZoneException()
817
860
  if self._leader_subnet in self._worker_subnets_by_zone.get(zone, []):
818
861
  # The leader's subnet is an option for this zone, so use it.
819
862
  subnet_id = self._leader_subnet
@@ -822,21 +865,40 @@ class AWSProvisioner(AbstractProvisioner):
822
865
  subnet_id = next(iter(self._worker_subnets_by_zone[zone]))
823
866
 
824
867
  keyPath = self._sseKey if self._sseKey else None
825
- userData = self._getIgnitionUserData('worker', keyPath, preemptible, self._architecture)
868
+ userData: str = self._getIgnitionUserData('worker', keyPath, preemptible, self._architecture)
869
+ userDataBytes: bytes = b""
826
870
  if isinstance(userData, str):
827
871
  # Spot-market provisioning requires bytes for user data.
828
- userData = userData.encode('utf-8')
829
-
830
- kwargs = {'key_name': self._keyName,
831
- 'security_group_ids': self._getSecurityGroupIDs(),
832
- 'instance_type': type_info.name,
833
- 'user_data': userData,
834
- 'block_device_map': bdm,
835
- 'instance_profile_arn': self._leaderProfileArn,
836
- 'placement': zone,
837
- 'subnet_id': subnet_id}
838
-
839
- instancesLaunched = []
872
+ userDataBytes = userData.encode('utf-8')
873
+
874
+ spot_kwargs = {'KeyName': self._keyName,
875
+ 'LaunchSpecification': {
876
+ 'SecurityGroupIds': self._getSecurityGroupIDs(),
877
+ 'InstanceType': type_info.name,
878
+ 'UserData': userDataBytes,
879
+ 'BlockDeviceMappings': bdm,
880
+ 'IamInstanceProfile': {
881
+ 'Arn': self._leaderProfileArn
882
+ },
883
+ 'Placement': {
884
+ 'AvailabilityZone': zone
885
+ },
886
+ 'SubnetId': subnet_id}
887
+ }
888
+ on_demand_kwargs = {'KeyName': self._keyName,
889
+ 'SecurityGroupIds': self._getSecurityGroupIDs(),
890
+ 'InstanceType': type_info.name,
891
+ 'UserData': userDataBytes,
892
+ 'BlockDeviceMappings': bdm,
893
+ 'IamInstanceProfile': {
894
+ 'Arn': self._leaderProfileArn
895
+ },
896
+ 'Placement': {
897
+ 'AvailabilityZone': zone
898
+ },
899
+ 'SubnetId': subnet_id}
900
+
901
+ instancesLaunched: List[InstanceTypeDef] = []
840
902
 
841
903
  for attempt in old_retry(predicate=awsRetryPredicate):
842
904
  with attempt:
@@ -845,41 +907,45 @@ class AWSProvisioner(AbstractProvisioner):
845
907
  # every request in this method
846
908
  if not preemptible:
847
909
  logger.debug('Launching %s non-preemptible nodes', numNodes)
848
- instancesLaunched = create_ondemand_instances(ec2,
910
+ instancesLaunched = create_ondemand_instances(boto3_ec2=boto3_ec2,
849
911
  image_id=self._discoverAMI(),
850
- spec=kwargs, num_instances=numNodes)
912
+ spec=on_demand_kwargs, num_instances=numNodes)
851
913
  else:
852
914
  logger.debug('Launching %s preemptible nodes', numNodes)
853
915
  # force generator to evaluate
854
- instancesLaunched = list(create_spot_instances(ec2=ec2,
855
- price=spotBid,
856
- image_id=self._discoverAMI(),
857
- tags={_TAG_KEY_TOIL_CLUSTER_NAME: self.clusterName},
858
- spec=kwargs,
859
- num_instances=numNodes,
860
- tentative=True)
861
- )
916
+ generatedInstancesLaunched: List[DescribeInstancesResultTypeDef] = list(create_spot_instances(boto3_ec2=boto3_ec2,
917
+ price=spotBid,
918
+ image_id=self._discoverAMI(),
919
+ tags={_TAG_KEY_TOIL_CLUSTER_NAME: self.clusterName},
920
+ spec=spot_kwargs,
921
+ num_instances=numNodes,
922
+ tentative=True)
923
+ )
862
924
  # flatten the list
863
- instancesLaunched = [item for sublist in instancesLaunched for item in sublist]
925
+ flatten_reservations: List[ReservationTypeDef] = [reservation for subdict in generatedInstancesLaunched for reservation in subdict["Reservations"] for key, value in subdict.items()]
926
+ # get a flattened list of all requested instances, as before instancesLaunched is a dict of reservations which is a dict of instance requests
927
+ instancesLaunched = [instance for instances in flatten_reservations for instance in instances['Instances']]
864
928
 
865
929
  for attempt in old_retry(predicate=awsRetryPredicate):
866
930
  with attempt:
867
- wait_instances_running(ec2, instancesLaunched)
931
+ list(wait_instances_running(boto3_ec2, instancesLaunched)) # ensure all instances are running
932
+
933
+ increase_instance_hop_limit(boto3_ec2, instancesLaunched)
868
934
 
869
935
  self._tags[_TAG_KEY_TOIL_NODE_TYPE] = 'worker'
870
- AWSProvisioner._addTags(instancesLaunched, self._tags)
936
+ AWSProvisioner._addTags(boto3_ec2, instancesLaunched, self._tags)
871
937
  if self._sseKey:
872
938
  for i in instancesLaunched:
873
939
  self._waitForIP(i)
874
- node = Node(publicIP=i.ip_address, privateIP=i.private_ip_address, name=i.id,
875
- launchTime=i.launch_time, nodeType=i.instance_type, preemptible=preemptible,
876
- tags=i.tags)
940
+ node = Node(publicIP=i['PublicIpAddress'], privateIP=i['PrivateIpAddress'], name=i['InstanceId'],
941
+ launchTime=i['LaunchTime'], nodeType=i['InstanceType'], preemptible=preemptible,
942
+ tags=collapse_tags(i['Tags']))
877
943
  node.waitForNode('toil_worker')
878
944
  node.coreRsync([self._sseKey, ':' + self._sseKey], applianceName='toil_worker')
879
945
  logger.debug('Launched %s new instance(s)', numNodes)
880
946
  return len(instancesLaunched)
881
947
 
882
- def addManagedNodes(self, nodeTypes: Set[str], minNodes, maxNodes, preemptible, spotBid=None) -> None:
948
+ def addManagedNodes(self, nodeTypes: Set[str], minNodes: int, maxNodes: int, preemptible: bool, spotBid: Optional[float] = None) -> None:
883
949
 
884
950
  if self.clusterType != 'kubernetes':
885
951
  raise ManagedNodesNotSupportedException("Managed nodes only supported for Kubernetes clusters")
@@ -901,17 +967,17 @@ class AWSProvisioner(AbstractProvisioner):
901
967
 
902
968
  def getProvisionedWorkers(self, instance_type: Optional[str] = None, preemptible: Optional[bool] = None) -> List[Node]:
903
969
  assert self._leaderPrivateIP
904
- entireCluster = self._get_nodes_in_cluster(instance_type=instance_type)
970
+ entireCluster = self._get_nodes_in_cluster_boto3(instance_type=instance_type)
905
971
  logger.debug('All nodes in cluster: %s', entireCluster)
906
- workerInstances = [i for i in entireCluster if i.private_ip_address != self._leaderPrivateIP]
972
+ workerInstances: List[InstanceTypeDef] = [i for i in entireCluster if i["PrivateIpAddress"] != self._leaderPrivateIP]
907
973
  logger.debug('All workers found in cluster: %s', workerInstances)
908
974
  if preemptible is not None:
909
- workerInstances = [i for i in workerInstances if preemptible == (i.spot_instance_request_id is not None)]
975
+ workerInstances = [i for i in workerInstances if preemptible == (i["SpotInstanceRequestId"] is not None)]
910
976
  logger.debug('%spreemptible workers found in cluster: %s', 'non-' if not preemptible else '', workerInstances)
911
- workerInstances = awsFilterImpairedNodes(workerInstances, self.aws.boto2(self._region, 'ec2'))
912
- return [Node(publicIP=i.ip_address, privateIP=i.private_ip_address,
913
- name=i.id, launchTime=i.launch_time, nodeType=i.instance_type,
914
- preemptible=i.spot_instance_request_id is not None, tags=i.tags)
977
+ workerInstances = awsFilterImpairedNodes(workerInstances, self.aws.client(self._region, 'ec2'))
978
+ return [Node(publicIP=i["PublicIpAddress"], privateIP=i["PrivateIpAddress"],
979
+ name=i["InstanceId"], launchTime=i["LaunchTime"], nodeType=i["InstanceType"],
980
+ preemptible=i["SpotInstanceRequestId"] is not None, tags=collapse_tags(i["Tags"]))
915
981
  for i in workerInstances]
916
982
 
917
983
  @memoize
@@ -952,37 +1018,65 @@ class AWSProvisioner(AbstractProvisioner):
952
1018
  denamespaced = '/' + '_'.join(s.replace('_', '/') for s in namespaced_name.split('__'))
953
1019
  return denamespaced.startswith(self._toNameSpace())
954
1020
 
1021
+ def _getLeaderInstanceBoto3(self) -> InstanceTypeDef:
1022
+ """
1023
+ Get the Boto 3 instance for the cluster's leader.
1024
+ :return: InstanceTypeDef
1025
+ """
1026
+ # Tags are stored differently in Boto 3
1027
+ instances: List[InstanceTypeDef] = self._get_nodes_in_cluster_boto3(include_stopped_nodes=True)
1028
+ instances.sort(key=lambda x: x["LaunchTime"])
1029
+ try:
1030
+ leader = instances[0] # assume leader was launched first
1031
+ except IndexError:
1032
+ raise NoSuchClusterException(self.clusterName)
1033
+ if leader.get("Tags") is not None:
1034
+ tag_value = next(item["Value"] for item in leader["Tags"] if item["Key"] == _TAG_KEY_TOIL_NODE_TYPE)
1035
+ else:
1036
+ tag_value = None
1037
+ if (tag_value or 'leader') != 'leader':
1038
+ raise InvalidClusterStateException(
1039
+ 'Invalid cluster state! The first launched instance appears not to be the leader '
1040
+ 'as it is missing the "leader" tag. The safest recovery is to destroy the cluster '
1041
+ 'and restart the job. Incorrect Leader ID: %s' % leader["InstanceId"]
1042
+ )
1043
+ return leader
955
1044
 
956
- def _getLeaderInstance(self) -> Boto2Instance:
1045
+ def _getLeaderInstance(self) -> InstanceTypeDef:
957
1046
  """
958
1047
  Get the Boto 2 instance for the cluster's leader.
959
1048
  """
960
- instances = self._get_nodes_in_cluster(include_stopped_nodes=True)
961
- instances.sort(key=lambda x: x.launch_time)
1049
+ instances = self._get_nodes_in_cluster_boto3(include_stopped_nodes=True)
1050
+ instances.sort(key=lambda x: x["LaunchTime"])
962
1051
  try:
963
- leader = instances[0] # assume leader was launched first
1052
+ leader: InstanceTypeDef = instances[0] # assume leader was launched first
964
1053
  except IndexError:
965
1054
  raise NoSuchClusterException(self.clusterName)
966
- if (leader.tags.get(_TAG_KEY_TOIL_NODE_TYPE) or 'leader') != 'leader':
1055
+ tagged_node_type: str = 'leader'
1056
+ for tag in leader["Tags"]:
1057
+ # If a tag specifying node type exists,
1058
+ if tag.get("Key") is not None and tag["Key"] == _TAG_KEY_TOIL_NODE_TYPE:
1059
+ tagged_node_type = tag["Value"]
1060
+ if tagged_node_type != 'leader':
967
1061
  raise InvalidClusterStateException(
968
1062
  'Invalid cluster state! The first launched instance appears not to be the leader '
969
1063
  'as it is missing the "leader" tag. The safest recovery is to destroy the cluster '
970
- 'and restart the job. Incorrect Leader ID: %s' % leader.id
1064
+ 'and restart the job. Incorrect Leader ID: %s' % leader["InstanceId"]
971
1065
  )
972
1066
  return leader
973
1067
 
974
- def getLeader(self, wait=False) -> Node:
1068
+ def getLeader(self, wait: bool = False) -> Node:
975
1069
  """
976
1070
  Get the leader for the cluster as a Toil Node object.
977
1071
  """
978
- leader = self._getLeaderInstance()
1072
+ leader: InstanceTypeDef = self._getLeaderInstanceBoto3()
979
1073
 
980
- leaderNode = Node(publicIP=leader.ip_address, privateIP=leader.private_ip_address,
981
- name=leader.id, launchTime=leader.launch_time, nodeType=None,
982
- preemptible=False, tags=leader.tags)
1074
+ leaderNode = Node(publicIP=leader["PublicIpAddress"], privateIP=leader["PrivateIpAddress"],
1075
+ name=leader["InstanceId"], launchTime=leader["LaunchTime"], nodeType=None,
1076
+ preemptible=False, tags=collapse_tags(leader["Tags"]))
983
1077
  if wait:
984
1078
  logger.debug("Waiting for toil_leader to enter 'running' state...")
985
- wait_instances_running(self.aws.boto2(self._region, 'ec2'), [leader])
1079
+ wait_instances_running(self.aws.client(self._region, 'ec2'), [leader])
986
1080
  logger.debug('... toil_leader is running')
987
1081
  self._waitForIP(leader)
988
1082
  leaderNode.waitForNode('toil_leader')
@@ -991,17 +1085,20 @@ class AWSProvisioner(AbstractProvisioner):
991
1085
 
992
1086
  @classmethod
993
1087
  @awsRetry
994
- def _addTag(cls, instance: Boto2Instance, key: str, value: str):
995
- instance.add_tag(key, value)
1088
+ def _addTag(cls, boto3_ec2: EC2Client, instance: InstanceTypeDef, key: str, value: str) -> None:
1089
+ if instance.get('Tags') is None:
1090
+ instance['Tags'] = []
1091
+ new_tag: TagTypeDef = {"Key": key, "Value": value}
1092
+ boto3_ec2.create_tags(Resources=[instance["InstanceId"]], Tags=[new_tag])
996
1093
 
997
1094
  @classmethod
998
- def _addTags(cls, instances: List[Boto2Instance], tags: Dict[str, str]):
1095
+ def _addTags(cls, boto3_ec2: EC2Client, instances: List[InstanceTypeDef], tags: Dict[str, str]) -> None:
999
1096
  for instance in instances:
1000
1097
  for key, value in tags.items():
1001
- cls._addTag(instance, key, value)
1098
+ cls._addTag(boto3_ec2, instance, key, value)
1002
1099
 
1003
1100
  @classmethod
1004
- def _waitForIP(cls, instance: Boto2Instance):
1101
+ def _waitForIP(cls, instance: InstanceTypeDef) -> None:
1005
1102
  """
1006
1103
  Wait until the instances has a public IP address assigned to it.
1007
1104
 
@@ -1010,32 +1107,32 @@ class AWSProvisioner(AbstractProvisioner):
1010
1107
  logger.debug('Waiting for ip...')
1011
1108
  while True:
1012
1109
  time.sleep(a_short_time)
1013
- instance.update()
1014
- if instance.ip_address or instance.public_dns_name or instance.private_ip_address:
1110
+ if instance.get("PublicIpAddress") or instance.get("PublicDnsName") or instance.get("PrivateIpAddress"):
1015
1111
  logger.debug('...got ip')
1016
1112
  break
1017
1113
 
1018
- def _terminateInstances(self, instances: List[Boto2Instance]):
1019
- instanceIDs = [x.id for x in instances]
1114
+ def _terminateInstances(self, instances: List[InstanceTypeDef]) -> None:
1115
+ instanceIDs = [x["InstanceId"] for x in instances]
1020
1116
  self._terminateIDs(instanceIDs)
1021
1117
  logger.info('... Waiting for instance(s) to shut down...')
1022
1118
  for instance in instances:
1023
- wait_transition(instance, {'pending', 'running', 'shutting-down', 'stopping', 'stopped'}, 'terminated')
1119
+ wait_transition(self.aws.client(region=self._region, service_name="ec2"), instance, {'pending', 'running', 'shutting-down', 'stopping', 'stopped'}, 'terminated')
1024
1120
  logger.info('Instance(s) terminated.')
1025
1121
 
1026
1122
  @awsRetry
1027
- def _terminateIDs(self, instanceIDs: List[str]):
1123
+ def _terminateIDs(self, instanceIDs: List[str]) -> None:
1028
1124
  logger.info('Terminating instance(s): %s', instanceIDs)
1029
- self.aws.boto2(self._region, 'ec2').terminate_instances(instance_ids=instanceIDs)
1125
+ boto3_ec2 = self.aws.client(region=self._region, service_name="ec2")
1126
+ boto3_ec2.terminate_instances(InstanceIds=instanceIDs)
1030
1127
  logger.info('Instance(s) terminated.')
1031
1128
 
1032
1129
  @awsRetry
1033
- def _deleteRoles(self, names: List[str]):
1130
+ def _deleteRoles(self, names: List[str]) -> None:
1034
1131
  """
1035
1132
  Delete all the given named IAM roles.
1036
1133
  Detatches but does not delete associated instance profiles.
1037
1134
  """
1038
-
1135
+ boto3_iam = self.aws.client(region=self._region, service_name="iam")
1039
1136
  for role_name in names:
1040
1137
  for profile_name in self._getRoleInstanceProfileNames(role_name):
1041
1138
  # We can't delete either the role or the profile while they
@@ -1043,60 +1140,64 @@ class AWSProvisioner(AbstractProvisioner):
1043
1140
 
1044
1141
  for attempt in old_retry(timeout=300, predicate=expectedShutdownErrors):
1045
1142
  with attempt:
1046
- self.aws.client(self._region, 'iam').remove_role_from_instance_profile(InstanceProfileName=profile_name,
1047
- RoleName=role_name)
1143
+ boto3_iam.remove_role_from_instance_profile(InstanceProfileName=profile_name,
1144
+ RoleName=role_name)
1048
1145
  # We also need to drop all inline policies
1049
1146
  for policy_name in self._getRoleInlinePolicyNames(role_name):
1050
1147
  for attempt in old_retry(timeout=300, predicate=expectedShutdownErrors):
1051
1148
  with attempt:
1052
- self.aws.client(self._region, 'iam').delete_role_policy(PolicyName=policy_name,
1053
- RoleName=role_name)
1149
+ boto3_iam.delete_role_policy(PolicyName=policy_name,
1150
+ RoleName=role_name)
1054
1151
 
1055
1152
  for attempt in old_retry(timeout=300, predicate=expectedShutdownErrors):
1056
1153
  with attempt:
1057
- self.aws.client(self._region, 'iam').delete_role(RoleName=role_name)
1154
+ boto3_iam.delete_role(RoleName=role_name)
1058
1155
  logger.debug('... Successfully deleted IAM role %s', role_name)
1059
1156
 
1060
-
1061
1157
  @awsRetry
1062
- def _deleteInstanceProfiles(self, names: List[str]):
1158
+ def _deleteInstanceProfiles(self, names: List[str]) -> None:
1063
1159
  """
1064
1160
  Delete all the given named IAM instance profiles.
1065
1161
  All roles must already be detached.
1066
1162
  """
1067
-
1163
+ boto3_iam = self.aws.client(region=self._region, service_name="iam")
1068
1164
  for profile_name in names:
1069
1165
  for attempt in old_retry(timeout=300, predicate=expectedShutdownErrors):
1070
1166
  with attempt:
1071
- self.aws.client(self._region, 'iam').delete_instance_profile(InstanceProfileName=profile_name)
1167
+ boto3_iam.delete_instance_profile(InstanceProfileName=profile_name)
1072
1168
  logger.debug('... Succesfully deleted instance profile %s', profile_name)
1073
1169
 
1074
1170
  @classmethod
1075
- def _getBoto2BlockDeviceMapping(cls, type_info: InstanceType, rootVolSize: int = 50) -> Boto2BlockDeviceMapping:
1171
+ def _getBoto3BlockDeviceMapping(cls, type_info: InstanceType, rootVolSize: int = 50) -> List[BlockDeviceMappingTypeDef]:
1076
1172
  # determine number of ephemeral drives via cgcloud-lib (actually this is moved into toil's lib
1077
1173
  bdtKeys = [''] + [f'/dev/xvd{c}' for c in string.ascii_lowercase[1:]]
1078
- bdm = Boto2BlockDeviceMapping()
1174
+ bdm_list: List[BlockDeviceMappingTypeDef] = []
1079
1175
  # Change root volume size to allow for bigger Docker instances
1080
- root_vol = Boto2BlockDeviceType(delete_on_termination=True)
1081
- root_vol.size = rootVolSize
1082
- bdm["/dev/xvda"] = root_vol
1176
+ root_vol: EbsBlockDeviceTypeDef = {"DeleteOnTermination": True,
1177
+ "VolumeSize": rootVolSize}
1178
+ bdm: BlockDeviceMappingTypeDef = {"DeviceName": "/dev/xvda", "Ebs": root_vol}
1179
+ bdm_list.append(bdm)
1083
1180
  # The first disk is already attached for us so start with 2nd.
1084
1181
  # Disk count is weirdly a float in our instance database, so make it an int here.
1085
1182
  for disk in range(1, int(type_info.disks) + 1):
1086
- bdm[bdtKeys[disk]] = Boto2BlockDeviceType(
1087
- ephemeral_name=f'ephemeral{disk - 1}') # ephemeral counts start at 0
1183
+ bdm = {}
1184
+ bdm["DeviceName"] = bdtKeys[disk]
1185
+ bdm["VirtualName"] = f"ephemeral{disk - 1}" # ephemeral counts start at 0
1186
+ bdm["Ebs"] = root_vol # default
1187
+ # bdm["Ebs"] = root_vol.update({"VirtualName": f"ephemeral{disk - 1}"})
1188
+ bdm_list.append(bdm)
1088
1189
 
1089
- logger.debug('Device mapping: %s', bdm)
1090
- return bdm
1190
+ logger.debug('Device mapping: %s', bdm_list)
1191
+ return bdm_list
1091
1192
 
1092
1193
  @classmethod
1093
- def _getBoto3BlockDeviceMappings(cls, type_info: InstanceType, rootVolSize: int = 50) -> List[dict]:
1194
+ def _getBoto3BlockDeviceMappings(cls, type_info: InstanceType, rootVolSize: int = 50) -> List[BlockDeviceMappingTypeDef]:
1094
1195
  """
1095
1196
  Get block device mappings for the root volume for a worker.
1096
1197
  """
1097
1198
 
1098
1199
  # Start with the root
1099
- bdms = [{
1200
+ bdms: List[BlockDeviceMappingTypeDef] = [{
1100
1201
  'DeviceName': '/dev/xvda',
1101
1202
  'Ebs': {
1102
1203
  'DeleteOnTermination': True,
@@ -1121,96 +1222,98 @@ class AWSProvisioner(AbstractProvisioner):
1121
1222
  return bdms
1122
1223
 
1123
1224
  @awsRetry
1124
- def _get_nodes_in_cluster(self, instance_type: Optional[str] = None, include_stopped_nodes=False) -> List[Boto2Instance]:
1225
+ def _get_nodes_in_cluster_boto3(self, instance_type: Optional[str] = None, include_stopped_nodes: bool = False) -> List[InstanceTypeDef]:
1125
1226
  """
1126
- Get Boto2 instance objects for all nodes in the cluster.
1227
+ Get Boto3 instance objects for all nodes in the cluster.
1127
1228
  """
1229
+ boto3_ec2: EC2Client = self.aws.client(region=self._region, service_name='ec2')
1230
+ instance_filter: FilterTypeDef = {'Name': 'instance.group-name', 'Values': [self.clusterName]}
1231
+ describe_response: DescribeInstancesResultTypeDef = boto3_ec2.describe_instances(Filters=[instance_filter])
1232
+ all_instances: List[InstanceTypeDef] = []
1233
+ for reservation in describe_response['Reservations']:
1234
+ instances = reservation['Instances']
1235
+ all_instances.extend(instances)
1128
1236
 
1129
- all_instances = self.aws.boto2(self._region, 'ec2').get_only_instances(filters={'instance.group-name': self.clusterName})
1237
+ # all_instances = self.aws.boto2(self._region, 'ec2').get_only_instances(filters={'instance.group-name': self.clusterName})
1130
1238
 
1131
- def instanceFilter(i):
1239
+ def instanceFilter(i: InstanceTypeDef) -> bool:
1132
1240
  # filter by type only if nodeType is true
1133
- rightType = not instance_type or i.instance_type == instance_type
1134
- rightState = i.state == 'running' or i.state == 'pending'
1241
+ rightType = not instance_type or i['InstanceType'] == instance_type
1242
+ rightState = i['State']['Name'] == 'running' or i['State']['Name'] == 'pending'
1135
1243
  if include_stopped_nodes:
1136
- rightState = rightState or i.state == 'stopping' or i.state == 'stopped'
1244
+ rightState = rightState or i['State']['Name'] == 'stopping' or i['State']['Name'] == 'stopped'
1137
1245
  return rightType and rightState
1138
1246
 
1139
1247
  return [i for i in all_instances if instanceFilter(i)]
1140
1248
 
1141
- def _filter_nodes_in_cluster(self, instance_type: Optional[str] = None, preemptible: bool = False) -> List[Boto2Instance]:
1142
- """
1143
- Get Boto2 instance objects for the nodes in the cluster filtered by preemptability.
1144
- """
1145
-
1146
- instances = self._get_nodes_in_cluster(instance_type, include_stopped_nodes=False)
1147
-
1148
- if preemptible:
1149
- return [i for i in instances if i.spot_instance_request_id is not None]
1150
-
1151
- return [i for i in instances if i.spot_instance_request_id is None]
1152
-
1153
1249
  def _getSpotRequestIDs(self) -> List[str]:
1154
1250
  """
1155
1251
  Get the IDs of all spot requests associated with the cluster.
1156
1252
  """
1157
1253
 
1158
1254
  # Grab the connection we need to use for this operation.
1159
- ec2 = self.aws.boto2(self._region, 'ec2')
1255
+ ec2: EC2Client = self.aws.client(self._region, 'ec2')
1160
1256
 
1161
- requests = ec2.get_all_spot_instance_requests()
1162
- tags = ec2.get_all_tags({'tag:': {_TAG_KEY_TOIL_CLUSTER_NAME: self.clusterName}})
1163
- idsToCancel = [tag.id for tag in tags]
1164
- return [request for request in requests if request.id in idsToCancel]
1257
+ requests: List[SpotInstanceRequestTypeDef] = ec2.describe_spot_instance_requests()["SpotInstanceRequests"]
1258
+ tag_filter: FilterTypeDef = {"Name": "tag:" + _TAG_KEY_TOIL_CLUSTER_NAME, "Values": [self.clusterName]}
1259
+ tags: List[TagDescriptionTypeDef] = ec2.describe_tags(Filters=[tag_filter])["Tags"]
1260
+ idsToCancel = [tag["ResourceId"] for tag in tags]
1261
+ return [request["SpotInstanceRequestId"] for request in requests if request["InstanceId"] in idsToCancel]
1165
1262
 
1166
1263
  def _createSecurityGroups(self) -> List[str]:
1167
1264
  """
1168
1265
  Create security groups for the cluster. Returns a list of their IDs.
1169
1266
  """
1267
+ def group_not_found(e: ClientError) -> bool:
1268
+ retry = (get_error_status(e) == 400 and 'does not exist in default VPC' in get_error_body(e))
1269
+ return retry
1170
1270
 
1171
1271
  # Grab the connection we need to use for this operation.
1172
1272
  # The VPC connection can do anything the EC2 one can do, but also look at subnets.
1173
- vpc = self.aws.boto2(self._region, 'vpc')
1273
+ boto3_ec2: EC2Client = self.aws.client(region=self._region, service_name="ec2")
1174
1274
 
1175
- def groupNotFound(e):
1176
- retry = (e.status == 400 and 'does not exist in default VPC' in e.body)
1177
- return retry
1178
- # Security groups need to belong to the same VPC as the leader. If we
1179
- # put the leader in a particular non-default subnet, it may be in a
1180
- # particular non-default VPC, which we need to know about.
1181
- vpcId = None
1275
+ vpc_id = None
1182
1276
  if self._leader_subnet:
1183
- subnets = vpc.get_all_subnets(subnet_ids=[self._leader_subnet])
1277
+ subnets = boto3_ec2.describe_subnets(SubnetIds=[self._leader_subnet])["Subnets"]
1184
1278
  if len(subnets) > 0:
1185
- vpcId = subnets[0].vpc_id
1186
- # security group create/get. ssh + all ports open within the group
1279
+ vpc_id = subnets[0]["VpcId"]
1187
1280
  try:
1188
- web = vpc.create_security_group(self.clusterName,
1189
- 'Toil appliance security group', vpc_id=vpcId)
1190
- except EC2ResponseError as e:
1191
- if e.status == 400 and 'already exists' in e.body:
1192
- pass # group exists- nothing to do
1281
+ # Security groups need to belong to the same VPC as the leader. If we
1282
+ # put the leader in a particular non-default subnet, it may be in a
1283
+ # particular non-default VPC, which we need to know about.
1284
+ other = {"GroupName": self.clusterName, "Description": "Toil appliance security group"}
1285
+ if vpc_id is not None:
1286
+ other["VpcId"] = vpc_id
1287
+ # mypy stubs don't explicitly state kwargs even though documentation allows it, and mypy gets confused
1288
+ web_response: CreateSecurityGroupResultTypeDef = boto3_ec2.create_security_group(**other) # type: ignore[arg-type]
1289
+ except ClientError as e:
1290
+ if get_error_status(e) == 400 and 'already exists' in get_error_body(e):
1291
+ pass
1193
1292
  else:
1194
1293
  raise
1195
1294
  else:
1196
- for attempt in old_retry(predicate=groupNotFound, timeout=300):
1197
- with attempt:
1198
- # open port 22 for ssh-ing
1199
- web.authorize(ip_protocol='tcp', from_port=22, to_port=22, cidr_ip='0.0.0.0/0')
1200
- # TODO: boto2 doesn't support IPv6 here but we need to.
1201
- for attempt in old_retry(predicate=groupNotFound, timeout=300):
1202
- with attempt:
1203
- # the following authorizes all TCP access within the web security group
1204
- web.authorize(ip_protocol='tcp', from_port=0, to_port=65535, src_group=web)
1205
- for attempt in old_retry(predicate=groupNotFound, timeout=300):
1295
+ for attempt in old_retry(predicate=group_not_found, timeout=300):
1206
1296
  with attempt:
1207
- # We also want to open up UDP, both for user code and for the RealtimeLogger
1208
- web.authorize(ip_protocol='udp', from_port=0, to_port=65535, src_group=web)
1297
+ ip_permissions: List[IpPermissionTypeDef] = [{"IpProtocol": "tcp",
1298
+ "FromPort": 22,
1299
+ "ToPort": 22,
1300
+ "IpRanges": [
1301
+ {"CidrIp": "0.0.0.0/0"}
1302
+ ],
1303
+ "Ipv6Ranges": [{"CidrIpv6": "::/0"}]}]
1304
+ for protocol in ("tcp", "udp"):
1305
+ ip_permissions.append({"IpProtocol": protocol,
1306
+ "FromPort": 0,
1307
+ "ToPort": 65535,
1308
+ "UserIdGroupPairs":
1309
+ [{"GroupId": web_response["GroupId"],
1310
+ "GroupName": self.clusterName}]})
1311
+ boto3_ec2.authorize_security_group_ingress(IpPermissions=ip_permissions, GroupName=self.clusterName, GroupId=web_response["GroupId"])
1209
1312
  out = []
1210
- for sg in vpc.get_all_security_groups():
1211
- if sg.name == self.clusterName and (vpcId is None or sg.vpc_id == vpcId):
1212
- out.append(sg)
1213
- return [sg.id for sg in out]
1313
+ for sg in boto3_ec2.describe_security_groups()["SecurityGroups"]:
1314
+ if sg["GroupName"] == self.clusterName and (vpc_id is None or sg["VpcId"] == vpc_id):
1315
+ out.append(sg["GroupId"])
1316
+ return out
1214
1317
 
1215
1318
  @awsRetry
1216
1319
  def _getSecurityGroupIDs(self) -> List[str]:
@@ -1222,13 +1325,13 @@ class AWSProvisioner(AbstractProvisioner):
1222
1325
 
1223
1326
  # Depending on if we enumerated them on the leader or locally, we might
1224
1327
  # know the required security groups by name, ID, or both.
1225
- sgs = [sg for sg in self.aws.boto2(self._region, 'ec2').get_all_security_groups()
1226
- if (sg.name in self._leaderSecurityGroupNames or
1227
- sg.id in self._leaderSecurityGroupIDs)]
1228
- return [sg.id for sg in sgs]
1328
+ boto3_ec2 = self.aws.client(region=self._region, service_name='ec2')
1329
+ return [sg["GroupId"] for sg in boto3_ec2.describe_security_groups()["SecurityGroups"]
1330
+ if (sg["GroupName"] in self._leaderSecurityGroupNames or
1331
+ sg["GroupId"] in self._leaderSecurityGroupIDs)]
1229
1332
 
1230
1333
  @awsRetry
1231
- def _get_launch_template_ids(self, filters: Optional[List[Dict[str, List[str]]]] = None) -> List[str]:
1334
+ def _get_launch_template_ids(self, filters: Optional[List[FilterTypeDef]] = None) -> List[str]:
1232
1335
  """
1233
1336
  Find all launch templates associated with the cluster.
1234
1337
 
@@ -1236,10 +1339,10 @@ class AWSProvisioner(AbstractProvisioner):
1236
1339
  """
1237
1340
 
1238
1341
  # Grab the connection we need to use for this operation.
1239
- ec2 = self.aws.client(self._region, 'ec2')
1342
+ ec2: EC2Client = self.aws.client(self._region, 'ec2')
1240
1343
 
1241
1344
  # How do we match the right templates?
1242
- combined_filters = [{'Name': 'tag:' + _TAG_KEY_TOIL_CLUSTER_NAME, 'Values': [self.clusterName]}]
1345
+ combined_filters: List[FilterTypeDef] = [{'Name': 'tag:' + _TAG_KEY_TOIL_CLUSTER_NAME, 'Values': [self.clusterName]}]
1243
1346
 
1244
1347
  if filters:
1245
1348
  # Add any user-specified filters
@@ -1254,7 +1357,7 @@ class AWSProvisioner(AbstractProvisioner):
1254
1357
  allTemplateIDs += [item['LaunchTemplateId'] for item in response.get('LaunchTemplates', [])]
1255
1358
  if 'NextToken' in response:
1256
1359
  # There are more pages. Get the next one, supplying the token.
1257
- response = ec2.describe_launch_templates(Filters=filters,
1360
+ response = ec2.describe_launch_templates(Filters=filters or [],
1258
1361
  NextToken=response['NextToken'],
1259
1362
  MaxResults=200)
1260
1363
  else:
@@ -1286,10 +1389,10 @@ class AWSProvisioner(AbstractProvisioner):
1286
1389
  lt_name = self._name_worker_launch_template(instance_type, preemptible=preemptible)
1287
1390
 
1288
1391
  # How do we match the right templates?
1289
- filters = [{'Name': 'launch-template-name', 'Values': [lt_name]}]
1392
+ filters: List[FilterTypeDef] = [{'Name': 'launch-template-name', 'Values': [lt_name]}]
1290
1393
 
1291
1394
  # Get the templates
1292
- templates = self._get_launch_template_ids(filters=filters)
1395
+ templates: List[str] = self._get_launch_template_ids(filters=filters)
1293
1396
 
1294
1397
  if len(templates) > 1:
1295
1398
  # There shouldn't ever be multiple templates with our reserved name
@@ -1305,7 +1408,7 @@ class AWSProvisioner(AbstractProvisioner):
1305
1408
  # writes). Recurse to try again, because now it exists.
1306
1409
  logger.info('Waiting %f seconds for template %s to be available', backoff, lt_name)
1307
1410
  time.sleep(backoff)
1308
- return self._get_worker_launch_template(instance_type, preemptible=preemptible, backoff=backoff*2)
1411
+ return self._get_worker_launch_template(instance_type, preemptible=preemptible, backoff=backoff * 2)
1309
1412
  else:
1310
1413
  raise
1311
1414
  else:
@@ -1345,7 +1448,7 @@ class AWSProvisioner(AbstractProvisioner):
1345
1448
 
1346
1449
  assert self._leaderPrivateIP
1347
1450
  type_info = E2Instances[instance_type]
1348
- rootVolSize=self._nodeStorageOverrides.get(instance_type, self._nodeStorage)
1451
+ rootVolSize = self._nodeStorageOverrides.get(instance_type, self._nodeStorage)
1349
1452
  bdms = self._getBoto3BlockDeviceMappings(type_info, rootVolSize=rootVolSize)
1350
1453
 
1351
1454
  keyPath = self._sseKey if self._sseKey else None
@@ -1377,16 +1480,16 @@ class AWSProvisioner(AbstractProvisioner):
1377
1480
  """
1378
1481
 
1379
1482
  # Grab the connection we need to use for this operation.
1380
- autoscaling = self.aws.client(self._region, 'autoscaling')
1483
+ autoscaling: AutoScalingClient = self.aws.client(self._region, 'autoscaling')
1381
1484
 
1382
1485
  # AWS won't filter ASGs server-side for us in describe_auto_scaling_groups.
1383
1486
  # So we search instances of applied tags for the ASGs they are on.
1384
1487
  # The ASGs tagged with our cluster are our ASGs.
1385
1488
  # The filtering is on different fields of the tag object itself.
1386
- filters = [{'Name': 'key',
1387
- 'Values': [_TAG_KEY_TOIL_CLUSTER_NAME]},
1388
- {'Name': 'value',
1389
- 'Values': [self.clusterName]}]
1489
+ filters: List[FilterTypeDef] = [{'Name': 'key',
1490
+ 'Values': [_TAG_KEY_TOIL_CLUSTER_NAME]},
1491
+ {'Name': 'value',
1492
+ 'Values': [self.clusterName]}]
1390
1493
 
1391
1494
  matchedASGs = []
1392
1495
  # Get the first page with no NextToken
@@ -1461,7 +1564,7 @@ class AWSProvisioner(AbstractProvisioner):
1461
1564
  if self.clusterType == 'kubernetes':
1462
1565
  # We also need to tag it with Kubernetes autoscaler info (empty tags)
1463
1566
  tags['k8s.io/cluster-autoscaler/' + self.clusterName] = ''
1464
- assert(self.clusterName != 'enabled')
1567
+ assert (self.clusterName != 'enabled')
1465
1568
  tags['k8s.io/cluster-autoscaler/enabled'] = ''
1466
1569
  tags['k8s.io/cluster-autoscaler/node-template/resources/ephemeral-storage'] = f'{min_gigs}G'
1467
1570
 
@@ -1481,7 +1584,7 @@ class AWSProvisioner(AbstractProvisioner):
1481
1584
 
1482
1585
  return asg_name
1483
1586
 
1484
- def _boto2_pager(self, requestor_callable: Callable, result_attribute_name: str) -> Iterable[Dict[str, Any]]:
1587
+ def _boto2_pager(self, requestor_callable: Callable[[...], Any], result_attribute_name: str) -> Iterable[Dict[str, Any]]: # type: ignore[misc]
1485
1588
  """
1486
1589
  Yield all the results from calling the given Boto 2 method and paging
1487
1590
  through all the results using the "marker" field. Results are to be
@@ -1489,33 +1592,13 @@ class AWSProvisioner(AbstractProvisioner):
1489
1592
  """
1490
1593
  marker = None
1491
1594
  while True:
1492
- result = requestor_callable(marker=marker)
1595
+ result = requestor_callable(marker=marker) # type: ignore[call-arg]
1493
1596
  yield from getattr(result, result_attribute_name)
1494
1597
  if result.is_truncated == 'true':
1495
1598
  marker = result.marker
1496
1599
  else:
1497
1600
  break
1498
1601
 
1499
- def _pager(self, requestor_callable: Callable, result_attribute_name: str, **kwargs) -> Iterable[Dict[str, Any]]:
1500
- """
1501
- Yield all the results from calling the given Boto 3 method with the
1502
- given keyword arguments, paging through the results using the Marker or
1503
- NextToken, and fetching out and looping over the list in the response
1504
- with the given attribute name.
1505
- """
1506
-
1507
- # Recover the Boto3 client, and the name of the operation
1508
- client = requestor_callable.__self__
1509
- op_name = requestor_callable.__name__
1510
-
1511
- # grab a Boto 3 built-in paginator. See
1512
- # <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html>
1513
- paginator = client.get_paginator(op_name)
1514
-
1515
- for page in paginator.paginate(**kwargs):
1516
- # Invoke it and go through the pages, yielding from them
1517
- yield from page.get(result_attribute_name, [])
1518
-
1519
1602
  @awsRetry
1520
1603
  def _getRoleNames(self) -> List[str]:
1521
1604
  """
@@ -1523,10 +1606,12 @@ class AWSProvisioner(AbstractProvisioner):
1523
1606
  """
1524
1607
 
1525
1608
  results = []
1526
- for result in self._boto2_pager(self.aws.boto2(self._region, 'iam').list_roles, 'roles'):
1609
+ boto3_iam = self.aws.client(self._region, 'iam')
1610
+ for result in boto3_pager(boto3_iam.list_roles, 'Roles'):
1527
1611
  # For each Boto2 role object
1528
1612
  # Grab out the name
1529
- name = result['role_name']
1613
+ cast(RoleTypeDef, result)
1614
+ name = result['RoleName']
1530
1615
  if self._is_our_namespaced_name(name):
1531
1616
  # If it looks like ours, it is ours.
1532
1617
  results.append(name)
@@ -1539,11 +1624,12 @@ class AWSProvisioner(AbstractProvisioner):
1539
1624
  """
1540
1625
 
1541
1626
  results = []
1542
- for result in self._boto2_pager(self.aws.boto2(self._region, 'iam').list_instance_profiles,
1543
- 'instance_profiles'):
1544
- # For each Boto2 role object
1627
+ boto3_iam = self.aws.client(self._region, 'iam')
1628
+ for result in boto3_pager(boto3_iam.list_instance_profiles,
1629
+ 'InstanceProfiles'):
1545
1630
  # Grab out the name
1546
- name = result['instance_profile_name']
1631
+ cast(InstanceProfileTypeDef, result)
1632
+ name = result['InstanceProfileName']
1547
1633
  if self._is_our_namespaced_name(name):
1548
1634
  # If it looks like ours, it is ours.
1549
1635
  results.append(name)
@@ -1558,9 +1644,9 @@ class AWSProvisioner(AbstractProvisioner):
1558
1644
  """
1559
1645
 
1560
1646
  # Grab the connection we need to use for this operation.
1561
- iam = self.aws.client(self._region, 'iam')
1647
+ boto3_iam: IAMClient = self.aws.client(self._region, 'iam')
1562
1648
 
1563
- return [item['InstanceProfileName'] for item in self._pager(iam.list_instance_profiles_for_role,
1649
+ return [item['InstanceProfileName'] for item in boto3_pager(boto3_iam.list_instance_profiles_for_role,
1564
1650
  'InstanceProfiles',
1565
1651
  RoleName=role_name)]
1566
1652
 
@@ -1575,11 +1661,11 @@ class AWSProvisioner(AbstractProvisioner):
1575
1661
  """
1576
1662
 
1577
1663
  # Grab the connection we need to use for this operation.
1578
- iam = self.aws.client(self._region, 'iam')
1664
+ boto3_iam: IAMClient = self.aws.client(self._region, 'iam')
1579
1665
 
1580
1666
  # TODO: we don't currently use attached policies.
1581
1667
 
1582
- return [item['PolicyArn'] for item in self._pager(iam.list_attached_role_policies,
1668
+ return [item['PolicyArn'] for item in boto3_pager(boto3_iam.list_attached_role_policies,
1583
1669
  'AttachedPolicies',
1584
1670
  RoleName=role_name)]
1585
1671
 
@@ -1591,20 +1677,18 @@ class AWSProvisioner(AbstractProvisioner):
1591
1677
  """
1592
1678
 
1593
1679
  # Grab the connection we need to use for this operation.
1594
- iam = self.aws.client(self._region, 'iam')
1680
+ boto3_iam: IAMClient = self.aws.client(self._region, 'iam')
1595
1681
 
1596
- return list(self._pager(iam.list_role_policies,
1597
- 'PolicyNames',
1598
- RoleName=role_name))
1682
+ return list(boto3_pager(boto3_iam.list_role_policies, 'PolicyNames', RoleName=role_name))
1599
1683
 
1600
- def full_policy(self, resource: str) -> dict:
1684
+ def full_policy(self, resource: str) -> Dict[str, Any]:
1601
1685
  """
1602
1686
  Produce a dict describing the JSON form of a full-access-granting AWS
1603
1687
  IAM policy for the service with the given name (e.g. 's3').
1604
1688
  """
1605
1689
  return dict(Version="2012-10-17", Statement=[dict(Effect="Allow", Resource="*", Action=f"{resource}:*")])
1606
1690
 
1607
- def kubernetes_policy(self) -> dict:
1691
+ def kubernetes_policy(self) -> Dict[str, Any]:
1608
1692
  """
1609
1693
  Get the Kubernetes policy grants not provided by the full grants on EC2
1610
1694
  and IAM. See
@@ -1671,45 +1755,42 @@ class AWSProvisioner(AbstractProvisioner):
1671
1755
  """
1672
1756
 
1673
1757
  # Grab the connection we need to use for this operation.
1674
- iam = self.aws.boto2(self._region, 'iam')
1758
+ boto3_iam: IAMClient = self.aws.client(self._region, 'iam')
1675
1759
 
1676
1760
  # Make sure we can tell our roles apart from roles for other clusters
1677
1761
  aws_role_name = self._namespace_name(local_role_name)
1678
1762
  try:
1679
1763
  # Make the role
1680
1764
  logger.debug('Creating IAM role %s...', aws_role_name)
1681
- iam.create_role(aws_role_name, assume_role_policy_document=json.dumps({
1765
+ assume_role_policy_document = json.dumps({
1682
1766
  "Version": "2012-10-17",
1683
1767
  "Statement": [{
1684
1768
  "Effect": "Allow",
1685
1769
  "Principal": {"Service": ["ec2.amazonaws.com"]},
1686
1770
  "Action": ["sts:AssumeRole"]}
1687
- ]}))
1771
+ ]})
1772
+ boto3_iam.create_role(RoleName=aws_role_name, AssumeRolePolicyDocument=assume_role_policy_document)
1688
1773
  logger.debug('Created new IAM role')
1689
- except BotoServerError as e:
1690
- if e.status == 409 and e.error_code == 'EntityAlreadyExists':
1774
+ except ClientError as e:
1775
+ if get_error_status(e) == 409 and get_error_code(e) == 'EntityAlreadyExists':
1691
1776
  logger.debug('IAM role already exists. Reusing.')
1692
1777
  else:
1693
1778
  raise
1694
1779
 
1695
1780
  # Delete superfluous policies
1696
- policy_names = set(iam.list_role_policies(aws_role_name).policy_names)
1781
+ policy_names = set(boto3_iam.list_role_policies(RoleName=aws_role_name)["PolicyNames"])
1697
1782
  for policy_name in policy_names.difference(set(list(policies.keys()))):
1698
- iam.delete_role_policy(aws_role_name, policy_name)
1783
+ boto3_iam.delete_role_policy(RoleName=aws_role_name, PolicyName=policy_name)
1699
1784
 
1700
1785
  # Create expected policies
1701
1786
  for policy_name, policy in policies.items():
1702
1787
  current_policy = None
1703
1788
  try:
1704
- current_policy = json.loads(unquote(
1705
- iam.get_role_policy(aws_role_name, policy_name).policy_document))
1706
- except BotoServerError as e:
1707
- if e.status == 404 and e.error_code == 'NoSuchEntity':
1708
- pass
1709
- else:
1710
- raise
1789
+ current_policy = boto3_iam.get_role_policy(RoleName=aws_role_name, PolicyName=policy_name)["PolicyDocument"]
1790
+ except boto3_iam.exceptions.NoSuchEntityException:
1791
+ pass
1711
1792
  if current_policy != policy:
1712
- iam.put_role_policy(aws_role_name, policy_name, json.dumps(policy))
1793
+ boto3_iam.put_role_policy(RoleName=aws_role_name, PolicyName=policy_name, PolicyDocument=json.dumps(policy))
1713
1794
 
1714
1795
  # Now the role has the right policies so it is ready.
1715
1796
  return aws_role_name
@@ -1724,7 +1805,7 @@ class AWSProvisioner(AbstractProvisioner):
1724
1805
  """
1725
1806
 
1726
1807
  # Grab the connection we need to use for this operation.
1727
- iam = self.aws.boto2(self._region, 'iam')
1808
+ boto3_iam: IAMClient = self.aws.client(self._region, 'iam')
1728
1809
 
1729
1810
  policy = dict(iam_full=self.full_policy('iam'), ec2_full=self.full_policy('ec2'),
1730
1811
  s3_full=self.full_policy('s3'), sbd_full=self.full_policy('sdb'))
@@ -1735,45 +1816,41 @@ class AWSProvisioner(AbstractProvisioner):
1735
1816
  iamRoleName = self._setup_iam_ec2_role(_INSTANCE_PROFILE_ROLE_NAME, policy)
1736
1817
 
1737
1818
  try:
1738
- profile = iam.get_instance_profile(iamRoleName)
1739
- logger.debug("Have preexisting instance profile: %s", profile.get_instance_profile_response.get_instance_profile_result.instance_profile)
1740
- except BotoServerError as e:
1741
- if e.status == 404:
1742
- profile = iam.create_instance_profile(iamRoleName)
1743
- profile = profile.create_instance_profile_response.create_instance_profile_result
1744
- logger.debug("Created new instance profile: %s", profile.instance_profile)
1745
- else:
1746
- raise
1819
+ profile_result = boto3_iam.get_instance_profile(InstanceProfileName=iamRoleName)
1820
+ profile: InstanceProfileTypeDef = profile_result["InstanceProfile"]
1821
+ logger.debug("Have preexisting instance profile: %s", profile)
1822
+ except boto3_iam.exceptions.NoSuchEntityException:
1823
+ profile_result = boto3_iam.create_instance_profile(InstanceProfileName=iamRoleName)
1824
+ profile = profile_result["InstanceProfile"]
1825
+ logger.debug("Created new instance profile: %s", profile)
1747
1826
  else:
1748
- profile = profile.get_instance_profile_response.get_instance_profile_result
1749
- profile = profile.instance_profile
1827
+ profile = profile_result["InstanceProfile"]
1750
1828
 
1751
- profile_arn = profile.arn
1829
+ profile_arn: str = profile["Arn"]
1752
1830
 
1753
1831
  # Now we have the profile ARN, but we want to make sure it really is
1754
1832
  # visible by name in a different session.
1755
1833
  wait_until_instance_profile_arn_exists(profile_arn)
1756
1834
 
1757
- if len(profile.roles) > 1:
1835
+ if len(profile["Roles"]) > 1:
1758
1836
  # This is too many roles. We probably grabbed something we should
1759
1837
  # not have by mistake, and this is some important profile for
1760
1838
  # something else.
1761
1839
  raise RuntimeError(f'Did not expect instance profile {profile_arn} to contain '
1762
1840
  f'more than one role; is it really a Toil-managed profile?')
1763
- elif len(profile.roles) == 1:
1764
- # this should be profile.roles[0].role_name
1765
- if profile.roles.member.role_name == iamRoleName:
1841
+ elif len(profile["Roles"]) == 1:
1842
+ if profile["Roles"][0]["RoleName"] == iamRoleName:
1766
1843
  return profile_arn
1767
1844
  else:
1768
1845
  # Drop this wrong role and use the fallback code for 0 roles
1769
- iam.remove_role_from_instance_profile(iamRoleName,
1770
- profile.roles.member.role_name)
1846
+ boto3_iam.remove_role_from_instance_profile(InstanceProfileName=iamRoleName,
1847
+ RoleName=profile["Roles"][0]["RoleName"])
1771
1848
 
1772
1849
  # If we get here, we had 0 roles on the profile, or we had 1 but we removed it.
1773
- for attempt in old_retry(predicate=lambda err: err.status == 404):
1850
+ for attempt in old_retry(predicate=lambda err: get_error_status(err) == 404):
1774
1851
  with attempt:
1775
1852
  # Put the IAM role on the profile
1776
- iam.add_role_to_instance_profile(profile.instance_profile_name, iamRoleName)
1853
+ boto3_iam.add_role_to_instance_profile(InstanceProfileName=profile["InstanceProfileName"], RoleName=iamRoleName)
1777
1854
  logger.debug("Associated role %s with profile", iamRoleName)
1778
1855
 
1779
1856
  return profile_arn