PyPI - toil - Versions diffs - 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl - Mend

toil 6.1.0py3-none-any.whl → 7.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

toil/__init__.py +1 -232
toil/batchSystems/abstractBatchSystem.py +22 -13
toil/batchSystems/abstractGridEngineBatchSystem.py +59 -45
toil/batchSystems/awsBatch.py +8 -8
toil/batchSystems/contained_executor.py +4 -5
toil/batchSystems/gridengine.py +1 -1
toil/batchSystems/htcondor.py +5 -5
toil/batchSystems/kubernetes.py +25 -11
toil/batchSystems/local_support.py +3 -3
toil/batchSystems/lsf.py +2 -2
toil/batchSystems/mesos/batchSystem.py +4 -4
toil/batchSystems/mesos/executor.py +3 -2
toil/batchSystems/options.py +9 -0
toil/batchSystems/singleMachine.py +11 -10
toil/batchSystems/slurm.py +64 -22
toil/batchSystems/torque.py +1 -1
toil/bus.py +7 -3
toil/common.py +36 -13
toil/cwl/cwltoil.py +365 -312
toil/deferred.py +1 -1
toil/fileStores/abstractFileStore.py +17 -17
toil/fileStores/cachingFileStore.py +2 -2
toil/fileStores/nonCachingFileStore.py +1 -1
toil/job.py +228 -60
toil/jobStores/abstractJobStore.py +18 -10
toil/jobStores/aws/jobStore.py +280 -218
toil/jobStores/aws/utils.py +57 -29
toil/jobStores/conftest.py +2 -2
toil/jobStores/fileJobStore.py +2 -2
toil/jobStores/googleJobStore.py +3 -4
toil/leader.py +72 -24
toil/lib/aws/__init__.py +26 -10
toil/lib/aws/iam.py +2 -2
toil/lib/aws/session.py +62 -22
toil/lib/aws/utils.py +73 -37
toil/lib/conversions.py +5 -1
toil/lib/ec2.py +118 -69
toil/lib/expando.py +1 -1
toil/lib/io.py +14 -2
toil/lib/misc.py +1 -3
toil/lib/resources.py +55 -21
toil/lib/retry.py +12 -5
toil/lib/threading.py +2 -2
toil/lib/throttle.py +1 -1
toil/options/common.py +27 -24
toil/provisioners/__init__.py +9 -3
toil/provisioners/abstractProvisioner.py +9 -7
toil/provisioners/aws/__init__.py +20 -15
toil/provisioners/aws/awsProvisioner.py +406 -329
toil/provisioners/gceProvisioner.py +2 -2
toil/provisioners/node.py +13 -5
toil/server/app.py +1 -1
toil/statsAndLogging.py +58 -16
toil/test/__init__.py +27 -12
toil/test/batchSystems/batchSystemTest.py +40 -33
toil/test/batchSystems/batch_system_plugin_test.py +79 -0
toil/test/batchSystems/test_slurm.py +1 -1
toil/test/cwl/cwlTest.py +8 -91
toil/test/cwl/seqtk_seq.cwl +1 -1
toil/test/docs/scriptsTest.py +10 -13
toil/test/jobStores/jobStoreTest.py +33 -49
toil/test/lib/aws/test_iam.py +2 -2
toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
toil/test/provisioners/clusterTest.py +90 -8
toil/test/server/serverTest.py +2 -2
toil/test/src/autoDeploymentTest.py +1 -1
toil/test/src/dockerCheckTest.py +2 -1
toil/test/src/environmentTest.py +125 -0
toil/test/src/fileStoreTest.py +1 -1
toil/test/src/jobDescriptionTest.py +18 -8
toil/test/src/jobTest.py +1 -1
toil/test/src/realtimeLoggerTest.py +4 -0
toil/test/src/workerTest.py +52 -19
toil/test/utils/toilDebugTest.py +61 -3
toil/test/utils/utilsTest.py +20 -18
toil/test/wdl/wdltoil_test.py +24 -71
toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
toil/toilState.py +68 -9
toil/utils/toilDebugJob.py +153 -26
toil/utils/toilLaunchCluster.py +12 -2
toil/utils/toilRsyncCluster.py +7 -2
toil/utils/toilSshCluster.py +7 -3
toil/utils/toilStats.py +2 -1
toil/utils/toilStatus.py +97 -51
toil/version.py +10 -10
toil/wdl/wdltoil.py +318 -51
toil/worker.py +96 -69
{toil-6.1.0.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
{toil-6.1.0.dist-info → toil-7.0.0.dist-info}/METADATA +55 -21
{toil-6.1.0.dist-info → toil-7.0.0.dist-info}/RECORD +93 -90
{toil-6.1.0.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
{toil-6.1.0.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
{toil-6.1.0.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0

toil/lib/ec2.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import logging
 import time
 from base64 import b64encode
-from operator import attrgetter
-from typing import Dict, Iterable, List, Optional, Union
+from operator import itemgetter
+from typing import Dict, Iterable, List, Optional, Union, TYPE_CHECKING, Generator, Callable, Mapping, Any
+import botocore.client
 from boto3.resources.base import ServiceResource
-from boto.ec2.instance import Instance as Boto2Instance
-from boto.ec2.spotinstancerequest import SpotInstanceRequest
-from botocore.client import BaseClient
 from toil.lib.aws.session import establish_boto3_session
 from toil.lib.aws.utils import flatten_tags
@@ -18,6 +16,11 @@ from toil.lib.retry import (ErrorCondition,
                             old_retry,
                             retry)
+from mypy_boto3_ec2.client import EC2Client
+from mypy_boto3_autoscaling.client import AutoScalingClient
+from mypy_boto3_ec2.type_defs import SpotInstanceRequestTypeDef, DescribeInstancesResultTypeDef, InstanceTypeDef
+from mypy_boto3_ec2.service_resource import EC2ServiceResource, Instance
 a_short_time = 5
 a_long_time = 60 * 60
 logger = logging.getLogger(__name__)
@@ -38,6 +41,7 @@ def not_found(e):
         # Not the right kind of error
         return False
 def inconsistencies_detected(e):
     if get_error_code(e) == 'InvalidGroup.NotFound':
         return True
@@ -45,6 +49,7 @@ def inconsistencies_detected(e):
     matches = ('invalid iam instance profile' in m) or ('no associated iam roles' in m)
     return matches
 # We also define these error categories for the new retry decorator
 INCONSISTENCY_ERRORS = [ErrorCondition(boto_error_codes=['InvalidGroup.NotFound']),
                         ErrorCondition(error_message_must_include='Invalid IAM Instance Profile'),
@@ -62,9 +67,10 @@ class UnexpectedResourceState(Exception):
         super().__init__(
             "Expected state of %s to be '%s' but got '%s'" %
             (resource, to_state, state))
-def wait_transition(resource, from_states, to_state,
-                    state_getter=attrgetter('state')):
+def wait_transition(boto3_ec2: EC2Client, resource: InstanceTypeDef, from_states: Iterable[str], to_state: str,
+                    state_getter: Callable[[InstanceTypeDef], str]=lambda x: x.get('State').get('Name')):
     """
     Wait until the specified EC2 resource (instance, image, volume, ...) transitions from any
     of the given 'from' states to the specified 'to' state. If the instance is found in a state
@@ -76,41 +82,44 @@ def wait_transition(resource, from_states, to_state,
     :param to_state: the state of the resource when this method returns
     """
     state = state_getter(resource)
+    instance_id = resource["InstanceId"]
     while state in from_states:
         time.sleep(a_short_time)
         for attempt in retry_ec2():
             with attempt:
-                resource.update(validate=True)
+                described = boto3_ec2.describe_instances(InstanceIds=[instance_id])
+        resource = described["Reservations"][0]["Instances"][0]  # there should only be one requested
         state = state_getter(resource)
     if state != to_state:
         raise UnexpectedResourceState(resource, to_state, state)
-def wait_instances_running(ec2, instances: Iterable[Boto2Instance]) -> Iterable[Boto2Instance]:
+def wait_instances_running(boto3_ec2: EC2Client, instances: Iterable[InstanceTypeDef]) -> Generator[InstanceTypeDef, None, None]:
     """
     Wait until no instance in the given iterable is 'pending'. Yield every instance that
     entered the running state as soon as it does.
-    :param boto.ec2.connection.EC2Connection ec2: the EC2 connection to use for making requests
-    :param Iterable[Boto2Instance] instances: the instances to wait on
-    :rtype: Iterable[Boto2Instance]
+    :param EC2Client boto3_ec2: the EC2 connection to use for making requests
+    :param Iterable[InstanceTypeDef] instances: the instances to wait on
+    :rtype: Iterable[InstanceTypeDef]
     """
     running_ids = set()
     other_ids = set()
     while True:
         pending_ids = set()
         for i in instances:
-            if i.state == 'pending':
-                pending_ids.add(i.id)
-            elif i.state == 'running':
-                if i.id in running_ids:
+            i: InstanceTypeDef
+            if i['State']['Name'] == 'pending':
+                pending_ids.add(i['InstanceId'])
+            elif i['State']['Name'] == 'running':
+                if i['InstanceId'] in running_ids:
                     raise RuntimeError("An instance was already added to the list of running instance IDs. Maybe there is a duplicate.")
-                running_ids.add(i.id)
+                running_ids.add(i['InstanceId'])
                 yield i
             else:
-                if i.id in other_ids:
+                if i['InstanceId'] in other_ids:
                     raise RuntimeError("An instance was already added to the list of other instances. Maybe there is a duplicate.")
-                other_ids.add(i.id)
+                other_ids.add(i['InstanceId'])
                 yield i
         logger.info('%i instance(s) pending, %i running, %i other.',
                     *list(map(len, (pending_ids, running_ids, other_ids))))
@@ -121,14 +130,16 @@ def wait_instances_running(ec2, instances: Iterable[Boto2Instance]) -> Iterable[
         time.sleep(seconds)
         for attempt in retry_ec2():
             with attempt:
-                instances = ec2.get_only_instances(list(pending_ids))
+                described_instances = boto3_ec2.describe_instances(InstanceIds=list(pending_ids))
+                instances = [instance for reservation in described_instances["Reservations"] for instance in reservation["Instances"]]
-def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], timeout: float = None, tentative: bool = False) -> Iterable[List[SpotInstanceRequest]]:
+def wait_spot_requests_active(boto3_ec2: EC2Client, requests: Iterable[SpotInstanceRequestTypeDef], timeout: float = None, tentative: bool = False) -> Iterable[List[SpotInstanceRequestTypeDef]]:
     """
     Wait until no spot request in the given iterator is in the 'open' state or, optionally,
     a timeout occurs. Yield spot requests as soon as they leave the 'open' state.
+    :param boto3_ec2: ec2 client
     :param requests: The requests to wait on.
     :param timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
@@ -145,11 +156,11 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
     other_ids = set()
     open_ids = None
-    def cancel():
+    def cancel() -> None:
         logger.warning('Cancelling remaining %i spot requests.', len(open_ids))
-        ec2.cancel_spot_instance_requests(list(open_ids))
+        boto3_ec2.cancel_spot_instance_requests(SpotInstanceRequestIds=list(open_ids))
-    def spot_request_not_found(e):
+    def spot_request_not_found(e: Exception) -> bool:
         return get_error_code(e) == 'InvalidSpotInstanceRequestID.NotFound'
     try:
@@ -157,30 +168,31 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
             open_ids, eval_ids, fulfill_ids = set(), set(), set()
             batch = []
             for r in requests:
-                if r.state == 'open':
-                    open_ids.add(r.id)
-                    if r.status.code == 'pending-evaluation':
-                        eval_ids.add(r.id)
-                    elif r.status.code == 'pending-fulfillment':
-                        fulfill_ids.add(r.id)
+                r: SpotInstanceRequestTypeDef  # pycharm thinks it is a string
+                if r['State'] == 'open':
+                    open_ids.add(r['InstanceId'])
+                    if r['Status'] == 'pending-evaluation':
+                        eval_ids.add(r['InstanceId'])
+                    elif r['Status'] == 'pending-fulfillment':
+                        fulfill_ids.add(r['InstanceId'])
                     else:
                         logger.info(
                             'Request %s entered status %s indicating that it will not be '
-                            'fulfilled anytime soon.', r.id, r.status.code)
-                elif r.state == 'active':
-                    if r.id in active_ids:
+                            'fulfilled anytime soon.', r['InstanceId'], r['Status'])
+                elif r['State'] == 'active':
+                    if r['InstanceId'] in active_ids:
                         raise RuntimeError("A request was already added to the list of active requests. Maybe there are duplicate requests.")
-                    active_ids.add(r.id)
+                    active_ids.add(r['InstanceId'])
                     batch.append(r)
                 else:
-                    if r.id in other_ids:
+                    if r['InstanceId'] in other_ids:
                         raise RuntimeError("A request was already added to the list of other IDs. Maybe there are duplicate requests.")
-                    other_ids.add(r.id)
+                    other_ids.add(r['InstanceId'])
                     batch.append(r)
             if batch:
                 yield batch
             logger.info('%i spot requests(s) are open (%i of which are pending evaluation and %i '
-                     'are pending fulfillment), %i are active and %i are in another state.',
+                        'are pending fulfillment), %i are active and %i are in another state.',
                         *list(map(len, (open_ids, eval_ids, fulfill_ids, active_ids, other_ids))))
             if not open_ids or tentative and not eval_ids and not fulfill_ids:
                 break
@@ -192,8 +204,7 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
             time.sleep(sleep_time)
             for attempt in retry_ec2(retry_while=spot_request_not_found):
                 with attempt:
-                    requests = ec2.get_all_spot_instance_requests(
-                        list(open_ids))
+                    requests = boto3_ec2.describe_spot_instance_requests(SpotInstanceRequestIds=list(open_ids))
     except BaseException:
         if open_ids:
             with panic(logger):
@@ -204,47 +215,56 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
             cancel()
-def create_spot_instances(ec2, price, image_id, spec, num_instances=1, timeout=None, tentative=False, tags=None) -> Iterable[List[Boto2Instance]]:
+def create_spot_instances(boto3_ec2: EC2Client, price, image_id, spec, num_instances=1, timeout=None, tentative=False, tags=None) -> Generator[DescribeInstancesResultTypeDef, None, None]:
     """
     Create instances on the spot market.
     """
     def spotRequestNotFound(e):
         return getattr(e, 'error_code', None) == "InvalidSpotInstanceRequestID.NotFound"
+    spec['LaunchSpecification'].update({'ImageId': image_id})  # boto3 image id is in the launch specification
     for attempt in retry_ec2(retry_for=a_long_time,
                              retry_while=inconsistencies_detected):
         with attempt:
-            requests = ec2.request_spot_instances(
-                price, image_id, count=num_instances, **spec)
+            requests_dict = boto3_ec2.request_spot_instances(
+                SpotPrice=price, InstanceCount=num_instances, **spec)
+            requests = requests_dict['SpotInstanceRequests']
     if tags is not None:
-        for requestID in (request.id for request in requests):
+        for requestID in (request['SpotInstanceRequestId'] for request in requests):
             for attempt in retry_ec2(retry_while=spotRequestNotFound):
                 with attempt:
-                    ec2.create_tags([requestID], tags)
+                    boto3_ec2.create_tags(Resources=[requestID], Tags=tags)
     num_active, num_other = 0, 0
     # noinspection PyUnboundLocalVariable,PyTypeChecker
     # request_spot_instances's type annotation is wrong
-    for batch in wait_spot_requests_active(ec2,
+    for batch in wait_spot_requests_active(boto3_ec2,
                                            requests,
                                            timeout=timeout,
                                            tentative=tentative):
         instance_ids = []
         for request in batch:
-            if request.state == 'active':
-                instance_ids.append(request.instance_id)
+            request: SpotInstanceRequestTypeDef
+            if request["State"] == 'active':
+                instance_ids.append(request["InstanceId"])
                 num_active += 1
             else:
                 logger.info(
                     'Request %s in unexpected state %s.',
-                    request.id,
-                    request.state)
+                    request["InstanceId"],
+                    request["State"])
                 num_other += 1
         if instance_ids:
             # This next line is the reason we batch. It's so we can get multiple instances in
             # a single request.
-            yield ec2.get_only_instances(instance_ids)
+            for instance_id in instance_ids:
+                for attempt in retry_ec2():
+                    with attempt:
+                        # Increase hop limit from 1 to use Instance Metadata V2
+                        boto3_ec2.modify_instance_metadata_options(InstanceId=instance_id, HttpPutResponseHopLimit=3)
+            yield boto3_ec2.describe_instances(InstanceIds=instance_ids)
     if not num_active:
         message = 'None of the spot requests entered the active state'
         if tentative:
@@ -255,22 +275,43 @@ def create_spot_instances(ec2, price, image_id, spec, num_instances=1, timeout=N
         logger.warning('%i request(s) entered a state other than active.', num_other)
-def create_ondemand_instances(ec2, image_id, spec, num_instances=1) -> List[Boto2Instance]:
+def create_ondemand_instances(boto3_ec2: EC2Client, image_id: str, spec: Mapping[str, Any], num_instances: int=1) -> List[InstanceTypeDef]:
     """
     Requests the RunInstances EC2 API call but accounts for the race between recently created
     instance profiles, IAM roles and an instance creation that refers to them.
-    :rtype: List[Boto2Instance]
+    :rtype: List[InstanceTypeDef]
     """
-    instance_type = spec['instance_type']
+    instance_type = spec['InstanceType']
     logger.info('Creating %s instance(s) ... ', instance_type)
+    boto_instance_list = []
     for attempt in retry_ec2(retry_for=a_long_time,
                              retry_while=inconsistencies_detected):
         with attempt:
-            return ec2.run_instances(image_id,
-                                     min_count=num_instances,
-                                     max_count=num_instances,
-                                     **spec).instances
+            boto_instance_list: List[InstanceTypeDef] = boto3_ec2.run_instances(ImageId=image_id,
+                                                                                MinCount=num_instances,
+                                                                                MaxCount=num_instances,
+                                                                                **spec)['Instances']
+    return boto_instance_list
+def increase_instance_hop_limit(boto3_ec2: EC2Client, boto_instance_list: List[InstanceTypeDef]) -> None:
+    """
+    Increase the default HTTP hop limit, as we are running Toil and Kubernetes inside a Docker container, so the default
+    hop limit of 1 will not be enough when grabbing metadata information with ec2_metadata
+    Must be called after the instances are guaranteed to be running.
+    :param boto_instance_list: List of boto instances to modify
+    :return:
+    """
+    for boto_instance in boto_instance_list:
+        instance_id = boto_instance['InstanceId']
+        for attempt in retry_ec2():
+            with attempt:
+                # Increase hop limit from 1 to use Instance Metadata V2
+                boto3_ec2.modify_instance_metadata_options(InstanceId=instance_id, HttpPutResponseHopLimit=3)
 def prune(bushy: dict) -> dict:
@@ -289,6 +330,7 @@ def prune(bushy: dict) -> dict:
 # catch, and to wait on IAM items.
 iam_client = establish_boto3_session().client('iam')
 # exception is generated by a factory so we weirdly need a client instance to reference it
 @retry(errors=[iam_client.exceptions.NoSuchEntityException],
        intervals=[1, 1, 2, 4, 8, 16, 32, 64])
@@ -301,7 +343,7 @@ def wait_until_instance_profile_arn_exists(instance_profile_arn: str):
 @retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
-def create_instances(ec2_resource: ServiceResource,
+def create_instances(ec2_resource: EC2ServiceResource,
                      image_id: str,
                      key_name: str,
                      instance_type: str,
@@ -312,7 +354,7 @@ def create_instances(ec2_resource: ServiceResource,
                      instance_profile_arn: Optional[str] = None,
                      placement_az: Optional[str] = None,
                      subnet_id: str = None,
-                     tags: Optional[Dict[str, str]] = None) -> List[dict]:
+                     tags: Optional[Dict[str, str]] = None) -> List[Instance]:
     """
     Replaces create_ondemand_instances.  Uses boto3 and returns a list of Boto3 instance dicts.
@@ -336,7 +378,10 @@ def create_instances(ec2_resource: ServiceResource,
                'InstanceType': instance_type,
                'UserData': user_data,
                'BlockDeviceMappings': block_device_map,
-               'SubnetId': subnet_id}
+               'SubnetId': subnet_id,
+               # Metadata V2 defaults hops to 1, which is an issue when running inside a docker container
+               # https://github.com/adamchainz/ec2-metadata?tab=readme-ov-file#instance-metadata-service-version-2
+               'MetadataOptions': {'HttpPutResponseHopLimit': 3}}
     if instance_profile_arn:
         # We could just retry when we get an error because the ARN doesn't
@@ -357,8 +402,9 @@ def create_instances(ec2_resource: ServiceResource,
     return ec2_resource.create_instances(**prune(request))
 @retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
-def create_launch_template(ec2_client: BaseClient,
+def create_launch_template(ec2_client: EC2Client,
                            template_name: str,
                            image_id: str,
                            key_name: str,
@@ -400,7 +446,10 @@ def create_launch_template(ec2_client: BaseClient,
                 'InstanceType': instance_type,
                 'UserData': user_data,
                 'BlockDeviceMappings': block_device_map,
-                'SubnetId': subnet_id}
+                'SubnetId': subnet_id,
+                # Increase hop limit from 1 to use Instance Metadata V2
+                'MetadataOptions': {'HttpPutResponseHopLimit': 3}
+                }
     if instance_profile_arn:
         # We could just retry when we get an error because the ARN doesn't
@@ -413,6 +462,7 @@ def create_launch_template(ec2_client: BaseClient,
     if placement_az:
         template['Placement'] = {'AvailabilityZone': placement_az}
+    flat_tags = []
     if tags:
         # Tag everything when we make it.
         flat_tags = flatten_tags(tags)
@@ -429,17 +479,16 @@ def create_launch_template(ec2_client: BaseClient,
 @retry(intervals=[5, 5, 10, 20, 20, 20, 20], errors=INCONSISTENCY_ERRORS)
-def create_auto_scaling_group(autoscaling_client: BaseClient,
+def create_auto_scaling_group(autoscaling_client: AutoScalingClient,
                               asg_name: str,
                               launch_template_ids: Dict[str, str],
                               vpc_subnets: List[str],
                               min_size: int,
                               max_size: int,
-                              instance_types: Optional[List[str]] = None,
+                              instance_types: Optional[Iterable[str]] = None,
                               spot_bid: Optional[float] = None,
                               spot_cheapest: bool = False,
                               tags: Optional[Dict[str, str]] = None) -> None:
     """
     Create a new Auto Scaling Group with the given name (which is also its
     unique identifier).
@@ -472,7 +521,7 @@ def create_auto_scaling_group(autoscaling_client: BaseClient,
     """
     if instance_types is None:
-        instance_types = []
+        instance_types: List[str] = []
     if instance_types is not None and len(instance_types) > 20:
         raise RuntimeError(f"Too many instance types ({len(instance_types)}) in group; AWS supports only 20.")
@@ -493,8 +542,8 @@ def create_auto_scaling_group(autoscaling_client: BaseClient,
     # We need to use a launch template per instance type so that different
     # instance types with specified EBS storage size overrides will get their
     # storage.
-    mip = {'LaunchTemplate': {'LaunchTemplateSpecification': get_launch_template_spec(next(iter(instance_types))),
-                              'Overrides': [{'InstanceType': t, 'LaunchTemplateSpecification': get_launch_template_spec(t)} for t in instance_types]}}
+    mip = {'LaunchTemplate': {'LaunchTemplateSpecification': get_launch_template_spec(next(iter(instance_types))),  # noqa
+                              'Overrides': [{'InstanceType': t, 'LaunchTemplateSpecification': get_launch_template_spec(t)} for t in instance_types]}}  # noqa
     if spot_bid is not None:
         # Ask for spot instances by saying everything above base capacity of 0 should be spot.

toil/lib/expando.py CHANGED Viewed

@@ -16,7 +16,7 @@
 class Expando(dict):
     """
-    Pass inital attributes to the constructor:
+    Pass initial attributes to the constructor:
     >>> o = Expando(foo=42)
     >>> o.foo

toil/lib/io.py CHANGED Viewed

@@ -16,7 +16,7 @@ def mkdtemp(suffix: Optional[str] = None, prefix: Optional[str] = None, dir: Opt
     The permissions on the directory will be 711 instead of 700, allowing the
     group and all other users to traverse the directory. This is necessary if
-    the direcotry is on NFS and the Docker daemon would like to mount it or a
+    the directory is on NFS and the Docker daemon would like to mount it or a
     file inside it into a container, because on NFS even the Docker daemon
     appears bound by the file permissions.
@@ -159,14 +159,26 @@ def atomic_copyobj(src_fh: BytesIO, dest_path: str, length: int = 16384, executa
             os.chmod(dest_path_tmp, os.stat(dest_path_tmp).st_mode | stat.S_IXUSR)
-def make_public_dir(in_directory: Optional[str] = None) -> str:
+def make_public_dir(in_directory: str, suggested_name: Optional[str] = None) -> str:
     """
+    Make a publicly-accessible directory in the given directory.
+    :param suggested_name: Use this directory name first if possible.
     Try to make a random directory name with length 4 that doesn't exist, with the given prefix.
     Otherwise, try length 5, length 6, etc, up to a max of 32 (len of uuid4 with dashes replaced).
     This function's purpose is mostly to avoid having long file names when generating directories.
     If somehow this fails, which should be incredibly unlikely, default to a normal uuid4, which was
     our old default.
     """
+    if suggested_name is not None:
+        generated_dir_path: str = os.path.join(in_directory, suggested_name)
+        try:
+            os.mkdir(generated_dir_path)
+            os.chmod(generated_dir_path, 0o777)
+            return generated_dir_path
+        except FileExistsError:
+            pass
     for i in range(4, 32 + 1):  # make random uuids and truncate to lengths starting at 4 and working up to max 32
         for _ in range(10):  # make 10 attempts for each length
             truncated_uuid: str = str(uuid.uuid4()).replace('-', '')[:i]

toil/lib/misc.py CHANGED Viewed

@@ -11,8 +11,6 @@ import typing
 from contextlib import closing
 from typing import Iterator, List, Optional
-import pytz
 logger = logging.getLogger(__name__)
@@ -56,7 +54,7 @@ def get_user_name() -> str:
 def utc_now() -> datetime.datetime:
     """Return a datetime in the UTC timezone corresponding to right now."""
-    return datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
+    return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
 def unix_now_ms() -> float:
     """Return the current time in milliseconds since the Unix epoch."""

toil/lib/resources.py CHANGED Viewed

@@ -18,29 +18,63 @@ import sys
 import resource
 from typing import List, Tuple
-def get_total_cpu_time_and_memory_usage() -> Tuple[float, int]:
+class ResourceMonitor:
     """
-    Gives the total cpu time of itself and all its children, and the maximum RSS memory usage of
-    itself and its single largest child (in kibibytes).
+    Global resource monitoring widget.
+    Presents class methods to get the resource usage of this process and child
+    processes, and other class methods to adjust the statistics so they can
+    account for e.g. resources used inside containers, or other resource usage
+    that *should* be billable to the current process.
     """
-    me = resource.getrusage(resource.RUSAGE_SELF)
-    children = resource.getrusage(resource.RUSAGE_CHILDREN)
-    total_cpu_time = me.ru_utime + me.ru_stime + children.ru_utime + children.ru_stime
-    total_memory_usage = me.ru_maxrss + children.ru_maxrss
-    if sys.platform == "darwin":
-        # On Linux, getrusage works in "kilobytes" (really kibibytes), but on
-        # Mac it works in bytes. See
-        # <https://github.com/python/cpython/issues/74698>
-        total_memory_usage = int(math.ceil(total_memory_usage / 1024))
-    return total_cpu_time, total_memory_usage
-def get_total_cpu_time() -> float:
-    """Gives the total cpu time, including the children."""
-    me = resource.getrusage(resource.RUSAGE_SELF)
-    childs = resource.getrusage(resource.RUSAGE_CHILDREN)
-    return me.ru_utime + me.ru_stime + childs.ru_utime + childs.ru_stime
+    # Store some extra usage to tack onto the stats as module-level globals
+    _extra_cpu_seconds: float = 0
+    _extra_memory_ki: int = 0
+    @classmethod
+    def record_extra_memory(cls, peak_ki: int) -> None:
+        """
+        Become responsible for the given peak memory usage, in kibibytes.
+        The memory will be treated as if it was used by a child process at the time
+        our real child processes were also using their peak memory.
+        """
+        cls._extra_memory_ki = max(cls._extra_memory_ki, peak_ki)
+    @classmethod
+    def record_extra_cpu(cls, seconds: float) -> None:
+        """
+        Become responsible for the given CPU time.
+        The CPU time will be treated as if it had been used by a child process.
+        """
+        cls._extra_cpu_seconds += seconds
+    @classmethod
+    def get_total_cpu_time_and_memory_usage(cls) -> Tuple[float, int]:
+        """
+        Gives the total cpu time of itself and all its children, and the maximum RSS memory usage of
+        itself and its single largest child (in kibibytes).
+        """
+        me = resource.getrusage(resource.RUSAGE_SELF)
+        children = resource.getrusage(resource.RUSAGE_CHILDREN)
+        total_cpu_time = me.ru_utime + me.ru_stime + children.ru_utime + children.ru_stime + cls._extra_cpu_seconds
+        total_memory_usage = me.ru_maxrss + children.ru_maxrss
+        if sys.platform == "darwin":
+            # On Linux, getrusage works in "kilobytes" (really kibibytes), but on
+            # Mac it works in bytes. See
+            # <https://github.com/python/cpython/issues/74698>
+            total_memory_usage = int(math.ceil(total_memory_usage / 1024))
+        total_memory_usage += cls._extra_memory_ki
+        return total_cpu_time, total_memory_usage
+    @classmethod
+    def get_total_cpu_time(cls) -> float:
+        """Gives the total cpu time, including the children."""
+        me = resource.getrusage(resource.RUSAGE_SELF)
+        childs = resource.getrusage(resource.RUSAGE_CHILDREN)
+        return me.ru_utime + me.ru_stime + childs.ru_utime + childs.ru_stime + cls._extra_cpu_seconds
 def glob(glob_pattern: str, directoryname: str) -> List[str]:

toil/lib/retry.py CHANGED Viewed

@@ -142,7 +142,7 @@ from typing import (Any,
                     Sequence,
                     Tuple,
                     Type,
-                    Union)
+                    Union, TypeVar)
 import requests.exceptions
 import urllib3.exceptions
@@ -224,13 +224,16 @@ class ErrorCondition:
                 )
+# There is a better way to type hint this with python 3.10
+# https://stackoverflow.com/a/68290080
+RT = TypeVar("RT")
 def retry(
     intervals: Optional[List] = None,
     infinite_retries: bool = False,
     errors: Optional[Sequence[Union[ErrorCondition, Type[Exception]]]] = None,
     log_message: Optional[Tuple[Callable, str]] = None,
     prepare: Optional[List[Callable]] = None,
-) -> Callable[[Any], Any]:
+) -> Callable[[Callable[..., RT]], Callable[..., RT]]:
     """
     Retry a function if it fails with any Exception defined in "errors".
@@ -281,9 +284,9 @@ def retry(
         if error_condition.retry_on_this_condition:
             retriable_errors.add(error_condition.error)
-    def decorate(func):
+    def decorate(func: Callable[..., RT]) -> Callable[..., RT]:
         @functools.wraps(func)
-        def call(*args, **kwargs):
+        def call(*args, **kwargs) -> RT:
             intervals_remaining = copy.deepcopy(intervals)
             while True:
                 try:
@@ -488,13 +491,15 @@ def error_meets_conditions(e, error_conditions):
 DEFAULT_DELAYS = (0, 1, 1, 4, 16, 64)
 DEFAULT_TIMEOUT = 300
+E = TypeVar("E", bound=Exception)  # so mypy understands passed through types
 # TODO: Replace the use of this with retry()
 #  The aws provisioner and jobstore need a large refactoring to be boto3 compliant, so this is
 #  still used there to avoid the duplication of future work
 def old_retry(
     delays: Iterable[float] = DEFAULT_DELAYS,
     timeout: float = DEFAULT_TIMEOUT,
-    predicate: Callable[[Exception], bool] = lambda e: False,
+    predicate: Callable[[E], bool] = lambda e: False,
 ) -> Generator[ContextManager, None, None]:
     """
     Deprecated.
@@ -567,6 +572,8 @@ def old_retry(
     >>> i
     1
     """
+    if timeout is None:
+        timeout = DEFAULT_TIMEOUT
     if timeout > 0:
         go = [ None ]

toil/lib/threading.py CHANGED Viewed

@@ -28,7 +28,7 @@ import traceback
 from contextlib import contextmanager
 from typing import Dict, Iterator, Optional, Union, cast
-import psutil  # type: ignore
+import psutil
 from toil.lib.exceptions import raise_
 from toil.lib.io import robust_rmtree
@@ -41,7 +41,7 @@ class ExceptionalThread(threading.Thread):
     A thread whose join() method re-raises exceptions raised during run(). While join() is
     idempotent, the exception is only during the first invocation of join() that successfully
     joined the thread. If join() times out, no exception will be re reraised even though an
-    exception might already have occured in run().
+    exception might already have occurred in run().
     When subclassing this thread, override tryRun() instead of run().

toil 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

toil 6.1.0py3-none-any.whl → 7.0.0py3-none-any.whl