PyPI - toil - Versions diffs - 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl - Mend

toil 5.12.0py3-none-any.whl → 6.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

toil/__init__.py +18 -13
toil/batchSystems/abstractBatchSystem.py +39 -13
toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
toil/batchSystems/awsBatch.py +14 -14
toil/batchSystems/cleanup_support.py +7 -3
toil/batchSystems/contained_executor.py +3 -3
toil/batchSystems/htcondor.py +0 -1
toil/batchSystems/kubernetes.py +34 -31
toil/batchSystems/local_support.py +3 -1
toil/batchSystems/lsf.py +7 -7
toil/batchSystems/mesos/batchSystem.py +7 -7
toil/batchSystems/options.py +32 -83
toil/batchSystems/registry.py +104 -23
toil/batchSystems/singleMachine.py +16 -13
toil/batchSystems/slurm.py +87 -16
toil/batchSystems/torque.py +0 -1
toil/bus.py +44 -8
toil/common.py +544 -753
toil/cwl/__init__.py +28 -32
toil/cwl/cwltoil.py +595 -574
toil/cwl/utils.py +55 -10
toil/exceptions.py +1 -1
toil/fileStores/__init__.py +2 -2
toil/fileStores/abstractFileStore.py +88 -14
toil/fileStores/cachingFileStore.py +610 -549
toil/fileStores/nonCachingFileStore.py +46 -22
toil/job.py +182 -101
toil/jobStores/abstractJobStore.py +161 -95
toil/jobStores/aws/jobStore.py +23 -9
toil/jobStores/aws/utils.py +6 -6
toil/jobStores/fileJobStore.py +116 -18
toil/jobStores/googleJobStore.py +16 -7
toil/jobStores/utils.py +5 -6
toil/leader.py +87 -56
toil/lib/accelerators.py +10 -5
toil/lib/aws/__init__.py +3 -14
toil/lib/aws/ami.py +22 -9
toil/lib/aws/iam.py +21 -13
toil/lib/aws/session.py +2 -16
toil/lib/aws/utils.py +4 -5
toil/lib/compatibility.py +1 -1
toil/lib/conversions.py +26 -3
toil/lib/docker.py +22 -23
toil/lib/ec2.py +10 -6
toil/lib/ec2nodes.py +106 -100
toil/lib/encryption/_nacl.py +2 -1
toil/lib/generatedEC2Lists.py +325 -18
toil/lib/io.py +49 -2
toil/lib/misc.py +1 -1
toil/lib/resources.py +9 -2
toil/lib/threading.py +101 -38
toil/options/common.py +736 -0
toil/options/cwl.py +336 -0
toil/options/wdl.py +37 -0
toil/provisioners/abstractProvisioner.py +9 -4
toil/provisioners/aws/__init__.py +3 -6
toil/provisioners/aws/awsProvisioner.py +6 -0
toil/provisioners/clusterScaler.py +3 -2
toil/provisioners/gceProvisioner.py +2 -2
toil/realtimeLogger.py +2 -1
toil/resource.py +24 -18
toil/server/app.py +2 -3
toil/server/cli/wes_cwl_runner.py +4 -4
toil/server/utils.py +1 -1
toil/server/wes/abstract_backend.py +3 -2
toil/server/wes/amazon_wes_utils.py +5 -4
toil/server/wes/tasks.py +2 -3
toil/server/wes/toil_backend.py +2 -10
toil/server/wsgi_app.py +2 -0
toil/serviceManager.py +12 -10
toil/statsAndLogging.py +41 -9
toil/test/__init__.py +29 -54
toil/test/batchSystems/batchSystemTest.py +11 -111
toil/test/batchSystems/test_slurm.py +24 -8
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +58 -0
toil/test/cwl/cwlTest.py +438 -223
toil/test/cwl/glob_dir.cwl +15 -0
toil/test/cwl/preemptible.cwl +21 -0
toil/test/cwl/preemptible_expression.cwl +28 -0
toil/test/cwl/revsort.cwl +1 -1
toil/test/cwl/revsort2.cwl +1 -1
toil/test/docs/scriptsTest.py +2 -3
toil/test/jobStores/jobStoreTest.py +34 -21
toil/test/lib/aws/test_iam.py +4 -14
toil/test/lib/aws/test_utils.py +0 -3
toil/test/lib/dockerTest.py +4 -4
toil/test/lib/test_ec2.py +12 -17
toil/test/mesos/helloWorld.py +4 -5
toil/test/mesos/stress.py +1 -1
toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
toil/test/options/options.py +37 -0
toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
toil/test/provisioners/clusterScalerTest.py +6 -4
toil/test/provisioners/clusterTest.py +23 -11
toil/test/provisioners/gceProvisionerTest.py +0 -6
toil/test/provisioners/restartScript.py +3 -2
toil/test/server/serverTest.py +1 -1
toil/test/sort/restart_sort.py +2 -1
toil/test/sort/sort.py +2 -1
toil/test/sort/sortTest.py +2 -13
toil/test/src/autoDeploymentTest.py +45 -45
toil/test/src/busTest.py +5 -5
toil/test/src/checkpointTest.py +2 -2
toil/test/src/deferredFunctionTest.py +1 -1
toil/test/src/fileStoreTest.py +32 -16
toil/test/src/helloWorldTest.py +1 -1
toil/test/src/importExportFileTest.py +1 -1
toil/test/src/jobDescriptionTest.py +2 -1
toil/test/src/jobServiceTest.py +1 -1
toil/test/src/jobTest.py +18 -18
toil/test/src/miscTests.py +5 -3
toil/test/src/promisedRequirementTest.py +3 -3
toil/test/src/realtimeLoggerTest.py +1 -1
toil/test/src/resourceTest.py +2 -2
toil/test/src/restartDAGTest.py +1 -1
toil/test/src/resumabilityTest.py +36 -2
toil/test/src/retainTempDirTest.py +1 -1
toil/test/src/systemTest.py +2 -2
toil/test/src/toilContextManagerTest.py +2 -2
toil/test/src/userDefinedJobArgTypeTest.py +1 -1
toil/test/utils/toilDebugTest.py +98 -32
toil/test/utils/toilKillTest.py +2 -2
toil/test/utils/utilsTest.py +23 -3
toil/test/wdl/wdltoil_test.py +223 -45
toil/toilState.py +7 -6
toil/utils/toilClean.py +1 -1
toil/utils/toilConfig.py +36 -0
toil/utils/toilDebugFile.py +60 -33
toil/utils/toilDebugJob.py +39 -12
toil/utils/toilDestroyCluster.py +1 -1
toil/utils/toilKill.py +1 -1
toil/utils/toilLaunchCluster.py +13 -2
toil/utils/toilMain.py +3 -2
toil/utils/toilRsyncCluster.py +1 -1
toil/utils/toilSshCluster.py +1 -1
toil/utils/toilStats.py +445 -305
toil/utils/toilStatus.py +2 -5
toil/version.py +10 -10
toil/wdl/utils.py +2 -122
toil/wdl/wdltoil.py +1257 -492
toil/worker.py +55 -46
toil-6.1.0.dist-info/METADATA +124 -0
toil-6.1.0.dist-info/RECORD +241 -0
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
toil/batchSystems/parasol.py +0 -379
toil/batchSystems/tes.py +0 -459
toil/test/batchSystems/parasolTestSupport.py +0 -117
toil/test/wdl/builtinTest.py +0 -506
toil/test/wdl/toilwdlTest.py +0 -522
toil/wdl/toilwdl.py +0 -141
toil/wdl/versions/dev.py +0 -107
toil/wdl/versions/draft2.py +0 -980
toil/wdl/versions/v1.py +0 -794
toil/wdl/wdl_analysis.py +0 -116
toil/wdl/wdl_functions.py +0 -997
toil/wdl/wdl_synthesis.py +0 -1011
toil/wdl/wdl_types.py +0 -243
toil-5.12.0.dist-info/METADATA +0 -118
toil-5.12.0.dist-info/RECORD +0 -244
/toil/{wdl/versions → options}/__init__.py +0 -0
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
{toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0

toil/lib/conversions.py CHANGED Viewed

@@ -46,8 +46,10 @@ def convert_units(num: float,
                   src_unit: str,
                   dst_unit: str = 'B') -> float:
     """Returns a float representing the converted input in dst_units."""
-    assert src_unit.lower() in VALID_PREFIXES, f"{src_unit} not a valid unit, valid units are {VALID_PREFIXES}."
-    assert dst_unit.lower() in VALID_PREFIXES, f"{dst_unit} not a valid unit, valid units are {VALID_PREFIXES}."
+    if not src_unit.lower() in VALID_PREFIXES:
+        raise RuntimeError(f"{src_unit} not a valid unit, valid units are {VALID_PREFIXES}.")
+    if not dst_unit.lower() in VALID_PREFIXES:
+        raise RuntimeError(f"{dst_unit} not a valid unit, valid units are {VALID_PREFIXES}.")
     return (num * bytes_in_unit(src_unit)) / bytes_in_unit(dst_unit)
@@ -60,7 +62,8 @@ def parse_memory_string(string: str) -> Tuple[float, str]:
         # find the first character of the unit
         if character not in '0123456789.-_ ':
             units = string[i:].strip()
-            assert units.lower() in VALID_PREFIXES, f"{units} not a valid unit, valid units are {VALID_PREFIXES}."
+            if not units.lower() in VALID_PREFIXES:
+                raise RuntimeError(f"{units} not a valid unit, valid units are {VALID_PREFIXES}.")
             return float(string[:i]), units
     return float(string), 'b'
@@ -71,6 +74,7 @@ def human2bytes(string: str) -> int:
     integer number of bytes.
     """
     value, unit = parse_memory_string(string)
     return int(convert_units(value, src_unit=unit, dst_unit='b'))
@@ -124,3 +128,22 @@ def hms_duration_to_seconds(hms: str) -> float:
     seconds += float(vals_to_convert[2])
     return seconds
+def strtobool(val: str) -> bool:
+    """
+    Make a human-readable string into a bool.
+    Convert a string along the lines of "y", "1", "ON", "TrUe", or
+    "Yes" to True, and the corresponding false-ish values to False.
+    """
+    # We only track prefixes, so "y" covers "y", "yes",
+    # and "yeah no" and makes them all True.
+    TABLE = {True: ["1", "on", "y", "t"], False: ["0", "off", "n", "f"]}
+    lowered = val.lower()
+    for result, prefixes in TABLE.items():
+        for prefix in prefixes:
+            if lowered.startswith(prefix):
+                return result
+    raise ValueError(f"Cannot convert \"{val}\" to a bool")

toil/lib/docker.py CHANGED Viewed

@@ -17,7 +17,7 @@ import os
 import re
 import struct
 from shlex import quote
-from typing import Optional, List
+from typing import List, Optional
 import requests
@@ -27,7 +27,6 @@ from docker.errors import (ContainerError,
                            NotFound,
                            create_api_error_from_http_exception)
 from docker.utils.socket import consume_socket_output, demux_adaptor
 from toil.lib.accelerators import get_host_accelerator_numbers
 logger = logging.getLogger(__name__)
@@ -84,16 +83,17 @@ def apiDockerCall(job,
     jobs, with the intention that failed/orphaned docker jobs be handled
     appropriately.
-    Example of using dockerCall in toil to index a FASTA file with SAMtools:
-    def toil_job(job):
-        working_dir = job.fileStore.getLocalTempDir()
-        path = job.fileStore.readGlobalFile(ref_id,
-                                          os.path.join(working_dir, 'ref.fasta')
-        parameters = ['faidx', path]
-        apiDockerCall(job,
-                      image='quay.io/ucgc_cgl/samtools:latest',
-                      working_dir=working_dir,
-                      parameters=parameters)
+    Example of using dockerCall in toil to index a FASTA file with SAMtools::
+        def toil_job(job):
+            working_dir = job.fileStore.getLocalTempDir()
+            path = job.fileStore.readGlobalFile(ref_id,
+                                              os.path.join(working_dir, 'ref.fasta')
+            parameters = ['faidx', path]
+            apiDockerCall(job,
+                          image='quay.io/ucgc_cgl/samtools:latest',
+                          working_dir=working_dir,
+                          parameters=parameters)
     Note that when run with detach=False, or with detach=True and stdout=True
     or stderr=True, this is a blocking call. When run with detach=True and
@@ -103,13 +103,13 @@ def apiDockerCall(job,
     :param toil.Job.job job: The Job instance for the calling function.
     :param str image: Name of the Docker image to be used.
                      (e.g. 'quay.io/ucsc_cgl/samtools:latest')
-    :param list[str] parameters: A list of string elements.  If there are
+    :param list[str] parameters: A list of string elements. If there are
                                  multiple elements, these will be joined with
-                                 spaces.  This handling of multiple elements
+                                 spaces. This handling of multiple elements
                                  provides backwards compatibility with previous
                                  versions which called docker using
                                  subprocess.check_call().
-                                 **If list of lists: list[list[str]], then treat
+                                 If list of lists: list[list[str]], then treat
                                  as successive commands chained with pipe.
     :param str working_dir: The working directory.
     :param int deferParam: Action to take on the container upon job completion.
@@ -225,8 +225,8 @@ def apiDockerCall(job,
     working_dir = os.path.abspath(working_dir)
     # Ensure the user has passed a valid value for deferParam
-    assert deferParam in (None, FORGO, STOP, RM), \
-        'Please provide a valid value for deferParam.'
+    if deferParam not in (None, FORGO, STOP, RM):
+        raise RuntimeError('Please provide a valid value for deferParam.')
     client = docker.from_env(version='auto', timeout=timeout)
@@ -413,12 +413,11 @@ def containerIsRunning(container_name: str, timeout: int = 365 * 24 * 60 * 60):
     :param container_name: Name of the container being checked.
     :param int timeout: Use the given timeout in seconds for interactions with
-                        the Docker daemon. Note that the underlying docker module is
-                        not always able to abort ongoing reads and writes in order
-                        to respect the timeout. Defaults to 1 year (i.e. wait
-                        essentially indefinitely).
+        the Docker daemon. Note that the underlying docker module is not always
+        able to abort ongoing reads and writes in order to respect the timeout.
+        Defaults to 1 year (i.e. wait essentially indefinitely).
     :returns: True if status is 'running', False if status is anything else,
-    and None if the container does not exist.
+        and None if the container does not exist.
     """
     client = docker.from_env(version='auto', timeout=timeout)
     try:
@@ -439,7 +438,7 @@ def containerIsRunning(container_name: str, timeout: int = 365 * 24 * 60 * 60):
 def getContainerName(job):
     """
     Create a random string including the job name, and return it. Name will
-    match [a-zA-Z0-9][a-zA-Z0-9_.-]
+    match ``[a-zA-Z0-9][a-zA-Z0-9_.-]``.
     """
     parts = ['toil', str(job.description), base64.b64encode(os.urandom(9), b'-_').decode('utf-8')]
     name = re.sub('[^a-zA-Z0-9_.-]', '', '--'.join(parts))

toil/lib/ec2.py CHANGED Viewed

@@ -103,11 +103,13 @@ def wait_instances_running(ec2, instances: Iterable[Boto2Instance]) -> Iterable[
             if i.state == 'pending':
                 pending_ids.add(i.id)
             elif i.state == 'running':
-                assert i.id not in running_ids
+                if i.id in running_ids:
+                    raise RuntimeError("An instance was already added to the list of running instance IDs. Maybe there is a duplicate.")
                 running_ids.add(i.id)
                 yield i
             else:
-                assert i.id not in other_ids
+                if i.id in other_ids:
+                    raise RuntimeError("An instance was already added to the list of other instances. Maybe there is a duplicate.")
                 other_ids.add(i.id)
                 yield i
         logger.info('%i instance(s) pending, %i running, %i other.',
@@ -130,10 +132,10 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
     :param requests: The requests to wait on.
     :param timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
-    timeout occurs, the remaining open requests will be cancelled.
+        timeout occurs, the remaining open requests will be cancelled.
     :param tentative: if True, give up on a spot request at the earliest indication of it
-    not being fulfilled immediately
+        not being fulfilled immediately
     """
@@ -166,11 +168,13 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
                             'Request %s entered status %s indicating that it will not be '
                             'fulfilled anytime soon.', r.id, r.status.code)
                 elif r.state == 'active':
-                    assert r.id not in active_ids
+                    if r.id in active_ids:
+                        raise RuntimeError("A request was already added to the list of active requests. Maybe there are duplicate requests.")
                     active_ids.add(r.id)
                     batch.append(r)
                 else:
-                    assert r.id not in other_ids
+                    if r.id in other_ids:
+                        raise RuntimeError("A request was already added to the list of other IDs. Maybe there are duplicate requests.")
                     other_ids.add(r.id)
                     batch.append(r)
             if batch:

toil/lib/ec2nodes.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2021 Regents of the University of California
+# Copyright (C) 2015-2024 Regents of the University of California
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,12 +17,17 @@ import logging
 import os
 import re
 import textwrap
-from typing import Any, Dict, List, Tuple, Union
 import requests
+import shutil
+import enlighten  # type: ignore
+from typing import Dict, List, Tuple, Union, Any
 logger = logging.getLogger(__name__)
+manager = enlighten.get_manager()
 dirname = os.path.dirname(__file__)
+region_json_dirname = os.path.join(dirname, 'region_jsons')
 EC2Regions = {'us-west-1': 'US West (N. California)',
@@ -83,7 +88,7 @@ class InstanceType:
         return False
-def isNumber(s: str) -> bool:
+def is_number(s: str) -> bool:
     """
     Determines if a unicode string (that may include commas) is a number.
@@ -105,7 +110,7 @@ def isNumber(s: str) -> bool:
     return False
-def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float], float]]:
+def parse_storage(storage_info: str) -> Union[List[int], Tuple[Union[int, float], float]]:
     """
     Parses EC2 JSON storage param string into a number.
@@ -117,22 +122,22 @@ def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float],
         "8 x 1.9 NVMe SSD"
         "900 GB NVMe SSD"
-    :param str storageData: EC2 JSON storage param string.
+    :param str storage_info: EC2 JSON storage param string.
     :return: Two floats representing: (# of disks), and (disk_capacity in GiB of each disk).
     """
-    if storageData == "EBS only":
+    if storage_info == "EBS only":
         return [0, 0]
     else:
-        specs = storageData.strip().split()
-        if isNumber(specs[0]) and specs[1] == 'x' and isNumber(specs[2]):
+        specs = storage_info.strip().split()
+        if is_number(specs[0]) and specs[1] == 'x' and is_number(specs[2]):
             return float(specs[0].replace(',', '')), float(specs[2].replace(',', ''))
-        elif isNumber(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
+        elif is_number(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
             return 1, float(specs[0].replace(',', ''))
         else:
             raise RuntimeError('EC2 JSON format has likely changed.  Error parsing disk specs.')
-def parseMemory(memAttribute: str) -> float:
+def parse_memory(mem_info: str) -> float:
     """
     Returns EC2 'memory' string as a float.
@@ -140,18 +145,19 @@ def parseMemory(memAttribute: str) -> float:
     Amazon loves to put commas in their numbers, so we have to accommodate that.
     If the syntax ever changes, this will raise.
-    :param memAttribute: EC2 JSON memory param string.
+    :param mem_info: EC2 JSON memory param string.
     :return: A float representing memory in GiB.
     """
-    mem = memAttribute.replace(',', '').split()
+    mem = mem_info.replace(',', '').split()
     if mem[1] == 'GiB':
         return float(mem[0])
     else:
         raise RuntimeError('EC2 JSON format has likely changed.  Error parsing memory.')
-def fetchEC2Index(filename: str) -> None:
-    """Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
+def download_region_json(filename: str, region: str = 'us-east-1') -> None:
+    """
+    Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
     See: https://aws.amazon.com/blogs/aws/new-aws-price-list-api/
@@ -159,61 +165,45 @@ def fetchEC2Index(filename: str) -> None:
              aws instance name (example: 't2.micro'), and the value is an
              InstanceType object representing that aws instance name.
     """
-    print('Downloading ~1Gb AWS billing file to parse for information.\n')
+    response = requests.get(f'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/{region}/index.json', stream=True)
+    file_size = int(response.headers.get("content-length", 0))
+    print(f'Downloading ~{file_size / 1000000000}Gb {region} AWS billing file to: {filename}')
-    response = requests.get('https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json')
-    if response.ok:
-        with open(filename, 'w') as f:
-            f.write(str(json.dumps(json.loads(response.text), indent=4)))
-            print('Download completed successfully!\n')
-    else:
-        raise RuntimeError('Error: ' + str(response) + ' :: ' + str(response.text))
+    with manager.counter(total=file_size, desc=os.path.basename(filename), unit='bytes', leave=False) as progress_bar:
+        with open(filename, "wb") as file:
+            for data in response.iter_content(1048576):
+                progress_bar.update(len(data))
+                file.write(data)
-def fetchEC2InstanceDict(awsBillingJson: Dict[str, Any], region: str) -> Dict[str, InstanceType]:
+def reduce_region_json_size(filename:str) -> List[Dict[str, Any]]:
     """
-    Takes a JSON and returns a list of InstanceType objects representing EC2 instance params.
+    Deletes information in the json file that we don't need, and rewrites it.  This makes the file smaller.
-    :param region:
-    :return:
+    The reason being: we used to download the unified AWS Bulk API JSON, which eventually crept up to 5.6Gb,
+    the loading of which could not be done on a 32Gb RAM machine.  Now we download each region JSON individually
+    (with AWS's new Query API), but even those may eventually one day grow ridiculously large, so we do what we can to
+    keep the file sizes down (and thus also the amount loaded into memory) to keep this script working for longer.
     """
-    ec2InstanceList = []
-    for k, v in awsBillingJson['products'].items():
-        i = v['attributes']
-        # NOTES:
-        #
-        # 3 tenant types: 'Host' (always $0.00; just a template?)
-        #                 'Dedicated' (toil does not support; these are pricier)
-        #                 'Shared' (AWS default and what toil uses)
-        #
-        # The same instance can appear with multiple "operation" values;
-        # "RunInstances" is normal
-        # "RunInstances:<code>" is e.g. Linux with MS SQL Server installed.
-        if (i.get('location') == region and
-            i.get('tenancy') == 'Shared' and
-            i.get('operatingSystem') == 'Linux' and
-            i.get('operation') == 'RunInstances'):
-            normal_use = i.get('usagetype').endswith('BoxUsage:' + i['instanceType'])  # not reserved or unused
-            if normal_use:
-                disks, disk_capacity = parseStorage(v["attributes"]["storage"])
-                # Determines whether the instance type is from an ARM or AMD family
-                # ARM instance names include a digit followed by a 'g' before the instance size
-                architecture = 'arm64' if re.search(r".*\dg.*\..*", i["instanceType"]) else 'amd64'
-                instance = InstanceType(name=i["instanceType"],
-                                        cores=i["vcpu"],
-                                        memory=parseMemory(i["memory"]),
-                                        disks=disks,
-                                        disk_capacity=disk_capacity,
-                                        architecture=architecture)
-                if instance in ec2InstanceList:
-                    raise RuntimeError('EC2 JSON format has likely changed.  '
-                                       'Duplicate instance {} found.'.format(instance))
-                ec2InstanceList.append(instance)
-    print('Finished for ' + str(region) + '.  ' + str(len(ec2InstanceList)) + ' added.')
-    return {_.name: _ for _ in ec2InstanceList}
+    with open(filename, 'r') as f:
+        aws_products = json.loads(f.read())['products']
+    aws_product_list = list()
+    for k in list(aws_products.keys()):
+        ec2_attributes = aws_products[k]['attributes']
+        if     (ec2_attributes.get('tenancy') == 'Shared' and
+                ec2_attributes.get('operatingSystem') == 'Linux' and
+                ec2_attributes.get('operation') == 'RunInstances' and
+                ec2_attributes.get('usagetype').endswith('BoxUsage:' + ec2_attributes['instanceType'])):
+            aws_product_list.append(dict(disk=ec2_attributes["storage"],
+                                         loc=ec2_attributes["location"],
+                                         name=ec2_attributes["instanceType"],
+                                         mem=ec2_attributes["memory"],
+                                         cpu=ec2_attributes["vcpu"]))
+        del aws_products[k]
+    del aws_products
+    with open(filename, 'w') as f:
+        f.write(json.dumps(dict(aws=aws_product_list), indent=2))
+    return aws_product_list
 def updateStaticEC2Instances() -> None:
@@ -225,39 +215,58 @@ def updateStaticEC2Instances() -> None:
     :return: Nothing.  Writes a new 'generatedEC2Lists.py' file.
     """
     print("Updating Toil's EC2 lists to the most current version from AWS's bulk API.\n"
-          "This may take a while, depending on your internet connection (~1Gb file).\n")
+          "This may take a while, depending on your internet connection.\n")
-    origFile = os.path.join(dirname, 'generatedEC2Lists.py')  # original
-    assert os.path.exists(origFile)
+    original_aws_instance_list = os.path.join(dirname, 'generatedEC2Lists.py')  # original
+    if not os.path.exists(original_aws_instance_list):
+        raise RuntimeError(f"Path {original_aws_instance_list} does not exist.")
     # use a temporary file until all info is fetched
-    genFile = os.path.join(dirname, 'generatedEC2Lists_tmp.py')  # temp
-    if os.path.exists(genFile):
-        os.remove(genFile)
+    updated_aws_instance_list = os.path.join(dirname, 'generatedEC2Lists_tmp.py')  # temp
+    if os.path.exists(updated_aws_instance_list):
+        os.remove(updated_aws_instance_list)
-    # filepath to store the aws json request (will be cleaned up)
-    # this is done because AWS changes their json format from time to time
-    # and debugging is faster with the file stored locally
-    awsJsonIndex = os.path.join(dirname, 'index.json')
-    if not os.path.exists(awsJsonIndex):
-        fetchEC2Index(filename=awsJsonIndex)
-    else:
-        print('Reusing previously downloaded json @: ' + awsJsonIndex)
-    with open(awsJsonIndex) as f:
-        awsProductDict = json.loads(f.read())
+    if not os.path.exists(region_json_dirname):
+        os.mkdir(region_json_dirname)
     currentEC2List = []
     instancesByRegion: Dict[str, List[str]] = {}
-    for regionNickname in EC2Regions:
-        currentEC2Dict = fetchEC2InstanceDict(awsProductDict, region=EC2Regions[regionNickname])
+    for region in EC2Regions.keys():
+        region_json = os.path.join(region_json_dirname, f'{region}.json')
+        if os.path.exists(region_json):
+            try:
+                with open(region_json, 'r') as f:
+                    aws_products = json.loads(f.read())['aws']
+                print(f'Reusing previously downloaded json @: {region_json}')
+            except:
+                os.remove(region_json)
+                download_region_json(filename=region_json, region=region)
+                aws_products = reduce_region_json_size(filename=region_json)
+        else:
+            download_region_json(filename=region_json, region=region)
+            aws_products = reduce_region_json_size(filename=region_json)
+        ec2InstanceList = []
+        for i in aws_products:
+            disks, disk_capacity = parse_storage(i["disk"])
+            # Determines whether the instance type is from an ARM or AMD family
+            # ARM instance names include a digit followed by a 'g' before the instance size
+            architecture = 'arm64' if re.search(r".*\dg.*\..*", i["name"]) else 'amd64'
+            ec2InstanceList.append(InstanceType(name=i["name"],
+                                                cores=i["cpu"],
+                                                memory=parse_memory(i["mem"]),
+                                                disks=disks,
+                                                disk_capacity=disk_capacity,
+                                                architecture=architecture))
+        print('Finished for ' + str(region) + '.  ' + str(len(ec2InstanceList)) + ' added.\n')
+        currentEC2Dict = {_.name: _ for _ in ec2InstanceList}
         for instanceName, instanceTypeObj in currentEC2Dict.items():
             if instanceTypeObj not in currentEC2List:
                 currentEC2List.append(instanceTypeObj)
-            instancesByRegion.setdefault(regionNickname, []).append(instanceName)
+            instancesByRegion.setdefault(region, []).append(instanceName)
     # write provenance note, copyright and imports
-    with open(genFile, 'w') as f:
+    with open(updated_aws_instance_list, 'w') as f:
         f.write(textwrap.dedent('''
         # !!! AUTOGENERATED FILE !!!
         # Update with: src/toil/utils/toilUpdateEC2Instances.py
@@ -278,16 +287,13 @@ def updateStaticEC2Instances() -> None:
         from toil.lib.ec2nodes import InstanceType\n\n\n''').format(year=datetime.date.today().strftime("%Y"))[1:])
     # write header of total EC2 instance type list
-    genString = "# {num} Instance Types.  Generated {date}.\n".format(
-                num=str(len(currentEC2List)), date=str(datetime.datetime.now()))
+    genString = f'# {len(currentEC2List)} Instance Types.  Generated {datetime.datetime.now()}.\n'
     genString = genString + "E2Instances = {\n"
     sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name)
     # write the list of all instances types
     for i in sortedCurrentEC2List:
-        z = "    '{name}': InstanceType(name='{name}', cores={cores}, memory={memory}, disks={disks}, disk_capacity={disk_capacity}, architecture='{architecture}')," \
-            "\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity, architecture=i.architecture)
-        genString = genString + z
+        genString = genString + f"    '{i.name}': InstanceType(name='{i.name}', cores={i.cores}, memory={i.memory}, disks={i.disks}, disk_capacity={i.disk_capacity}, architecture='{i.architecture}'),\n"
     genString = genString + '}\n\n'
     genString = genString + 'regionDict = {\n'
@@ -301,19 +307,19 @@ def updateStaticEC2Instances() -> None:
     if genString.endswith(',\n'):
         genString = genString[:-len(',\n')]
     genString = genString + '}\n'
-    with open(genFile, 'a+') as f:
+    with open(updated_aws_instance_list, 'a+') as f:
         f.write(genString)
     # append key for fetching at the end
     regionKey = '\nec2InstancesByRegion = {region: [E2Instances[i] for i in instances] for region, instances in regionDict.items()}\n'
-    with open(genFile, 'a+') as f:
+    with open(updated_aws_instance_list, 'a+') as f:
         f.write(regionKey)
-    # delete the original file
-    if os.path.exists(origFile):
-        os.remove(origFile)
     # replace the instance list with a current list
-    os.rename(genFile, origFile)
-    # delete the aws billing json file
-    if os.path.exists(awsJsonIndex):
-        os.remove(awsJsonIndex)
+    os.rename(updated_aws_instance_list, original_aws_instance_list)
+    # delete the aws region json file directory
+    if os.path.exists(region_json_dirname):
+        print(f'Update Successful!  Removing AWS Region JSON Files @: {region_json_dirname}')
+        shutil.rmtree(region_json_dirname)

toil/lib/encryption/_nacl.py CHANGED Viewed

@@ -53,7 +53,8 @@ def encrypt(message: bytes, keyPath: str) -> bytes:
     # of a collision is astronomically low. (This approach is
     # recommended in the libsodium documentation.)
     nonce = nacl.utils.random(SecretBox.NONCE_SIZE)
-    assert len(nonce) == SecretBox.NONCE_SIZE
+    if len(nonce) != SecretBox.NONCE_SIZE:
+        raise RuntimeError("Generated nonce is the wrong size.")
     return bytes(sb.encrypt(message, nonce))

toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

toil 5.12.0py3-none-any.whl → 6.1.0py3-none-any.whl