PyPI - toil - Versions diffs - 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl - Mend

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

toil/batchSystems/abstractBatchSystem.py +13 -5
toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
toil/batchSystems/kubernetes.py +13 -2
toil/batchSystems/mesos/batchSystem.py +33 -2
toil/batchSystems/registry.py +15 -118
toil/batchSystems/slurm.py +191 -16
toil/common.py +20 -1
toil/cwl/cwltoil.py +97 -119
toil/cwl/utils.py +103 -3
toil/fileStores/__init__.py +1 -1
toil/fileStores/abstractFileStore.py +5 -2
toil/fileStores/cachingFileStore.py +1 -1
toil/job.py +30 -14
toil/jobStores/abstractJobStore.py +35 -255
toil/jobStores/aws/jobStore.py +864 -1964
toil/jobStores/aws/utils.py +24 -270
toil/jobStores/fileJobStore.py +2 -1
toil/jobStores/googleJobStore.py +32 -13
toil/jobStores/utils.py +0 -327
toil/leader.py +27 -22
toil/lib/accelerators.py +1 -1
toil/lib/aws/config.py +22 -0
toil/lib/aws/s3.py +477 -9
toil/lib/aws/utils.py +22 -33
toil/lib/checksum.py +88 -0
toil/lib/conversions.py +33 -31
toil/lib/directory.py +217 -0
toil/lib/ec2.py +97 -29
toil/lib/exceptions.py +2 -1
toil/lib/expando.py +2 -2
toil/lib/generatedEC2Lists.py +138 -19
toil/lib/io.py +33 -2
toil/lib/memoize.py +21 -7
toil/lib/misc.py +1 -1
toil/lib/pipes.py +385 -0
toil/lib/plugins.py +106 -0
toil/lib/retry.py +1 -1
toil/lib/threading.py +1 -1
toil/lib/url.py +320 -0
toil/lib/web.py +4 -5
toil/options/cwl.py +13 -1
toil/options/runner.py +17 -10
toil/options/wdl.py +12 -1
toil/provisioners/__init__.py +5 -2
toil/provisioners/aws/__init__.py +43 -36
toil/provisioners/aws/awsProvisioner.py +47 -15
toil/provisioners/node.py +60 -12
toil/resource.py +3 -13
toil/server/app.py +12 -6
toil/server/cli/wes_cwl_runner.py +2 -2
toil/server/wes/abstract_backend.py +21 -43
toil/server/wes/toil_backend.py +2 -2
toil/test/__init__.py +16 -18
toil/test/batchSystems/batchSystemTest.py +2 -9
toil/test/batchSystems/batch_system_plugin_test.py +7 -0
toil/test/batchSystems/test_slurm.py +103 -14
toil/test/cwl/cwlTest.py +181 -8
toil/test/cwl/staging_cat.cwl +27 -0
toil/test/cwl/staging_make_file.cwl +25 -0
toil/test/cwl/staging_workflow.cwl +43 -0
toil/test/cwl/zero_default.cwl +61 -0
toil/test/docs/scripts/tutorial_staging.py +17 -8
toil/test/docs/scriptsTest.py +2 -1
toil/test/jobStores/jobStoreTest.py +23 -133
toil/test/lib/aws/test_iam.py +7 -7
toil/test/lib/aws/test_s3.py +30 -33
toil/test/lib/aws/test_utils.py +9 -9
toil/test/lib/test_url.py +69 -0
toil/test/lib/url_plugin_test.py +105 -0
toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
toil/test/provisioners/clusterTest.py +15 -2
toil/test/provisioners/gceProvisionerTest.py +1 -1
toil/test/server/serverTest.py +78 -36
toil/test/src/autoDeploymentTest.py +2 -3
toil/test/src/fileStoreTest.py +89 -87
toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
toil/test/utils/toilKillTest.py +35 -28
toil/test/wdl/md5sum/md5sum-gs.json +1 -1
toil/test/wdl/md5sum/md5sum.json +1 -1
toil/test/wdl/testfiles/read_file.wdl +18 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
toil/test/wdl/wdltoil_test.py +171 -162
toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
toil/utils/toilDebugFile.py +6 -3
toil/utils/toilSshCluster.py +23 -0
toil/utils/toilStats.py +17 -2
toil/utils/toilUpdateEC2Instances.py +1 -0
toil/version.py +10 -10
toil/wdl/wdltoil.py +1179 -825
toil/worker.py +16 -8
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
toil/lib/iterables.py +0 -112
toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0

toil/lib/checksum.py ADDED Viewed

@@ -0,0 +1,88 @@
+# Copyright (C) 2015-2021 Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import hashlib
+from io import BytesIO
+from typing import BinaryIO, Union, List, TYPE_CHECKING
+from toil.lib.aws.config import S3_PART_SIZE
+if TYPE_CHECKING:
+    # mypy complaint: https://github.com/python/typeshed/issues/2928
+    from hashlib import _Hash
+logger = logging.getLogger(__name__)
+class ChecksumError(Exception):
+    """Raised when a download does not contain the correct data."""
+class Etag:
+    """A hasher for s3 etags."""
+    def __init__(self, chunk_size: int) -> None:
+        self.etag_bytes: int = 0
+        self.etag_parts: List[bytes] = []
+        self.etag_hasher: "_Hash" = hashlib.md5()
+        self.chunk_size: int = chunk_size
+    def update(self, chunk: bytes) -> None:
+        if self.etag_bytes + len(chunk) > self.chunk_size:
+            chunk_head = chunk[:self.chunk_size - self.etag_bytes]
+            chunk_tail = chunk[self.chunk_size - self.etag_bytes:]
+            self.etag_hasher.update(chunk_head)
+            self.etag_parts.append(self.etag_hasher.digest())
+            self.etag_hasher = hashlib.md5()
+            self.etag_hasher.update(chunk_tail)
+            self.etag_bytes = len(chunk_tail)
+        else:
+            self.etag_hasher.update(chunk)
+            self.etag_bytes += len(chunk)
+    def hexdigest(self) -> str:
+        if self.etag_bytes:
+            self.etag_parts.append(self.etag_hasher.digest())
+            self.etag_bytes = 0
+        if len(self.etag_parts) > 1:
+            etag = hashlib.md5(b"".join(self.etag_parts)).hexdigest()
+            return f'{etag}-{len(self.etag_parts)}'
+        else:
+            return self.etag_hasher.hexdigest()
+hashers = {'sha1':    hashlib.sha1(),
+           'sha256':  hashlib.sha256(),
+           'etag': Etag(chunk_size=S3_PART_SIZE)}
+def compute_checksum_for_file(local_file_path: str, algorithm: str = 'sha1') -> str:
+    with open(local_file_path, 'rb') as fh:
+        checksum_result = compute_checksum_for_content(fh, algorithm=algorithm)
+    return checksum_result
+def compute_checksum_for_content(fh: Union[BinaryIO, BytesIO], algorithm: str = 'sha1') -> str:
+    """
+    Note: Chunk size matters for s3 etags, and must be the same to get the same hash from the same object.
+    Therefore this buffer is not modifiable throughout Toil.
+    """
+    hasher: "_Hash" = hashers[algorithm]  # type: ignore
+    contents = fh.read(S3_PART_SIZE)
+    while contents != b'':
+        hasher.update(contents)
+        contents = fh.read(S3_PART_SIZE)
+    return f'{algorithm}${hasher.hexdigest()}'

toil/lib/conversions.py CHANGED Viewed

@@ -2,40 +2,28 @@
 Conversion utilities for mapping memory, disk, core declarations from strings to numbers and vice versa.
 Also contains general conversion functions
 """
 import math
-from typing import Optional, SupportsInt, Union
+import urllib.parse
+from typing import Optional, SupportsInt, Union, List
+KIB = 1024
+MIB = 1024 ** 2
+GIB = 1024 ** 3
+TIB = 1024 ** 4
+PIB = 1024 ** 5
+EIB = 1024 ** 6
+KB = 1000
+MB = 1000 ** 2
+GB = 1000 ** 3
+TB = 1000 ** 4
+PB = 1000 ** 5
+EB = 1000 ** 6
 # See https://en.wikipedia.org/wiki/Binary_prefix
-BINARY_PREFIXES = [
-    "ki",
-    "mi",
-    "gi",
-    "ti",
-    "pi",
-    "ei",
-    "kib",
-    "mib",
-    "gib",
-    "tib",
-    "pib",
-    "eib",
-]
-DECIMAL_PREFIXES = [
-    "b",
-    "k",
-    "m",
-    "g",
-    "t",
-    "p",
-    "e",
-    "kb",
-    "mb",
-    "gb",
-    "tb",
-    "pb",
-    "eb",
-]
+BINARY_PREFIXES = ['ki', 'mi', 'gi', 'ti', 'pi', 'ei', 'kib', 'mib', 'gib', 'tib', 'pib', 'eib']
+DECIMAL_PREFIXES = ['b', 'k', 'm', 'g', 't', 'p', 'e', 'kb', 'mb', 'gb', 'tb', 'pb', 'eb']
 VALID_PREFIXES = BINARY_PREFIXES + DECIMAL_PREFIXES
@@ -185,3 +173,17 @@ def strtobool(val: str) -> bool:
 def opt_strtobool(b: Optional[str]) -> Optional[bool]:
     """Convert an optional string representation of bool to None or bool"""
     return b if b is None else strtobool(b)
+def modify_url(url: str, remove: List[str]) -> str:
+    """
+    Given a valid URL string, split out the params, remove any offending
+    params in 'remove', and return the cleaned URL.
+    """
+    scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
+    params = urllib.parse.parse_qs(query)
+    for param_key in remove:
+        if param_key in params:
+            del params[param_key]
+    query = urllib.parse.urlencode(params, doseq=True)
+    return urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))

toil/lib/directory.py ADDED Viewed

@@ -0,0 +1,217 @@
+# Copyright (C) 2015-2025 Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import base64
+from urllib.parse import quote, unquote
+from typing import Iterator, Optional, Union
+TOIL_DIR_URI_SCHEME = "toildir:"
+DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
+def check_directory_dict_invariants(contents: DirectoryContents) -> None:
+    """
+    Make sure a directory structure dict makes sense. Throws an error
+    otherwise.
+    Currently just checks to make sure no empty-string keys exist.
+    """
+    for name, item in contents.items():
+        if name == "":
+            raise RuntimeError(
+                "Found nameless entry in directory: " + json.dumps(contents, indent=2)
+            )
+        if isinstance(item, dict):
+            check_directory_dict_invariants(item)
+def decode_directory(
+    dir_path: str,
+) -> tuple[DirectoryContents, Optional[str], str, Optional[str], Optional[str]]:
+    """
+    Decode a directory from a "toildir:" path to a directory (or a file in it).
+    :returns: the decoded directory dict, the remaining part of the path (which
+        may be None), an identifier string for the directory (which is the
+        stored name URI if one was provided), and the name URI and source task
+        info.
+    """
+    if not dir_path.startswith(TOIL_DIR_URI_SCHEME):
+        raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
+    # We will decode the directory and then look inside it
+    # Since this was encoded by upload_directory we know the
+    # next pieces are encoded source URL, encoded source task, and JSON
+    # describing the directory structure, and it can't contain any slashes.
+    #
+    # So split on slash to separate all that from the path components within
+    # the directory to whatever we're trying to get.
+    parts = dir_path[len(TOIL_DIR_URI_SCHEME) :].split("/", 1)
+    # Before the first slash is the encoded data describing the directory contents
+    encoded_name, encoded_source, dir_data = parts[0].split(":")
+    # Decode the name and source, replacing empty string with None again.
+    name: Optional[str] = unquote(encoded_name) or None
+    source: Optional[str] = unquote(encoded_source) or None
+    # We need the unique key identifying this directory, which is where it came
+    # from if stored, or the encoded data itself otherwise.
+    # TODO: Is this too complicated?
+    directory_identifier = name if name is not None else dir_data
+    # Decode what to download
+    contents = json.loads(
+        base64.urlsafe_b64decode(dir_data.encode("utf-8")).decode("utf-8")
+    )
+    check_directory_dict_invariants(contents)
+    if len(parts) == 1 or parts[1] == "/":
+        # We didn't have any subdirectory
+        return contents, None, directory_identifier, name, source
+    else:
+        # We have a path below this
+        return contents, parts[1], directory_identifier, name, source
+def encode_directory(contents: DirectoryContents, name: Optional[str] = None, source: Optional[str] = None) -> str:
+    """
+    Encode a directory from a "toildir:" path to a directory (or a file in it).
+    :param contents: the directory dict, which is a dict from name to URI for a
+        file or dict for a subdirectory.
+    :param name: the path or URI the directory belongs at, including its
+        basename. May not be empty if set.
+    :param source: the name of a workflow component that uploaded the
+        directory. May not be empty if set.
+    """
+    check_directory_dict_invariants(contents)
+    parts = [
+        TOIL_DIR_URI_SCHEME[:-1],
+        quote(name or "", safe=""),
+        quote(source or "", safe=""),
+        base64.urlsafe_b64encode(
+            json.dumps(contents).encode("utf-8")
+        ).decode("utf-8"),
+    ]
+    return ":".join(parts)
+def directory_item_exists(dir_path: str) -> bool:
+    """
+    Checks that a URL to a Toil directory or thing in it actually exists.
+    Assumes that all the pointed-to URLs exist; just checks tha tthe thing is
+    actually in the encoded directory structure.
+    """
+    try:
+        get_directory_item(dir_path)
+    except FileNotFoundError:
+        return False
+    return True
+def get_directory_contents_item(contents: DirectoryContents, remaining_path: Optional[str]) -> Union[DirectoryContents, str]:
+    """
+    Get a subdirectory or file from a decoded directory and remaining path.
+    """
+    if remaining_path is None:
+        return contents
+    here: Union[str, DirectoryContents] = contents
+    for part in remaining_path.split("/"):
+        if not isinstance(here, dict):
+            # We're trying to go inside a file
+            raise FileNotFoundError(remaining_path)
+        if part not in here:
+            # We've hit a nonexistent path component
+            raise FileNotFoundError(remaining_path)
+        here = here[part]
+    # If we get here we successfully looked up the thing in the structure
+    return here
+def get_directory_item(dir_path: str) -> Union[DirectoryContents, str]:
+    """
+    Get a subdirectory or file from a URL pointing to or into a toildir: directory.
+    """
+    contents, remaining_path, _, _, _ = decode_directory(dir_path)
+    try:
+        return get_directory_contents_item(contents, remaining_path)
+    except FileNotFoundError:
+        # Rewrite file not found to be about the full thing we went to look up.
+        raise FileNotFoundError(dir_path)
+def directory_contents_items(contents: DirectoryContents) -> Iterator[tuple[str, Union[str, None]]]:
+    """
+    Yield each file or directory under the given contents, including itself.
+    Yields parent items before children.
+    Yields each item as a str path from the root (possibly empty), and either a
+        str value for files or a None for directories.
+    The path won't have trailing slashes.
+    """
+    # Yield the thing itself
+    yield ("", None)
+    for k, v in contents.items():
+        if isinstance(v, str):
+            # Yield a file
+            yield (k, v)
+        else:
+            # Recurse on the directory
+            for child_path, child_value in directory_contents_items(v):
+                yield (f"{k}/{child_path}", child_value)
+def directory_items(dir_path: str) -> Iterator[tuple[str, Union[str, None]]]:
+    """
+    Yield each file or directory under the given path, including itself.
+    Yields parent items before children.
+    Yields each item as a str path from the root (possibly empty), and either a
+        str value for files or a None for directories.
+    The path won't have trailing slashes.
+    """
+    item = get_directory_item(dir_path)
+    if isinstance(item, str):
+        # Only one item and it's this file
+        yield ("", item)
+    else:
+        # It's a directory in there.
+        yield from directory_contents_items(item)

toil/lib/ec2.py CHANGED Viewed

@@ -1,11 +1,19 @@
 import logging
 import time
-from base64 import b64encode
+from base64 import b64encode, b64decode
+import binascii
 from collections.abc import Generator, Iterable, Mapping
-from typing import TYPE_CHECKING, Any, Callable, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Literal,
+    Optional,
+    Union,
+)
 from toil.lib.aws.session import establish_boto3_session
-from toil.lib.aws.utils import flatten_tags
+from toil.lib.aws.utils import flatten_tags, boto3_pager
 from toil.lib.exceptions import panic
 from toil.lib.retry import (
     ErrorCondition,
@@ -29,6 +37,19 @@ a_short_time = 5
 a_long_time = 60 * 60
 logger = logging.getLogger(__name__)
+def is_base64(value: str) -> bool:
+    """
+    Return True if value is base64-decodeable, and False otherwise.
+    """
+    try:
+        b64decode(
+            value.encode("utf-8"),
+            validate=True
+        )
+        return True
+    except binascii.Error:
+        return False
 class UserError(RuntimeError):
     def __init__(self, message=None, cause=None):
@@ -129,7 +150,7 @@ def wait_instances_running(
             elif i["State"]["Name"] == "running":
                 if i["InstanceId"] in running_ids:
                     raise RuntimeError(
-                        "An instance was already added to the list of running instance IDs. Maybe there is a duplicate."
+                        f"Instance {i['InstanceId']} was already added to the list of running instance IDs. Maybe there is a duplicate."
                     )
                 running_ids.add(i["InstanceId"])
                 yield i
@@ -151,12 +172,15 @@ def wait_instances_running(
         time.sleep(seconds)
         for attempt in retry_ec2():
             with attempt:
-                described_instances = boto3_ec2.describe_instances(
+                # describe_instances weirdly really describes reservations
+                reservations = boto3_pager(
+                    boto3_ec2.describe_instances,
+                    "Reservations",
                     InstanceIds=list(pending_ids)
                 )
                 instances = [
                     instance
-                    for reservation in described_instances["Reservations"]
+                    for reservation in reservations
                     for instance in reservation["Instances"]
                 ]
@@ -184,6 +208,9 @@ def wait_spot_requests_active(
     if timeout is not None:
         timeout = time.time() + timeout
+    # These hold spot instance request IDs.
+    # Not to be confused with instance IDs.
     active_ids = set()
     other_ids = set()
     open_ids = None
@@ -201,34 +228,37 @@ def wait_spot_requests_active(
             batch = []
             for r in requests:
                 r: "SpotInstanceRequestTypeDef"  # pycharm thinks it is a string
+                assert isinstance(r, dict), f"Found garbage posing as a spot request: {r}"
                 if r["State"] == "open":
-                    open_ids.add(r["InstanceId"])
-                    if r["Status"] == "pending-evaluation":
-                        eval_ids.add(r["InstanceId"])
-                    elif r["Status"] == "pending-fulfillment":
-                        fulfill_ids.add(r["InstanceId"])
+                    open_ids.add(r["SpotInstanceRequestId"])
+                    if r["Status"]["Code"] == "pending-evaluation":
+                        eval_ids.add(r["SpotInstanceRequestId"])
+                    elif r["Status"]["Code"] == "pending-fulfillment":
+                        fulfill_ids.add(r["SpotInstanceRequestId"])
                     else:
                         logger.info(
                             "Request %s entered status %s indicating that it will not be "
-                            "fulfilled anytime soon.",
-                            r["InstanceId"],
-                            r["Status"],
+                            "fulfilled anytime soon. (Message: %s)",
+                            r["SpotInstanceRequestId"],
+                            r["Status"]["Code"],
+                            r["Status"].get("Message"),
                         )
                 elif r["State"] == "active":
-                    if r["InstanceId"] in active_ids:
+                    if r["SpotInstanceRequestId"] in active_ids:
                         raise RuntimeError(
                             "A request was already added to the list of active requests. Maybe there are duplicate requests."
                         )
-                    active_ids.add(r["InstanceId"])
+                    active_ids.add(r["SpotInstanceRequestId"])
                     batch.append(r)
                 else:
-                    if r["InstanceId"] in other_ids:
+                    if r["SpotInstanceRequestId"] in other_ids:
                         raise RuntimeError(
                             "A request was already added to the list of other IDs. Maybe there are duplicate requests."
                         )
-                    other_ids.add(r["InstanceId"])
+                    other_ids.add(r["SpotInstanceRequestId"])
                     batch.append(r)
             if batch:
+                logger.debug("Found %d new active/other spot requests", len(batch))
                 yield batch
             logger.info(
                 "%i spot requests(s) are open (%i of which are pending evaluation and %i "
@@ -247,8 +277,10 @@ def wait_spot_requests_active(
             time.sleep(sleep_time)
             for attempt in retry_ec2(retry_while=spot_request_not_found):
                 with attempt:
-                    requests = boto3_ec2.describe_spot_instance_requests(
-                        SpotInstanceRequestIds=list(open_ids)
+                    requests = boto3_pager(
+                        boto3_ec2.describe_spot_instance_requests,
+                        "SpotInstanceRequests",
+                        SpotInstanceRequestIds=list(open_ids),
                     )
     except BaseException:
         if open_ids:
@@ -264,14 +296,20 @@ def create_spot_instances(
     boto3_ec2: "EC2Client",
     price,
     image_id,
-    spec,
+    spec: dict[Literal["LaunchSpecification"], dict[str, Any]],
     num_instances=1,
     timeout=None,
     tentative=False,
-    tags=None,
+    tags: dict[str, str] = None,
 ) -> Generator["DescribeInstancesResultTypeDef", None, None]:
     """
     Create instances on the spot market.
+    The "UserData" field in "LaunchSpecification" in spec MUST ALREADY BE
+    base64-encoded. It will NOT be automatically encoded.
+    :param tags: Dict from tag key to tag value of tags to apply to the
+        request.
     """
     def spotRequestNotFound(e):
@@ -280,20 +318,27 @@ def create_spot_instances(
     spec["LaunchSpecification"].update(
         {"ImageId": image_id}
     )  # boto3 image id is in the launch specification
+    user_data = spec["LaunchSpecification"].get("UserData", "")
+    assert is_base64(user_data), f"Spot user data needs to be base64-encoded: {user_data}"
     for attempt in retry_ec2(
         retry_for=a_long_time, retry_while=inconsistencies_detected
     ):
         with attempt:
             requests_dict = boto3_ec2.request_spot_instances(
-                SpotPrice=price, InstanceCount=num_instances, **spec
+                SpotPrice=str(price), InstanceCount=num_instances, **spec
             )
             requests = requests_dict["SpotInstanceRequests"]
+    assert isinstance(requests, list)
     if tags is not None:
+        flat_tags = flatten_tags(tags)
         for requestID in (request["SpotInstanceRequestId"] for request in requests):
             for attempt in retry_ec2(retry_while=spotRequestNotFound):
                 with attempt:
-                    boto3_ec2.create_tags(Resources=[requestID], Tags=tags)
+                    boto3_ec2.create_tags(Resources=[requestID], Tags=flat_tags)
     num_active, num_other = 0, 0
     # noinspection PyUnboundLocalVariable,PyTypeChecker
@@ -310,7 +355,7 @@ def create_spot_instances(
             else:
                 logger.info(
                     "Request %s in unexpected state %s.",
-                    request["InstanceId"],
+                    request["SpotInstanceRequestId"],
                     request["State"],
                 )
                 num_other += 1
@@ -324,7 +369,14 @@ def create_spot_instances(
                         boto3_ec2.modify_instance_metadata_options(
                             InstanceId=instance_id, HttpPutResponseHopLimit=3
                         )
-            yield boto3_ec2.describe_instances(InstanceIds=instance_ids)
+            # We can't use the normal boto3_pager here because we're weirdly
+            # specced as yielding the pages ourselves.
+            # TODO: Change this to just yield instance descriptions instead.
+            page = boto3_ec2.describe_instances(InstanceIds=instance_ids)
+            while page.get("NextToken") is not None:
+                yield page
+                page = boto3_ec2.describe_instances(InstanceIds=instance_ids, NextToken=page["NextToken"])
+            yield page
     if not num_active:
         message = "None of the spot requests entered the active state"
         if tentative:
@@ -335,6 +387,9 @@ def create_spot_instances(
         logger.warning("%i request(s) entered a state other than active.", num_other)
+# TODO: Get rid of this and use create_instances instead.
+# Right now we need it because we have code that needs an InstanceTypeDef for
+# either a spot or an ondemand instance.
 def create_ondemand_instances(
     boto3_ec2: "EC2Client",
     image_id: str,
@@ -344,7 +399,19 @@ def create_ondemand_instances(
     """
     Requests the RunInstances EC2 API call but accounts for the race between recently created
     instance profiles, IAM roles and an instance creation that refers to them.
+    The "UserData" field in spec MUST NOT be base64 encoded; it will be
+    base64-encoded by boto3 automatically. See
+    <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2/client/run_instances.html>.
+    Replaced by create_instances.
     """
+    user_data: str = spec.get("UserData", "")
+    if user_data:
+        # Hope any real user data contains some characters not allowed in base64
+        assert not is_base64(user_data), f"On-demand user data needs to not be base64-encoded: {user_data}"
     instance_type = spec["InstanceType"]
     logger.info("Creating %s instance(s) ... ", instance_type)
     boto_instance_list = []
@@ -434,12 +501,13 @@ def create_instances(
     Not to be confused with "run_instances" (same input args; returns a dictionary):
       https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.run_instances
-    Tags, if given, are applied to the instances, and all volumes.
+    :param user_data: non-base64-encoded user data to control instance startup.
+    :param tags: if given, these tags are applied to the instances, and all volumes.
     """
     logger.info("Creating %s instance(s) ... ", instance_type)
-    if isinstance(user_data, str):
-        user_data = user_data.encode("utf-8")
+    if isinstance(user_data, bytes):
+        user_data = user_data.decode("utf-8")
     request = {
         "ImageId": image_id,

toil/lib/exceptions.py CHANGED Viewed

@@ -18,6 +18,7 @@ import sys
 from typing import Optional
 import logging
 from urllib.parse import ParseResult
+from types import TracebackType
 # TODO: isn't this built in to Python 3 now?
@@ -56,7 +57,7 @@ class panic:
         raise_(exc_type, exc_value, traceback)
-def raise_(exc_type, exc_value, traceback) -> None:
+def raise_(exc_type: Optional[type[BaseException]], exc_value: Optional[BaseException], traceback: Optional[TracebackType]) -> None:
     if exc_value is not None:
         exc = exc_value
     else:

toil/lib/expando.py CHANGED Viewed

@@ -67,10 +67,10 @@ class Expando(dict):
     ...
     KeyError: 'foo'
-    >>> del o.foo
+    >>> del o.foo # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
     ...
-    AttributeError: foo
+    AttributeError: 'Expando' object has no attribute 'foo'
     And copied:

toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl