PyPI - toil - Versions diffs - 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl - Mend

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

toil/batchSystems/abstractBatchSystem.py +13 -5
toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
toil/batchSystems/kubernetes.py +13 -2
toil/batchSystems/mesos/batchSystem.py +33 -2
toil/batchSystems/registry.py +15 -118
toil/batchSystems/slurm.py +191 -16
toil/common.py +20 -1
toil/cwl/cwltoil.py +97 -119
toil/cwl/utils.py +103 -3
toil/fileStores/__init__.py +1 -1
toil/fileStores/abstractFileStore.py +5 -2
toil/fileStores/cachingFileStore.py +1 -1
toil/job.py +30 -14
toil/jobStores/abstractJobStore.py +35 -255
toil/jobStores/aws/jobStore.py +864 -1964
toil/jobStores/aws/utils.py +24 -270
toil/jobStores/fileJobStore.py +2 -1
toil/jobStores/googleJobStore.py +32 -13
toil/jobStores/utils.py +0 -327
toil/leader.py +27 -22
toil/lib/accelerators.py +1 -1
toil/lib/aws/config.py +22 -0
toil/lib/aws/s3.py +477 -9
toil/lib/aws/utils.py +22 -33
toil/lib/checksum.py +88 -0
toil/lib/conversions.py +33 -31
toil/lib/directory.py +217 -0
toil/lib/ec2.py +97 -29
toil/lib/exceptions.py +2 -1
toil/lib/expando.py +2 -2
toil/lib/generatedEC2Lists.py +138 -19
toil/lib/io.py +33 -2
toil/lib/memoize.py +21 -7
toil/lib/misc.py +1 -1
toil/lib/pipes.py +385 -0
toil/lib/plugins.py +106 -0
toil/lib/retry.py +1 -1
toil/lib/threading.py +1 -1
toil/lib/url.py +320 -0
toil/lib/web.py +4 -5
toil/options/cwl.py +13 -1
toil/options/runner.py +17 -10
toil/options/wdl.py +12 -1
toil/provisioners/__init__.py +5 -2
toil/provisioners/aws/__init__.py +43 -36
toil/provisioners/aws/awsProvisioner.py +47 -15
toil/provisioners/node.py +60 -12
toil/resource.py +3 -13
toil/server/app.py +12 -6
toil/server/cli/wes_cwl_runner.py +2 -2
toil/server/wes/abstract_backend.py +21 -43
toil/server/wes/toil_backend.py +2 -2
toil/test/__init__.py +16 -18
toil/test/batchSystems/batchSystemTest.py +2 -9
toil/test/batchSystems/batch_system_plugin_test.py +7 -0
toil/test/batchSystems/test_slurm.py +103 -14
toil/test/cwl/cwlTest.py +181 -8
toil/test/cwl/staging_cat.cwl +27 -0
toil/test/cwl/staging_make_file.cwl +25 -0
toil/test/cwl/staging_workflow.cwl +43 -0
toil/test/cwl/zero_default.cwl +61 -0
toil/test/docs/scripts/tutorial_staging.py +17 -8
toil/test/docs/scriptsTest.py +2 -1
toil/test/jobStores/jobStoreTest.py +23 -133
toil/test/lib/aws/test_iam.py +7 -7
toil/test/lib/aws/test_s3.py +30 -33
toil/test/lib/aws/test_utils.py +9 -9
toil/test/lib/test_url.py +69 -0
toil/test/lib/url_plugin_test.py +105 -0
toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
toil/test/provisioners/clusterTest.py +15 -2
toil/test/provisioners/gceProvisionerTest.py +1 -1
toil/test/server/serverTest.py +78 -36
toil/test/src/autoDeploymentTest.py +2 -3
toil/test/src/fileStoreTest.py +89 -87
toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
toil/test/utils/toilKillTest.py +35 -28
toil/test/wdl/md5sum/md5sum-gs.json +1 -1
toil/test/wdl/md5sum/md5sum.json +1 -1
toil/test/wdl/testfiles/read_file.wdl +18 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
toil/test/wdl/wdltoil_test.py +171 -162
toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
toil/utils/toilDebugFile.py +6 -3
toil/utils/toilSshCluster.py +23 -0
toil/utils/toilStats.py +17 -2
toil/utils/toilUpdateEC2Instances.py +1 -0
toil/version.py +10 -10
toil/wdl/wdltoil.py +1179 -825
toil/worker.py +16 -8
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
toil/lib/iterables.py +0 -112
toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0

toil/jobStores/aws/jobStore.py CHANGED Viewed

@@ -11,675 +11,804 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import hashlib
-import itertools
-import logging
+"""
+This file contains the AWS jobstore, which has its own docstring defining its use.
+This docstring is about the organization of the file.
+All direct AWS boto calls should live in toil.lib.aws, except for creating the
+session instance and the resource/client (which should only be made ONCE in the jobstore).
+Reasons for this
+ - DRY.
+ - All retries are on their individual boto functions, instead of here.
+ - Simple clear functions => simple clear unit tests (ideally).
+Variables defining part size, parallelization, and other constants should live in toil.lib.aws.config.
+"""
 import os
+import json
+import logging
 import pickle
 import re
-import reprlib
 import stat
-import time
 import uuid
-from collections.abc import Generator
-from contextlib import contextmanager
+import datetime
 from io import BytesIO
-from typing import IO, TYPE_CHECKING, Optional, Union, cast
-from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
+from contextlib import contextmanager
+from urllib.parse import ParseResult, urlparse
+from typing import (
+    ContextManager,
+    IO,
+    TYPE_CHECKING,
+    Optional,
+    Union,
+    cast,
+    Tuple,
+    Callable,
+    Dict,
+    Any,
+    Iterator,
+    Literal,
+    overload
+)
+# This file can't be imported if the AWS modules are not available.
 from botocore.exceptions import ClientError
-import toil.lib.encryption as encryption
 from toil.fileStores import FileID
-from toil.job import Job, JobDescription
-from toil.jobStores.abstractJobStore import (
-    AbstractJobStore,
-    ConcurrentFileModificationException,
-    JobStoreExistsException,
-    LocatorException,
-    NoSuchFileException,
-    NoSuchJobException,
-    NoSuchJobStoreException,
-)
-from toil.jobStores.aws.utils import (
-    SDBHelper,
-    ServerSideCopyProhibitedError,
-    copyKeyMultipart,
-    fileSizeAndTime,
-    no_such_sdb_domain,
-    retry_sdb,
-    sdb_unavailable,
-    uploadFile,
-    uploadFromPath,
-)
-from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
-from toil.lib.aws import build_tag_dict_from_env
-from toil.lib.aws.session import establish_boto3_session
-from toil.lib.aws.utils import (
-    NoBucketLocationError,
-    boto3_pager,
+from toil.jobStores.abstractJobStore import (AbstractJobStore,
+                                             JobStoreExistsException,
+                                             NoSuchJobException,
+                                             NoSuchJobStoreException)
+from toil.lib.aws.s3 import (
     create_s3_bucket,
-    enable_public_objects,
-    flatten_tags,
-    get_bucket_region,
-    get_item_from_attributes,
-    get_object_for_url,
-    list_objects_for_url,
-    retry_s3,
-    retryable_s3_errors,
+    delete_s3_bucket,
+    bucket_exists,
+    copy_s3_to_s3,
+    copy_local_to_s3,
+    copy_s3_to_local,
+    parse_s3_uri,
+    MultiPartPipe,
+    list_s3_items,
+    upload_to_s3,
+    download_stream,
+    s3_key_exists,
+    head_s3_object,
+    get_s3_object,
+    put_s3_object,
+    create_public_url,
+    AWSKeyNotFoundError,
 )
-from toil.lib.compatibility import compat_bytes
+from toil.lib.aws.utils import get_object_for_url, list_objects_for_url
+from toil.common import Config
+from toil.jobStores.abstractJobStore import NoSuchFileException
 from toil.lib.ec2nodes import EC2Regions
-from toil.lib.exceptions import panic
-from toil.lib.io import AtomicFileCreate
-from toil.lib.memoize import strict_bool
-from toil.lib.objects import InnerClass
-from toil.lib.retry import get_error_code, get_error_status, retry
-if TYPE_CHECKING:
-    from mypy_boto3_sdb.type_defs import (
-        AttributeTypeDef,
-        DeletableItemTypeDef,
-        ItemTypeDef,
-        ReplaceableAttributeTypeDef,
-        ReplaceableItemTypeDef,
-        UpdateConditionTypeDef,
-    )
-    from toil import Config
-boto3_session = establish_boto3_session()
-s3_boto3_resource = boto3_session.resource("s3")
-s3_boto3_client = boto3_session.client("s3")
+from toil.lib.retry import get_error_status
+from toil.version import version
+from toil.lib.aws.session import establish_boto3_session
+from toil.job import JobDescription, Job
+from toil.lib.url import URLAccess
+DEFAULT_AWS_PART_SIZE = 52428800
 logger = logging.getLogger(__name__)
-# Sometimes we have to wait for multipart uploads to become real. How long
-# should we wait?
-CONSISTENCY_TICKS = 5
-CONSISTENCY_TIME = 1
+class AWSJobStore(AbstractJobStore, URLAccess):
+    """
+    The AWS jobstore can be thought of as an AWS s3 bucket, with functions to
+    centralize, store, and track files for the workflow.
-class ChecksumError(Exception):
-    """Raised when a download from AWS does not contain the correct data."""
+    The AWS jobstore stores 4 things:
+    1. Jobs: These are pickled as files, and contain the information necessary to run a job when unpickled.
+       A job's file is deleted when finished, and its absence means it completed.
-class DomainDoesNotExist(Exception):
-    """Raised when a domain that is expected to exist does not exist."""
+    2. Files: The inputs and outputs of jobs.  Each file is written in s3 with the file pattern:
+       "files/{uuid4}/{original_filename}", where the file prefix
+       "files/{uuid4}" should only point to one file.
+    3. Logs: The written log files of jobs that have run, plus the log file for the main Toil process.
-    def __init__(self, domain_name):
-        super().__init__(f"Expected domain {domain_name} to exist!")
+    4. Shared Files: Files with himan=-readable names, used by Toil itself or Python workflows.
+       These include:
+       * environment.pickle   (environment variables)
-class AWSJobStore(AbstractJobStore):
-    """
-    A job store that uses Amazon's S3 for file storage and SimpleDB for storing job info and
-    enforcing strong consistency on the S3 file storage. There will be SDB domains for jobs and
-    files and a versioned S3 bucket for file contents. Job objects are pickled, compressed,
-    partitioned into chunks of 1024 bytes and each chunk is stored as a an attribute of the SDB
-    item representing the job. UUIDs are used to identify jobs and files.
+       * config.pickle        (user options)
+       * pid.log              (process ID of the workflow; when it finishes, the workflow either succeeded/failed)
+       * userScript           (hot deployment;  this is the job module)
+       * rootJobReturnValue   (workflow succeeded or not)
+    NOTES
+     - The AWS jobstore does not use a database (directly, at least) currently.  We can get away with this because:
+       1. AWS s3 has strong consistency.
+       2. s3's filter/query speed is pretty good.
+       However, there may be reasons in the future to provide users with a database:
+       * s3 throttling has limits (3,500/5,000 requests (TODO: per
+         second?); something like dynamodb supports 100,000+ requests).
+       * Access and filtering would be sped up, though how much faster this would be needs testing.
+       ALSO NOTE: The caching filestore uses a local (per node) database with a very similar structure that maybe
+       could be synced up with this.
+     - TODO: Etags are s3's native checksum, so use that for file integrity checking since it's free when fetching
+       object headers from s3.  Using an md5sum in addition to this would work well with the current filestore.
+       WARNING: Etag values differ for the same file when the part size changes, so part size should always
+       be Set In Stone, unless we hit s3's 10,000 part limit, and we need to account for that.
+     - This class fills in self.config only when initialized/restarted; it is None upon class instantiation.  These
+       are the options/config set by the user.  When jobs are loaded/unpickled, they must re-incorporate this.
+     - The config.sseKey field is the single source of truth for bucket encryption
+       status. The key is never stored inside this class; it is always read
+       from the file referenced by the config when needed. Modifying the config
+       at runtime will modify whether encryption is used. Note that files
+       written *without* encryption (i.e. config.pickle) can't be read when
+       encryption is enabled!
+     - TODO: In general, job stores should log the version of Toil they were
+       initialized with and warn the user if restarting with a different
+       version.
     """
+    def __init__(self, locator: str, partSize: int = DEFAULT_AWS_PART_SIZE) -> None:
+        super(AWSJobStore, self).__init__(locator)
+        # TODO: parsing of user options seems like it should be done outside of this class;
+        #  pass in only the bucket name and region?
+        self.region, self.bucket_name = parse_jobstore_identifier(locator)
+        boto3_session = establish_boto3_session(region_name=self.region)
+        self.s3_resource = boto3_session.resource("s3")
+        self.s3_client = boto3_session.client("s3")
+        logger.info(f"Instantiating {self.__class__} with region: {self.region}")
+        self.part_size = DEFAULT_AWS_PART_SIZE  # don't let users set the part size; it will throw off etag values
+        # created anew during self.initialize() or loaded using self.resume()
+        self.bucket = None
+        # pickled job files named with uuid4
+        self.job_key_prefix = 'jobs/'
+        # job-file associations; these are empty files mimicking a db w/naming convention: job_uuid4.file_uuid4
+        #
+        # TODO: a many-to-many system is implemented, but a simpler one-to-many
+        # system could be used, because each file should belong to at most one
+        # job. This should be changed to a hierarchical layout.
+        self.job_associations_key_prefix = 'job-associations/'
+        # input/output files named with uuid4
+        self.content_key_prefix = 'files/'
+        # these are special files, like 'environment.pickle'; place them in root
+        self.shared_key_prefix = ''
+        # read and unread; named with uuid4
+        self.logs_key_prefix = 'logs/'
+        ###################################### CREATE/DESTROY JOBSTORE ######################################
+    def initialize(self, config: Config) -> None:
+        """
+        Called when starting a new jobstore with a non-existent bucket.
-    # Dots in bucket names should be avoided because bucket names are used in HTTPS bucket
-    # URLs where the may interfere with the certificate common name. We use a double
-    # underscore as a separator instead.
-    #
-    bucketNameRe = re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$")
+        Create bucket, raise if it already exists.
+        Set options from config.
+        """
+        logger.debug(f"Instantiating {self.__class__} for region {self.region} with bucket: '{self.bucket_name}'")
+        if bucket_exists(self.s3_resource, self.bucket_name):
+            raise JobStoreExistsException(self.locator, 'aws')
+        self.bucket = create_s3_bucket(self.s3_resource, self.bucket_name, region=self.region)  # type: ignore
+        super(AWSJobStore, self).initialize(config)
-    # See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
-    #
-    minBucketNameLen = 3
-    maxBucketNameLen = 63
-    maxNameLen = 10
-    nameSeparator = "--"
+    def resume(self) -> None:
+        """
+        Called when reusing an old jobstore with an existing bucket.
-    def __init__(self, locator: str, partSize: int = 50 << 20) -> None:
+        :raise NoSuchJobStoreException: if the bucket doesn't exist.
         """
-        Create a new job store in AWS or load an existing one from there.
+        if not bucket_exists(self.s3_resource, self.bucket_name):
+            raise NoSuchJobStoreException(self.locator, 'aws')
+        # This sets self.config to not be None and loads the encryption key
+        # path from the unencrypted config. So it needs the bucket to exist to
+        # read from.
+        super(AWSJobStore, self).resume()
+    def destroy(self) -> None:
+        delete_s3_bucket(self.s3_resource, self.bucket_name)
-        :param int partSize: The size of each individual part used for multipart operations like
-               upload and copy, must be >= 5 MiB but large enough to not exceed 10k parts for the
-               whole file
+        ###################################### BUCKET UTIL API ######################################
+    def _key_in_bucket(
+        self,
+        identifier: str,
+        prefix: str,
+    ) -> str:
         """
-        super().__init__(locator)
-        region, namePrefix = locator.split(":")
-        regions = EC2Regions.keys()
-        if region not in regions:
-            raise ValueError(f'Region "{region}" is not one of: {regions}')
-        if not self.bucketNameRe.match(namePrefix):
-            raise ValueError(
-                "Invalid name prefix '%s'. Name prefixes must contain only digits, "
-                "hyphens or lower-case letters and must not start or end in a "
-                "hyphen." % namePrefix
-            )
-        # Reserve 13 for separator and suffix
-        if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
-            self.nameSeparator
-        ):
-            raise ValueError(
-                "Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
-                "characters." % namePrefix
-            )
-        if "--" in namePrefix:
-            raise ValueError(
-                "Invalid name prefix '%s'. Name prefixes may not contain "
-                "%s." % (namePrefix, self.nameSeparator)
-            )
-        logger.debug(
-            "Instantiating %s for region %s and name prefix '%s'",
-            self.__class__,
-            region,
-            namePrefix,
+        Get the key in the bucket for the given identifier and prefix.
+        We have this so higher-level code doesn't need to worry about the
+        pasting together of prefixes and identifiers, so it never ahs to be
+        mixed with the identifier=/prefix= calling convention.
+        """
+        return f'{prefix}{identifier}'
+    def is_in_bucket(
+        self,
+        identifier: str,
+        prefix: str,
+        bucket: Optional[str] = None,
+    ) -> bool:
+        """
+        Check if the key for the given identifier and prefix is in the bucket.
+        """
+        bucket = bucket or self.bucket_name
+        return s3_key_exists(
+            s3_resource=self.s3_resource,
+            bucket=bucket,
+            key=self._key_in_bucket(identifier=identifier, prefix=prefix),
+            extra_args=self._get_encryption_args()
         )
-        self.region = region
-        self.name_prefix = namePrefix
-        self.part_size = partSize
-        self.jobs_domain_name: Optional[str] = None
-        self.files_domain_name: Optional[str] = None
-        self.files_bucket = None
-        self.db = boto3_session.client(service_name="sdb", region_name=region)
-        self.s3_resource = boto3_session.resource("s3", region_name=self.region)
-        self.s3_client = self.s3_resource.meta.client
-    def initialize(self, config: "Config") -> None:
-        if self._registered:
-            raise JobStoreExistsException(self.locator, "aws")
-        self._registered = None
-        try:
-            self._bind(create=True)
-        except:
-            with panic(logger):
-                self.destroy()
-        else:
-            super().initialize(config)
-            # Only register after job store has been full initialized
-            self._registered = True
-    @property
-    def sseKeyPath(self) -> Optional[str]:
-        return self.config.sseKey
-    def resume(self) -> None:
-        if not self._registered:
-            raise NoSuchJobStoreException(self.locator, "aws")
-        self._bind(create=False)
-        super().resume()
+    def write_to_bucket(
+            self,
+            identifier: str,
+            prefix: str,
+            data: Optional[Union[bytes, str, Dict[str, Any]]],
+            bucket: Optional[str] = None,
+            encrypted: Optional[bool] = None,
+    ) -> None:
+        """
+        Write something directly to a bucket.
+        Use for small files.  Does not parallelize or use multipart.
+        :param encrypted: Can be set to False to disable encryption.
+        """
+        # only used if exporting to a URL
+        encryption_args = {} if encrypted is False else self._get_encryption_args()
+        bucket = bucket or self.bucket_name
+        if isinstance(data, dict):
+            data = json.dumps(data).encode('utf-8')
+        elif isinstance(data, str):
+            data = data.encode('utf-8')
+        elif data is None:
+            data = b''
+        assert isinstance(data, bytes)
+        put_s3_object(
+            s3_resource=self.s3_resource,
+            bucket=bucket,
+            key=self._key_in_bucket(identifier=identifier, prefix=prefix),
+            body=data,
+            extra_args=encryption_args,
+        )
-    def _bind(
+    def read_from_bucket(
         self,
-        create: bool = False,
-        block: bool = True,
-        check_versioning_consistency: bool = True,
-    ) -> None:
-        def qualify(name):
-            assert len(name) <= self.maxNameLen
-            return self.name_prefix + self.nameSeparator + name
-        # The order in which this sequence of events happens is important.  We can easily handle the
-        # inability to bind a domain, but it is a little harder to handle some cases of binding the
-        # jobstore bucket.  Maintaining this order allows for an easier `destroy` method.
-        if self.jobs_domain_name is None:
-            self.jobs_domain_name = qualify("jobs")
-            self._bindDomain(self.jobs_domain_name, create=create, block=block)
-        if self.files_domain_name is None:
-            self.files_domain_name = qualify("files")
-            self._bindDomain(self.files_domain_name, create=create, block=block)
-        if self.files_bucket is None:
-            self.files_bucket = self._bindBucket(
-                qualify("files"),
-                create=create,
-                block=block,
-                versioning=True,
-                check_versioning_consistency=check_versioning_consistency,
-            )
+        identifier: str,
+        prefix: str,
+        bucket: Optional[str] = None,
+    ) -> bytes:
+        """
+        Read something directly from a bucket.
-    @property
-    def _registered(self) -> Optional[bool]:
+        Use for small files.  Does not parallelize or use multipart.
+        :raises NoSuchJobException: if the prefix is the job prefix and the
+            identifier is not found.
+        :raises NoSuchFileException: if the prefix is the content prefix and
+            the identifier is not found.
+        :raises self.s3_client.exceptions.NoSuchKey: in other cases where the
+            identifier is not found.
+        """
+        bucket = bucket or self.bucket_name
+        try:
+            return get_s3_object(
+                s3_resource=self.s3_resource,
+                bucket=bucket,
+                key=self._key_in_bucket(identifier=identifier, prefix=prefix),
+                extra_args=self._get_encryption_args(),
+            )['Body'].read()
+        except self.s3_client.exceptions.NoSuchKey:
+            if prefix == self.job_key_prefix:
+                raise NoSuchJobException(identifier)
+            elif prefix == self.content_key_prefix:
+                raise NoSuchFileException(identifier)
+            else:
+                raise
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                if prefix == self.job_key_prefix:
+                    raise NoSuchJobException(identifier)
+                elif prefix == self.content_key_prefix:
+                    raise NoSuchFileException(identifier)
+                else:
+                    raise
+            else:
+                raise
+        ###################################### JOBS API ######################################
+    def assign_job_id(self, jobDescription: JobDescription) -> None:
+        jobDescription.jobStoreID = str(uuid.uuid4())
+        logger.debug("Assigning Job ID %s", jobDescription.jobStoreID)
+    def create_job(self, jobDescription: JobDescription) -> JobDescription:
+        """
+        Pickle a jobDescription object and write it to the jobstore as a file.
+        Responsible for calling :meth:`toil.job.JobDescription.pre_update_hook`
+        on the job description.
         """
-        A optional boolean property indicating whether this job store is registered. The
-        registry is the authority on deciding if a job store exists or not. If True, this job
-        store exists, if None the job store is transitioning from True to False or vice versa,
-        if False the job store doesn't exist.
-        :type: bool|None
+        jobDescription.pre_update_hook()
+        self.write_to_bucket(identifier=str(jobDescription.jobStoreID),
+                             prefix=self.job_key_prefix,
+                             data=pickle.dumps(jobDescription, protocol=pickle.HIGHEST_PROTOCOL))
+        return jobDescription
+    def job_exists(self, job_id: str, check: bool = False) -> bool:
+        """
+        Checks if the job_id is found in s3.
+        :param check: If True, raise an exception instead of returning false
+            when a job does not exist.
         """
-        # The weird mapping of the SDB item attribute value to the property value is due to
-        # backwards compatibility. 'True' becomes True, that's easy. Toil < 3.3.0 writes this at
-        # the end of job store creation. Absence of either the registry, the item or the
-        # attribute becomes False, representing a truly absent, non-existing job store. An
-        # attribute value of 'False', which is what Toil < 3.3.0 writes at the *beginning* of job
-        # store destruction, indicates a job store in transition, reflecting the fact that 3.3.0
-        # may leak buckets or domains even though the registry reports 'False' for them. We
-        # can't handle job stores that were partially created by 3.3.0, though.
-        registry_domain_name = "toil-registry"
         try:
-            self._bindDomain(
-                domain_name=registry_domain_name, create=False, block=False
+            self.s3_client.head_object(
+                Bucket=self.bucket_name,
+                Key=self._key_in_bucket(
+                    identifier=job_id,
+                    prefix=self.job_key_prefix,
+                ),
+                **self._get_encryption_args()
             )
-        except DomainDoesNotExist:
-            return False
-        for attempt in retry_sdb():
-            with attempt:
-                get_result = self.db.get_attributes(
-                    DomainName=registry_domain_name,
-                    ItemName=self.name_prefix,
-                    AttributeNames=["exists"],
-                    ConsistentRead=True,
-                )
-                attributes: list["AttributeTypeDef"] = get_result.get(
-                    "Attributes", []
-                )  # the documentation says 'Attributes' should always exist, but this is not true
-                exists: Optional[str] = get_item_from_attributes(
-                    attributes=attributes, name="exists"
-                )
-                if exists is None:
-                    return False
-                elif exists == "True":
-                    return True
-                elif exists == "False":
-                    return None
-                else:
-                    assert False
+            return True
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                if check:
+                    raise NoSuchJobException(job_id)
+            else:
+                raise
+        except self.s3_client.exceptions.NoSuchKey:
+            if check:
+                raise NoSuchJobException(job_id)
+            else:
+                raise
+        return False
-    @_registered.setter
-    def _registered(self, value: bool) -> None:
-        registry_domain_name = "toil-registry"
+    def jobs(self) -> Iterator[JobDescription]:
+        for result in list_s3_items(self.s3_resource, bucket=self.bucket_name, prefix=self.job_key_prefix):
+            try:
+                job_id = result['Key'][len(self.job_key_prefix):]  # strip self.job_key_prefix
+                yield self.load_job(job_id)
+            except NoSuchJobException:
+                # job may have been deleted between showing up in the list and getting loaded
+                pass
+    def load_job(self, job_id: str) -> JobDescription:
+        """Use a job_id to get a job from the jobstore's s3 bucket, unpickle, and return it."""
         try:
-            self._bindDomain(
-                domain_name=registry_domain_name,
-                # Only create registry domain when registering or
-                # transitioning a store
-                create=value is not False,
-                block=False,
+            job = pickle.loads(self.read_from_bucket(identifier=job_id, prefix=self.job_key_prefix))
+        except NoSuchJobException:
+            raise
+        if not isinstance(job, JobDescription):
+            raise RuntimeError(
+                f"While trying to load a JobDescription for {job_id}, got a {type(job)} instead!",
             )
-        except DomainDoesNotExist:
-            pass
-        else:
-            for attempt in retry_sdb():
-                with attempt:
-                    if value is False:
-                        self.db.delete_attributes(
-                            DomainName=registry_domain_name, ItemName=self.name_prefix
-                        )
-                    else:
-                        if value is True:
-                            attributes: list["ReplaceableAttributeTypeDef"] = [
-                                {"Name": "exists", "Value": "True", "Replace": True}
-                            ]
-                        elif value is None:
-                            attributes = [
-                                {"Name": "exists", "Value": "False", "Replace": True}
-                            ]
-                        else:
-                            assert False
-                        self.db.put_attributes(
-                            DomainName=registry_domain_name,
-                            ItemName=self.name_prefix,
-                            Attributes=attributes,
-                        )
-    def _checkItem(self, item: "ItemTypeDef", enforce: bool = True) -> None:
-        """
-        Make sure that the given SimpleDB item actually has the attributes we think it should.
-        Throw otherwise.
+        # Now we know it's the right type
+        job.assignConfig(self.config)
+        return job
-        If enforce is false, log but don't throw.
-        """
-        self._checkAttributes(item["Attributes"], enforce)
+    def update_job(self, jobDescription: JobDescription) -> None:
+        self.create_job(jobDescription)
-    def _checkAttributes(
-        self, attributes: list["AttributeTypeDef"], enforce: bool = True
-    ) -> None:
-        if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
-            logger.error(
-                "overlargeID attribute isn't present: either SimpleDB entry is "
-                "corrupt or jobstore is from an extremely old Toil: %s",
-                attributes,
+    def delete_job(self, job_id: str) -> None:
+        logger.debug("Deleting job %s", job_id)
+        # delete the actual job file
+        self.s3_client.delete_object(
+            Bucket=self.bucket_name,
+            Key=self._key_in_bucket(
+                identifier=job_id,
+                prefix=self.job_key_prefix,
             )
-            if enforce:
-                raise RuntimeError(
-                    "encountered SimpleDB entry missing required attribute "
-                    "'overlargeID'; is your job store ancient?"
-                )
+        )
-    def _awsJobFromAttributes(self, attributes: list["AttributeTypeDef"]) -> Job:
-        """
-        Get a Toil Job object from attributes that are defined in an item from the DB
-        :param attributes: List of attributes
-        :return: Toil job
-        """
-        self._checkAttributes(attributes)
-        overlarge_id_value = get_item_from_attributes(
-            attributes=attributes, name="overlargeID"
+        # delete any files marked as associated with the job
+        job_file_associations_to_delete = []
+        root_key = self._key_in_bucket(
+            identifier=job_id,
+            prefix=self.job_associations_key_prefix,
         )
-        if overlarge_id_value:
-            assert self.file_exists(overlarge_id_value)
-            # This is an overlarge job, download the actual attributes
-            # from the file store
-            logger.debug("Loading overlarge job from S3.")
-            with self.read_file_stream(overlarge_id_value) as fh:
-                binary = fh.read()
-        else:
-            binary, _ = SDBHelper.attributesToBinary(attributes)
-            assert binary is not None
-        job = pickle.loads(binary)
-        if job is not None:
-            job.assignConfig(self.config)
-        return job
+        for associated_job_file in list_s3_items(self.s3_resource,
+                                                 bucket=self.bucket_name,
+                                                 prefix=root_key):
+            job_file_associations_to_delete.append(associated_job_file['Key'])
+            file_id = associated_job_file['Key'].split('.')[-1]
+            self.delete_file(file_id)
+        # delete the job-file association references (these are empty files the simply connect jobs to files)
+        for job_file_association in job_file_associations_to_delete:
+            self.s3_client.delete_object(Bucket=self.bucket_name, Key=f'{job_file_association}')
+    def associate_job_with_file(self, job_id: str, file_id: str) -> None:
+        # associate this job with this file; the file will be deleted when the job is
+        self.write_to_bucket(identifier=f'{job_id}.{file_id}', prefix=self.job_associations_key_prefix, data=None)
-    def _awsJobFromItem(self, item: "ItemTypeDef") -> Job:
+        ###################################### FILES API ######################################
+    def write_file(self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False) -> FileID:
         """
-        Get a Toil Job object from an item from the DB
-        :return: Toil Job
+        Write a local file into the jobstore and return a file_id referencing it.
+        :param job_id:
+            If job_id AND cleanup are supplied, associate this file with that job.  When the job is deleted, the
+            file will be deleted as well.
+        :param cleanup:
+            If job_id AND cleanup are supplied, associate this file with that job.  When the job is deleted, the
+            file will be deleted as well.
+            TODO: we don't need cleanup; remove it and only use job_id
         """
-        return self._awsJobFromAttributes(item["Attributes"])
-    def _awsJobToAttributes(self, job: JobDescription) -> list["AttributeTypeDef"]:
-        binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
-        if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
-            # Store as an overlarge job in S3
-            with self.write_file_stream() as (writable, fileID):
-                writable.write(binary)
-            item = SDBHelper.binaryToAttributes(None)
-            item["overlargeID"] = fileID
-        else:
-            item = SDBHelper.binaryToAttributes(binary)
-            item["overlargeID"] = ""
-        return SDBHelper.attributeDictToList(item)
+        # TODO: etag = compute_checksum_for_file(local_path, algorithm='etag')[len('etag$'):]
+        file_id = str(uuid.uuid4())  # mint a new file_id
+        file_attributes = os.stat(local_path)
+        size = file_attributes.st_size
+        executable = file_attributes.st_mode & stat.S_IXUSR != 0
+        if job_id and cleanup:
+            # associate this job with this file; then the file reference will be deleted when the job is
+            self.associate_job_with_file(job_id, file_id)
+        # Each file gets a prefix under which we put exactly one key, to hide
+        # metadata in the key.
+        prefix = self._key_in_bucket(
+            identifier=file_id,
+            prefix=self.content_key_prefix
+        )
-    def _awsJobToItem(self, job: JobDescription, name: str) -> "ItemTypeDef":
-        return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
+        copy_local_to_s3(
+            s3_resource=self.s3_resource,
+            local_file_path=local_path,
+            dst_bucket=self.bucket_name,
+            dst_key=f'{prefix}/{os.path.basename(local_path)}',
+            extra_args=self._get_encryption_args()
+        )
+        return FileID(file_id, size, executable)
-    jobsPerBatchInsert = 25
+    def find_s3_key_from_file_id(self, file_id: str) -> str:
+        """This finds an s3 key for which file_id is the prefix, and which already exists."""
+        prefix = self._key_in_bucket(
+            identifier=file_id,
+            prefix=self.content_key_prefix
+        )
+        s3_keys = [s3_item for s3_item in list_s3_items(self.s3_resource, bucket=self.bucket_name, prefix=prefix)]
+        if len(s3_keys) == 0:
+            raise NoSuchFileException(file_id)
+        if len(s3_keys) > 1:
+            # There can be only one.
+            raise RuntimeError(f'File ID: {file_id} should be unique, but includes: {s3_keys}')
+        return s3_keys[0]['Key']
     @contextmanager
-    def batch(self) -> None:
-        self._batchedUpdates = []
-        yield
-        batches = [
-            self._batchedUpdates[i : i + self.jobsPerBatchInsert]
-            for i in range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)
-        ]
-        for batch in batches:
-            items: list["ReplaceableItemTypeDef"] = []
-            for jobDescription in batch:
-                item_attributes: list["ReplaceableAttributeTypeDef"] = []
-                jobDescription.pre_update_hook()
-                item_name = compat_bytes(jobDescription.jobStoreID)
-                got_job_attributes: list["AttributeTypeDef"] = self._awsJobToAttributes(
-                    jobDescription
-                )
-                for each_attribute in got_job_attributes:
-                    new_attribute: "ReplaceableAttributeTypeDef" = {
-                        "Name": each_attribute["Name"],
-                        "Value": each_attribute["Value"],
-                        "Replace": True,
-                    }
-                    item_attributes.append(new_attribute)
-                items.append({"Name": item_name, "Attributes": item_attributes})
-            for attempt in retry_sdb():
-                with attempt:
-                    self.db.batch_put_attributes(
-                        DomainName=self.jobs_domain_name, Items=items
-                    )
-        self._batchedUpdates = None
-    def assign_job_id(self, job_description: JobDescription) -> None:
-        jobStoreID = self._new_job_id()
-        logger.debug("Assigning ID to job %s", jobStoreID)
-        job_description.jobStoreID = jobStoreID
-    def create_job(self, job_description: JobDescription) -> JobDescription:
-        if hasattr(self, "_batchedUpdates") and self._batchedUpdates is not None:
-            self._batchedUpdates.append(job_description)
-        else:
-            self.update_job(job_description)
-        return job_description
-    def job_exists(self, job_id: Union[bytes, str]) -> bool:
-        for attempt in retry_sdb():
-            with attempt:
-                return (
-                    len(
-                        self.db.get_attributes(
-                            DomainName=self.jobs_domain_name,
-                            ItemName=compat_bytes(job_id),
-                            AttributeNames=[SDBHelper.presenceIndicator()],
-                            ConsistentRead=True,
-                        ).get("Attributes", [])
-                    )
-                    > 0
-                )
-    def jobs(self) -> Generator[Job, None, None]:
-        job_items: Optional[list["ItemTypeDef"]] = None
-        for attempt in retry_sdb():
-            with attempt:
-                job_items = boto3_pager(
-                    self.db.select,
-                    "Items",
-                    ConsistentRead=True,
-                    SelectExpression="select * from `%s`" % self.jobs_domain_name,
-                )
-        assert job_items is not None
-        for jobItem in job_items:
-            yield self._awsJobFromItem(jobItem)
-    def load_job(self, job_id: FileID) -> Job:
-        item_attributes = None
-        for attempt in retry_sdb():
-            with attempt:
-                item_attributes = self.db.get_attributes(
-                    DomainName=self.jobs_domain_name,
-                    ItemName=compat_bytes(job_id),
-                    ConsistentRead=True,
-                ).get("Attributes", [])
-        if not item_attributes:
-            raise NoSuchJobException(job_id)
-        job = self._awsJobFromAttributes(item_attributes)
-        if job is None:
-            raise NoSuchJobException(job_id)
-        logger.debug("Loaded job %s", job_id)
-        return job
+    def write_file_stream(
+        self,
+        job_id: Optional[str] = None,
+        cleanup: bool = False,
+        basename: Optional[str] = None,
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+    ) -> Iterator[tuple[IO[bytes], str]]:
+        file_id = str(uuid.uuid4())
+        if job_id and cleanup:
+            self.associate_job_with_file(job_id, file_id)
+        prefix = self._key_in_bucket(
+            identifier=file_id,
+            prefix=self.content_key_prefix
+        )
-    def update_job(self, job_description):
-        logger.debug("Updating job %s", job_description.jobStoreID)
-        job_description.pre_update_hook()
-        job_attributes = self._awsJobToAttributes(job_description)
-        update_attributes: list["ReplaceableAttributeTypeDef"] = [
-            {"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
-            for attribute in job_attributes
-        ]
-        for attempt in retry_sdb():
-            with attempt:
-                self.db.put_attributes(
-                    DomainName=self.jobs_domain_name,
-                    ItemName=compat_bytes(job_description.jobStoreID),
-                    Attributes=update_attributes,
-                )
+        pipe = MultiPartPipe(part_size=self.part_size,
+                             s3_resource=self.s3_resource,
+                             bucket_name=self.bucket_name,
+                             file_id=f'{prefix}/{str(basename)}',
+                             encryption_args=self._get_encryption_args(),
+                             encoding=encoding,
+                             errors=errors)
+        with pipe as writable:
+            yield writable, file_id
-    itemsPerBatchDelete = 25
+    @contextmanager
+    def update_file_stream(
+            self,
+            file_id: str,
+            encoding: Optional[str] = None,
+            errors: Optional[str] = None
+    ) -> Iterator[IO[Any]]:
+        logger.debug("Replacing file %s via multipart upload", file_id)
+        pipe = MultiPartPipe(
+            part_size=self.part_size,
+            s3_resource=self.s3_resource,
+            bucket_name=self.bucket_name,
+            file_id=self.find_s3_key_from_file_id(file_id),
+            encryption_args=self._get_encryption_args(),
+            encoding=encoding,
+            errors=errors,
+        )
+        with pipe as writable:
+            yield writable
-    def delete_job(self, job_id):
-        # remove job and replace with jobStoreId.
-        logger.debug("Deleting job %s", job_id)
+    @contextmanager
+    def write_shared_file_stream(
+        self,
+        shared_file_name: str,
+        encrypted: Optional[bool] = None,
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+    ) -> Iterator[IO[bytes]]:
+        encryption_args = {} if encrypted is False else self._get_encryption_args()
+        pipe = MultiPartPipe(
+            part_size=self.part_size,
+            s3_resource=self.s3_resource,
+            bucket_name=self.bucket_name,
+            file_id=self._key_in_bucket(
+                identifier=shared_file_name,
+                prefix=self.shared_key_prefix,
+            ),
+            encryption_args=encryption_args,
+            encoding=encoding,
+            errors=errors,
+        )
+        with pipe as writable:
+            yield writable
-        # If the job is overlarge, delete its file from the filestore
-        for attempt in retry_sdb():
-            with attempt:
-                attributes = self.db.get_attributes(
-                    DomainName=self.jobs_domain_name,
-                    ItemName=compat_bytes(job_id),
-                    ConsistentRead=True,
-                ).get("Attributes", [])
-        # If the overlargeID has fallen off, maybe we partially deleted the
-        # attributes of the item? Or raced on it? Or hit SimpleDB being merely
-        # eventually consistent? We should still be able to get rid of it.
-        self._checkAttributes(attributes, enforce=False)
-        overlarge_id_value = get_item_from_attributes(
-            attributes=attributes, name="overlargeID"
+    def update_file(self, file_id: str, local_path: str) -> None:
+        copy_local_to_s3(
+            s3_resource=self.s3_resource,
+            local_file_path=local_path,
+            dst_bucket=self.bucket_name,
+            dst_key=self.find_s3_key_from_file_id(file_id),
+            extra_args=self._get_encryption_args()
         )
-        if overlarge_id_value:
-            logger.debug("Deleting job from filestore")
-            self.delete_file(overlarge_id_value)
-        for attempt in retry_sdb():
-            with attempt:
-                self.db.delete_attributes(
-                    DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id)
-                )
-        items: Optional[list["ItemTypeDef"]] = None
-        for attempt in retry_sdb():
-            with attempt:
-                items = list(
-                    boto3_pager(
-                        self.db.select,
-                        "Items",
-                        ConsistentRead=True,
-                        SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'",
-                    )
-                )
-        assert items is not None
-        if items:
-            logger.debug(
-                "Deleting %d file(s) associated with job %s", len(items), job_id
+    def file_exists(self, file_id: str) -> bool:
+        try:
+            # This throws if the file doesn't exist.
+            self.find_s3_key_from_file_id(file_id)
+        except NoSuchFileException:
+            # It didn't exist
+            return False
+        return True
+    def get_file_size(self, file_id: str) -> int:
+        """Do we need both get_file_size and _get_size???"""
+        full_s3_key = self.find_s3_key_from_file_id(file_id)
+        return self._get_size(url=urlparse(f's3://{self.bucket_name}/{full_s3_key}')) or 0
+    @classmethod
+    def _get_size(cls, url: ParseResult) -> Optional[int]:
+        """Do we need both get_file_size and _get_size???"""
+        try:
+            return get_object_for_url(url, existing=True).content_length
+        except (AWSKeyNotFoundError, NoSuchFileException):
+            return 0
+    def read_file(self, file_id: str, local_path: str, symlink: bool = False) -> None:
+        full_s3_key = self.find_s3_key_from_file_id(file_id)
+        executable = getattr(file_id, "executable", False)
+        try:
+            copy_s3_to_local(
+                s3_resource=self.s3_resource,
+                local_file_path=local_path,
+                src_bucket=self.bucket_name,
+                src_key=full_s3_key,
+                extra_args=self._get_encryption_args()
             )
-            n = self.itemsPerBatchDelete
-            batches = [items[i : i + n] for i in range(0, len(items), n)]
-            for batch in batches:
-                delete_items: list["DeletableItemTypeDef"] = [
-                    {"Name": item["Name"]} for item in batch
-                ]
-                for attempt in retry_sdb():
-                    with attempt:
-                        self.db.batch_delete_attributes(
-                            DomainName=self.files_domain_name, Items=delete_items
-                        )
-            for item in items:
-                item: "ItemTypeDef"
-                version = get_item_from_attributes(
-                    attributes=item["Attributes"], name="version"
-                )
-                for attempt in retry_s3():
-                    with attempt:
-                        if version:
-                            self.s3_client.delete_object(
-                                Bucket=self.files_bucket.name,
-                                Key=compat_bytes(item["Name"]),
-                                VersionId=version,
-                            )
-                        else:
-                            self.s3_client.delete_object(
-                                Bucket=self.files_bucket.name,
-                                Key=compat_bytes(item["Name"]),
-                            )
+            if executable:
+                os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
+        except self.s3_client.exceptions.NoSuchKey:
+            raise NoSuchFileException(file_id)
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                raise NoSuchFileException(file_id)
+            else:
+                raise
-    def get_empty_file_store_id(
-        self, jobStoreID=None, cleanup=False, basename=None
-    ) -> FileID:
-        info = self.FileInfo.create(jobStoreID if cleanup else None)
-        with info.uploadStream() as _:
-            # Empty
-            pass
-        info.save()
-        logger.debug("Created %r.", info)
-        return info.fileID
+    @contextmanager  # type: ignore
+    def read_file_stream(  # type: ignore
+        self,
+        file_id: Union[FileID, str],
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+    ) -> Union[ContextManager[IO[bytes]], ContextManager[IO[str]]]:
+        full_s3_key = self.find_s3_key_from_file_id(file_id)
+        try:
+            with download_stream(self.s3_resource,
+                                 bucket=self.bucket_name,
+                                 key=full_s3_key,
+                                 extra_args=self._get_encryption_args(),
+                                 encoding=encoding,
+                                 errors=errors) as readable:
+                yield readable
+        except self.s3_client.exceptions.NoSuchKey:
+            raise NoSuchFileException(file_id)
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                raise NoSuchFileException(file_id)
+            else:
+                raise
+    @overload
+    @contextmanager
+    def read_shared_file_stream(
+        self,
+        shared_file_name: str,
+        encoding: str,
+        errors: Optional[str] = None,
+    ) -> Iterator[IO[str]]: ...
+    @overload
+    @contextmanager
+    def read_shared_file_stream(
+        self,
+        shared_file_name: str,
+        encoding: Literal[None] = None,
+        errors: Optional[str] = None,
+    ) -> Iterator[IO[bytes]]: ...
+    @contextmanager
+    def read_shared_file_stream(
+        self,
+        shared_file_name: str,
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+    ) -> Iterator[Union[IO[bytes], IO[str]]]:
+        self._requireValidSharedFileName(shared_file_name)
+        key = self._key_in_bucket(identifier=shared_file_name, prefix=self.shared_key_prefix)
+        if not self.is_in_bucket(
+            identifier=shared_file_name,
+            prefix=self.shared_key_prefix,
+        ):
+            # TRAVIS=true TOIL_OWNER_TAG="shared" /home/quokka/git/toil/v3nv/bin/python -m pytest --durations=0 --log-level DEBUG --log-cli-level INFO -r s /home/quokka/git/toil/src/toil/test/jobStores/jobStoreTest.py::EncryptedAWSJobStoreTest::testJobDeletions
+            # throw NoSuchFileException in download_stream
+            raise NoSuchFileException(f's3://{self.bucket_name}/{key}')
+        try:
+            with download_stream(self.s3_resource,
+                                 bucket=self.bucket_name,
+                                 key=key,
+                                 encoding=encoding,
+                                 errors=errors,
+                                 extra_args=self._get_encryption_args()) as readable:
+                yield readable
+        except self.s3_client.exceptions.NoSuchKey:
+            raise NoSuchFileException(shared_file_name)
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                raise NoSuchFileException(shared_file_name)
+            else:
+                raise
+    def delete_file(self, file_id: str) -> None:
+        try:
+            full_s3_key = self.find_s3_key_from_file_id(file_id)
+        except NoSuchFileException:
+            # The file is gone. That's great, we're idempotent.
+            return
+        self.s3_client.delete_object(Bucket=self.bucket_name, Key=full_s3_key)
+        ###################################### URI API ######################################
     def _import_file(
         self,
-        otherCls,
+        otherCls: type[URLAccess],
         uri: ParseResult,
         shared_file_name: Optional[str] = None,
         hardlink: bool = False,
         symlink: bool = True,
     ) -> Optional[FileID]:
-        try:
-            if issubclass(otherCls, AWSJobStore):
-                srcObj = get_object_for_url(uri, existing=True)
-                size = srcObj.content_length
-                if shared_file_name is None:
-                    info = self.FileInfo.create(srcObj.key)
-                else:
-                    self._requireValidSharedFileName(shared_file_name)
-                    jobStoreFileID = self._shared_file_id(shared_file_name)
-                    info = self.FileInfo.loadOrCreate(
-                        jobStoreFileID=jobStoreFileID,
-                        ownerID=str(self.sharedFileOwnerID),
-                        encrypted=None,
-                    )
-                info.copyFrom(srcObj)
-                info.save()
-                return FileID(info.fileID, size) if shared_file_name is None else None
-        except (NoBucketLocationError, ServerSideCopyProhibitedError):
-            # AWS refuses to tell us where the bucket is or do this copy for us
-            logger.warning(
-                "Falling back to copying via the local machine. This could get expensive!"
+        """
+        Upload a file into the s3 bucket jobstore from the source uri.
+        This db entry's existence should always be in sync with the file's existence (when one exists,
+        so must the other).
+        """
+        # we are copying from s3 to s3
+        if isinstance(otherCls, AWSJobStore):
+            src_bucket_name, src_key_name = parse_s3_uri(uri.geturl())
+            response = head_s3_object(self.s3_resource, bucket=src_bucket_name, key=src_key_name, check=True)
+            content_length = response['ContentLength']  # e.g. 65536
+            file_id = str(uuid.uuid4())
+            if shared_file_name:
+                dst_key = self._key_in_bucket(identifier=shared_file_name, prefix=self.shared_key_prefix)
+            else:
+                # cannot determine exec bit from foreign s3 so default to False
+                dst_key = "/".join([
+                    self._key_in_bucket(identifier=file_id, prefix=self.content_key_prefix),
+                    src_key_name.split("/")[-1],
+                ])
+            copy_s3_to_s3(
+                s3_resource=self.s3_resource,
+                src_bucket=src_bucket_name,
+                src_key=src_key_name,
+                dst_bucket=self.bucket_name,
+                dst_key=dst_key,
+                extra_args=self._get_encryption_args()
             )
+            # TODO: verify etag after copying here?
-        # copy if exception
-        return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
+            return FileID(file_id, content_length) if not shared_file_name else None
+        else:
+            return super(AWSJobStore, self)._import_file(
+                otherCls=otherCls,
+                uri=uri,
+                shared_file_name=shared_file_name,
+                hardlink=hardlink,
+                symlink=symlink
+            )
-    def _export_file(self, otherCls, file_id: FileID, uri: ParseResult) -> None:
-        try:
-            if issubclass(otherCls, AWSJobStore):
-                dstObj = get_object_for_url(uri)
-                info = self.FileInfo.loadOrFail(file_id)
-                info.copyTo(dstObj)
-                return
-        except (NoBucketLocationError, ServerSideCopyProhibitedError):
-            # AWS refuses to tell us where the bucket is or do this copy for us
-            logger.warning(
-                "Falling back to copying via the local machine. This could get expensive!"
+    def _export_file(
+        self, otherCls: type[URLAccess], jobStoreFileID: FileID, url: ParseResult
+    ) -> None:
+        """Export a file_id in the jobstore to the url."""
+        if isinstance(otherCls, AWSJobStore):
+            src_full_s3_key = self.find_s3_key_from_file_id(jobStoreFileID)
+            dst_bucket_name, dst_key_name = parse_s3_uri(url.geturl())
+            copy_s3_to_s3(
+                s3_resource=self.s3_resource,
+                src_bucket=self.bucket_name,
+                src_key=src_full_s3_key,
+                dst_bucket=dst_bucket_name,
+                dst_key=dst_key_name,
+                extra_args=self._get_encryption_args()
             )
         else:
-            super()._default_export_file(otherCls, file_id, uri)
+            super(AWSJobStore, self)._default_export_file(otherCls, jobStoreFileID, url)
-    ###
-    # URL access implementation
-    ###
+    @classmethod
+    def _read_from_url(
+        cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
+    ) -> tuple[int, bool]:
+        src_obj = get_object_for_url(url, existing=True)
+        src_obj.download_fileobj(writable)
+        executable = False
+        return src_obj.content_length, executable
-    # URL access methods aren't used by the rest of the job store methods.
+    @classmethod
+    def _write_to_url(
+        cls,
+        readable: Union[IO[bytes], IO[str]],
+        url: ParseResult,
+        executable: bool = False,
+    ) -> None:
+        dst_obj = get_object_for_url(url)
+        upload_to_s3(readable=readable,
+                     s3_resource=establish_boto3_session().resource("s3"),
+                     bucket=dst_obj.bucket_name,
+                     key=dst_obj.key)
     @classmethod
     def _url_exists(cls, url: ParseResult) -> bool:
         try:
-            try:
-                get_object_for_url(url, existing=True, anonymous=True)
-            except PermissionError:
-                # If we can't look anonymously, log in
-                get_object_for_url(url, existing=True)
+            get_object_for_url(url, existing=True)
             return True
         except FileNotFoundError:
             # Not a file
-            # Might be a directory. Or we might not have access to know.
-            # See if it's a directory.
+            # Might be a directory.
             return cls._get_is_directory(url)
-    @classmethod
-    def _get_size(cls, url: ParseResult) -> int:
-        try:
-            src_obj = get_object_for_url(url, existing=True, anonymous=True)
-        except PermissionError:
-            src_obj = get_object_for_url(url, existing=True)
-        return src_obj.content_length
-    @classmethod
-    def _read_from_url(cls, url: ParseResult, writable):
-        try:
-            src_obj = get_object_for_url(url, existing=True, anonymous=True)
-            src_obj.download_fileobj(writable)
-        except Exception as e:
-            if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
-                # The object setup or the download does not have permission. Try again with a login.
-                src_obj = get_object_for_url(url, existing=True)
-                src_obj.download_fileobj(writable)
-            else:
-                raise
-        return (src_obj.content_length, False)  # executable bit is always False
     @classmethod
     def _open_url(cls, url: ParseResult) -> IO[bytes]:
         try:
@@ -695,1415 +824,186 @@ class AWSJobStore(AbstractJobStore):
         # We should get back a response with a stream in 'Body'
         if "Body" not in response:
             raise RuntimeError(f"Could not fetch body stream for {url}")
-        return response["Body"]
+        return response["Body"]  # type: ignore
     @classmethod
-    def _write_to_url(
-        cls, readable, url: ParseResult, executable: bool = False
-    ) -> None:
-        # Don't try to do anonympus writes.
-        dstObj = get_object_for_url(url)
-        logger.debug("Uploading %s", dstObj.key)
-        # uploadFile takes care of using multipart upload if the file is larger than partSize (default to 5MB)
-        uploadFile(
-            readable=readable,
-            resource=s3_boto3_resource,
-            bucketName=dstObj.bucket_name,
-            fileID=dstObj.key,
-            partSize=5 * 1000 * 1000,
-        )
+    def _list_url(cls, url: ParseResult) -> list[str]:
+        return list_objects_for_url(url)
     @classmethod
-    def _list_url(cls, url: ParseResult) -> list[str]:
+    def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
+        # TODO: export seems unused
+        return url.scheme.lower() == 's3'
+    def get_public_url(self, file_id: str) -> str:
+        """Turn s3:// into http:// and put a public-read ACL on it."""
         try:
-            return list_objects_for_url(url, anonymous=True)
-        except PermissionError:
-            return list_objects_for_url(url)
+            return create_public_url(
+                self.s3_resource,
+                bucket=self.bucket_name,
+                key=self._key_in_bucket(
+                    identifier=file_id,
+                    prefix=self.content_key_prefix,
+                ),
+            )
+        except self.s3_client.exceptions.NoSuchKey:
+            raise NoSuchFileException(file_id)
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                raise NoSuchFileException(file_id)
+            else:
+                raise
+    def get_shared_public_url(self, file_id: str) -> str:
+        """Turn s3:// into http:// and put a public-read ACL on it."""
+        # since this is only for a few files like "config.pickle"... why and what is this used for?
+        self._requireValidSharedFileName(file_id)
+        try:
+            return create_public_url(
+                self.s3_resource,
+                bucket=self.bucket_name,
+                key=self._key_in_bucket(
+                    identifier=file_id,
+                    prefix=self.shared_key_prefix,
+                ),
+            )
+        except self.s3_client.exceptions.NoSuchKey:
+            raise NoSuchFileException(file_id)
+        except ClientError as e:
+            if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404:
+                raise NoSuchFileException(file_id)
+            else:
+                raise
     @classmethod
     def _get_is_directory(cls, url: ParseResult) -> bool:
         # We consider it a directory if anything is in it.
         # TODO: Can we just get the first item and not the whole list?
-        return len(cls._list_url(url)) > 0
-    @classmethod
-    def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
-        return url.scheme.lower() == "s3"
+        return len(list_objects_for_url(url)) > 0
-    def write_file(
-        self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False
-    ) -> FileID:
-        info = self.FileInfo.create(job_id if cleanup else None)
-        info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
-        info.save()
-        logger.debug("Wrote %r of from %r", info, local_path)
-        return info.fileID
-    @contextmanager
-    def write_file_stream(
+    def get_empty_file_store_id(
         self,
-        job_id: Optional[FileID] = None,
+        job_id: Optional[str] = None,
         cleanup: bool = False,
-        basename=None,
-        encoding=None,
-        errors=None,
-    ):
-        info = self.FileInfo.create(job_id if cleanup else None)
-        with info.uploadStream(encoding=encoding, errors=errors) as writable:
-            yield writable, info.fileID
-        info.save()
-        logger.debug("Wrote %r.", info)
-    @contextmanager
-    def write_shared_file_stream(
-        self, shared_file_name, encrypted=None, encoding=None, errors=None
-    ):
-        self._requireValidSharedFileName(shared_file_name)
-        info = self.FileInfo.loadOrCreate(
-            jobStoreFileID=self._shared_file_id(shared_file_name),
-            ownerID=str(self.sharedFileOwnerID),
-            encrypted=encrypted,
-        )
-        with info.uploadStream(encoding=encoding, errors=errors) as writable:
-            yield writable
-        info.save()
-        logger.debug("Wrote %r for shared file %r.", info, shared_file_name)
-    def update_file(self, file_id, local_path):
-        info = self.FileInfo.loadOrFail(file_id)
-        info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
-        info.save()
-        logger.debug("Wrote %r from path %r.", info, local_path)
-    @contextmanager
-    def update_file_stream(self, file_id, encoding=None, errors=None):
-        info = self.FileInfo.loadOrFail(file_id)
-        with info.uploadStream(encoding=encoding, errors=errors) as writable:
-            yield writable
-        info.save()
-        logger.debug("Wrote %r from stream.", info)
-    def file_exists(self, file_id):
-        return self.FileInfo.exists(file_id)
-    def get_file_size(self, file_id):
-        if not self.file_exists(file_id):
-            return 0
-        info = self.FileInfo.loadOrFail(file_id)
-        return info.getSize()
-    def read_file(self, file_id, local_path, symlink=False):
-        info = self.FileInfo.loadOrFail(file_id)
-        logger.debug("Reading %r into %r.", info, local_path)
-        info.download(local_path, not self.config.disableJobStoreChecksumVerification)
-        if getattr(file_id, "executable", False):
-            os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
-    @contextmanager
-    def read_file_stream(self, file_id, encoding=None, errors=None):
-        info = self.FileInfo.loadOrFail(file_id)
-        logger.debug("Reading %r into stream.", info)
-        with info.downloadStream(encoding=encoding, errors=errors) as readable:
-            yield readable
-    @contextmanager
-    def read_shared_file_stream(self, shared_file_name, encoding=None, errors=None):
-        self._requireValidSharedFileName(shared_file_name)
-        jobStoreFileID = self._shared_file_id(shared_file_name)
-        info = self.FileInfo.loadOrFail(jobStoreFileID, customName=shared_file_name)
-        logger.debug(
-            "Reading %r for shared file %r into stream.", info, shared_file_name
-        )
-        with info.downloadStream(encoding=encoding, errors=errors) as readable:
-            yield readable
-    def delete_file(self, file_id):
-        info = self.FileInfo.load(file_id)
-        if info is None:
-            logger.debug("File %s does not exist, skipping deletion.", file_id)
-        else:
-            info.delete()
-    def write_logs(self, msg):
-        info = self.FileInfo.create(str(self.statsFileOwnerID))
-        with info.uploadStream(multipart=False) as writeable:
-            if isinstance(msg, str):
-                # This stream is for binary data, so encode any non-encoded things
-                msg = msg.encode("utf-8", errors="ignore")
-            writeable.write(msg)
-        info.save()
-    def read_logs(self, callback, read_all=False):
-        itemsProcessed = 0
-        for info in self._read_logs(callback, self.statsFileOwnerID):
-            info._ownerID = str(self.readStatsFileOwnerID)  # boto3 requires strings
-            info.save()
-            itemsProcessed += 1
-        if read_all:
-            for _ in self._read_logs(callback, self.readStatsFileOwnerID):
-                itemsProcessed += 1
-        return itemsProcessed
-    def _read_logs(self, callback, ownerId):
-        items = None
-        for attempt in retry_sdb():
-            with attempt:
-                items = boto3_pager(
-                    self.db.select,
-                    "Items",
-                    ConsistentRead=True,
-                    SelectExpression=f"select * from `{self.files_domain_name}` where ownerID='{str(ownerId)}'",
-                )
-        assert items is not None
-        for item in items:
-            info = self.FileInfo.fromItem(item)
-            with info.downloadStream() as readable:
-                callback(readable)
-            yield info
-    # TODO: Make this retry more specific?
-    #  example: https://github.com/DataBiosphere/toil/issues/3378
-    @retry()
-    def get_public_url(self, jobStoreFileID):
-        info = self.FileInfo.loadOrFail(jobStoreFileID)
-        if info.content is not None:
-            with info.uploadStream(allowInlining=False) as f:
-                f.write(info.content)
-        self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
-            ACL="public-read"
+        basename: Optional[str] = None,
+    ) -> str:
+        """Create an empty file in s3 and return a bare string file ID."""
+        file_id = str(uuid.uuid4())
+        self.write_to_bucket(
+            identifier=f'{file_id}/0/{basename}',
+            prefix=self.content_key_prefix,
+            data=None,
+            bucket=self.bucket_name
         )
-        url = self.s3_client.generate_presigned_url(
-            "get_object",
-            Params={
-                "Bucket": self.files_bucket.name,
-                "Key": compat_bytes(jobStoreFileID),
-                "VersionId": info.version,
-            },
-            ExpiresIn=self.publicUrlExpiration.total_seconds(),
+        if job_id and cleanup:
+            self.associate_job_with_file(job_id, file_id)
+        return file_id
+        ###################################### LOGGING API ######################################
+    def write_logs(self, log_msg: Union[bytes, str]) -> None:
+        if isinstance(log_msg, str):
+            log_msg = log_msg.encode('utf-8', errors='ignore')
+        file_obj = BytesIO(log_msg)
+        key_name = self._key_in_bucket(
+            identifier=f'{datetime.datetime.now()}{str(uuid.uuid4())}'.replace(
+                ' ', '_'
+            ),
+            prefix=self.logs_key_prefix,
         )
+        self.s3_client.upload_fileobj(Bucket=self.bucket_name,
+                                      Key=key_name,
+                                      ExtraArgs=self._get_encryption_args(),
+                                      Fileobj=file_obj)
-        # boto doesn't properly remove the x-amz-security-token parameter when
-        # query_auth is False when using an IAM role (see issue #2043). Including the
-        # x-amz-security-token parameter without the access key results in a 403,
-        # even if the resource is public, so we need to remove it.
-        scheme, netloc, path, query, fragment = urlsplit(url)
-        params = parse_qs(query)
-        if "x-amz-security-token" in params:
-            del params["x-amz-security-token"]
-        if "AWSAccessKeyId" in params:
-            del params["AWSAccessKeyId"]
-        if "Signature" in params:
-            del params["Signature"]
-        query = urlencode(params, doseq=True)
-        url = urlunsplit((scheme, netloc, path, query, fragment))
-        return url
-    def get_shared_public_url(self, shared_file_name):
-        self._requireValidSharedFileName(shared_file_name)
-        return self.get_public_url(self._shared_file_id(shared_file_name))
-    def _bindBucket(
-        self,
-        bucket_name: str,
-        create: bool = False,
-        block: bool = True,
-        versioning: bool = False,
-        check_versioning_consistency: bool = True,
-    ):
+    def read_logs(self, callback: Callable[..., Any], read_all: bool = False) -> int:
         """
-        Return the Boto Bucket object representing the S3 bucket with the given name. If the
-        bucket does not exist and `create` is True, it will be created.
-        :param str bucket_name: the name of the bucket to bind to
-        :param bool create: Whether to create bucket the if it doesn't exist
-        :param bool block: If False, return None if the bucket doesn't exist. If True, wait until
-               bucket appears. Ignored if `create` is True.
-        :rtype: Bucket|None
-        :raises botocore.exceptions.ClientError: If `block` is True and the bucket still doesn't exist after the
-                retry timeout expires.
+        This fetches all referenced logs in the database from s3 as readable objects
+        and runs "callback()" on them.
         """
-        assert self.minBucketNameLen <= len(bucket_name) <= self.maxBucketNameLen
-        assert self.bucketNameRe.match(bucket_name)
-        logger.debug("Binding to job store bucket '%s'.", bucket_name)
-        def bucket_retry_predicate(error):
-            """
-            Decide, given an error, whether we should retry binding the bucket.
-            """
-            if isinstance(error, ClientError) and get_error_status(error) in (404, 409):
-                # Handle cases where the bucket creation is in a weird state that might let us proceed.
-                # https://github.com/BD2KGenomics/toil/issues/955
-                # https://github.com/BD2KGenomics/toil/issues/995
-                # https://github.com/BD2KGenomics/toil/issues/1093
-                # BucketAlreadyOwnedByYou == 409
-                # OperationAborted == 409
-                # NoSuchBucket == 404
-                return True
-            if get_error_code(error) == "SlowDown":
-                # We may get told to SlowDown by AWS when we try to create our
-                # bucket. In that case, we should retry and use the exponential
-                # backoff.
-                return True
-            return False
-        bucketExisted = True
-        for attempt in retry_s3(predicate=bucket_retry_predicate):
-            with attempt:
-                try:
-                    # the head_bucket() call makes sure that the bucket exists and the user can access it
-                    self.s3_client.head_bucket(Bucket=bucket_name)
-                    bucket = self.s3_resource.Bucket(bucket_name)
-                except ClientError as e:
-                    error_http_status = get_error_status(e)
-                    if error_http_status == 404:
-                        bucketExisted = False
-                        logger.debug("Bucket '%s' does not exist.", bucket_name)
-                        if create:
-                            bucket = create_s3_bucket(
-                                self.s3_resource, bucket_name, self.region
-                            )
-                            # Wait until the bucket exists before checking the region and adding tags
-                            bucket.wait_until_exists()
-                            # It is possible for create_bucket to return but
-                            # for an immediate request for the bucket region to
-                            # produce an S3ResponseError with code
-                            # NoSuchBucket. We let that kick us back up to the
-                            # main retry loop.
-                            assert (
-                                get_bucket_region(bucket_name) == self.region
-                            ), f"bucket_name: {bucket_name}, {get_bucket_region(bucket_name)} != {self.region}"
-                            tags = build_tag_dict_from_env()
-                            if tags:
-                                flat_tags = flatten_tags(tags)
-                                bucket_tagging = self.s3_resource.BucketTagging(
-                                    bucket_name
-                                )
-                                bucket_tagging.put(Tagging={"TagSet": flat_tags})
-                            # Configure bucket so that we can make objects in
-                            # it public, which was the historical default.
-                            enable_public_objects(bucket_name)
-                        elif block:
-                            raise
-                        else:
-                            return None
-                    elif error_http_status == 301:
-                        # This is raised if the user attempts to get a bucket in a region outside
-                        # the specified one, if the specified one is not `us-east-1`.  The us-east-1
-                        # server allows a user to use buckets from any region.
-                        raise BucketLocationConflictException(
-                            get_bucket_region(bucket_name)
-                        )
-                    else:
-                        raise
-                else:
-                    bucketRegion = get_bucket_region(bucket_name)
-                    if bucketRegion != self.region:
-                        raise BucketLocationConflictException(bucketRegion)
-                if versioning and not bucketExisted:
-                    # only call this method on bucket creation
-                    bucket.Versioning().enable()
-                    # Now wait until versioning is actually on. Some uploads
-                    # would come back with no versions; maybe they were
-                    # happening too fast and this setting isn't sufficiently
-                    # consistent?
-                    time.sleep(1)
-                    while not self._getBucketVersioning(bucket_name):
-                        logger.warning(
-                            f"Waiting for versioning activation on bucket '{bucket_name}'..."
-                        )
-                        time.sleep(1)
-                elif check_versioning_consistency:
-                    # now test for versioning consistency
-                    # we should never see any of these errors since 'versioning' should always be true
-                    bucket_versioning = self._getBucketVersioning(bucket_name)
-                    if bucket_versioning != versioning:
-                        assert False, "Cannot modify versioning on existing bucket"
-                    elif bucket_versioning is None:
-                        assert False, "Cannot use a bucket with versioning suspended"
-                if bucketExisted:
-                    logger.debug(
-                        f"Using pre-existing job store bucket '{bucket_name}'."
-                    )
-                else:
-                    logger.debug(
-                        f"Created new job store bucket '{bucket_name}' with versioning state {versioning}."
-                    )
-                return bucket
-    def _bindDomain(
-        self, domain_name: str, create: bool = False, block: bool = True
-    ) -> None:
+        items_processed = 0
+        LOG_MARKER = "most_recently_read_log.marker"
+        read_log_marker = "0"
+        if not read_all:
+            # We want to pick up reading where we left off
+            try:
+                read_log_marker = self.read_from_bucket(
+                    identifier=LOG_MARKER,
+                    prefix=self.shared_key_prefix
+                ).decode('utf-8')
+            except self.s3_client.exceptions.NoSuchKey:
+                # We haven't recorded that we've read anything yet.
+                # Leave read_log_marker at "0"
+                pass
+        startafter = None if read_log_marker == "0" else read_log_marker
+        for result in list_s3_items(self.s3_resource, bucket=self.bucket_name, prefix=self.logs_key_prefix, startafter=startafter):
+            if result['Key'] > read_log_marker or read_all:
+                read_log_marker = result['Key']
+                with download_stream(self.s3_resource,
+                                     bucket=self.bucket_name,
+                                     key=result['Key'],
+                                     extra_args=self._get_encryption_args()) as readable:
+                    callback(readable)
+                items_processed += 1
+        if items_processed > 0:
+            # We processed something, so we need to update the marker.
+            self.write_to_bucket(identifier=LOG_MARKER,
+                                 prefix=self.shared_key_prefix,
+                                 data=read_log_marker)
+        return items_processed
+    def _get_encryption_args(self) -> dict[str, Any]:
         """
-        Return the Boto3 domain name representing the SDB domain. When create=True, it will
-        create the domain if it does not exist.
-        Return the Boto Domain object representing the SDB domain of the given name. If the
-        domain does not exist and `create` is True, it will be created.
-        :param str domain_name: the name of the domain to bind to
+        Get the encryption arguments to pass to an AWS function.
-        :param bool create: True if domain should be created if it doesn't exist
+        Reads live from the SSE key file referenced by the config.
-        :param bool block: If False, raise DomainDoesNotExist if the domain doesn't exist. If True, wait until
-               domain appears. This parameter is ignored if create is True.
+        If the config is not available, returns an empty dict.
-        :rtype: None
-        :raises ClientError: If `block` is True and the domain still doesn't exist after the
-                retry timeout expires.
-        """
-        logger.debug("Binding to job store domain '%s'.", domain_name)
-        retryargs = dict(
-            predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e)
-        )
-        if not block:
-            retryargs["timeout"] = 15
-        for attempt in retry_sdb(**retryargs):
-            with attempt:
-                try:
-                    self.db.domain_metadata(DomainName=domain_name)
-                    return
-                except ClientError as e:
-                    if no_such_sdb_domain(e):
-                        if create:
-                            self.db.create_domain(DomainName=domain_name)
-                            return
-                        elif block:
-                            raise
-                        else:
-                            raise DomainDoesNotExist(domain_name)
-                    else:
-                        raise
-    def _new_job_id(self):
-        return str(uuid.uuid4())
-    # A dummy job ID under which all shared files are stored
-    sharedFileOwnerID = uuid.UUID("891f7db6-e4d9-4221-a58e-ab6cc4395f94")
-    # A dummy job ID under which all unread stats files are stored
-    statsFileOwnerID = uuid.UUID("bfcf5286-4bc7-41ef-a85d-9ab415b69d53")
-    # A dummy job ID under which all read stats files are stored
-    readStatsFileOwnerID = uuid.UUID("e77fc3aa-d232-4255-ae04-f64ee8eb0bfa")
-    def _shared_file_id(self, shared_file_name):
-        return str(uuid.uuid5(self.sharedFileOwnerID, shared_file_name))
-    @InnerClass
-    class FileInfo(SDBHelper):
+        :raises ValueError: If the key data is not formatted correctly.
         """
-        Represents a file in this job store.
-        """
-        outer = None
-        """
-        :type: AWSJobStore
-        """
-        def __init__(
-            self,
-            fileID,
-            ownerID,
-            encrypted,
-            version=None,
-            content=None,
-            numContentChunks=0,
-            checksum=None,
-        ):
-            """
-            :type fileID: str
-            :param fileID: the file's ID
-            :type ownerID: str
-            :param ownerID: ID of the entity owning this file, typically a job ID aka jobStoreID
-            :type encrypted: bool
-            :param encrypted: whether the file is stored in encrypted form
-            :type version: str|None
-            :param version: a non-empty string containing the most recent version of the S3
-            object storing this file's content, None if the file is new, or empty string if the
-            file is inlined.
-            :type content: str|None
-            :param content: this file's inlined content
-            :type numContentChunks: int
-            :param numContentChunks: the number of SDB domain attributes occupied by this files
-            :type checksum: str|None
-            :param checksum: the checksum of the file, if available. Formatted
-            as <algorithm>$<lowercase hex hash>.
-            inlined content. Note that an inlined empty string still occupies one chunk.
-            """
-            super().__init__()
-            self._fileID = fileID
-            self._ownerID = ownerID
-            self.encrypted = encrypted
-            self._version = version
-            self._previousVersion = version
-            self._content = content
-            self._checksum = checksum
-            self._numContentChunks = numContentChunks
-        @property
-        def fileID(self):
-            return self._fileID
-        @property
-        def ownerID(self):
-            return self._ownerID
-        @property
-        def version(self):
-            return self._version
-        @version.setter
-        def version(self, version):
-            # Version should only change once
-            assert self._previousVersion == self._version
-            self._version = version
-            if version:
-                self.content = None
-        @property
-        def previousVersion(self):
-            return self._previousVersion
-        @property
-        def content(self):
-            return self._content
-        @property
-        def checksum(self):
-            return self._checksum
-        @checksum.setter
-        def checksum(self, checksum):
-            self._checksum = checksum
-        @content.setter
-        def content(self, content):
-            assert content is None or isinstance(content, bytes)
-            self._content = content
-            if content is not None:
-                self.version = ""
-        @classmethod
-        def create(cls, ownerID: str):
-            return cls(
-                str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None
-            )
-        @classmethod
-        def presenceIndicator(cls):
-            return "encrypted"
-        @classmethod
-        def exists(cls, jobStoreFileID):
-            for attempt in retry_sdb():
-                with attempt:
-                    return bool(
-                        cls.outer.db.get_attributes(
-                            DomainName=cls.outer.files_domain_name,
-                            ItemName=compat_bytes(jobStoreFileID),
-                            AttributeNames=[cls.presenceIndicator()],
-                            ConsistentRead=True,
-                        ).get("Attributes", [])
-                    )
-        @classmethod
-        def load(cls, jobStoreFileID):
-            for attempt in retry_sdb():
-                with attempt:
-                    self = cls.fromItem(
-                        {
-                            "Name": compat_bytes(jobStoreFileID),
-                            "Attributes": cls.outer.db.get_attributes(
-                                DomainName=cls.outer.files_domain_name,
-                                ItemName=compat_bytes(jobStoreFileID),
-                                ConsistentRead=True,
-                            ).get("Attributes", []),
-                        }
-                    )
-                    return self
-        @classmethod
-        def loadOrCreate(cls, jobStoreFileID, ownerID, encrypted):
-            self = cls.load(jobStoreFileID)
-            if encrypted is None:
-                encrypted = cls.outer.sseKeyPath is not None
-            if self is None:
-                self = cls(jobStoreFileID, ownerID, encrypted=encrypted)
-            else:
-                assert self.fileID == jobStoreFileID
-                assert self.ownerID == ownerID
-                self.encrypted = encrypted
-            return self
-        @classmethod
-        def loadOrFail(cls, jobStoreFileID, customName=None):
-            """
-            :rtype: AWSJobStore.FileInfo
-            :return: an instance of this class representing the file with the given ID
-            :raises NoSuchFileException: if given file does not exist
-            """
-            self = cls.load(jobStoreFileID)
-            if self is None:
-                raise NoSuchFileException(jobStoreFileID, customName=customName)
-            else:
-                return self
-        @classmethod
-        def fromItem(cls, item: "ItemTypeDef"):
-            """
-            Convert an SDB item to an instance of this class.
-            :type item: Item
-            """
-            assert item is not None
-            # Strings come back from SDB as unicode
-            def strOrNone(s):
-                return s if s is None else str(s)
-            # ownerID and encrypted are the only mandatory attributes
-            ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
-                item, ["ownerID", "encrypted", "version", "checksum"]
-            )
-            if ownerID is None:
-                assert encrypted is None
-                return None
-            else:
-                encrypted = strict_bool(encrypted)
-                content, numContentChunks = cls.attributesToBinary(item["Attributes"])
-                if encrypted:
-                    sseKeyPath = cls.outer.sseKeyPath
-                    if sseKeyPath is None:
-                        raise AssertionError(
-                            "Content is encrypted but no key was provided."
-                        )
-                    if content is not None:
-                        content = encryption.decrypt(content, sseKeyPath)
-                self = cls(
-                    fileID=item["Name"],
-                    ownerID=ownerID,
-                    encrypted=encrypted,
-                    version=version,
-                    content=content,
-                    numContentChunks=numContentChunks,
-                    checksum=checksum,
-                )
-                return self
-        def toItem(self) -> tuple[dict[str, str], int]:
-            """
-            Convert this instance to a dictionary of attribute names to values
-            :return: the attributes dict and an integer specifying the the number of chunk
-                     attributes in the dictionary that are used for storing inlined content.
-            """
-            content = self.content
-            assert content is None or isinstance(content, bytes)
-            if self.encrypted and content is not None:
-                sseKeyPath = self.outer.sseKeyPath
-                if sseKeyPath is None:
-                    raise AssertionError(
-                        "Encryption requested but no key was provided."
-                    )
-                content = encryption.encrypt(content, sseKeyPath)
-            assert content is None or isinstance(content, bytes)
-            attributes = self.binaryToAttributes(content)
-            numChunks = int(attributes["numChunks"])
-            attributes.update(
-                dict(
-                    ownerID=self.ownerID or "",
-                    encrypted=str(self.encrypted),
-                    version=self.version or "",
-                    checksum=self.checksum or "",
-                )
-            )
-            return attributes, numChunks
-        @classmethod
-        def _reservedAttributes(cls):
-            return 3 + super()._reservedAttributes()
-        @staticmethod
-        def maxInlinedSize():
-            return 256
-        def save(self):
-            attributes, numNewContentChunks = self.toItem()
-            attributes_boto3 = SDBHelper.attributeDictToList(attributes)
-            # False stands for absence
-            if self.previousVersion is None:
-                expected: "UpdateConditionTypeDef" = {
-                    "Name": "version",
-                    "Exists": False,
-                }
-            else:
-                expected = {"Name": "version", "Value": cast(str, self.previousVersion)}
-            try:
-                for attempt in retry_sdb():
-                    with attempt:
-                        self.outer.db.put_attributes(
-                            DomainName=self.outer.files_domain_name,
-                            ItemName=compat_bytes(self.fileID),
-                            Attributes=[
-                                {
-                                    "Name": attribute["Name"],
-                                    "Value": attribute["Value"],
-                                    "Replace": True,
-                                }
-                                for attribute in attributes_boto3
-                            ],
-                            Expected=expected,
-                        )
-                # clean up the old version of the file if necessary and safe
-                if self.previousVersion and (self.previousVersion != self.version):
-                    for attempt in retry_s3():
-                        with attempt:
-                            self.outer.s3_client.delete_object(
-                                Bucket=self.outer.files_bucket.name,
-                                Key=compat_bytes(self.fileID),
-                                VersionId=self.previousVersion,
-                            )
-                self._previousVersion = self._version
-                if numNewContentChunks < self._numContentChunks:
-                    residualChunks = range(numNewContentChunks, self._numContentChunks)
-                    residual_chunk_names = [self._chunkName(i) for i in residualChunks]
-                    # boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
-                    # the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
-                    # but this doesnt extend to boto3
-                    delete_attributes = self.outer.db.get_attributes(
-                        DomainName=self.outer.files_domain_name,
-                        ItemName=compat_bytes(self.fileID),
-                        AttributeNames=[chunk for chunk in residual_chunk_names],
-                    ).get("Attributes")
-                    for attempt in retry_sdb():
-                        with attempt:
-                            self.outer.db.delete_attributes(
-                                DomainName=self.outer.files_domain_name,
-                                ItemName=compat_bytes(self.fileID),
-                                Attributes=delete_attributes,
-                            )
-                    self.outer.db.get_attributes(
-                        DomainName=self.outer.files_domain_name,
-                        ItemName=compat_bytes(self.fileID),
-                    )
-                self._numContentChunks = numNewContentChunks
-            except ClientError as e:
-                if get_error_code(e) == "ConditionalCheckFailed":
-                    raise ConcurrentFileModificationException(self.fileID)
-                else:
-                    raise
-        def upload(self, localFilePath, calculateChecksum=True):
-            file_size, file_time = fileSizeAndTime(localFilePath)
-            if file_size <= self.maxInlinedSize():
-                with open(localFilePath, "rb") as f:
-                    self.content = f.read()
-                # Clear out any old checksum in case of overwrite
-                self.checksum = ""
-            else:
-                headerArgs = self._s3EncryptionArgs()
-                # Create a new Resource in case it needs to be on its own thread
-                resource = boto3_session.resource("s3", region_name=self.outer.region)
-                self.checksum = (
-                    self._get_file_checksum(localFilePath)
-                    if calculateChecksum
-                    else None
-                )
-                self.version = uploadFromPath(
-                    localFilePath,
-                    resource=resource,
-                    bucketName=self.outer.files_bucket.name,
-                    fileID=compat_bytes(self.fileID),
-                    headerArgs=headerArgs,
-                    partSize=self.outer.part_size,
-                )
-        def _start_checksum(self, to_match=None, algorithm="sha1"):
-            """
-            Get a hasher that can be used with _update_checksum and
-            _finish_checksum.
-            If to_match is set, it is a precomputed checksum which we expect
-            the result to match.
-            The right way to compare checksums is to feed in the checksum to be
-            matched, so we can see its algorithm, instead of getting a new one
-            and comparing. If a checksum to match is fed in, _finish_checksum()
-            will raise a ChecksumError if it isn't matched.
-            """
-            # If we have an expexted result it will go here.
-            expected = None
-            if to_match is not None:
-                parts = to_match.split("$")
-                algorithm = parts[0]
-                expected = parts[1]
-            wrapped = getattr(hashlib, algorithm)()
-            logger.debug(f"Starting {algorithm} checksum to match {expected}")
-            return algorithm, wrapped, expected
-        def _update_checksum(self, checksum_in_progress, data):
-            """
-            Update a checksum in progress from _start_checksum with new data.
-            """
-            checksum_in_progress[1].update(data)
-        def _finish_checksum(self, checksum_in_progress):
-            """
-            Complete a checksum in progress from _start_checksum and return the
-            checksum result string.
-            """
-            result_hash = checksum_in_progress[1].hexdigest()
-            logger.debug(
-                f"Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}"
-            )
-            if checksum_in_progress[2] is not None:
-                # We expected a particular hash
-                if result_hash != checksum_in_progress[2]:
-                    raise ChecksumError(
-                        "Checksum mismatch. Expected: %s Actual: %s"
-                        % (checksum_in_progress[2], result_hash)
-                    )
-            return "$".join([checksum_in_progress[0], result_hash])
-        def _get_file_checksum(self, localFilePath, to_match=None):
-            with open(localFilePath, "rb") as f:
-                hasher = self._start_checksum(to_match=to_match)
-                contents = f.read(1024 * 1024)
-                while contents != b"":
-                    self._update_checksum(hasher, contents)
-                    contents = f.read(1024 * 1024)
-                return self._finish_checksum(hasher)
-        @contextmanager
-        def uploadStream(
-            self, multipart=True, allowInlining=True, encoding=None, errors=None
-        ):
-            """
-            Context manager that gives out a binary or text mode upload stream to upload data.
-            """
-            # Note that we have to handle already having a content or a version
-            # if we are overwriting something.
-            # But make sure we don't have both.
-            assert not (bool(self.version) and self.content is not None)
-            info = self
-            store = self.outer
-            class MultiPartPipe(WritablePipe):
-                def readFrom(self, readable):
-                    # Get the first block of data we want to put
-                    buf = readable.read(store.part_size)
-                    assert isinstance(buf, bytes)
-                    if allowInlining and len(buf) <= info.maxInlinedSize():
-                        logger.debug("Inlining content of %d bytes", len(buf))
-                        info.content = buf
-                        # There will be no checksum
-                        info.checksum = ""
-                    else:
-                        # We will compute a checksum
-                        hasher = info._start_checksum()
-                        logger.debug("Updating checksum with %d bytes", len(buf))
-                        info._update_checksum(hasher, buf)
-                        client = store.s3_client
-                        bucket_name = store.files_bucket.name
-                        headerArgs = info._s3EncryptionArgs()
-                        for attempt in retry_s3():
-                            with attempt:
-                                logger.debug("Starting multipart upload")
-                                # low-level clients are thread safe
-                                upload = client.create_multipart_upload(
-                                    Bucket=bucket_name,
-                                    Key=compat_bytes(info.fileID),
-                                    **headerArgs,
-                                )
-                                uploadId = upload["UploadId"]
-                                parts = []
-                                logger.debug("Multipart upload started as %s", uploadId)
-                        for attempt in retry_s3():
-                            with attempt:
-                                for i in range(CONSISTENCY_TICKS):
-                                    # Sometimes we can create a multipart upload and not see it. Wait around for it.
-                                    response = client.list_multipart_uploads(
-                                        Bucket=bucket_name,
-                                        MaxUploads=1,
-                                        Prefix=compat_bytes(info.fileID),
-                                    )
-                                    if (
-                                        "Uploads" in response
-                                        and len(response["Uploads"]) != 0
-                                        and response["Uploads"][0]["UploadId"]
-                                        == uploadId
-                                    ):
-                                        logger.debug(
-                                            "Multipart upload visible as %s", uploadId
-                                        )
-                                        break
-                                    else:
-                                        logger.debug(
-                                            "Multipart upload %s is not visible; we see %s",
-                                            uploadId,
-                                            response.get("Uploads"),
-                                        )
-                                        time.sleep(CONSISTENCY_TIME * 2**i)
-                        try:
-                            for part_num in itertools.count():
-                                for attempt in retry_s3():
-                                    with attempt:
-                                        logger.debug(
-                                            "Uploading part %d of %d bytes to %s",
-                                            part_num + 1,
-                                            len(buf),
-                                            uploadId,
-                                        )
-                                        # TODO: include the Content-MD5 header:
-                                        #  https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.complete_multipart_upload
-                                        part = client.upload_part(
-                                            Bucket=bucket_name,
-                                            Key=compat_bytes(info.fileID),
-                                            PartNumber=part_num + 1,
-                                            UploadId=uploadId,
-                                            Body=BytesIO(buf),
-                                            **headerArgs,
-                                        )
-                                        parts.append(
-                                            {
-                                                "PartNumber": part_num + 1,
-                                                "ETag": part["ETag"],
-                                            }
-                                        )
-                                # Get the next block of data we want to put
-                                buf = readable.read(info.outer.part_size)
-                                assert isinstance(buf, bytes)
-                                if len(buf) == 0:
-                                    # Don't allow any part other than the very first to be empty.
-                                    break
-                                info._update_checksum(hasher, buf)
-                        except:
-                            with panic(log=logger):
-                                for attempt in retry_s3():
-                                    with attempt:
-                                        client.abort_multipart_upload(
-                                            Bucket=bucket_name,
-                                            Key=compat_bytes(info.fileID),
-                                            UploadId=uploadId,
-                                        )
-                        else:
-                            while not store._getBucketVersioning(
-                                store.files_bucket.name
-                            ):
-                                logger.warning(
-                                    "Versioning does not appear to be enabled yet. Deferring multipart "
-                                    "upload completion..."
-                                )
-                                time.sleep(1)
-                            # Save the checksum
-                            info.checksum = info._finish_checksum(hasher)
-                            for attempt in retry_s3(timeout=600):
-                                # Wait here for a bit longer if S3 breaks,
-                                # because we have been known to flake out here
-                                # in tests
-                                # (https://github.com/DataBiosphere/toil/issues/3894)
-                                with attempt:
-                                    logger.debug("Attempting to complete upload...")
-                                    completed = client.complete_multipart_upload(
-                                        Bucket=bucket_name,
-                                        Key=compat_bytes(info.fileID),
-                                        UploadId=uploadId,
-                                        MultipartUpload={"Parts": parts},
-                                    )
-                                    logger.debug(
-                                        "Completed upload object of type %s: %s",
-                                        str(type(completed)),
-                                        repr(completed),
-                                    )
-                                    info.version = completed.get("VersionId")
-                                    logger.debug(
-                                        "Completed upload with version %s",
-                                        str(info.version),
-                                    )
-                            if info.version is None:
-                                # Somehow we don't know the version. Try and get it.
-                                for attempt in retry_s3(
-                                    predicate=lambda e: retryable_s3_errors(e)
-                                    or isinstance(e, AssertionError)
-                                ):
-                                    with attempt:
-                                        version = client.head_object(
-                                            Bucket=bucket_name,
-                                            Key=compat_bytes(info.fileID),
-                                            **headerArgs,
-                                        ).get("VersionId", None)
-                                        logger.warning(
-                                            "Loaded key for upload with no version and got version %s",
-                                            str(version),
-                                        )
-                                        info.version = version
-                                        assert info.version is not None
-                    # Make sure we actually wrote something, even if an empty file
-                    assert bool(info.version) or info.content is not None
-            class SinglePartPipe(WritablePipe):
-                def readFrom(self, readable):
-                    buf = readable.read()
-                    assert isinstance(buf, bytes)
-                    dataLength = len(buf)
-                    if allowInlining and dataLength <= info.maxInlinedSize():
-                        logger.debug("Inlining content of %d bytes", len(buf))
-                        info.content = buf
-                        # There will be no checksum
-                        info.checksum = ""
-                    else:
-                        # We will compute a checksum
-                        hasher = info._start_checksum()
-                        info._update_checksum(hasher, buf)
-                        info.checksum = info._finish_checksum(hasher)
-                        bucket_name = store.files_bucket.name
-                        headerArgs = info._s3EncryptionArgs()
-                        client = store.s3_client
-                        buf = BytesIO(buf)
-                        while not store._getBucketVersioning(bucket_name):
-                            logger.warning(
-                                "Versioning does not appear to be enabled yet. Deferring single part "
-                                "upload..."
-                            )
-                            time.sleep(1)
-                        for attempt in retry_s3():
-                            with attempt:
-                                logger.debug(
-                                    "Uploading single part of %d bytes", dataLength
-                                )
-                                client.upload_fileobj(
-                                    Bucket=bucket_name,
-                                    Key=compat_bytes(info.fileID),
-                                    Fileobj=buf,
-                                    ExtraArgs=headerArgs,
-                                )
-                                # use head_object with the SSE headers to access versionId and content_length attributes
-                                headObj = client.head_object(
-                                    Bucket=bucket_name,
-                                    Key=compat_bytes(info.fileID),
-                                    **headerArgs,
-                                )
-                                assert dataLength == headObj.get("ContentLength", None)
-                                info.version = headObj.get("VersionId", None)
-                                logger.debug(
-                                    "Upload received version %s", str(info.version)
-                                )
-                        if info.version is None:
-                            # Somehow we don't know the version
-                            for attempt in retry_s3(
-                                predicate=lambda e: retryable_s3_errors(e)
-                                or isinstance(e, AssertionError)
-                            ):
-                                with attempt:
-                                    headObj = client.head_object(
-                                        Bucket=bucket_name,
-                                        Key=compat_bytes(info.fileID),
-                                        **headerArgs,
-                                    )
-                                    info.version = headObj.get("VersionId", None)
-                                    logger.warning(
-                                        "Reloaded key with no version and got version %s",
-                                        str(info.version),
-                                    )
-                                    assert info.version is not None
-                    # Make sure we actually wrote something, even if an empty file
-                    assert bool(info.version) or info.content is not None
-            if multipart:
-                pipe = MultiPartPipe(encoding=encoding, errors=errors)
-            else:
-                pipe = SinglePartPipe(encoding=encoding, errors=errors)
+        # TODO: Maybe memoize the file read, subject to config field changes?
-            with pipe as writable:
-                yield writable
-            if not pipe.reader_done:
-                logger.debug(f"Version: {self.version} Content: {self.content}")
-                raise RuntimeError(
-                    "Escaped context manager without written data being read!"
+        try:
+            config = self.config
+        except AttributeError:
+            # The config isn't set yet. This happens during resume(), when we
+            # need to get the encryption args to talk to the job store to
+            # download the config, before we have it.
+            return {}
+        if config is not None and config.sseKey:
+            with open(config.sseKey, 'r') as f:
+                sse_key = f.read()
+            if not len(sse_key) == 32:  # TODO: regex
+                raise ValueError(
+                    f'Check that {self.config.sseKey} '
+                    f'is the path to a real SSE key. '
+                    f'(Key length {len(sse_key)} != 32)'
                 )
+            return {'SSECustomerAlgorithm': 'AES256', 'SSECustomerKey': sse_key}
+        else:
+            return {}
-            # We check our work to make sure we have exactly one of embedded
-            # content or a real object version.
-            if self.content is None:
-                if not bool(self.version):
-                    logger.debug(f"Version: {self.version} Content: {self.content}")
-                    raise RuntimeError("No content added and no version created")
-            else:
-                if bool(self.version):
-                    logger.debug(f"Version: {self.version} Content: {self.content}")
-                    raise RuntimeError("Content added and version created")
-        def copyFrom(self, srcObj):
-            """
-            Copies contents of source key into this file.
-            :param S3.Object srcObj: The key (object) that will be copied from
-            """
-            assert srcObj.content_length is not None
-            if srcObj.content_length <= self.maxInlinedSize():
-                self.content = srcObj.get().get("Body").read()
-            else:
-                # Create a new Resource in case it needs to be on its own thread
-                resource = boto3_session.resource("s3", region_name=self.outer.region)
-                self.version = copyKeyMultipart(
-                    resource,
-                    srcBucketName=compat_bytes(srcObj.bucket_name),
-                    srcKeyName=compat_bytes(srcObj.key),
-                    srcKeyVersion=compat_bytes(srcObj.version_id),
-                    dstBucketName=compat_bytes(self.outer.files_bucket.name),
-                    dstKeyName=compat_bytes(self._fileID),
-                    sseAlgorithm="AES256",
-                    sseKey=self._getSSEKey(),
-                )
+def parse_jobstore_identifier(jobstore_identifier: str) -> Tuple[str, str]:
+    region, jobstore_name = jobstore_identifier.split(':')
+    bucket_name = f'{jobstore_name}--toil'
-        def copyTo(self, dstObj):
-            """
-            Copies contents of this file to the given key.
-            :param S3.Object dstObj: The key (object) to copy this file's content to
-            """
-            if self.content is not None:
-                for attempt in retry_s3():
-                    with attempt:
-                        dstObj.put(Body=self.content)
-            elif self.version:
-                # Create a new Resource in case it needs to be on its own thread
-                resource = boto3_session.resource("s3", region_name=self.outer.region)
-                for attempt in retry_s3():
-                    # encrypted = True if self.outer.sseKeyPath else False
-                    with attempt:
-                        copyKeyMultipart(
-                            resource,
-                            srcBucketName=compat_bytes(self.outer.files_bucket.name),
-                            srcKeyName=compat_bytes(self.fileID),
-                            srcKeyVersion=compat_bytes(self.version),
-                            dstBucketName=compat_bytes(dstObj.bucket_name),
-                            dstKeyName=compat_bytes(dstObj.key),
-                            copySourceSseAlgorithm="AES256",
-                            copySourceSseKey=self._getSSEKey(),
-                        )
-            else:
-                assert False
-        def download(self, localFilePath, verifyChecksum=True):
-            if self.content is not None:
-                with AtomicFileCreate(localFilePath) as tmpPath:
-                    with open(tmpPath, "wb") as f:
-                        f.write(self.content)
-            elif self.version:
-                headerArgs = self._s3EncryptionArgs()
-                obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
-                for attempt in retry_s3(
-                    predicate=lambda e: retryable_s3_errors(e)
-                    or isinstance(e, ChecksumError)
-                ):
-                    with attempt:
-                        with AtomicFileCreate(localFilePath) as tmpPath:
-                            obj.download_file(
-                                Filename=tmpPath,
-                                ExtraArgs={"VersionId": self.version, **headerArgs},
-                            )
-                        if verifyChecksum and self.checksum:
-                            try:
-                                # This automatically compares the result and matches the algorithm.
-                                self._get_file_checksum(localFilePath, self.checksum)
-                            except ChecksumError as e:
-                                # Annotate checksum mismatches with file name
-                                raise ChecksumError(
-                                    "Checksums do not match for file %s."
-                                    % localFilePath
-                                ) from e
-                                # The error will get caught and result in a retry of the download until we run out of retries.
-                                # TODO: handle obviously truncated downloads by resuming instead.
-            else:
-                assert False
-        @contextmanager
-        def downloadStream(self, verifyChecksum=True, encoding=None, errors=None):
-            """
-            Context manager that gives out a download stream to download data.
-            """
-            info = self
-            class DownloadPipe(ReadablePipe):
-                def writeTo(self, writable):
-                    if info.content is not None:
-                        writable.write(info.content)
-                    elif info.version:
-                        headerArgs = info._s3EncryptionArgs()
-                        obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
-                        for attempt in retry_s3():
-                            with attempt:
-                                obj.download_fileobj(
-                                    writable,
-                                    ExtraArgs={"VersionId": info.version, **headerArgs},
-                                )
-                    else:
-                        assert False
-            class HashingPipe(ReadableTransformingPipe):
-                """
-                Class which checksums all the data read through it. If it
-                reaches EOF and the checksum isn't correct, raises
-                ChecksumError.
-                Assumes info actually has a checksum.
-                """
-                def transform(self, readable, writable):
-                    hasher = info._start_checksum(to_match=info.checksum)
-                    contents = readable.read(1024 * 1024)
-                    while contents != b"":
-                        info._update_checksum(hasher, contents)
-                        try:
-                            writable.write(contents)
-                        except BrokenPipeError:
-                            # Read was stopped early by user code.
-                            # Can't check the checksum.
-                            return
-                        contents = readable.read(1024 * 1024)
-                    # We reached EOF in the input.
-                    # Finish checksumming and verify.
-                    info._finish_checksum(hasher)
-                    # Now stop so EOF happens in the output.
-            if verifyChecksum and self.checksum:
-                with DownloadPipe() as readable:
-                    # Interpose a pipe to check the hash
-                    with HashingPipe(
-                        readable, encoding=encoding, errors=errors
-                    ) as verified:
-                        yield verified
-            else:
-                # Readable end of pipe produces text mode output if encoding specified
-                with DownloadPipe(encoding=encoding, errors=errors) as readable:
-                    # No true checksum available, so don't hash
-                    yield readable
-        def delete(self):
-            store = self.outer
-            if self.previousVersion is not None:
-                expected: "UpdateConditionTypeDef" = {
-                    "Name": "version",
-                    "Value": cast(str, self.previousVersion),
-                }
-                for attempt in retry_sdb():
-                    with attempt:
-                        store.db.delete_attributes(
-                            DomainName=store.files_domain_name,
-                            ItemName=compat_bytes(self.fileID),
-                            Expected=expected,
-                        )
-                if self.previousVersion:
-                    for attempt in retry_s3():
-                        with attempt:
-                            store.s3_client.delete_object(
-                                Bucket=store.files_bucket.name,
-                                Key=compat_bytes(self.fileID),
-                                VersionId=self.previousVersion,
-                            )
-        def getSize(self):
-            """
-            Return the size of the referenced item in bytes.
-            """
-            if self.content is not None:
-                return len(self.content)
-            elif self.version:
-                for attempt in retry_s3():
-                    with attempt:
-                        obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
-                        return obj.content_length
-            else:
-                return 0
-        def _getSSEKey(self) -> Optional[bytes]:
-            sseKeyPath = self.outer.sseKeyPath
-            if sseKeyPath:
-                with open(sseKeyPath, "rb") as f:
-                    sseKey = f.read()
-                return sseKey
-        def _s3EncryptionArgs(self):
-            # the keys of the returned dictionary are unpacked to the corresponding boto3 optional
-            # parameters and will be used to set the http headers
-            if self.encrypted:
-                sseKey = self._getSSEKey()
-                assert (
-                    sseKey is not None
-                ), "Content is encrypted but no key was provided."
-                assert len(sseKey) == 32
-                # boto3 encodes the key and calculates the MD5 for us
-                return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sseKey}
-            else:
-                return {}
-        def __repr__(self):
-            r = custom_repr
-            d = (
-                ("fileID", r(self.fileID)),
-                ("ownerID", r(self.ownerID)),
-                ("encrypted", r(self.encrypted)),
-                ("version", r(self.version)),
-                ("previousVersion", r(self.previousVersion)),
-                ("content", r(self.content)),
-                ("checksum", r(self.checksum)),
-                ("_numContentChunks", r(self._numContentChunks)),
-            )
-            return "{}({})".format(
-                type(self).__name__, ", ".join(f"{k}={v}" for k, v in d)
-            )
+    regions = EC2Regions.keys()
+    if region not in regions:
+        raise ValueError(f'AWS Region "{region}" is not one of: {regions}')
-    versionings = dict(Enabled=True, Disabled=False, Suspended=None)
+    if not 3 <= len(jobstore_name) <= 56:
+        raise ValueError(f'AWS jobstore name must be between 3 and 56 chars: '
+                         f'{jobstore_name} (len: {len(jobstore_name)})')
-    def _getBucketVersioning(self, bucket_name):
-        """
-        The status attribute of BucketVersioning can be 'Enabled', 'Suspended' or None (Disabled)
-        which we map to True, None and False respectively. Note that we've never seen a versioning
-        status of 'Disabled', only the None return value. Calling BucketVersioning.suspend() will
-        cause BucketVersioning.status to then return 'Suspended' even on a new bucket that never
-        had versioning enabled.
+    if not re.compile(r'^[a-z0-9][a-z0-9-]+[a-z0-9]$').match(jobstore_name):
+        raise ValueError(f"Invalid AWS jobstore name: '{jobstore_name}'.  Must contain only digits, "
+                         f"lower-case letters, and hyphens.  Must also not start or end in a hyphen.")
-        :param bucket_name: str
-        """
-        for attempt in retry_s3():
-            with attempt:
-                status = self.s3_resource.BucketVersioning(bucket_name).status
-                return self.versionings.get(status) if status else False
-    # TODO: Make this retry more specific?
-    #  example: https://github.com/DataBiosphere/toil/issues/3378
-    @retry()
-    def destroy(self):
-        # FIXME: Destruction of encrypted stores only works after initialize() or .resume()
-        # See https://github.com/BD2KGenomics/toil/issues/1041
-        try:
-            self._bind(create=False, block=False, check_versioning_consistency=False)
-        except BucketLocationConflictException:
-            # If the unique jobstore bucket name existed, _bind would have raised a
-            # BucketLocationConflictException before calling destroy.  Calling _bind here again
-            # would reraise the same exception so we need to catch and ignore that exception.
-            pass
-        # TODO: Add other failure cases to be ignored here.
-        self._registered = None
-        if self.files_bucket is not None:
-            self._delete_bucket(self.files_bucket)
-            self.files_bucket = None
-        for name in "files_domain_name", "jobs_domain_name":
-            domainName = getattr(self, name)
-            if domainName is not None:
-                self._delete_domain(domainName)
-                setattr(self, name, None)
-        self._registered = False
-    def _delete_domain(self, domainName):
-        for attempt in retry_sdb():
-            with attempt:
-                try:
-                    self.db.delete_domain(DomainName=domainName)
-                except ClientError as e:
-                    if not no_such_sdb_domain(e):
-                        raise
-    @staticmethod
-    def _delete_bucket(bucket):
-        """
-        :param bucket: S3.Bucket
-        """
-        for attempt in retry_s3():
-            with attempt:
-                try:
-                    uploads = s3_boto3_client.list_multipart_uploads(
-                        Bucket=bucket.name
-                    ).get("Uploads")
-                    if uploads:
-                        for u in uploads:
-                            s3_boto3_client.abort_multipart_upload(
-                                Bucket=bucket.name, Key=u["Key"], UploadId=u["UploadId"]
-                            )
-                    bucket.objects.all().delete()
-                    bucket.object_versions.delete()
-                    bucket.delete()
-                except s3_boto3_client.exceptions.NoSuchBucket:
-                    pass
-                except ClientError as e:
-                    if get_error_status(e) != 404:
-                        raise
-aRepr = reprlib.Repr()
-aRepr.maxstring = 38  # so UUIDs don't get truncated (36 for UUID plus 2 for quotes)
-custom_repr = aRepr.repr
-class BucketLocationConflictException(LocatorException):
-    def __init__(self, bucketRegion):
-        super().__init__(
-            "A bucket with the same name as the jobstore was found in another region (%s). "
-            "Cannot proceed as the unique bucket name is already in use.",
-            locator=bucketRegion,
-        )
+    if '--' in jobstore_name:
+        raise ValueError(f"AWS jobstore names may not contain '--': {jobstore_name}")
+    return region, bucket_name

toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl