PyPI - toil - Versions diffs - 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl - Mend

toil 7.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

toil/__init__.py +124 -86
toil/batchSystems/__init__.py +1 -0
toil/batchSystems/abstractBatchSystem.py +137 -77
toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
toil/batchSystems/awsBatch.py +237 -128
toil/batchSystems/cleanup_support.py +22 -16
toil/batchSystems/contained_executor.py +30 -26
toil/batchSystems/gridengine.py +85 -49
toil/batchSystems/htcondor.py +164 -87
toil/batchSystems/kubernetes.py +622 -386
toil/batchSystems/local_support.py +17 -12
toil/batchSystems/lsf.py +132 -79
toil/batchSystems/lsfHelper.py +13 -11
toil/batchSystems/mesos/__init__.py +41 -29
toil/batchSystems/mesos/batchSystem.py +288 -149
toil/batchSystems/mesos/executor.py +77 -49
toil/batchSystems/mesos/test/__init__.py +31 -23
toil/batchSystems/options.py +39 -29
toil/batchSystems/registry.py +53 -19
toil/batchSystems/singleMachine.py +293 -123
toil/batchSystems/slurm.py +651 -155
toil/batchSystems/torque.py +46 -32
toil/bus.py +141 -73
toil/common.py +784 -397
toil/cwl/__init__.py +1 -1
toil/cwl/cwltoil.py +1137 -534
toil/cwl/utils.py +17 -22
toil/deferred.py +62 -41
toil/exceptions.py +5 -3
toil/fileStores/__init__.py +5 -5
toil/fileStores/abstractFileStore.py +88 -57
toil/fileStores/cachingFileStore.py +711 -247
toil/fileStores/nonCachingFileStore.py +113 -75
toil/job.py +1031 -349
toil/jobStores/abstractJobStore.py +387 -243
toil/jobStores/aws/jobStore.py +772 -412
toil/jobStores/aws/utils.py +161 -109
toil/jobStores/conftest.py +1 -0
toil/jobStores/fileJobStore.py +289 -151
toil/jobStores/googleJobStore.py +137 -70
toil/jobStores/utils.py +36 -15
toil/leader.py +614 -269
toil/lib/accelerators.py +115 -18
toil/lib/aws/__init__.py +55 -28
toil/lib/aws/ami.py +122 -87
toil/lib/aws/iam.py +284 -108
toil/lib/aws/s3.py +31 -0
toil/lib/aws/session.py +204 -58
toil/lib/aws/utils.py +290 -213
toil/lib/bioio.py +13 -5
toil/lib/compatibility.py +11 -6
toil/lib/conversions.py +83 -49
toil/lib/docker.py +131 -103
toil/lib/dockstore.py +379 -0
toil/lib/ec2.py +322 -209
toil/lib/ec2nodes.py +174 -105
toil/lib/encryption/_dummy.py +5 -3
toil/lib/encryption/_nacl.py +10 -6
toil/lib/encryption/conftest.py +1 -0
toil/lib/exceptions.py +26 -7
toil/lib/expando.py +4 -2
toil/lib/ftp_utils.py +217 -0
toil/lib/generatedEC2Lists.py +127 -19
toil/lib/history.py +1271 -0
toil/lib/history_submission.py +681 -0
toil/lib/humanize.py +6 -2
toil/lib/io.py +121 -12
toil/lib/iterables.py +4 -2
toil/lib/memoize.py +12 -8
toil/lib/misc.py +83 -18
toil/lib/objects.py +2 -2
toil/lib/resources.py +19 -7
toil/lib/retry.py +125 -87
toil/lib/threading.py +282 -80
toil/lib/throttle.py +15 -14
toil/lib/trs.py +390 -0
toil/lib/web.py +38 -0
toil/options/common.py +850 -402
toil/options/cwl.py +185 -90
toil/options/runner.py +50 -0
toil/options/wdl.py +70 -19
toil/provisioners/__init__.py +111 -46
toil/provisioners/abstractProvisioner.py +322 -157
toil/provisioners/aws/__init__.py +62 -30
toil/provisioners/aws/awsProvisioner.py +980 -627
toil/provisioners/clusterScaler.py +541 -279
toil/provisioners/gceProvisioner.py +283 -180
toil/provisioners/node.py +147 -79
toil/realtimeLogger.py +34 -22
toil/resource.py +137 -75
toil/server/app.py +127 -61
toil/server/celery_app.py +3 -1
toil/server/cli/wes_cwl_runner.py +84 -55
toil/server/utils.py +56 -31
toil/server/wes/abstract_backend.py +64 -26
toil/server/wes/amazon_wes_utils.py +21 -15
toil/server/wes/tasks.py +121 -63
toil/server/wes/toil_backend.py +142 -107
toil/server/wsgi_app.py +4 -3
toil/serviceManager.py +58 -22
toil/statsAndLogging.py +183 -65
toil/test/__init__.py +263 -179
toil/test/batchSystems/batchSystemTest.py +438 -195
toil/test/batchSystems/batch_system_plugin_test.py +18 -7
toil/test/batchSystems/test_gridengine.py +173 -0
toil/test/batchSystems/test_lsf_helper.py +67 -58
toil/test/batchSystems/test_slurm.py +265 -49
toil/test/cactus/test_cactus_integration.py +20 -22
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +375 -72
toil/test/cwl/measure_default_memory.cwl +12 -0
toil/test/cwl/not_run_required_input.cwl +29 -0
toil/test/cwl/optional-file.cwl +18 -0
toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
toil/test/docs/scriptsTest.py +60 -34
toil/test/jobStores/jobStoreTest.py +412 -235
toil/test/lib/aws/test_iam.py +116 -48
toil/test/lib/aws/test_s3.py +16 -9
toil/test/lib/aws/test_utils.py +5 -6
toil/test/lib/dockerTest.py +118 -141
toil/test/lib/test_conversions.py +113 -115
toil/test/lib/test_ec2.py +57 -49
toil/test/lib/test_history.py +212 -0
toil/test/lib/test_misc.py +12 -5
toil/test/lib/test_trs.py +161 -0
toil/test/mesos/MesosDataStructuresTest.py +23 -10
toil/test/mesos/helloWorld.py +7 -6
toil/test/mesos/stress.py +25 -20
toil/test/options/options.py +7 -2
toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
toil/test/provisioners/clusterScalerTest.py +440 -250
toil/test/provisioners/clusterTest.py +81 -42
toil/test/provisioners/gceProvisionerTest.py +174 -100
toil/test/provisioners/provisionerTest.py +25 -13
toil/test/provisioners/restartScript.py +5 -4
toil/test/server/serverTest.py +188 -141
toil/test/sort/restart_sort.py +137 -68
toil/test/sort/sort.py +134 -66
toil/test/sort/sortTest.py +91 -49
toil/test/src/autoDeploymentTest.py +140 -100
toil/test/src/busTest.py +20 -18
toil/test/src/checkpointTest.py +8 -2
toil/test/src/deferredFunctionTest.py +49 -35
toil/test/src/dockerCheckTest.py +33 -26
toil/test/src/environmentTest.py +20 -10
toil/test/src/fileStoreTest.py +538 -271
toil/test/src/helloWorldTest.py +7 -4
toil/test/src/importExportFileTest.py +61 -31
toil/test/src/jobDescriptionTest.py +32 -17
toil/test/src/jobEncapsulationTest.py +2 -0
toil/test/src/jobFileStoreTest.py +74 -50
toil/test/src/jobServiceTest.py +187 -73
toil/test/src/jobTest.py +120 -70
toil/test/src/miscTests.py +19 -18
toil/test/src/promisedRequirementTest.py +82 -36
toil/test/src/promisesTest.py +7 -6
toil/test/src/realtimeLoggerTest.py +6 -6
toil/test/src/regularLogTest.py +71 -37
toil/test/src/resourceTest.py +80 -49
toil/test/src/restartDAGTest.py +36 -22
toil/test/src/resumabilityTest.py +9 -2
toil/test/src/retainTempDirTest.py +45 -14
toil/test/src/systemTest.py +12 -8
toil/test/src/threadingTest.py +44 -25
toil/test/src/toilContextManagerTest.py +10 -7
toil/test/src/userDefinedJobArgTypeTest.py +8 -5
toil/test/src/workerTest.py +33 -16
toil/test/utils/toilDebugTest.py +70 -58
toil/test/utils/toilKillTest.py +4 -5
toil/test/utils/utilsTest.py +239 -102
toil/test/wdl/wdltoil_test.py +789 -148
toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
toil/toilState.py +52 -26
toil/utils/toilConfig.py +13 -4
toil/utils/toilDebugFile.py +44 -27
toil/utils/toilDebugJob.py +85 -25
toil/utils/toilDestroyCluster.py +11 -6
toil/utils/toilKill.py +8 -3
toil/utils/toilLaunchCluster.py +251 -145
toil/utils/toilMain.py +37 -16
toil/utils/toilRsyncCluster.py +27 -14
toil/utils/toilSshCluster.py +45 -22
toil/utils/toilStats.py +75 -36
toil/utils/toilStatus.py +226 -119
toil/utils/toilUpdateEC2Instances.py +3 -1
toil/version.py +6 -6
toil/wdl/utils.py +5 -5
toil/wdl/wdltoil.py +3528 -1053
toil/worker.py +370 -149
toil-8.1.0b1.dist-info/METADATA +178 -0
toil-8.1.0b1.dist-info/RECORD +259 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
toil-7.0.0.dist-info/METADATA +0 -158
toil-7.0.0.dist-info/RECORD +0 -244
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0

toil/lib/aws/utils.py CHANGED Viewed

@@ -15,102 +15,53 @@ import errno
 import logging
 import os
 import socket
-from typing import (Any,
-                    Callable,
-                    ContextManager,
-                    Dict,
-                    Iterable,
-                    Iterator,
-                    List,
-                    Optional,
-                    Set,
-                    cast)
+from collections.abc import Iterable, Iterator
+from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
 from urllib.parse import ParseResult
-from mypy_boto3_sdb.type_defs import AttributeTypeDef
-from toil.lib.aws import session, AWSRegionName, AWSServerErrors
+# To import toil.lib.aws.session, the AWS libraries must be installed
+from toil.lib.aws import AWSRegionName, AWSServerErrors, session
+from toil.lib.conversions import strtobool
+from toil.lib.memoize import memoize
 from toil.lib.misc import printq
-from toil.lib.retry import (DEFAULT_DELAYS,
-                            DEFAULT_TIMEOUT,
-                            get_error_code,
-                            get_error_status,
-                            old_retry,
-                            retry, ErrorCondition)
-try:
-    from botocore.exceptions import ClientError, EndpointConnectionError
-    from mypy_boto3_iam import IAMClient, IAMServiceResource
-    from mypy_boto3_s3 import S3Client, S3ServiceResource
-    from mypy_boto3_s3.literals import BucketLocationConstraintType
-    from mypy_boto3_s3.service_resource import Bucket, Object
-    from mypy_boto3_sdb import SimpleDBClient
-except ImportError:
-    ClientError = None  # type: ignore
-    EndpointConnectionError = None  # type: ignore
-    # AWS/boto extra is not installed
+from toil.lib.retry import (
+    DEFAULT_DELAYS,
+    DEFAULT_TIMEOUT,
+    get_error_code,
+    get_error_status,
+    old_retry,
+    retry,
+)
+if TYPE_CHECKING:
+    from mypy_boto3_s3 import S3ServiceResource
+    from mypy_boto3_s3.service_resource import Bucket
+    from mypy_boto3_s3.service_resource import Object as S3Object
+    from mypy_boto3_sdb.type_defs import AttributeTypeDef
+from botocore.exceptions import ClientError, EndpointConnectionError
 logger = logging.getLogger(__name__)
 # These are error codes we expect from AWS if we are making requests too fast.
 # https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
 THROTTLED_ERROR_CODES = [
-        'Throttling',
-        'ThrottlingException',
-        'ThrottledException',
-        'RequestThrottledException',
-        'TooManyRequestsException',
-        'ProvisionedThroughputExceededException',
-        'TransactionInProgressException',
-        'RequestLimitExceeded',
-        'BandwidthLimitExceeded',
-        'LimitExceededException',
-        'RequestThrottled',
-        'SlowDown',
-        'PriorRequestNotComplete',
-        'EC2ThrottledException',
+    "Throttling",
+    "ThrottlingException",
+    "ThrottledException",
+    "RequestThrottledException",
+    "TooManyRequestsException",
+    "ProvisionedThroughputExceededException",
+    "TransactionInProgressException",
+    "RequestLimitExceeded",
+    "BandwidthLimitExceeded",
+    "LimitExceededException",
+    "RequestThrottled",
+    "SlowDown",
+    "PriorRequestNotComplete",
+    "EC2ThrottledException",
 ]
-@retry(errors=[AWSServerErrors])
-def delete_iam_role(
-    role_name: str, region: Optional[str] = None, quiet: bool = True
-) -> None:
-    # TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
-    # of overloads of the client-getting methods to exist based on the literal
-    # string passed in, to return exactly the right kind of client or resource.
-    # So we end up having to wrap all the calls in casts, which kind of defeats
-    # the point of a nice fluent method you can call with the name of the thing
-    # you want; we should have been calling iam_client() and so on all along if
-    # we wanted MyPy to be able to understand us. So at some point we should
-    # consider revising our API here to be less annoying to explain to the type
-    # checker.
-    iam_client = session.client('iam', region_name=region)
-    iam_resource = session.resource('iam', region_name=region)
-    role = iam_resource.Role(role_name)
-    # normal policies
-    for attached_policy in role.attached_policies.all():
-        printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
-        role.detach_policy(PolicyArn=attached_policy.arn)
-    # inline policies
-    for inline_policy in role.policies.all():
-        printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
-        iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
-    iam_client.delete_role(RoleName=role_name)
-    printq(f'Role {role_name} successfully deleted.', quiet)
-@retry(errors=[AWSServerErrors])
-def delete_iam_instance_profile(
-    instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
-) -> None:
-    iam_resource = session.resource("iam", region_name=region)
-    instance_profile = iam_resource.InstanceProfile(instance_profile_name)
-    if instance_profile.roles is not None:
-        for role in instance_profile.roles:
-            printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
-            instance_profile.remove_role(RoleName=role.name)
-    instance_profile.delete()
-    printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
 @retry(errors=[AWSServerErrors])
 def delete_sdb_domain(
@@ -130,12 +81,12 @@ def connection_reset(e: Exception) -> bool:
     # errno is listed as 104. To be safe, we check for both:
     return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
 def connection_error(e: Exception) -> bool:
     """
     Return True if an error represents a failure to make a network connection.
     """
-    return (connection_reset(e)
-            or isinstance(e, EndpointConnectionError))
+    return connection_reset(e) or isinstance(e, EndpointConnectionError)
 # TODO: Replace with: @retry and ErrorCondition
@@ -143,34 +94,47 @@ def retryable_s3_errors(e: Exception) -> bool:
     """
     Return true if this is an error from S3 that looks like we ought to retry our request.
     """
-    return (connection_error(e)
-            or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
-            or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
-            # boto3 errors
-            or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
-            or (isinstance(e, ClientError) and 'BucketNotEmpty' in str(e))
-            or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 409 and 'try again' in str(e))
-            or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') in (404, 429, 500, 502, 503, 504)))
+    return (
+        connection_error(e)
+        or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
+        or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
+        # boto3 errors
+        or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
+        or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
+        or (
+            isinstance(e, ClientError)
+            and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
+            and "try again" in str(e)
+        )
+        or (
+            isinstance(e, ClientError)
+            and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
+            in (404, 429, 500, 502, 503, 504)
+        )
+    )
-def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_TIMEOUT, predicate: Callable[[Exception], bool] = retryable_s3_errors) -> Iterator[ContextManager[None]]:
+def retry_s3(
+    delays: Iterable[float] = DEFAULT_DELAYS,
+    timeout: float = DEFAULT_TIMEOUT,
+    predicate: Callable[[Exception], bool] = retryable_s3_errors,
+) -> Iterator[ContextManager[None]]:
     """
     Retry iterator of context managers specifically for S3 operations.
     """
     return old_retry(delays=delays, timeout=timeout, predicate=predicate)
 @retry(errors=[AWSServerErrors])
 def delete_s3_bucket(
-    s3_resource: "S3ServiceResource",
-    bucket: str,
-    quiet: bool = True
+    s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
 ) -> None:
     """
     Delete the given S3 bucket.
     """
-    printq(f'Deleting s3 bucket: {bucket}', quiet)
+    printq(f"Deleting s3 bucket: {bucket}", quiet)
-    paginator = s3_resource.meta.client.get_paginator('list_object_versions')
+    paginator = s3_resource.meta.client.get_paginator("list_object_versions")
     try:
         for response in paginator.paginate(Bucket=bucket):
             # Versions and delete markers can both go in here to be deleted.
@@ -178,15 +142,20 @@ def delete_s3_bucket(
             # defined for them in the stubs to express that. See
             # <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
             # have to do gymnastics to get them into the same list.
-            to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \
-                                              cast(List[Dict[str, Any]], response.get('DeleteMarkers', []))
+            to_delete: list[dict[str, Any]] = cast(
+                list[dict[str, Any]], response.get("Versions", [])
+            ) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
             for entry in to_delete:
-                printq(f"    Deleting {entry['Key']} version {entry['VersionId']}", quiet)
-                s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId'])
+                printq(
+                    f"    Deleting {entry['Key']} version {entry['VersionId']}", quiet
+                )
+                s3_resource.meta.client.delete_object(
+                    Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
+                )
         s3_resource.Bucket(bucket).delete()
-        printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet)
+        printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
     except s3_resource.meta.client.exceptions.NoSuchBucket:
-        printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet)
+        printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
 def create_s3_bucket(
@@ -202,7 +171,7 @@ def create_s3_bucket(
     *ALL* S3 bucket creation should use this function.
     """
-    logger.debug("Creating bucket '%s' in region %s.", bucket_name, region)
+    logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
     if region == "us-east-1":  # see https://github.com/boto/boto3/issues/125
         bucket = s3_resource.create_bucket(Bucket=bucket_name)
     else:
@@ -212,6 +181,7 @@ def create_s3_bucket(
         )
     return bucket
 @retry(errors=[ClientError])
 def enable_public_objects(bucket_name: str) -> None:
     """
@@ -235,7 +205,7 @@ def enable_public_objects(bucket_name: str) -> None:
     would be a very awkward way to do it. So we restore the old behavior.
     """
-    s3_client = session.client('s3')
+    s3_client = session.client("s3")
     # Even though the new default is for public access to be prohibited, this
     # is implemented by adding new things attached to the bucket. If we remove
@@ -249,22 +219,41 @@ def enable_public_objects(bucket_name: str) -> None:
     s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
-def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only_strategies: Optional[Set[int]] = None) -> str:
+class NoBucketLocationError(Exception):
+    """
+    Error to represent that we could not get a location for a bucket.
     """
-    Get the AWS region name associated with the given S3 bucket.
+def get_bucket_region(
+    bucket_name: str,
+    endpoint_url: Optional[str] = None,
+    only_strategies: Optional[set[int]] = None,
+    anonymous: Optional[bool] = None
+) -> str:
+    """
+    Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
+    Does not log at info level or above when this does not work; failures are expected in some contexts.
     Takes an optional S3 API URL override.
     :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
+    :raises NoBucketLocationError: if the bucket's region cannot be determined
+        (possibly due to lack of permissions).
     """
-    s3_client = session.client('s3', endpoint_url=endpoint_url)
+    config = session.ANONYMOUS_CONFIG if anonymous else None
+    s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
     def attempt_get_bucket_location() -> Optional[str]:
         """
         Try and get the bucket location from the normal API call.
         """
-        return s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
+        return s3_client.get_bucket_location(Bucket=bucket_name).get(
+            "LocationConstraint", None
+        )
     def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
         """
@@ -280,8 +269,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
         # It could also be because AWS open data buckets (which we tend to
         # encounter this problem for) tend to actually themselves be in
         # us-east-1.
-        backup_s3_client = session.client('s3', region_name='us-east-1')
-        return backup_s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
+        backup_s3_client = session.client("s3", region_name="us-east-1", config=config)
+        return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
+            "LocationConstraint", None
+        )
     def attempt_head_bucket() -> Optional[str]:
         """
@@ -293,11 +284,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
         # us where the bucket is. See
         # <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
         info = s3_client.head_bucket(Bucket=bucket_name)
-        return info['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
+        return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
     # Compose a list of strategies we want to try in order, which may work.
     # None is an acceptable return type that actually means something.
-    strategies: List[Callable[[], Optional[str]]] = []
+    strategies: list[Callable[[], Optional[str]]] = []
     strategies.append(attempt_get_bucket_location)
     if not endpoint_url:
         # We should only try to talk to us-east-1 if we don't have a custom
@@ -305,17 +296,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
         strategies.append(attempt_get_bucket_location_from_us_east_1)
     strategies.append(attempt_head_bucket)
+    error_logs: list[tuple[int, str]] = []
     for attempt in retry_s3():
         with attempt:
             for i, strategy in enumerate(strategies):
-                if only_strategies is not None and i+1 not in only_strategies:
+                if only_strategies is not None and i + 1 not in only_strategies:
                     # We want to test running without this strategy.
                     continue
                 try:
-                    return bucket_location_to_region(strategy())
+                    location = bucket_location_to_region(strategy())
+                    logger.debug("Got bucket location from strategy %d", i + 1)
+                    return location
                 except ClientError as e:
-                    if get_error_code(e) == 'AccessDenied' and not endpoint_url:
-                        logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
+                    if get_error_code(e) == "AccessDenied" and not endpoint_url:
+                        logger.debug(
+                            "Strategy %d to get bucket location did not work: %s",
+                            i + 1,
+                            e,
+                        )
+                        error_logs.append((i + 1, str(e)))
                         last_error: Exception = e
                         # We were blocked with this strategy. Move on to the
                         # next strategy which might work.
@@ -324,127 +323,202 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
                         raise
                 except KeyError as e:
                     # If we get a weird head response we will have a KeyError
-                    logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
+                    logger.debug(
+                        "Strategy %d to get bucket location did not work: %s", i + 1, e
+                    )
+                    error_logs.append((i + 1, str(e)))
                     last_error = e
-    # If we get here we ran out of attempts. Raise whatever the last problem was.
-    raise last_error
+    error_messages = []
+    for rank, message in error_logs:
+        error_messages.append(
+            f"Strategy {rank} failed to get bucket location because: {message}"
+        )
+    # If we get here we ran out of attempts.
+    raise NoBucketLocationError(
+        "Could not get bucket location: " + "\n".join(error_messages)
+    ) from last_error
+@memoize
+def get_bucket_region_if_available(
+    bucket_name: str,
+    endpoint_url: Optional[str] = None,
+    only_strategies: Optional[set[int]] = None,
+    anonymous: Optional[bool] = None
+) -> Optional[str]:
+    """
+    Get the AWS region name associated with the given S3 bucket, or return None.
+    Caches results, so may not return the location for a bucket that has been
+    created but was previously observed to be nonexistent.
+    :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
+    """
+    try:
+        return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
+    except Exception as e:
+        if isinstance(e, NoBucketLocationError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
+            # We can't know
+            return None
+        else:
+            raise
 def region_to_bucket_location(region: str) -> str:
-    return '' if region == 'us-east-1' else region
+    return "" if region == "us-east-1" else region
 def bucket_location_to_region(location: Optional[str]) -> str:
     return "us-east-1" if location == "" or location is None else location
-def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
-        """
-        Extracts a key (object) from a given parsed s3:// URL.
-        If existing is true and the object does not exist, raises FileNotFoundError.
+def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonymous: Optional[bool] = None) -> "S3Object":
+    """
+    Extracts a key (object) from a given parsed s3:// URL.
-        :param bool existing: If True, key is expected to exist. If False, key is expected not to
-                exists and it will be created. If None, the key will be created if it doesn't exist.
-        """
+    If existing is true and the object does not exist, raises FileNotFoundError.
-        key_name = url.path[1:]
-        bucket_name = url.netloc
+    :param bool existing: If True, key is expected to exist. If False, key is expected not to
+            exists and it will be created. If None, the key will be created if it doesn't exist.
-        # Decide if we need to override Boto's built-in URL here.
-        endpoint_url: Optional[str] = None
-        host = os.environ.get('TOIL_S3_HOST', None)
-        port = os.environ.get('TOIL_S3_PORT', None)
-        protocol = 'https'
-        if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
-            protocol = 'http'
-        if host:
-            endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
+    :raises FileNotFoundError: when existing is True and the object does not exist.
+    :raises RuntimeError: when existing is False but the object exists.
+    :raises PermissionError: when we are not authorized to look at the object.
+    """
-        # TODO: OrdinaryCallingFormat equivalent in boto3?
-        # if botoargs:
-        #     botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
+    key_name = url.path[1:]
+    bucket_name = url.netloc
+    # Decide if we need to override Boto's built-in URL here.
+    endpoint_url: Optional[str] = None
+    host = os.environ.get("TOIL_S3_HOST", None)
+    port = os.environ.get("TOIL_S3_PORT", None)
+    protocol = "https"
+    if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
+        protocol = "http"
+    if host:
+        endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
+    # TODO: OrdinaryCallingFormat equivalent in boto3?
+    # if botoargs:
+    #     botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
+    config = session.ANONYMOUS_CONFIG if anonymous else None
+    # Get the bucket's region to avoid a redirect per request.
+    # Cache the result
+    region = get_bucket_region_if_available(bucket_name, endpoint_url=endpoint_url, anonymous=anonymous)
+    if region is not None:
+        s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url, config=config)
+    else:
+        # We can't get the bucket location, perhaps because we don't have
+        # permission to do that.
+        logger.debug("Couldn't get bucket location")
+        logger.debug("Fall back to not specifying location")
+        s3 = session.resource("s3", endpoint_url=endpoint_url, config=config)
-        try:
-            # Get the bucket's region to avoid a redirect per request
-            region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
-            s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
-        except ClientError:
-            # Probably don't have permission.
-            # TODO: check if it is that
-            s3 = session.resource('s3', endpoint_url=endpoint_url)
+    obj = s3.Object(bucket_name, key_name)
+    objExists = True
-        obj = s3.Object(bucket_name, key_name)
-        objExists = True
+    try:
+        obj.load()
+    except ClientError as e:
+        if get_error_status(e) == 404:
+            objExists = False
+        elif get_error_status(e) == 403:
+            raise PermissionError(
+                f"Key '{key_name}' is not accessible in bucket '{bucket_name}'."
+            ) from e
+        else:
+            raise
+    if existing is True and not objExists:
+        raise FileNotFoundError(
+            f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
+        )
+    elif existing is False and objExists:
+        raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
+    if not objExists:
         try:
-            obj.load()
+            obj.put()  # write an empty file
         except ClientError as e:
-            if get_error_status(e) == 404:
-                objExists = False
+            if get_error_status(e) == 403:
+                raise PermissionError(
+                    f"Key '{key_name}' is not writable in bucket '{bucket_name}'."
+                ) from e
             else:
                 raise
-        if existing is True and not objExists:
-            raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
-        elif existing is False and objExists:
-            raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
-        if not objExists:
-            obj.put()  # write an empty file
-        return obj
+    return obj
 @retry(errors=[AWSServerErrors])
-def list_objects_for_url(url: ParseResult) -> List[str]:
-        """
-        Extracts a key (object) from a given parsed s3:// URL. The URL will be
-        supplemented with a trailing slash if it is missing.
-        """
-        key_name = url.path[1:]
-        bucket_name = url.netloc
-        if key_name != '' and not key_name.endswith('/'):
-            # Make sure to put the trailing slash on the key, or else we'll see
-            # a prefix of just it.
-            key_name = key_name + '/'
-        # Decide if we need to override Boto's built-in URL here.
-        # TODO: Deduplicate with get_object_for_url, or push down into session module
-        endpoint_url: Optional[str] = None
-        host = os.environ.get('TOIL_S3_HOST', None)
-        port = os.environ.get('TOIL_S3_PORT', None)
-        protocol = 'https'
-        if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
-            protocol = 'http'
-        if host:
-            endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
-        client = session.client('s3', endpoint_url=endpoint_url)
-        listing = []
-        paginator = client.get_paginator('list_objects_v2')
-        result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter='/')
+def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) -> list[str]:
+    """
+    Extracts a key (object) from a given parsed s3:// URL. The URL will be
+    supplemented with a trailing slash if it is missing.
+    :raises PermissionError: when we are not authorized to do the list operation.
+    """
+    key_name = url.path[1:]
+    bucket_name = url.netloc
+    if key_name != "" and not key_name.endswith("/"):
+        # Make sure to put the trailing slash on the key, or else we'll see
+        # a prefix of just it.
+        key_name = key_name + "/"
+    # Decide if we need to override Boto's built-in URL here.
+    # TODO: Deduplicate with get_object_for_url, or push down into session module
+    endpoint_url: Optional[str] = None
+    host = os.environ.get("TOIL_S3_HOST", None)
+    port = os.environ.get("TOIL_S3_PORT", None)
+    protocol = "https"
+    if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
+        protocol = "http"
+    if host:
+        endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
+    config = session.ANONYMOUS_CONFIG if anonymous else None
+    client = session.client("s3", endpoint_url=endpoint_url, config=config)
+    listing = []
+    try:
+        paginator = client.get_paginator("list_objects_v2")
+        result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
         for page in result:
-            if 'CommonPrefixes' in page:
-                for prefix_item in page['CommonPrefixes']:
-                    listing.append(prefix_item['Prefix'][len(key_name):])
-            if 'Contents' in page:
-                for content_item in page['Contents']:
-                    if content_item['Key'] == key_name:
+            if "CommonPrefixes" in page:
+                for prefix_item in page["CommonPrefixes"]:
+                    listing.append(prefix_item["Prefix"][len(key_name) :])
+            if "Contents" in page:
+                for content_item in page["Contents"]:
+                    if content_item["Key"] == key_name:
                         # Ignore folder name itself
                         continue
-                    listing.append(content_item['Key'][len(key_name):])
+                    listing.append(content_item["Key"][len(key_name) :])
+    except ClientError as e:
+        if get_error_status(e) == 403:
+            raise PermissionError(
+                f"Prefix '{key_name}' is not authorized to be listed in bucket '{bucket_name}'."
+            ) from e
+        else:
+            raise
+    logger.debug("Found in %s items: %s", url, listing)
+    return listing
-        logger.debug('Found in %s items: %s', url, listing)
-        return listing
-def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]:
+def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
     """
     Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
     """
-    return [{'Key': k, 'Value': v} for k, v in tags.items()]
+    return [{"Key": k, "Value": v} for k, v in tags.items()]
-def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: str,
-                **kwargs: Any) -> Iterable[Any]:
+def boto3_pager(
+    requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
+) -> Iterable[Any]:
     """
     Yield all the results from calling the given Boto 3 method with the
     given keyword arguments, paging through the results using the Marker or
@@ -465,7 +539,7 @@ def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: s
         yield from page.get(result_attribute_name, [])
-def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> Any:
+def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
     """
     Given a list of attributes, find the attribute associated with the name and return its corresponding value.
@@ -475,8 +549,11 @@ def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> A
     If the attribute with the name does not exist, the function will return None.
-    :param attributes: list of attributes as List[AttributeTypeDef]
+    :param attributes: list of attributes
     :param name: name of the attribute
     :return: value of the attribute
     """
-    return next((attribute["Value"] for attribute in attributes if attribute["Name"] == name), None)
+    return next(
+        (attribute["Value"] for attribute in attributes if attribute["Name"] == name),
+        None,
+    )

toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

toil 7.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl