toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/lib/aws/utils.py
CHANGED
|
@@ -15,102 +15,53 @@ import errno
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import socket
|
|
18
|
-
from
|
|
19
|
-
|
|
20
|
-
ContextManager,
|
|
21
|
-
Dict,
|
|
22
|
-
Iterable,
|
|
23
|
-
Iterator,
|
|
24
|
-
List,
|
|
25
|
-
Optional,
|
|
26
|
-
Set,
|
|
27
|
-
cast)
|
|
18
|
+
from collections.abc import Iterable, Iterator
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
|
|
28
20
|
from urllib.parse import ParseResult
|
|
29
21
|
|
|
30
|
-
|
|
31
|
-
from toil.lib.aws import
|
|
22
|
+
# To import toil.lib.aws.session, the AWS libraries must be installed
|
|
23
|
+
from toil.lib.aws import AWSRegionName, AWSServerErrors, session
|
|
24
|
+
from toil.lib.conversions import strtobool
|
|
25
|
+
from toil.lib.memoize import memoize
|
|
32
26
|
from toil.lib.misc import printq
|
|
33
|
-
from toil.lib.retry import (
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
from mypy_boto3_s3 import
|
|
44
|
-
from mypy_boto3_s3.
|
|
45
|
-
from mypy_boto3_s3.service_resource import
|
|
46
|
-
from mypy_boto3_sdb import
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
EndpointConnectionError = None # type: ignore
|
|
50
|
-
# AWS/boto extra is not installed
|
|
27
|
+
from toil.lib.retry import (
|
|
28
|
+
DEFAULT_DELAYS,
|
|
29
|
+
DEFAULT_TIMEOUT,
|
|
30
|
+
get_error_code,
|
|
31
|
+
get_error_status,
|
|
32
|
+
old_retry,
|
|
33
|
+
retry,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from mypy_boto3_s3 import S3ServiceResource
|
|
38
|
+
from mypy_boto3_s3.service_resource import Bucket
|
|
39
|
+
from mypy_boto3_s3.service_resource import Object as S3Object
|
|
40
|
+
from mypy_boto3_sdb.type_defs import AttributeTypeDef
|
|
41
|
+
|
|
42
|
+
from botocore.exceptions import ClientError, EndpointConnectionError
|
|
51
43
|
|
|
52
44
|
logger = logging.getLogger(__name__)
|
|
53
45
|
|
|
54
46
|
# These are error codes we expect from AWS if we are making requests too fast.
|
|
55
47
|
# https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
|
|
56
48
|
THROTTLED_ERROR_CODES = [
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
49
|
+
"Throttling",
|
|
50
|
+
"ThrottlingException",
|
|
51
|
+
"ThrottledException",
|
|
52
|
+
"RequestThrottledException",
|
|
53
|
+
"TooManyRequestsException",
|
|
54
|
+
"ProvisionedThroughputExceededException",
|
|
55
|
+
"TransactionInProgressException",
|
|
56
|
+
"RequestLimitExceeded",
|
|
57
|
+
"BandwidthLimitExceeded",
|
|
58
|
+
"LimitExceededException",
|
|
59
|
+
"RequestThrottled",
|
|
60
|
+
"SlowDown",
|
|
61
|
+
"PriorRequestNotComplete",
|
|
62
|
+
"EC2ThrottledException",
|
|
71
63
|
]
|
|
72
64
|
|
|
73
|
-
@retry(errors=[AWSServerErrors])
|
|
74
|
-
def delete_iam_role(
|
|
75
|
-
role_name: str, region: Optional[str] = None, quiet: bool = True
|
|
76
|
-
) -> None:
|
|
77
|
-
# TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
|
|
78
|
-
# of overloads of the client-getting methods to exist based on the literal
|
|
79
|
-
# string passed in, to return exactly the right kind of client or resource.
|
|
80
|
-
# So we end up having to wrap all the calls in casts, which kind of defeats
|
|
81
|
-
# the point of a nice fluent method you can call with the name of the thing
|
|
82
|
-
# you want; we should have been calling iam_client() and so on all along if
|
|
83
|
-
# we wanted MyPy to be able to understand us. So at some point we should
|
|
84
|
-
# consider revising our API here to be less annoying to explain to the type
|
|
85
|
-
# checker.
|
|
86
|
-
iam_client = session.client('iam', region_name=region)
|
|
87
|
-
iam_resource = session.resource('iam', region_name=region)
|
|
88
|
-
role = iam_resource.Role(role_name)
|
|
89
|
-
# normal policies
|
|
90
|
-
for attached_policy in role.attached_policies.all():
|
|
91
|
-
printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
|
|
92
|
-
role.detach_policy(PolicyArn=attached_policy.arn)
|
|
93
|
-
# inline policies
|
|
94
|
-
for inline_policy in role.policies.all():
|
|
95
|
-
printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
|
|
96
|
-
iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
|
|
97
|
-
iam_client.delete_role(RoleName=role_name)
|
|
98
|
-
printq(f'Role {role_name} successfully deleted.', quiet)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
@retry(errors=[AWSServerErrors])
|
|
102
|
-
def delete_iam_instance_profile(
|
|
103
|
-
instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
|
|
104
|
-
) -> None:
|
|
105
|
-
iam_resource = session.resource("iam", region_name=region)
|
|
106
|
-
instance_profile = iam_resource.InstanceProfile(instance_profile_name)
|
|
107
|
-
if instance_profile.roles is not None:
|
|
108
|
-
for role in instance_profile.roles:
|
|
109
|
-
printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
|
|
110
|
-
instance_profile.remove_role(RoleName=role.name)
|
|
111
|
-
instance_profile.delete()
|
|
112
|
-
printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
|
|
113
|
-
|
|
114
65
|
|
|
115
66
|
@retry(errors=[AWSServerErrors])
|
|
116
67
|
def delete_sdb_domain(
|
|
@@ -130,12 +81,12 @@ def connection_reset(e: Exception) -> bool:
|
|
|
130
81
|
# errno is listed as 104. To be safe, we check for both:
|
|
131
82
|
return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
|
|
132
83
|
|
|
84
|
+
|
|
133
85
|
def connection_error(e: Exception) -> bool:
|
|
134
86
|
"""
|
|
135
87
|
Return True if an error represents a failure to make a network connection.
|
|
136
88
|
"""
|
|
137
|
-
return
|
|
138
|
-
or isinstance(e, EndpointConnectionError))
|
|
89
|
+
return connection_reset(e) or isinstance(e, EndpointConnectionError)
|
|
139
90
|
|
|
140
91
|
|
|
141
92
|
# TODO: Replace with: @retry and ErrorCondition
|
|
@@ -143,34 +94,47 @@ def retryable_s3_errors(e: Exception) -> bool:
|
|
|
143
94
|
"""
|
|
144
95
|
Return true if this is an error from S3 that looks like we ought to retry our request.
|
|
145
96
|
"""
|
|
146
|
-
return (
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
97
|
+
return (
|
|
98
|
+
connection_error(e)
|
|
99
|
+
or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
|
|
100
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
101
|
+
# boto3 errors
|
|
102
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
103
|
+
or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
|
|
104
|
+
or (
|
|
105
|
+
isinstance(e, ClientError)
|
|
106
|
+
and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
|
|
107
|
+
and "try again" in str(e)
|
|
108
|
+
)
|
|
109
|
+
or (
|
|
110
|
+
isinstance(e, ClientError)
|
|
111
|
+
and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
112
|
+
in (404, 429, 500, 502, 503, 504)
|
|
113
|
+
)
|
|
114
|
+
)
|
|
154
115
|
|
|
155
116
|
|
|
156
|
-
def retry_s3(
|
|
117
|
+
def retry_s3(
|
|
118
|
+
delays: Iterable[float] = DEFAULT_DELAYS,
|
|
119
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
120
|
+
predicate: Callable[[Exception], bool] = retryable_s3_errors,
|
|
121
|
+
) -> Iterator[ContextManager[None]]:
|
|
157
122
|
"""
|
|
158
123
|
Retry iterator of context managers specifically for S3 operations.
|
|
159
124
|
"""
|
|
160
125
|
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
|
161
126
|
|
|
127
|
+
|
|
162
128
|
@retry(errors=[AWSServerErrors])
|
|
163
129
|
def delete_s3_bucket(
|
|
164
|
-
s3_resource: "S3ServiceResource",
|
|
165
|
-
bucket: str,
|
|
166
|
-
quiet: bool = True
|
|
130
|
+
s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
|
|
167
131
|
) -> None:
|
|
168
132
|
"""
|
|
169
133
|
Delete the given S3 bucket.
|
|
170
134
|
"""
|
|
171
|
-
printq(f
|
|
135
|
+
printq(f"Deleting s3 bucket: {bucket}", quiet)
|
|
172
136
|
|
|
173
|
-
paginator = s3_resource.meta.client.get_paginator(
|
|
137
|
+
paginator = s3_resource.meta.client.get_paginator("list_object_versions")
|
|
174
138
|
try:
|
|
175
139
|
for response in paginator.paginate(Bucket=bucket):
|
|
176
140
|
# Versions and delete markers can both go in here to be deleted.
|
|
@@ -178,15 +142,20 @@ def delete_s3_bucket(
|
|
|
178
142
|
# defined for them in the stubs to express that. See
|
|
179
143
|
# <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
|
|
180
144
|
# have to do gymnastics to get them into the same list.
|
|
181
|
-
to_delete:
|
|
182
|
-
|
|
145
|
+
to_delete: list[dict[str, Any]] = cast(
|
|
146
|
+
list[dict[str, Any]], response.get("Versions", [])
|
|
147
|
+
) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
|
|
183
148
|
for entry in to_delete:
|
|
184
|
-
printq(
|
|
185
|
-
|
|
149
|
+
printq(
|
|
150
|
+
f" Deleting {entry['Key']} version {entry['VersionId']}", quiet
|
|
151
|
+
)
|
|
152
|
+
s3_resource.meta.client.delete_object(
|
|
153
|
+
Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
|
|
154
|
+
)
|
|
186
155
|
s3_resource.Bucket(bucket).delete()
|
|
187
|
-
printq(f
|
|
156
|
+
printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
|
|
188
157
|
except s3_resource.meta.client.exceptions.NoSuchBucket:
|
|
189
|
-
printq(f
|
|
158
|
+
printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
|
|
190
159
|
|
|
191
160
|
|
|
192
161
|
def create_s3_bucket(
|
|
@@ -202,7 +171,7 @@ def create_s3_bucket(
|
|
|
202
171
|
|
|
203
172
|
*ALL* S3 bucket creation should use this function.
|
|
204
173
|
"""
|
|
205
|
-
logger.
|
|
174
|
+
logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
|
|
206
175
|
if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
|
|
207
176
|
bucket = s3_resource.create_bucket(Bucket=bucket_name)
|
|
208
177
|
else:
|
|
@@ -212,6 +181,7 @@ def create_s3_bucket(
|
|
|
212
181
|
)
|
|
213
182
|
return bucket
|
|
214
183
|
|
|
184
|
+
|
|
215
185
|
@retry(errors=[ClientError])
|
|
216
186
|
def enable_public_objects(bucket_name: str) -> None:
|
|
217
187
|
"""
|
|
@@ -235,7 +205,7 @@ def enable_public_objects(bucket_name: str) -> None:
|
|
|
235
205
|
would be a very awkward way to do it. So we restore the old behavior.
|
|
236
206
|
"""
|
|
237
207
|
|
|
238
|
-
s3_client = session.client(
|
|
208
|
+
s3_client = session.client("s3")
|
|
239
209
|
|
|
240
210
|
# Even though the new default is for public access to be prohibited, this
|
|
241
211
|
# is implemented by adding new things attached to the bucket. If we remove
|
|
@@ -249,22 +219,41 @@ def enable_public_objects(bucket_name: str) -> None:
|
|
|
249
219
|
s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
|
|
250
220
|
|
|
251
221
|
|
|
252
|
-
|
|
222
|
+
class NoBucketLocationError(Exception):
|
|
223
|
+
"""
|
|
224
|
+
Error to represent that we could not get a location for a bucket.
|
|
253
225
|
"""
|
|
254
|
-
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def get_bucket_region(
|
|
229
|
+
bucket_name: str,
|
|
230
|
+
endpoint_url: Optional[str] = None,
|
|
231
|
+
only_strategies: Optional[set[int]] = None,
|
|
232
|
+
anonymous: Optional[bool] = None
|
|
233
|
+
) -> str:
|
|
234
|
+
"""
|
|
235
|
+
Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
|
|
236
|
+
|
|
237
|
+
Does not log at info level or above when this does not work; failures are expected in some contexts.
|
|
255
238
|
|
|
256
239
|
Takes an optional S3 API URL override.
|
|
257
240
|
|
|
258
241
|
:param only_strategies: For testing, use only strategies with 1-based numbers in this set.
|
|
242
|
+
|
|
243
|
+
:raises NoBucketLocationError: if the bucket's region cannot be determined
|
|
244
|
+
(possibly due to lack of permissions).
|
|
259
245
|
"""
|
|
260
246
|
|
|
261
|
-
|
|
247
|
+
config = session.ANONYMOUS_CONFIG if anonymous else None
|
|
248
|
+
s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
|
|
262
249
|
|
|
263
250
|
def attempt_get_bucket_location() -> Optional[str]:
|
|
264
251
|
"""
|
|
265
252
|
Try and get the bucket location from the normal API call.
|
|
266
253
|
"""
|
|
267
|
-
return s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
254
|
+
return s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
255
|
+
"LocationConstraint", None
|
|
256
|
+
)
|
|
268
257
|
|
|
269
258
|
def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
|
|
270
259
|
"""
|
|
@@ -280,8 +269,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
280
269
|
# It could also be because AWS open data buckets (which we tend to
|
|
281
270
|
# encounter this problem for) tend to actually themselves be in
|
|
282
271
|
# us-east-1.
|
|
283
|
-
backup_s3_client = session.client(
|
|
284
|
-
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
272
|
+
backup_s3_client = session.client("s3", region_name="us-east-1", config=config)
|
|
273
|
+
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
274
|
+
"LocationConstraint", None
|
|
275
|
+
)
|
|
285
276
|
|
|
286
277
|
def attempt_head_bucket() -> Optional[str]:
|
|
287
278
|
"""
|
|
@@ -293,11 +284,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
293
284
|
# us where the bucket is. See
|
|
294
285
|
# <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
|
|
295
286
|
info = s3_client.head_bucket(Bucket=bucket_name)
|
|
296
|
-
return info[
|
|
287
|
+
return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
|
|
297
288
|
|
|
298
289
|
# Compose a list of strategies we want to try in order, which may work.
|
|
299
290
|
# None is an acceptable return type that actually means something.
|
|
300
|
-
strategies:
|
|
291
|
+
strategies: list[Callable[[], Optional[str]]] = []
|
|
301
292
|
strategies.append(attempt_get_bucket_location)
|
|
302
293
|
if not endpoint_url:
|
|
303
294
|
# We should only try to talk to us-east-1 if we don't have a custom
|
|
@@ -305,17 +296,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
305
296
|
strategies.append(attempt_get_bucket_location_from_us_east_1)
|
|
306
297
|
strategies.append(attempt_head_bucket)
|
|
307
298
|
|
|
299
|
+
error_logs: list[tuple[int, str]] = []
|
|
308
300
|
for attempt in retry_s3():
|
|
309
301
|
with attempt:
|
|
310
302
|
for i, strategy in enumerate(strategies):
|
|
311
|
-
if only_strategies is not None and i+1 not in only_strategies:
|
|
303
|
+
if only_strategies is not None and i + 1 not in only_strategies:
|
|
312
304
|
# We want to test running without this strategy.
|
|
313
305
|
continue
|
|
314
306
|
try:
|
|
315
|
-
|
|
307
|
+
location = bucket_location_to_region(strategy())
|
|
308
|
+
logger.debug("Got bucket location from strategy %d", i + 1)
|
|
309
|
+
return location
|
|
316
310
|
except ClientError as e:
|
|
317
|
-
if get_error_code(e) ==
|
|
318
|
-
logger.
|
|
311
|
+
if get_error_code(e) == "AccessDenied" and not endpoint_url:
|
|
312
|
+
logger.debug(
|
|
313
|
+
"Strategy %d to get bucket location did not work: %s",
|
|
314
|
+
i + 1,
|
|
315
|
+
e,
|
|
316
|
+
)
|
|
317
|
+
error_logs.append((i + 1, str(e)))
|
|
319
318
|
last_error: Exception = e
|
|
320
319
|
# We were blocked with this strategy. Move on to the
|
|
321
320
|
# next strategy which might work.
|
|
@@ -324,127 +323,202 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
324
323
|
raise
|
|
325
324
|
except KeyError as e:
|
|
326
325
|
# If we get a weird head response we will have a KeyError
|
|
327
|
-
logger.
|
|
326
|
+
logger.debug(
|
|
327
|
+
"Strategy %d to get bucket location did not work: %s", i + 1, e
|
|
328
|
+
)
|
|
329
|
+
error_logs.append((i + 1, str(e)))
|
|
328
330
|
last_error = e
|
|
329
|
-
|
|
330
|
-
|
|
331
|
+
|
|
332
|
+
error_messages = []
|
|
333
|
+
for rank, message in error_logs:
|
|
334
|
+
error_messages.append(
|
|
335
|
+
f"Strategy {rank} failed to get bucket location because: {message}"
|
|
336
|
+
)
|
|
337
|
+
# If we get here we ran out of attempts.
|
|
338
|
+
raise NoBucketLocationError(
|
|
339
|
+
"Could not get bucket location: " + "\n".join(error_messages)
|
|
340
|
+
) from last_error
|
|
341
|
+
|
|
342
|
+
@memoize
|
|
343
|
+
def get_bucket_region_if_available(
|
|
344
|
+
bucket_name: str,
|
|
345
|
+
endpoint_url: Optional[str] = None,
|
|
346
|
+
only_strategies: Optional[set[int]] = None,
|
|
347
|
+
anonymous: Optional[bool] = None
|
|
348
|
+
) -> Optional[str]:
|
|
349
|
+
"""
|
|
350
|
+
Get the AWS region name associated with the given S3 bucket, or return None.
|
|
351
|
+
|
|
352
|
+
Caches results, so may not return the location for a bucket that has been
|
|
353
|
+
created but was previously observed to be nonexistent.
|
|
354
|
+
|
|
355
|
+
:param only_strategies: For testing, use only strategies with 1-based numbers in this set.
|
|
356
|
+
"""
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
|
|
360
|
+
except Exception as e:
|
|
361
|
+
if isinstance(e, NoBucketLocationError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
|
|
362
|
+
# We can't know
|
|
363
|
+
return None
|
|
364
|
+
else:
|
|
365
|
+
raise
|
|
331
366
|
|
|
332
367
|
def region_to_bucket_location(region: str) -> str:
|
|
333
|
-
return
|
|
368
|
+
return "" if region == "us-east-1" else region
|
|
369
|
+
|
|
334
370
|
|
|
335
371
|
def bucket_location_to_region(location: Optional[str]) -> str:
|
|
336
372
|
return "us-east-1" if location == "" or location is None else location
|
|
337
373
|
|
|
338
|
-
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
|
|
339
|
-
"""
|
|
340
|
-
Extracts a key (object) from a given parsed s3:// URL.
|
|
341
374
|
|
|
342
|
-
|
|
375
|
+
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonymous: Optional[bool] = None) -> "S3Object":
|
|
376
|
+
"""
|
|
377
|
+
Extracts a key (object) from a given parsed s3:// URL.
|
|
343
378
|
|
|
344
|
-
|
|
345
|
-
exists and it will be created. If None, the key will be created if it doesn't exist.
|
|
346
|
-
"""
|
|
379
|
+
If existing is true and the object does not exist, raises FileNotFoundError.
|
|
347
380
|
|
|
348
|
-
|
|
349
|
-
|
|
381
|
+
:param bool existing: If True, key is expected to exist. If False, key is expected not to
|
|
382
|
+
exists and it will be created. If None, the key will be created if it doesn't exist.
|
|
350
383
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
protocol = 'https'
|
|
356
|
-
if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
|
|
357
|
-
protocol = 'http'
|
|
358
|
-
if host:
|
|
359
|
-
endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
|
|
384
|
+
:raises FileNotFoundError: when existing is True and the object does not exist.
|
|
385
|
+
:raises RuntimeError: when existing is False but the object exists.
|
|
386
|
+
:raises PermissionError: when we are not authorized to look at the object.
|
|
387
|
+
"""
|
|
360
388
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
389
|
+
key_name = url.path[1:]
|
|
390
|
+
bucket_name = url.netloc
|
|
391
|
+
|
|
392
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
393
|
+
endpoint_url: Optional[str] = None
|
|
394
|
+
host = os.environ.get("TOIL_S3_HOST", None)
|
|
395
|
+
port = os.environ.get("TOIL_S3_PORT", None)
|
|
396
|
+
protocol = "https"
|
|
397
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
|
|
398
|
+
protocol = "http"
|
|
399
|
+
if host:
|
|
400
|
+
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
401
|
+
|
|
402
|
+
# TODO: OrdinaryCallingFormat equivalent in boto3?
|
|
403
|
+
# if botoargs:
|
|
404
|
+
# botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
|
|
405
|
+
|
|
406
|
+
config = session.ANONYMOUS_CONFIG if anonymous else None
|
|
407
|
+
# Get the bucket's region to avoid a redirect per request.
|
|
408
|
+
# Cache the result
|
|
409
|
+
region = get_bucket_region_if_available(bucket_name, endpoint_url=endpoint_url, anonymous=anonymous)
|
|
410
|
+
if region is not None:
|
|
411
|
+
s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url, config=config)
|
|
412
|
+
else:
|
|
413
|
+
# We can't get the bucket location, perhaps because we don't have
|
|
414
|
+
# permission to do that.
|
|
415
|
+
logger.debug("Couldn't get bucket location")
|
|
416
|
+
logger.debug("Fall back to not specifying location")
|
|
417
|
+
s3 = session.resource("s3", endpoint_url=endpoint_url, config=config)
|
|
364
418
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
|
|
368
|
-
s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
|
|
369
|
-
except ClientError:
|
|
370
|
-
# Probably don't have permission.
|
|
371
|
-
# TODO: check if it is that
|
|
372
|
-
s3 = session.resource('s3', endpoint_url=endpoint_url)
|
|
419
|
+
obj = s3.Object(bucket_name, key_name)
|
|
420
|
+
objExists = True
|
|
373
421
|
|
|
374
|
-
|
|
375
|
-
|
|
422
|
+
try:
|
|
423
|
+
obj.load()
|
|
424
|
+
except ClientError as e:
|
|
425
|
+
if get_error_status(e) == 404:
|
|
426
|
+
objExists = False
|
|
427
|
+
elif get_error_status(e) == 403:
|
|
428
|
+
raise PermissionError(
|
|
429
|
+
f"Key '{key_name}' is not accessible in bucket '{bucket_name}'."
|
|
430
|
+
) from e
|
|
431
|
+
else:
|
|
432
|
+
raise
|
|
433
|
+
if existing is True and not objExists:
|
|
434
|
+
raise FileNotFoundError(
|
|
435
|
+
f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
|
|
436
|
+
)
|
|
437
|
+
elif existing is False and objExists:
|
|
438
|
+
raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
|
|
376
439
|
|
|
440
|
+
if not objExists:
|
|
377
441
|
try:
|
|
378
|
-
obj.
|
|
442
|
+
obj.put() # write an empty file
|
|
379
443
|
except ClientError as e:
|
|
380
|
-
if get_error_status(e) ==
|
|
381
|
-
|
|
444
|
+
if get_error_status(e) == 403:
|
|
445
|
+
raise PermissionError(
|
|
446
|
+
f"Key '{key_name}' is not writable in bucket '{bucket_name}'."
|
|
447
|
+
) from e
|
|
382
448
|
else:
|
|
383
449
|
raise
|
|
384
|
-
|
|
385
|
-
raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
|
|
386
|
-
elif existing is False and objExists:
|
|
387
|
-
raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
|
|
388
|
-
|
|
389
|
-
if not objExists:
|
|
390
|
-
obj.put() # write an empty file
|
|
391
|
-
return obj
|
|
450
|
+
return obj
|
|
392
451
|
|
|
393
452
|
|
|
394
453
|
@retry(errors=[AWSServerErrors])
|
|
395
|
-
def list_objects_for_url(url: ParseResult) ->
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
#
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
454
|
+
def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) -> list[str]:
|
|
455
|
+
"""
|
|
456
|
+
Extracts a key (object) from a given parsed s3:// URL. The URL will be
|
|
457
|
+
supplemented with a trailing slash if it is missing.
|
|
458
|
+
|
|
459
|
+
:raises PermissionError: when we are not authorized to do the list operation.
|
|
460
|
+
"""
|
|
461
|
+
|
|
462
|
+
key_name = url.path[1:]
|
|
463
|
+
bucket_name = url.netloc
|
|
464
|
+
|
|
465
|
+
if key_name != "" and not key_name.endswith("/"):
|
|
466
|
+
# Make sure to put the trailing slash on the key, or else we'll see
|
|
467
|
+
# a prefix of just it.
|
|
468
|
+
key_name = key_name + "/"
|
|
469
|
+
|
|
470
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
471
|
+
# TODO: Deduplicate with get_object_for_url, or push down into session module
|
|
472
|
+
endpoint_url: Optional[str] = None
|
|
473
|
+
host = os.environ.get("TOIL_S3_HOST", None)
|
|
474
|
+
port = os.environ.get("TOIL_S3_PORT", None)
|
|
475
|
+
protocol = "https"
|
|
476
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
|
|
477
|
+
protocol = "http"
|
|
478
|
+
if host:
|
|
479
|
+
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
480
|
+
|
|
481
|
+
config = session.ANONYMOUS_CONFIG if anonymous else None
|
|
482
|
+
client = session.client("s3", endpoint_url=endpoint_url, config=config)
|
|
483
|
+
|
|
484
|
+
listing = []
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
paginator = client.get_paginator("list_objects_v2")
|
|
488
|
+
result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
|
|
425
489
|
for page in result:
|
|
426
|
-
if
|
|
427
|
-
for prefix_item in page[
|
|
428
|
-
listing.append(prefix_item[
|
|
429
|
-
if
|
|
430
|
-
for content_item in page[
|
|
431
|
-
if content_item[
|
|
490
|
+
if "CommonPrefixes" in page:
|
|
491
|
+
for prefix_item in page["CommonPrefixes"]:
|
|
492
|
+
listing.append(prefix_item["Prefix"][len(key_name) :])
|
|
493
|
+
if "Contents" in page:
|
|
494
|
+
for content_item in page["Contents"]:
|
|
495
|
+
if content_item["Key"] == key_name:
|
|
432
496
|
# Ignore folder name itself
|
|
433
497
|
continue
|
|
434
|
-
listing.append(content_item[
|
|
498
|
+
listing.append(content_item["Key"][len(key_name) :])
|
|
499
|
+
except ClientError as e:
|
|
500
|
+
if get_error_status(e) == 403:
|
|
501
|
+
raise PermissionError(
|
|
502
|
+
f"Prefix '{key_name}' is not authorized to be listed in bucket '{bucket_name}'."
|
|
503
|
+
) from e
|
|
504
|
+
else:
|
|
505
|
+
raise
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
logger.debug("Found in %s items: %s", url, listing)
|
|
509
|
+
return listing
|
|
435
510
|
|
|
436
|
-
logger.debug('Found in %s items: %s', url, listing)
|
|
437
|
-
return listing
|
|
438
511
|
|
|
439
|
-
def flatten_tags(tags:
|
|
512
|
+
def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
|
|
440
513
|
"""
|
|
441
514
|
Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
|
|
442
515
|
"""
|
|
443
|
-
return [{
|
|
516
|
+
return [{"Key": k, "Value": v} for k, v in tags.items()]
|
|
444
517
|
|
|
445
518
|
|
|
446
|
-
def boto3_pager(
|
|
447
|
-
|
|
519
|
+
def boto3_pager(
|
|
520
|
+
requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
|
|
521
|
+
) -> Iterable[Any]:
|
|
448
522
|
"""
|
|
449
523
|
Yield all the results from calling the given Boto 3 method with the
|
|
450
524
|
given keyword arguments, paging through the results using the Marker or
|
|
@@ -465,7 +539,7 @@ def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: s
|
|
|
465
539
|
yield from page.get(result_attribute_name, [])
|
|
466
540
|
|
|
467
541
|
|
|
468
|
-
def get_item_from_attributes(attributes:
|
|
542
|
+
def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
|
|
469
543
|
"""
|
|
470
544
|
Given a list of attributes, find the attribute associated with the name and return its corresponding value.
|
|
471
545
|
|
|
@@ -475,8 +549,11 @@ def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> A
|
|
|
475
549
|
|
|
476
550
|
If the attribute with the name does not exist, the function will return None.
|
|
477
551
|
|
|
478
|
-
:param attributes: list of attributes
|
|
552
|
+
:param attributes: list of attributes
|
|
479
553
|
:param name: name of the attribute
|
|
480
554
|
:return: value of the attribute
|
|
481
555
|
"""
|
|
482
|
-
return next(
|
|
556
|
+
return next(
|
|
557
|
+
(attribute["Value"] for attribute in attributes if attribute["Name"] == name),
|
|
558
|
+
None,
|
|
559
|
+
)
|