toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/lib/aws/utils.py
CHANGED
|
@@ -15,35 +15,30 @@ import errno
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import socket
|
|
18
|
-
from
|
|
19
|
-
|
|
20
|
-
ContextManager,
|
|
21
|
-
Dict,
|
|
22
|
-
Iterable,
|
|
23
|
-
Iterator,
|
|
24
|
-
List,
|
|
25
|
-
Optional,
|
|
26
|
-
Set,
|
|
27
|
-
cast)
|
|
18
|
+
from collections.abc import Iterable, Iterator
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
|
|
28
20
|
from urllib.parse import ParseResult
|
|
29
21
|
|
|
30
|
-
from
|
|
31
|
-
from toil.lib.
|
|
22
|
+
from toil.lib.aws import AWSRegionName, AWSServerErrors, session
|
|
23
|
+
from toil.lib.conversions import strtobool
|
|
32
24
|
from toil.lib.misc import printq
|
|
33
|
-
from toil.lib.retry import (
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
25
|
+
from toil.lib.retry import (
|
|
26
|
+
DEFAULT_DELAYS,
|
|
27
|
+
DEFAULT_TIMEOUT,
|
|
28
|
+
get_error_code,
|
|
29
|
+
get_error_status,
|
|
30
|
+
old_retry,
|
|
31
|
+
retry,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from mypy_boto3_s3 import S3ServiceResource
|
|
36
|
+
from mypy_boto3_s3.service_resource import Bucket
|
|
37
|
+
from mypy_boto3_s3.service_resource import Object as S3Object
|
|
38
|
+
from mypy_boto3_sdb.type_defs import AttributeTypeDef
|
|
39
39
|
|
|
40
40
|
try:
|
|
41
41
|
from botocore.exceptions import ClientError, EndpointConnectionError
|
|
42
|
-
from mypy_boto3_iam import IAMClient, IAMServiceResource
|
|
43
|
-
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
44
|
-
from mypy_boto3_s3.literals import BucketLocationConstraintType
|
|
45
|
-
from mypy_boto3_s3.service_resource import Bucket, Object
|
|
46
|
-
from mypy_boto3_sdb import SimpleDBClient
|
|
47
42
|
except ImportError:
|
|
48
43
|
ClientError = None # type: ignore
|
|
49
44
|
EndpointConnectionError = None # type: ignore
|
|
@@ -54,63 +49,22 @@ logger = logging.getLogger(__name__)
|
|
|
54
49
|
# These are error codes we expect from AWS if we are making requests too fast.
|
|
55
50
|
# https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
|
|
56
51
|
THROTTLED_ERROR_CODES = [
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
52
|
+
"Throttling",
|
|
53
|
+
"ThrottlingException",
|
|
54
|
+
"ThrottledException",
|
|
55
|
+
"RequestThrottledException",
|
|
56
|
+
"TooManyRequestsException",
|
|
57
|
+
"ProvisionedThroughputExceededException",
|
|
58
|
+
"TransactionInProgressException",
|
|
59
|
+
"RequestLimitExceeded",
|
|
60
|
+
"BandwidthLimitExceeded",
|
|
61
|
+
"LimitExceededException",
|
|
62
|
+
"RequestThrottled",
|
|
63
|
+
"SlowDown",
|
|
64
|
+
"PriorRequestNotComplete",
|
|
65
|
+
"EC2ThrottledException",
|
|
71
66
|
]
|
|
72
67
|
|
|
73
|
-
@retry(errors=[AWSServerErrors])
|
|
74
|
-
def delete_iam_role(
|
|
75
|
-
role_name: str, region: Optional[str] = None, quiet: bool = True
|
|
76
|
-
) -> None:
|
|
77
|
-
# TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
|
|
78
|
-
# of overloads of the client-getting methods to exist based on the literal
|
|
79
|
-
# string passed in, to return exactly the right kind of client or resource.
|
|
80
|
-
# So we end up having to wrap all the calls in casts, which kind of defeats
|
|
81
|
-
# the point of a nice fluent method you can call with the name of the thing
|
|
82
|
-
# you want; we should have been calling iam_client() and so on all along if
|
|
83
|
-
# we wanted MyPy to be able to understand us. So at some point we should
|
|
84
|
-
# consider revising our API here to be less annoying to explain to the type
|
|
85
|
-
# checker.
|
|
86
|
-
iam_client = session.client('iam', region_name=region)
|
|
87
|
-
iam_resource = session.resource('iam', region_name=region)
|
|
88
|
-
role = iam_resource.Role(role_name)
|
|
89
|
-
# normal policies
|
|
90
|
-
for attached_policy in role.attached_policies.all():
|
|
91
|
-
printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
|
|
92
|
-
role.detach_policy(PolicyArn=attached_policy.arn)
|
|
93
|
-
# inline policies
|
|
94
|
-
for inline_policy in role.policies.all():
|
|
95
|
-
printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
|
|
96
|
-
iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
|
|
97
|
-
iam_client.delete_role(RoleName=role_name)
|
|
98
|
-
printq(f'Role {role_name} successfully deleted.', quiet)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
@retry(errors=[AWSServerErrors])
|
|
102
|
-
def delete_iam_instance_profile(
|
|
103
|
-
instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
|
|
104
|
-
) -> None:
|
|
105
|
-
iam_resource = session.resource("iam", region_name=region)
|
|
106
|
-
instance_profile = iam_resource.InstanceProfile(instance_profile_name)
|
|
107
|
-
if instance_profile.roles is not None:
|
|
108
|
-
for role in instance_profile.roles:
|
|
109
|
-
printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
|
|
110
|
-
instance_profile.remove_role(RoleName=role.name)
|
|
111
|
-
instance_profile.delete()
|
|
112
|
-
printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
|
|
113
|
-
|
|
114
68
|
|
|
115
69
|
@retry(errors=[AWSServerErrors])
|
|
116
70
|
def delete_sdb_domain(
|
|
@@ -130,12 +84,12 @@ def connection_reset(e: Exception) -> bool:
|
|
|
130
84
|
# errno is listed as 104. To be safe, we check for both:
|
|
131
85
|
return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
|
|
132
86
|
|
|
87
|
+
|
|
133
88
|
def connection_error(e: Exception) -> bool:
|
|
134
89
|
"""
|
|
135
90
|
Return True if an error represents a failure to make a network connection.
|
|
136
91
|
"""
|
|
137
|
-
return
|
|
138
|
-
or isinstance(e, EndpointConnectionError))
|
|
92
|
+
return connection_reset(e) or isinstance(e, EndpointConnectionError)
|
|
139
93
|
|
|
140
94
|
|
|
141
95
|
# TODO: Replace with: @retry and ErrorCondition
|
|
@@ -143,34 +97,47 @@ def retryable_s3_errors(e: Exception) -> bool:
|
|
|
143
97
|
"""
|
|
144
98
|
Return true if this is an error from S3 that looks like we ought to retry our request.
|
|
145
99
|
"""
|
|
146
|
-
return (
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
100
|
+
return (
|
|
101
|
+
connection_error(e)
|
|
102
|
+
or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
|
|
103
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
104
|
+
# boto3 errors
|
|
105
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
106
|
+
or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
|
|
107
|
+
or (
|
|
108
|
+
isinstance(e, ClientError)
|
|
109
|
+
and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
|
|
110
|
+
and "try again" in str(e)
|
|
111
|
+
)
|
|
112
|
+
or (
|
|
113
|
+
isinstance(e, ClientError)
|
|
114
|
+
and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
115
|
+
in (404, 429, 500, 502, 503, 504)
|
|
116
|
+
)
|
|
117
|
+
)
|
|
154
118
|
|
|
155
119
|
|
|
156
|
-
def retry_s3(
|
|
120
|
+
def retry_s3(
|
|
121
|
+
delays: Iterable[float] = DEFAULT_DELAYS,
|
|
122
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
123
|
+
predicate: Callable[[Exception], bool] = retryable_s3_errors,
|
|
124
|
+
) -> Iterator[ContextManager[None]]:
|
|
157
125
|
"""
|
|
158
126
|
Retry iterator of context managers specifically for S3 operations.
|
|
159
127
|
"""
|
|
160
128
|
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
|
161
129
|
|
|
130
|
+
|
|
162
131
|
@retry(errors=[AWSServerErrors])
|
|
163
132
|
def delete_s3_bucket(
|
|
164
|
-
s3_resource: "S3ServiceResource",
|
|
165
|
-
bucket: str,
|
|
166
|
-
quiet: bool = True
|
|
133
|
+
s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
|
|
167
134
|
) -> None:
|
|
168
135
|
"""
|
|
169
136
|
Delete the given S3 bucket.
|
|
170
137
|
"""
|
|
171
|
-
printq(f
|
|
138
|
+
printq(f"Deleting s3 bucket: {bucket}", quiet)
|
|
172
139
|
|
|
173
|
-
paginator = s3_resource.meta.client.get_paginator(
|
|
140
|
+
paginator = s3_resource.meta.client.get_paginator("list_object_versions")
|
|
174
141
|
try:
|
|
175
142
|
for response in paginator.paginate(Bucket=bucket):
|
|
176
143
|
# Versions and delete markers can both go in here to be deleted.
|
|
@@ -178,15 +145,20 @@ def delete_s3_bucket(
|
|
|
178
145
|
# defined for them in the stubs to express that. See
|
|
179
146
|
# <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
|
|
180
147
|
# have to do gymnastics to get them into the same list.
|
|
181
|
-
to_delete:
|
|
182
|
-
|
|
148
|
+
to_delete: list[dict[str, Any]] = cast(
|
|
149
|
+
list[dict[str, Any]], response.get("Versions", [])
|
|
150
|
+
) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
|
|
183
151
|
for entry in to_delete:
|
|
184
|
-
printq(
|
|
185
|
-
|
|
152
|
+
printq(
|
|
153
|
+
f" Deleting {entry['Key']} version {entry['VersionId']}", quiet
|
|
154
|
+
)
|
|
155
|
+
s3_resource.meta.client.delete_object(
|
|
156
|
+
Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
|
|
157
|
+
)
|
|
186
158
|
s3_resource.Bucket(bucket).delete()
|
|
187
|
-
printq(f
|
|
159
|
+
printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
|
|
188
160
|
except s3_resource.meta.client.exceptions.NoSuchBucket:
|
|
189
|
-
printq(f
|
|
161
|
+
printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
|
|
190
162
|
|
|
191
163
|
|
|
192
164
|
def create_s3_bucket(
|
|
@@ -202,7 +174,7 @@ def create_s3_bucket(
|
|
|
202
174
|
|
|
203
175
|
*ALL* S3 bucket creation should use this function.
|
|
204
176
|
"""
|
|
205
|
-
logger.
|
|
177
|
+
logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
|
|
206
178
|
if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
|
|
207
179
|
bucket = s3_resource.create_bucket(Bucket=bucket_name)
|
|
208
180
|
else:
|
|
@@ -212,6 +184,7 @@ def create_s3_bucket(
|
|
|
212
184
|
)
|
|
213
185
|
return bucket
|
|
214
186
|
|
|
187
|
+
|
|
215
188
|
@retry(errors=[ClientError])
|
|
216
189
|
def enable_public_objects(bucket_name: str) -> None:
|
|
217
190
|
"""
|
|
@@ -235,7 +208,7 @@ def enable_public_objects(bucket_name: str) -> None:
|
|
|
235
208
|
would be a very awkward way to do it. So we restore the old behavior.
|
|
236
209
|
"""
|
|
237
210
|
|
|
238
|
-
s3_client = session.client(
|
|
211
|
+
s3_client = session.client("s3")
|
|
239
212
|
|
|
240
213
|
# Even though the new default is for public access to be prohibited, this
|
|
241
214
|
# is implemented by adding new things attached to the bucket. If we remove
|
|
@@ -249,22 +222,36 @@ def enable_public_objects(bucket_name: str) -> None:
|
|
|
249
222
|
s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
|
|
250
223
|
|
|
251
224
|
|
|
252
|
-
|
|
225
|
+
class NoBucketLocationError(Exception):
|
|
226
|
+
"""
|
|
227
|
+
Error to represent that we could not get a location for a bucket.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def get_bucket_region(
|
|
232
|
+
bucket_name: str,
|
|
233
|
+
endpoint_url: Optional[str] = None,
|
|
234
|
+
only_strategies: Optional[set[int]] = None,
|
|
235
|
+
) -> str:
|
|
253
236
|
"""
|
|
254
|
-
Get the AWS region name associated with the given S3 bucket.
|
|
237
|
+
Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
|
|
238
|
+
|
|
239
|
+
Does not log at info level or above when this does not work; failures are expected in some contexts.
|
|
255
240
|
|
|
256
241
|
Takes an optional S3 API URL override.
|
|
257
242
|
|
|
258
243
|
:param only_strategies: For testing, use only strategies with 1-based numbers in this set.
|
|
259
244
|
"""
|
|
260
245
|
|
|
261
|
-
s3_client = session.client(
|
|
246
|
+
s3_client = session.client("s3", endpoint_url=endpoint_url)
|
|
262
247
|
|
|
263
248
|
def attempt_get_bucket_location() -> Optional[str]:
|
|
264
249
|
"""
|
|
265
250
|
Try and get the bucket location from the normal API call.
|
|
266
251
|
"""
|
|
267
|
-
return s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
252
|
+
return s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
253
|
+
"LocationConstraint", None
|
|
254
|
+
)
|
|
268
255
|
|
|
269
256
|
def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
|
|
270
257
|
"""
|
|
@@ -280,8 +267,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
280
267
|
# It could also be because AWS open data buckets (which we tend to
|
|
281
268
|
# encounter this problem for) tend to actually themselves be in
|
|
282
269
|
# us-east-1.
|
|
283
|
-
backup_s3_client = session.client(
|
|
284
|
-
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
270
|
+
backup_s3_client = session.client("s3", region_name="us-east-1")
|
|
271
|
+
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
272
|
+
"LocationConstraint", None
|
|
273
|
+
)
|
|
285
274
|
|
|
286
275
|
def attempt_head_bucket() -> Optional[str]:
|
|
287
276
|
"""
|
|
@@ -293,11 +282,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
293
282
|
# us where the bucket is. See
|
|
294
283
|
# <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
|
|
295
284
|
info = s3_client.head_bucket(Bucket=bucket_name)
|
|
296
|
-
return info[
|
|
285
|
+
return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
|
|
297
286
|
|
|
298
287
|
# Compose a list of strategies we want to try in order, which may work.
|
|
299
288
|
# None is an acceptable return type that actually means something.
|
|
300
|
-
strategies:
|
|
289
|
+
strategies: list[Callable[[], Optional[str]]] = []
|
|
301
290
|
strategies.append(attempt_get_bucket_location)
|
|
302
291
|
if not endpoint_url:
|
|
303
292
|
# We should only try to talk to us-east-1 if we don't have a custom
|
|
@@ -305,17 +294,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
305
294
|
strategies.append(attempt_get_bucket_location_from_us_east_1)
|
|
306
295
|
strategies.append(attempt_head_bucket)
|
|
307
296
|
|
|
297
|
+
error_logs: list[tuple[int, str]] = []
|
|
308
298
|
for attempt in retry_s3():
|
|
309
299
|
with attempt:
|
|
310
300
|
for i, strategy in enumerate(strategies):
|
|
311
|
-
if only_strategies is not None and i+1 not in only_strategies:
|
|
301
|
+
if only_strategies is not None and i + 1 not in only_strategies:
|
|
312
302
|
# We want to test running without this strategy.
|
|
313
303
|
continue
|
|
314
304
|
try:
|
|
315
|
-
|
|
305
|
+
location = bucket_location_to_region(strategy())
|
|
306
|
+
logger.debug("Got bucket location from strategy %d", i + 1)
|
|
307
|
+
return location
|
|
316
308
|
except ClientError as e:
|
|
317
|
-
if get_error_code(e) ==
|
|
318
|
-
logger.
|
|
309
|
+
if get_error_code(e) == "AccessDenied" and not endpoint_url:
|
|
310
|
+
logger.debug(
|
|
311
|
+
"Strategy %d to get bucket location did not work: %s",
|
|
312
|
+
i + 1,
|
|
313
|
+
e,
|
|
314
|
+
)
|
|
315
|
+
error_logs.append((i + 1, str(e)))
|
|
319
316
|
last_error: Exception = e
|
|
320
317
|
# We were blocked with this strategy. Move on to the
|
|
321
318
|
# next strategy which might work.
|
|
@@ -324,127 +321,147 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
324
321
|
raise
|
|
325
322
|
except KeyError as e:
|
|
326
323
|
# If we get a weird head response we will have a KeyError
|
|
327
|
-
logger.
|
|
324
|
+
logger.debug(
|
|
325
|
+
"Strategy %d to get bucket location did not work: %s", i + 1, e
|
|
326
|
+
)
|
|
327
|
+
error_logs.append((i + 1, str(e)))
|
|
328
328
|
last_error = e
|
|
329
|
-
|
|
330
|
-
|
|
329
|
+
|
|
330
|
+
error_messages = []
|
|
331
|
+
for rank, message in error_logs:
|
|
332
|
+
error_messages.append(
|
|
333
|
+
f"Strategy {rank} failed to get bucket location because: {message}"
|
|
334
|
+
)
|
|
335
|
+
# If we get here we ran out of attempts.
|
|
336
|
+
raise NoBucketLocationError(
|
|
337
|
+
"Could not get bucket location: " + "\n".join(error_messages)
|
|
338
|
+
) from last_error
|
|
339
|
+
|
|
331
340
|
|
|
332
341
|
def region_to_bucket_location(region: str) -> str:
|
|
333
|
-
return
|
|
342
|
+
return "" if region == "us-east-1" else region
|
|
343
|
+
|
|
334
344
|
|
|
335
345
|
def bucket_location_to_region(location: Optional[str]) -> str:
|
|
336
346
|
return "us-east-1" if location == "" or location is None else location
|
|
337
347
|
|
|
338
|
-
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
|
|
339
|
-
"""
|
|
340
|
-
Extracts a key (object) from a given parsed s3:// URL.
|
|
341
348
|
|
|
342
|
-
|
|
349
|
+
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
|
|
350
|
+
"""
|
|
351
|
+
Extracts a key (object) from a given parsed s3:// URL.
|
|
343
352
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
353
|
+
If existing is true and the object does not exist, raises FileNotFoundError.
|
|
354
|
+
|
|
355
|
+
:param bool existing: If True, key is expected to exist. If False, key is expected not to
|
|
356
|
+
exists and it will be created. If None, the key will be created if it doesn't exist.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
key_name = url.path[1:]
|
|
360
|
+
bucket_name = url.netloc
|
|
361
|
+
|
|
362
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
363
|
+
endpoint_url: Optional[str] = None
|
|
364
|
+
host = os.environ.get("TOIL_S3_HOST", None)
|
|
365
|
+
port = os.environ.get("TOIL_S3_PORT", None)
|
|
366
|
+
protocol = "https"
|
|
367
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
|
|
368
|
+
protocol = "http"
|
|
369
|
+
if host:
|
|
370
|
+
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
371
|
+
|
|
372
|
+
# TODO: OrdinaryCallingFormat equivalent in boto3?
|
|
373
|
+
# if botoargs:
|
|
374
|
+
# botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
# Get the bucket's region to avoid a redirect per request
|
|
378
|
+
region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
|
|
379
|
+
s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url)
|
|
380
|
+
except NoBucketLocationError as e:
|
|
381
|
+
# Probably don't have permission.
|
|
382
|
+
# TODO: check if it is that
|
|
383
|
+
logger.debug("Couldn't get bucket location: %s", e)
|
|
384
|
+
logger.debug("Fall back to not specifying location")
|
|
385
|
+
s3 = session.resource("s3", endpoint_url=endpoint_url)
|
|
386
|
+
|
|
387
|
+
obj = s3.Object(bucket_name, key_name)
|
|
388
|
+
objExists = True
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
obj.load()
|
|
392
|
+
except ClientError as e:
|
|
393
|
+
if get_error_status(e) == 404:
|
|
394
|
+
objExists = False
|
|
395
|
+
else:
|
|
396
|
+
raise
|
|
397
|
+
if existing is True and not objExists:
|
|
398
|
+
raise FileNotFoundError(
|
|
399
|
+
f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
|
|
400
|
+
)
|
|
401
|
+
elif existing is False and objExists:
|
|
402
|
+
raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
|
|
347
403
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
# Decide if we need to override Boto's built-in URL here.
|
|
352
|
-
endpoint_url: Optional[str] = None
|
|
353
|
-
host = os.environ.get('TOIL_S3_HOST', None)
|
|
354
|
-
port = os.environ.get('TOIL_S3_PORT', None)
|
|
355
|
-
protocol = 'https'
|
|
356
|
-
if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
|
|
357
|
-
protocol = 'http'
|
|
358
|
-
if host:
|
|
359
|
-
endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
|
|
360
|
-
|
|
361
|
-
# TODO: OrdinaryCallingFormat equivalent in boto3?
|
|
362
|
-
# if botoargs:
|
|
363
|
-
# botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
|
|
364
|
-
|
|
365
|
-
try:
|
|
366
|
-
# Get the bucket's region to avoid a redirect per request
|
|
367
|
-
region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
|
|
368
|
-
s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
|
|
369
|
-
except ClientError:
|
|
370
|
-
# Probably don't have permission.
|
|
371
|
-
# TODO: check if it is that
|
|
372
|
-
s3 = session.resource('s3', endpoint_url=endpoint_url)
|
|
373
|
-
|
|
374
|
-
obj = s3.Object(bucket_name, key_name)
|
|
375
|
-
objExists = True
|
|
376
|
-
|
|
377
|
-
try:
|
|
378
|
-
obj.load()
|
|
379
|
-
except ClientError as e:
|
|
380
|
-
if get_error_status(e) == 404:
|
|
381
|
-
objExists = False
|
|
382
|
-
else:
|
|
383
|
-
raise
|
|
384
|
-
if existing is True and not objExists:
|
|
385
|
-
raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
|
|
386
|
-
elif existing is False and objExists:
|
|
387
|
-
raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
|
|
388
|
-
|
|
389
|
-
if not objExists:
|
|
390
|
-
obj.put() # write an empty file
|
|
391
|
-
return obj
|
|
404
|
+
if not objExists:
|
|
405
|
+
obj.put() # write an empty file
|
|
406
|
+
return obj
|
|
392
407
|
|
|
393
408
|
|
|
394
409
|
@retry(errors=[AWSServerErrors])
|
|
395
|
-
def list_objects_for_url(url: ParseResult) ->
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
410
|
+
def list_objects_for_url(url: ParseResult) -> list[str]:
|
|
411
|
+
"""
|
|
412
|
+
Extracts a key (object) from a given parsed s3:// URL. The URL will be
|
|
413
|
+
supplemented with a trailing slash if it is missing.
|
|
414
|
+
"""
|
|
415
|
+
key_name = url.path[1:]
|
|
416
|
+
bucket_name = url.netloc
|
|
417
|
+
|
|
418
|
+
if key_name != "" and not key_name.endswith("/"):
|
|
419
|
+
# Make sure to put the trailing slash on the key, or else we'll see
|
|
420
|
+
# a prefix of just it.
|
|
421
|
+
key_name = key_name + "/"
|
|
422
|
+
|
|
423
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
424
|
+
# TODO: Deduplicate with get_object_for_url, or push down into session module
|
|
425
|
+
endpoint_url: Optional[str] = None
|
|
426
|
+
host = os.environ.get("TOIL_S3_HOST", None)
|
|
427
|
+
port = os.environ.get("TOIL_S3_PORT", None)
|
|
428
|
+
protocol = "https"
|
|
429
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
|
|
430
|
+
protocol = "http"
|
|
431
|
+
if host:
|
|
432
|
+
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
433
|
+
|
|
434
|
+
client = session.client("s3", endpoint_url=endpoint_url)
|
|
435
|
+
|
|
436
|
+
listing = []
|
|
437
|
+
|
|
438
|
+
paginator = client.get_paginator("list_objects_v2")
|
|
439
|
+
result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
|
|
440
|
+
for page in result:
|
|
441
|
+
if "CommonPrefixes" in page:
|
|
442
|
+
for prefix_item in page["CommonPrefixes"]:
|
|
443
|
+
listing.append(prefix_item["Prefix"][len(key_name) :])
|
|
444
|
+
if "Contents" in page:
|
|
445
|
+
for content_item in page["Contents"]:
|
|
446
|
+
if content_item["Key"] == key_name:
|
|
447
|
+
# Ignore folder name itself
|
|
448
|
+
continue
|
|
449
|
+
listing.append(content_item["Key"][len(key_name) :])
|
|
450
|
+
|
|
451
|
+
logger.debug("Found in %s items: %s", url, listing)
|
|
452
|
+
return listing
|
|
435
453
|
|
|
436
|
-
logger.debug('Found in %s items: %s', url, listing)
|
|
437
|
-
return listing
|
|
438
454
|
|
|
439
|
-
def flatten_tags(tags:
|
|
455
|
+
def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
|
|
440
456
|
"""
|
|
441
457
|
Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
|
|
442
458
|
"""
|
|
443
|
-
return [{
|
|
459
|
+
return [{"Key": k, "Value": v} for k, v in tags.items()]
|
|
444
460
|
|
|
445
461
|
|
|
446
|
-
def boto3_pager(
|
|
447
|
-
|
|
462
|
+
def boto3_pager(
|
|
463
|
+
requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
|
|
464
|
+
) -> Iterable[Any]:
|
|
448
465
|
"""
|
|
449
466
|
Yield all the results from calling the given Boto 3 method with the
|
|
450
467
|
given keyword arguments, paging through the results using the Marker or
|
|
@@ -465,7 +482,7 @@ def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: s
|
|
|
465
482
|
yield from page.get(result_attribute_name, [])
|
|
466
483
|
|
|
467
484
|
|
|
468
|
-
def get_item_from_attributes(attributes:
|
|
485
|
+
def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
|
|
469
486
|
"""
|
|
470
487
|
Given a list of attributes, find the attribute associated with the name and return its corresponding value.
|
|
471
488
|
|
|
@@ -475,8 +492,11 @@ def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> A
|
|
|
475
492
|
|
|
476
493
|
If the attribute with the name does not exist, the function will return None.
|
|
477
494
|
|
|
478
|
-
:param attributes: list of attributes
|
|
495
|
+
:param attributes: list of attributes
|
|
479
496
|
:param name: name of the attribute
|
|
480
497
|
:return: value of the attribute
|
|
481
498
|
"""
|
|
482
|
-
return next(
|
|
499
|
+
return next(
|
|
500
|
+
(attribute["Value"] for attribute in attributes if attribute["Name"] == name),
|
|
501
|
+
None,
|
|
502
|
+
)
|