toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/lib/aws/utils.py
CHANGED
|
@@ -15,45 +15,33 @@ import errno
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import socket
|
|
18
|
-
import
|
|
19
|
-
from typing import
|
|
20
|
-
Callable,
|
|
21
|
-
ContextManager,
|
|
22
|
-
Dict,
|
|
23
|
-
Iterable,
|
|
24
|
-
Iterator,
|
|
25
|
-
List,
|
|
26
|
-
Optional,
|
|
27
|
-
Set,
|
|
28
|
-
Union,
|
|
29
|
-
cast)
|
|
18
|
+
from collections.abc import Iterable, Iterator
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
|
|
30
20
|
from urllib.parse import ParseResult
|
|
31
21
|
|
|
32
|
-
from toil.lib.aws import session
|
|
22
|
+
from toil.lib.aws import AWSRegionName, AWSServerErrors, session
|
|
23
|
+
from toil.lib.conversions import strtobool
|
|
33
24
|
from toil.lib.misc import printq
|
|
34
|
-
from toil.lib.retry import (
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
from
|
|
25
|
+
from toil.lib.retry import (
|
|
26
|
+
DEFAULT_DELAYS,
|
|
27
|
+
DEFAULT_TIMEOUT,
|
|
28
|
+
get_error_code,
|
|
29
|
+
get_error_status,
|
|
30
|
+
old_retry,
|
|
31
|
+
retry,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from mypy_boto3_s3 import S3ServiceResource
|
|
36
|
+
from mypy_boto3_s3.service_resource import Bucket
|
|
37
|
+
from mypy_boto3_s3.service_resource import Object as S3Object
|
|
38
|
+
from mypy_boto3_sdb.type_defs import AttributeTypeDef
|
|
45
39
|
|
|
46
40
|
try:
|
|
47
|
-
from
|
|
48
|
-
from botocore.exceptions import ClientError
|
|
49
|
-
from mypy_boto3_iam import IAMClient, IAMServiceResource
|
|
50
|
-
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
51
|
-
from mypy_boto3_s3.literals import BucketLocationConstraintType
|
|
52
|
-
from mypy_boto3_s3.service_resource import Bucket, Object
|
|
53
|
-
from mypy_boto3_sdb import SimpleDBClient
|
|
41
|
+
from botocore.exceptions import ClientError, EndpointConnectionError
|
|
54
42
|
except ImportError:
|
|
55
|
-
BotoServerError = None # type: ignore
|
|
56
43
|
ClientError = None # type: ignore
|
|
44
|
+
EndpointConnectionError = None # type: ignore
|
|
57
45
|
# AWS/boto extra is not installed
|
|
58
46
|
|
|
59
47
|
logger = logging.getLogger(__name__)
|
|
@@ -61,73 +49,28 @@ logger = logging.getLogger(__name__)
|
|
|
61
49
|
# These are error codes we expect from AWS if we are making requests too fast.
|
|
62
50
|
# https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
|
|
63
51
|
THROTTLED_ERROR_CODES = [
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
52
|
+
"Throttling",
|
|
53
|
+
"ThrottlingException",
|
|
54
|
+
"ThrottledException",
|
|
55
|
+
"RequestThrottledException",
|
|
56
|
+
"TooManyRequestsException",
|
|
57
|
+
"ProvisionedThroughputExceededException",
|
|
58
|
+
"TransactionInProgressException",
|
|
59
|
+
"RequestLimitExceeded",
|
|
60
|
+
"BandwidthLimitExceeded",
|
|
61
|
+
"LimitExceededException",
|
|
62
|
+
"RequestThrottled",
|
|
63
|
+
"SlowDown",
|
|
64
|
+
"PriorRequestNotComplete",
|
|
65
|
+
"EC2ThrottledException",
|
|
78
66
|
]
|
|
79
67
|
|
|
80
|
-
@retry(errors=[BotoServerError])
|
|
81
|
-
def delete_iam_role(
|
|
82
|
-
role_name: str, region: Optional[str] = None, quiet: bool = True
|
|
83
|
-
) -> None:
|
|
84
|
-
from boto.iam.connection import IAMConnection
|
|
85
|
-
|
|
86
|
-
# TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
|
|
87
|
-
# of overloads of the client-getting methods to exist based on the literal
|
|
88
|
-
# string passed in, to return exactly the right kind of client or resource.
|
|
89
|
-
# So we end up having to wrap all the calls in casts, which kind of defeats
|
|
90
|
-
# the point of a nice fluent method you can call with the name of the thing
|
|
91
|
-
# you want; we should have been calling iam_client() and so on all along if
|
|
92
|
-
# we wanted MyPy to be able to understand us. So at some point we should
|
|
93
|
-
# consider revising our API here to be less annoying to explain to the type
|
|
94
|
-
# checker.
|
|
95
|
-
iam_client = cast(IAMClient, session.client('iam', region_name=region))
|
|
96
|
-
iam_resource = cast(IAMServiceResource, session.resource('iam', region_name=region))
|
|
97
|
-
boto_iam_connection = IAMConnection()
|
|
98
|
-
role = iam_resource.Role(role_name)
|
|
99
|
-
# normal policies
|
|
100
|
-
for attached_policy in role.attached_policies.all():
|
|
101
|
-
printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
|
|
102
|
-
role.detach_policy(PolicyArn=attached_policy.arn)
|
|
103
|
-
# inline policies
|
|
104
|
-
for inline_policy in role.policies.all():
|
|
105
|
-
printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
|
|
106
|
-
# couldn't find an easy way to remove inline policies with boto3; use boto
|
|
107
|
-
boto_iam_connection.delete_role_policy(role.name, inline_policy.policy_name)
|
|
108
|
-
iam_client.delete_role(RoleName=role_name)
|
|
109
|
-
printq(f'Role {role_name} successfully deleted.', quiet)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
@retry(errors=[BotoServerError])
|
|
113
|
-
def delete_iam_instance_profile(
|
|
114
|
-
instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
|
|
115
|
-
) -> None:
|
|
116
|
-
iam_resource = cast(IAMServiceResource, session.resource("iam", region_name=region))
|
|
117
|
-
instance_profile = iam_resource.InstanceProfile(instance_profile_name)
|
|
118
|
-
if instance_profile.roles is not None:
|
|
119
|
-
for role in instance_profile.roles:
|
|
120
|
-
printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
|
|
121
|
-
instance_profile.remove_role(RoleName=role.name)
|
|
122
|
-
instance_profile.delete()
|
|
123
|
-
printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
|
|
124
68
|
|
|
125
|
-
|
|
126
|
-
@retry(errors=[BotoServerError])
|
|
69
|
+
@retry(errors=[AWSServerErrors])
|
|
127
70
|
def delete_sdb_domain(
|
|
128
71
|
sdb_domain_name: str, region: Optional[str] = None, quiet: bool = True
|
|
129
72
|
) -> None:
|
|
130
|
-
sdb_client =
|
|
73
|
+
sdb_client = session.client("sdb", region_name=region)
|
|
131
74
|
sdb_client.delete_domain(DomainName=sdb_domain_name)
|
|
132
75
|
printq(f'SBD Domain: "{sdb_domain_name}" successfully deleted.', quiet)
|
|
133
76
|
|
|
@@ -141,39 +84,60 @@ def connection_reset(e: Exception) -> bool:
|
|
|
141
84
|
# errno is listed as 104. To be safe, we check for both:
|
|
142
85
|
return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
|
|
143
86
|
|
|
87
|
+
|
|
88
|
+
def connection_error(e: Exception) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Return True if an error represents a failure to make a network connection.
|
|
91
|
+
"""
|
|
92
|
+
return connection_reset(e) or isinstance(e, EndpointConnectionError)
|
|
93
|
+
|
|
94
|
+
|
|
144
95
|
# TODO: Replace with: @retry and ErrorCondition
|
|
145
96
|
def retryable_s3_errors(e: Exception) -> bool:
|
|
146
97
|
"""
|
|
147
98
|
Return true if this is an error from S3 that looks like we ought to retry our request.
|
|
148
99
|
"""
|
|
149
|
-
return (
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
100
|
+
return (
|
|
101
|
+
connection_error(e)
|
|
102
|
+
or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
|
|
103
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
104
|
+
# boto3 errors
|
|
105
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
106
|
+
or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
|
|
107
|
+
or (
|
|
108
|
+
isinstance(e, ClientError)
|
|
109
|
+
and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
|
|
110
|
+
and "try again" in str(e)
|
|
111
|
+
)
|
|
112
|
+
or (
|
|
113
|
+
isinstance(e, ClientError)
|
|
114
|
+
and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
115
|
+
in (404, 429, 500, 502, 503, 504)
|
|
116
|
+
)
|
|
117
|
+
)
|
|
157
118
|
|
|
158
119
|
|
|
159
|
-
def retry_s3(
|
|
120
|
+
def retry_s3(
|
|
121
|
+
delays: Iterable[float] = DEFAULT_DELAYS,
|
|
122
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
123
|
+
predicate: Callable[[Exception], bool] = retryable_s3_errors,
|
|
124
|
+
) -> Iterator[ContextManager[None]]:
|
|
160
125
|
"""
|
|
161
126
|
Retry iterator of context managers specifically for S3 operations.
|
|
162
127
|
"""
|
|
163
128
|
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
|
164
129
|
|
|
165
|
-
|
|
130
|
+
|
|
131
|
+
@retry(errors=[AWSServerErrors])
|
|
166
132
|
def delete_s3_bucket(
|
|
167
|
-
s3_resource: "S3ServiceResource",
|
|
168
|
-
bucket: str,
|
|
169
|
-
quiet: bool = True
|
|
133
|
+
s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
|
|
170
134
|
) -> None:
|
|
171
135
|
"""
|
|
172
136
|
Delete the given S3 bucket.
|
|
173
137
|
"""
|
|
174
|
-
printq(f
|
|
138
|
+
printq(f"Deleting s3 bucket: {bucket}", quiet)
|
|
175
139
|
|
|
176
|
-
paginator = s3_resource.meta.client.get_paginator(
|
|
140
|
+
paginator = s3_resource.meta.client.get_paginator("list_object_versions")
|
|
177
141
|
try:
|
|
178
142
|
for response in paginator.paginate(Bucket=bucket):
|
|
179
143
|
# Versions and delete markers can both go in here to be deleted.
|
|
@@ -181,21 +145,26 @@ def delete_s3_bucket(
|
|
|
181
145
|
# defined for them in the stubs to express that. See
|
|
182
146
|
# <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
|
|
183
147
|
# have to do gymnastics to get them into the same list.
|
|
184
|
-
to_delete:
|
|
185
|
-
|
|
148
|
+
to_delete: list[dict[str, Any]] = cast(
|
|
149
|
+
list[dict[str, Any]], response.get("Versions", [])
|
|
150
|
+
) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
|
|
186
151
|
for entry in to_delete:
|
|
187
|
-
printq(
|
|
188
|
-
|
|
152
|
+
printq(
|
|
153
|
+
f" Deleting {entry['Key']} version {entry['VersionId']}", quiet
|
|
154
|
+
)
|
|
155
|
+
s3_resource.meta.client.delete_object(
|
|
156
|
+
Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
|
|
157
|
+
)
|
|
189
158
|
s3_resource.Bucket(bucket).delete()
|
|
190
|
-
printq(f
|
|
159
|
+
printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
|
|
191
160
|
except s3_resource.meta.client.exceptions.NoSuchBucket:
|
|
192
|
-
printq(f
|
|
161
|
+
printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
|
|
193
162
|
|
|
194
163
|
|
|
195
164
|
def create_s3_bucket(
|
|
196
165
|
s3_resource: "S3ServiceResource",
|
|
197
166
|
bucket_name: str,
|
|
198
|
-
region:
|
|
167
|
+
region: AWSRegionName,
|
|
199
168
|
) -> "Bucket":
|
|
200
169
|
"""
|
|
201
170
|
Create an AWS S3 bucket, using the given Boto3 S3 session, with the
|
|
@@ -205,7 +174,7 @@ def create_s3_bucket(
|
|
|
205
174
|
|
|
206
175
|
*ALL* S3 bucket creation should use this function.
|
|
207
176
|
"""
|
|
208
|
-
logger.
|
|
177
|
+
logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
|
|
209
178
|
if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
|
|
210
179
|
bucket = s3_resource.create_bucket(Bucket=bucket_name)
|
|
211
180
|
else:
|
|
@@ -215,6 +184,7 @@ def create_s3_bucket(
|
|
|
215
184
|
)
|
|
216
185
|
return bucket
|
|
217
186
|
|
|
187
|
+
|
|
218
188
|
@retry(errors=[ClientError])
|
|
219
189
|
def enable_public_objects(bucket_name: str) -> None:
|
|
220
190
|
"""
|
|
@@ -238,7 +208,7 @@ def enable_public_objects(bucket_name: str) -> None:
|
|
|
238
208
|
would be a very awkward way to do it. So we restore the old behavior.
|
|
239
209
|
"""
|
|
240
210
|
|
|
241
|
-
s3_client =
|
|
211
|
+
s3_client = session.client("s3")
|
|
242
212
|
|
|
243
213
|
# Even though the new default is for public access to be prohibited, this
|
|
244
214
|
# is implemented by adding new things attached to the bucket. If we remove
|
|
@@ -252,22 +222,36 @@ def enable_public_objects(bucket_name: str) -> None:
|
|
|
252
222
|
s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
|
|
253
223
|
|
|
254
224
|
|
|
255
|
-
|
|
225
|
+
class NoBucketLocationError(Exception):
|
|
226
|
+
"""
|
|
227
|
+
Error to represent that we could not get a location for a bucket.
|
|
256
228
|
"""
|
|
257
|
-
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def get_bucket_region(
|
|
232
|
+
bucket_name: str,
|
|
233
|
+
endpoint_url: Optional[str] = None,
|
|
234
|
+
only_strategies: Optional[set[int]] = None,
|
|
235
|
+
) -> str:
|
|
236
|
+
"""
|
|
237
|
+
Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
|
|
238
|
+
|
|
239
|
+
Does not log at info level or above when this does not work; failures are expected in some contexts.
|
|
258
240
|
|
|
259
241
|
Takes an optional S3 API URL override.
|
|
260
242
|
|
|
261
243
|
:param only_strategies: For testing, use only strategies with 1-based numbers in this set.
|
|
262
244
|
"""
|
|
263
245
|
|
|
264
|
-
s3_client =
|
|
246
|
+
s3_client = session.client("s3", endpoint_url=endpoint_url)
|
|
265
247
|
|
|
266
248
|
def attempt_get_bucket_location() -> Optional[str]:
|
|
267
249
|
"""
|
|
268
250
|
Try and get the bucket location from the normal API call.
|
|
269
251
|
"""
|
|
270
|
-
return s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
252
|
+
return s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
253
|
+
"LocationConstraint", None
|
|
254
|
+
)
|
|
271
255
|
|
|
272
256
|
def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
|
|
273
257
|
"""
|
|
@@ -283,8 +267,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
283
267
|
# It could also be because AWS open data buckets (which we tend to
|
|
284
268
|
# encounter this problem for) tend to actually themselves be in
|
|
285
269
|
# us-east-1.
|
|
286
|
-
backup_s3_client =
|
|
287
|
-
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
270
|
+
backup_s3_client = session.client("s3", region_name="us-east-1")
|
|
271
|
+
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
|
|
272
|
+
"LocationConstraint", None
|
|
273
|
+
)
|
|
288
274
|
|
|
289
275
|
def attempt_head_bucket() -> Optional[str]:
|
|
290
276
|
"""
|
|
@@ -296,11 +282,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
296
282
|
# us where the bucket is. See
|
|
297
283
|
# <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
|
|
298
284
|
info = s3_client.head_bucket(Bucket=bucket_name)
|
|
299
|
-
return info[
|
|
285
|
+
return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
|
|
300
286
|
|
|
301
287
|
# Compose a list of strategies we want to try in order, which may work.
|
|
302
288
|
# None is an acceptable return type that actually means something.
|
|
303
|
-
strategies:
|
|
289
|
+
strategies: list[Callable[[], Optional[str]]] = []
|
|
304
290
|
strategies.append(attempt_get_bucket_location)
|
|
305
291
|
if not endpoint_url:
|
|
306
292
|
# We should only try to talk to us-east-1 if we don't have a custom
|
|
@@ -308,17 +294,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
308
294
|
strategies.append(attempt_get_bucket_location_from_us_east_1)
|
|
309
295
|
strategies.append(attempt_head_bucket)
|
|
310
296
|
|
|
297
|
+
error_logs: list[tuple[int, str]] = []
|
|
311
298
|
for attempt in retry_s3():
|
|
312
299
|
with attempt:
|
|
313
300
|
for i, strategy in enumerate(strategies):
|
|
314
|
-
if only_strategies is not None and i+1 not in only_strategies:
|
|
301
|
+
if only_strategies is not None and i + 1 not in only_strategies:
|
|
315
302
|
# We want to test running without this strategy.
|
|
316
303
|
continue
|
|
317
304
|
try:
|
|
318
|
-
|
|
305
|
+
location = bucket_location_to_region(strategy())
|
|
306
|
+
logger.debug("Got bucket location from strategy %d", i + 1)
|
|
307
|
+
return location
|
|
319
308
|
except ClientError as e:
|
|
320
|
-
if get_error_code(e) ==
|
|
321
|
-
logger.
|
|
309
|
+
if get_error_code(e) == "AccessDenied" and not endpoint_url:
|
|
310
|
+
logger.debug(
|
|
311
|
+
"Strategy %d to get bucket location did not work: %s",
|
|
312
|
+
i + 1,
|
|
313
|
+
e,
|
|
314
|
+
)
|
|
315
|
+
error_logs.append((i + 1, str(e)))
|
|
322
316
|
last_error: Exception = e
|
|
323
317
|
# We were blocked with this strategy. Move on to the
|
|
324
318
|
# next strategy which might work.
|
|
@@ -327,120 +321,182 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
|
|
|
327
321
|
raise
|
|
328
322
|
except KeyError as e:
|
|
329
323
|
# If we get a weird head response we will have a KeyError
|
|
330
|
-
logger.
|
|
324
|
+
logger.debug(
|
|
325
|
+
"Strategy %d to get bucket location did not work: %s", i + 1, e
|
|
326
|
+
)
|
|
327
|
+
error_logs.append((i + 1, str(e)))
|
|
331
328
|
last_error = e
|
|
332
|
-
|
|
333
|
-
|
|
329
|
+
|
|
330
|
+
error_messages = []
|
|
331
|
+
for rank, message in error_logs:
|
|
332
|
+
error_messages.append(
|
|
333
|
+
f"Strategy {rank} failed to get bucket location because: {message}"
|
|
334
|
+
)
|
|
335
|
+
# If we get here we ran out of attempts.
|
|
336
|
+
raise NoBucketLocationError(
|
|
337
|
+
"Could not get bucket location: " + "\n".join(error_messages)
|
|
338
|
+
) from last_error
|
|
339
|
+
|
|
334
340
|
|
|
335
341
|
def region_to_bucket_location(region: str) -> str:
|
|
336
|
-
return
|
|
342
|
+
return "" if region == "us-east-1" else region
|
|
343
|
+
|
|
337
344
|
|
|
338
345
|
def bucket_location_to_region(location: Optional[str]) -> str:
|
|
339
346
|
return "us-east-1" if location == "" or location is None else location
|
|
340
347
|
|
|
341
|
-
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
|
|
342
|
-
"""
|
|
343
|
-
Extracts a key (object) from a given parsed s3:// URL.
|
|
344
348
|
|
|
345
|
-
|
|
349
|
+
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
|
|
350
|
+
"""
|
|
351
|
+
Extracts a key (object) from a given parsed s3:// URL.
|
|
346
352
|
|
|
347
|
-
|
|
348
|
-
exists and it will be created. If None, the key will be created if it doesn't exist.
|
|
349
|
-
"""
|
|
353
|
+
If existing is true and the object does not exist, raises FileNotFoundError.
|
|
350
354
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
355
|
+
:param bool existing: If True, key is expected to exist. If False, key is expected not to
|
|
356
|
+
exists and it will be created. If None, the key will be created if it doesn't exist.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
key_name = url.path[1:]
|
|
360
|
+
bucket_name = url.netloc
|
|
361
|
+
|
|
362
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
363
|
+
endpoint_url: Optional[str] = None
|
|
364
|
+
host = os.environ.get("TOIL_S3_HOST", None)
|
|
365
|
+
port = os.environ.get("TOIL_S3_PORT", None)
|
|
366
|
+
protocol = "https"
|
|
367
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
|
|
368
|
+
protocol = "http"
|
|
369
|
+
if host:
|
|
370
|
+
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
371
|
+
|
|
372
|
+
# TODO: OrdinaryCallingFormat equivalent in boto3?
|
|
373
|
+
# if botoargs:
|
|
374
|
+
# botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
# Get the bucket's region to avoid a redirect per request
|
|
378
|
+
region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
|
|
379
|
+
s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url)
|
|
380
|
+
except NoBucketLocationError as e:
|
|
381
|
+
# Probably don't have permission.
|
|
382
|
+
# TODO: check if it is that
|
|
383
|
+
logger.debug("Couldn't get bucket location: %s", e)
|
|
384
|
+
logger.debug("Fall back to not specifying location")
|
|
385
|
+
s3 = session.resource("s3", endpoint_url=endpoint_url)
|
|
386
|
+
|
|
387
|
+
obj = s3.Object(bucket_name, key_name)
|
|
388
|
+
objExists = True
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
obj.load()
|
|
392
|
+
except ClientError as e:
|
|
393
|
+
if get_error_status(e) == 404:
|
|
394
|
+
objExists = False
|
|
395
|
+
else:
|
|
396
|
+
raise
|
|
397
|
+
if existing is True and not objExists:
|
|
398
|
+
raise FileNotFoundError(
|
|
399
|
+
f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
|
|
400
|
+
)
|
|
401
|
+
elif existing is False and objExists:
|
|
402
|
+
raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
|
|
403
|
+
|
|
404
|
+
if not objExists:
|
|
405
|
+
obj.put() # write an empty file
|
|
406
|
+
return obj
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
@retry(errors=[AWSServerErrors])
|
|
410
|
+
def list_objects_for_url(url: ParseResult) -> list[str]:
|
|
411
|
+
"""
|
|
412
|
+
Extracts a key (object) from a given parsed s3:// URL. The URL will be
|
|
413
|
+
supplemented with a trailing slash if it is missing.
|
|
414
|
+
"""
|
|
415
|
+
key_name = url.path[1:]
|
|
416
|
+
bucket_name = url.netloc
|
|
417
|
+
|
|
418
|
+
if key_name != "" and not key_name.endswith("/"):
|
|
419
|
+
# Make sure to put the trailing slash on the key, or else we'll see
|
|
420
|
+
# a prefix of just it.
|
|
421
|
+
key_name = key_name + "/"
|
|
422
|
+
|
|
423
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
424
|
+
# TODO: Deduplicate with get_object_for_url, or push down into session module
|
|
425
|
+
endpoint_url: Optional[str] = None
|
|
426
|
+
host = os.environ.get("TOIL_S3_HOST", None)
|
|
427
|
+
port = os.environ.get("TOIL_S3_PORT", None)
|
|
428
|
+
protocol = "https"
|
|
429
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
|
|
430
|
+
protocol = "http"
|
|
431
|
+
if host:
|
|
432
|
+
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
433
|
+
|
|
434
|
+
client = session.client("s3", endpoint_url=endpoint_url)
|
|
435
|
+
|
|
436
|
+
listing = []
|
|
437
|
+
|
|
438
|
+
paginator = client.get_paginator("list_objects_v2")
|
|
439
|
+
result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
|
|
440
|
+
for page in result:
|
|
441
|
+
if "CommonPrefixes" in page:
|
|
442
|
+
for prefix_item in page["CommonPrefixes"]:
|
|
443
|
+
listing.append(prefix_item["Prefix"][len(key_name) :])
|
|
444
|
+
if "Contents" in page:
|
|
445
|
+
for content_item in page["Contents"]:
|
|
446
|
+
if content_item["Key"] == key_name:
|
|
447
|
+
# Ignore folder name itself
|
|
448
|
+
continue
|
|
449
|
+
listing.append(content_item["Key"][len(key_name) :])
|
|
438
450
|
|
|
439
|
-
|
|
440
|
-
|
|
451
|
+
logger.debug("Found in %s items: %s", url, listing)
|
|
452
|
+
return listing
|
|
441
453
|
|
|
442
|
-
|
|
454
|
+
|
|
455
|
+
def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
|
|
443
456
|
"""
|
|
444
457
|
Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
|
|
445
458
|
"""
|
|
446
|
-
return [{
|
|
459
|
+
return [{"Key": k, "Value": v} for k, v in tags.items()]
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def boto3_pager(
|
|
463
|
+
requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
|
|
464
|
+
) -> Iterable[Any]:
|
|
465
|
+
"""
|
|
466
|
+
Yield all the results from calling the given Boto 3 method with the
|
|
467
|
+
given keyword arguments, paging through the results using the Marker or
|
|
468
|
+
NextToken, and fetching out and looping over the list in the response
|
|
469
|
+
with the given attribute name.
|
|
470
|
+
"""
|
|
471
|
+
|
|
472
|
+
# Recover the Boto3 client, and the name of the operation
|
|
473
|
+
client = requestor_callable.__self__ # type: ignore[attr-defined]
|
|
474
|
+
op_name = requestor_callable.__name__
|
|
475
|
+
|
|
476
|
+
# grab a Boto 3 built-in paginator. See
|
|
477
|
+
# <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html>
|
|
478
|
+
paginator = client.get_paginator(op_name)
|
|
479
|
+
|
|
480
|
+
for page in paginator.paginate(**kwargs):
|
|
481
|
+
# Invoke it and go through the pages, yielding from them
|
|
482
|
+
yield from page.get(result_attribute_name, [])
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
|
|
486
|
+
"""
|
|
487
|
+
Given a list of attributes, find the attribute associated with the name and return its corresponding value.
|
|
488
|
+
|
|
489
|
+
The `attribute_list` will be a list of TypedDict's (which boto3 SDB functions commonly return),
|
|
490
|
+
where each TypedDict has a "Name" and "Value" key value pair.
|
|
491
|
+
This function grabs the value out of the associated TypedDict.
|
|
492
|
+
|
|
493
|
+
If the attribute with the name does not exist, the function will return None.
|
|
494
|
+
|
|
495
|
+
:param attributes: list of attributes
|
|
496
|
+
:param name: name of the attribute
|
|
497
|
+
:return: value of the attribute
|
|
498
|
+
"""
|
|
499
|
+
return next(
|
|
500
|
+
(attribute["Value"] for attribute in attributes if attribute["Name"] == name),
|
|
501
|
+
None,
|
|
502
|
+
)
|