toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/utils.py
CHANGED
|
@@ -17,25 +17,28 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import types
|
|
19
19
|
from ssl import SSLError
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import TYPE_CHECKING, Optional, cast
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
23
|
from botocore.client import Config
|
|
24
24
|
from botocore.exceptions import ClientError
|
|
25
|
-
from mypy_boto3_sdb.type_defs import ItemTypeDef, AttributeTypeDef
|
|
26
25
|
|
|
27
|
-
from toil.lib.aws import
|
|
26
|
+
from toil.lib.aws import AWSServerErrors, session
|
|
28
27
|
from toil.lib.aws.utils import connection_error, get_bucket_region
|
|
29
28
|
from toil.lib.compatibility import compat_bytes
|
|
30
|
-
from toil.lib.retry import (
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
29
|
+
from toil.lib.retry import (
|
|
30
|
+
DEFAULT_DELAYS,
|
|
31
|
+
DEFAULT_TIMEOUT,
|
|
32
|
+
get_error_code,
|
|
33
|
+
get_error_message,
|
|
34
|
+
get_error_status,
|
|
35
|
+
old_retry,
|
|
36
|
+
retry,
|
|
37
|
+
)
|
|
38
|
+
|
|
37
39
|
if TYPE_CHECKING:
|
|
38
40
|
from mypy_boto3_s3 import S3ServiceResource
|
|
41
|
+
from mypy_boto3_sdb.type_defs import AttributeTypeDef, ItemTypeDef
|
|
39
42
|
|
|
40
43
|
logger = logging.getLogger(__name__)
|
|
41
44
|
|
|
@@ -46,10 +49,10 @@ logger = logging.getLogger(__name__)
|
|
|
46
49
|
# also need to set a special flag to make sure we don't use the generic
|
|
47
50
|
# s3.amazonaws.com for us-east-1, or else we might not actually end up talking
|
|
48
51
|
# to us-east-1 when a bucket is there.
|
|
49
|
-
DIAL_SPECIFIC_REGION_CONFIG = Config(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
52
|
+
DIAL_SPECIFIC_REGION_CONFIG = Config(
|
|
53
|
+
s3={"addressing_style": "path", "us_east_1_regional_endpoint": "regional"}
|
|
54
|
+
)
|
|
55
|
+
|
|
53
56
|
|
|
54
57
|
class SDBHelper:
|
|
55
58
|
"""
|
|
@@ -89,6 +92,7 @@ class SDBHelper:
|
|
|
89
92
|
True
|
|
90
93
|
|
|
91
94
|
"""
|
|
95
|
+
|
|
92
96
|
# The SDB documentation is not clear as to whether the attribute value size limit of 1024
|
|
93
97
|
# applies to the base64-encoded value or the raw value. It suggests that responses are
|
|
94
98
|
# automatically encoded from which I conclude that the limit should apply to the raw,
|
|
@@ -101,8 +105,8 @@ class SDBHelper:
|
|
|
101
105
|
maxValueSize = 1024
|
|
102
106
|
maxRawValueSize = maxValueSize * 3 // 4
|
|
103
107
|
# Just make sure we don't have a problem with padding or integer truncation:
|
|
104
|
-
assert len(base64.b64encode(b
|
|
105
|
-
assert len(base64.b64encode(b
|
|
108
|
+
assert len(base64.b64encode(b" " * maxRawValueSize)) == 1024
|
|
109
|
+
assert len(base64.b64encode(b" " * (1 + maxRawValueSize))) > 1024
|
|
106
110
|
|
|
107
111
|
@classmethod
|
|
108
112
|
def _reservedAttributes(cls):
|
|
@@ -118,59 +122,72 @@ class SDBHelper:
|
|
|
118
122
|
|
|
119
123
|
@classmethod
|
|
120
124
|
def maxBinarySize(cls, extraReservedChunks=0):
|
|
121
|
-
return (
|
|
125
|
+
return (
|
|
126
|
+
cls._maxChunks() - extraReservedChunks
|
|
127
|
+
) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
|
|
122
128
|
|
|
123
129
|
@classmethod
|
|
124
130
|
def _maxEncodedSize(cls):
|
|
125
131
|
return cls._maxChunks() * cls.maxValueSize
|
|
126
132
|
|
|
127
133
|
@classmethod
|
|
128
|
-
def binaryToAttributes(cls, binary) ->
|
|
134
|
+
def binaryToAttributes(cls, binary) -> dict[str, str]:
|
|
129
135
|
"""
|
|
130
136
|
Turn a bytestring, or None, into SimpleDB attributes.
|
|
131
137
|
"""
|
|
132
|
-
if binary is None:
|
|
138
|
+
if binary is None:
|
|
139
|
+
return {"numChunks": "0"}
|
|
133
140
|
assert isinstance(binary, bytes)
|
|
134
141
|
assert len(binary) <= cls.maxBinarySize()
|
|
135
142
|
# The use of compression is just an optimization. We can't include it in the maxValueSize
|
|
136
143
|
# computation because the compression ratio depends on the input.
|
|
137
144
|
compressed = bz2.compress(binary)
|
|
138
145
|
if len(compressed) > len(binary):
|
|
139
|
-
compressed = b
|
|
146
|
+
compressed = b"U" + binary
|
|
140
147
|
else:
|
|
141
|
-
compressed = b
|
|
148
|
+
compressed = b"C" + compressed
|
|
142
149
|
encoded = base64.b64encode(compressed)
|
|
143
150
|
assert len(encoded) <= cls._maxEncodedSize()
|
|
144
151
|
n = cls.maxValueSize
|
|
145
|
-
chunks = (encoded[i:i + n] for i in range(0, len(encoded), n))
|
|
146
|
-
attributes = {
|
|
147
|
-
|
|
152
|
+
chunks = (encoded[i : i + n] for i in range(0, len(encoded), n))
|
|
153
|
+
attributes = {
|
|
154
|
+
cls._chunkName(i): chunk.decode("utf-8") for i, chunk in enumerate(chunks)
|
|
155
|
+
}
|
|
156
|
+
attributes.update({"numChunks": str(len(attributes))})
|
|
148
157
|
return attributes
|
|
149
158
|
|
|
150
159
|
@classmethod
|
|
151
|
-
def attributeDictToList(
|
|
160
|
+
def attributeDictToList(
|
|
161
|
+
cls, attributes: dict[str, str]
|
|
162
|
+
) -> list["AttributeTypeDef"]:
|
|
152
163
|
"""
|
|
153
164
|
Convert the attribute dict (ex: from binaryToAttributes) into a list of attribute typed dicts
|
|
154
165
|
to be compatible with boto3 argument syntax
|
|
155
166
|
:param attributes: Dict[str, str], attribute in object form
|
|
156
|
-
:return:
|
|
167
|
+
:return: list of attributes in typed dict form
|
|
157
168
|
"""
|
|
158
169
|
return [{"Name": name, "Value": value} for name, value in attributes.items()]
|
|
159
170
|
|
|
160
171
|
@classmethod
|
|
161
|
-
def attributeListToDict(
|
|
172
|
+
def attributeListToDict(
|
|
173
|
+
cls, attributes: list["AttributeTypeDef"]
|
|
174
|
+
) -> dict[str, str]:
|
|
162
175
|
"""
|
|
163
176
|
Convert the attribute boto3 representation of list of attribute typed dicts
|
|
164
177
|
back to a dictionary with name, value pairs
|
|
165
|
-
:param attribute:
|
|
178
|
+
:param attribute: attribute in typed dict form
|
|
166
179
|
:return: Dict[str, str], attribute in dict form
|
|
167
180
|
"""
|
|
168
181
|
return {attribute["Name"]: attribute["Value"] for attribute in attributes}
|
|
169
182
|
|
|
170
183
|
@classmethod
|
|
171
|
-
def get_attributes_from_item(
|
|
172
|
-
|
|
173
|
-
|
|
184
|
+
def get_attributes_from_item(
|
|
185
|
+
cls, item: "ItemTypeDef", keys: list[str]
|
|
186
|
+
) -> list[Optional[str]]:
|
|
187
|
+
return_values: list[Optional[str]] = [None for _ in keys]
|
|
188
|
+
mapped_indices: dict[str, int] = {
|
|
189
|
+
name: index for index, name in enumerate(keys)
|
|
190
|
+
}
|
|
174
191
|
for attribute in item["Attributes"]:
|
|
175
192
|
name = attribute["Name"]
|
|
176
193
|
value = attribute["Value"]
|
|
@@ -193,10 +210,12 @@ class SDBHelper:
|
|
|
193
210
|
Assuming that binaryToAttributes() is used with SDB's PutAttributes, the return value of
|
|
194
211
|
this method could be used to detect the presence/absence of an item in SDB.
|
|
195
212
|
"""
|
|
196
|
-
return
|
|
213
|
+
return "numChunks"
|
|
197
214
|
|
|
198
215
|
@classmethod
|
|
199
|
-
def attributesToBinary(
|
|
216
|
+
def attributesToBinary(
|
|
217
|
+
cls, attributes: list["AttributeTypeDef"]
|
|
218
|
+
) -> tuple[bytes, int]:
|
|
200
219
|
"""
|
|
201
220
|
:rtype: (str|None,int)
|
|
202
221
|
:return: the binary data and the number of chunks it was composed from
|
|
@@ -212,14 +231,14 @@ class SDBHelper:
|
|
|
212
231
|
numChunks = int(value)
|
|
213
232
|
chunks.sort()
|
|
214
233
|
if numChunks:
|
|
215
|
-
serializedJob = b
|
|
234
|
+
serializedJob = b"".join(v.encode() for k, v in chunks)
|
|
216
235
|
compressed = base64.b64decode(serializedJob)
|
|
217
|
-
if compressed[0] == b
|
|
236
|
+
if compressed[0] == b"C"[0]:
|
|
218
237
|
binary = bz2.decompress(compressed[1:])
|
|
219
|
-
elif compressed[0] == b
|
|
238
|
+
elif compressed[0] == b"U"[0]:
|
|
220
239
|
binary = compressed[1:]
|
|
221
240
|
else:
|
|
222
|
-
raise RuntimeError(f
|
|
241
|
+
raise RuntimeError(f"Unexpected prefix {compressed[0]}")
|
|
223
242
|
else:
|
|
224
243
|
binary = None
|
|
225
244
|
return binary, numChunks
|
|
@@ -231,12 +250,14 @@ def fileSizeAndTime(localFilePath):
|
|
|
231
250
|
|
|
232
251
|
|
|
233
252
|
@retry(errors=[AWSServerErrors])
|
|
234
|
-
def uploadFromPath(
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
253
|
+
def uploadFromPath(
|
|
254
|
+
localFilePath: str,
|
|
255
|
+
resource,
|
|
256
|
+
bucketName: str,
|
|
257
|
+
fileID: str,
|
|
258
|
+
headerArgs: Optional[dict] = None,
|
|
259
|
+
partSize: int = 50 << 20,
|
|
260
|
+
):
|
|
240
261
|
"""
|
|
241
262
|
Uploads a file to s3, using multipart uploading if applicable
|
|
242
263
|
|
|
@@ -255,9 +276,13 @@ def uploadFromPath(localFilePath: str,
|
|
|
255
276
|
client = resource.meta.client
|
|
256
277
|
file_size, file_time = fileSizeAndTime(localFilePath)
|
|
257
278
|
|
|
258
|
-
version = uploadFile(
|
|
259
|
-
|
|
260
|
-
|
|
279
|
+
version = uploadFile(
|
|
280
|
+
localFilePath, resource, bucketName, fileID, headerArgs, partSize
|
|
281
|
+
)
|
|
282
|
+
info = client.head_object(
|
|
283
|
+
Bucket=bucketName, Key=compat_bytes(fileID), VersionId=version, **headerArgs
|
|
284
|
+
)
|
|
285
|
+
size = info.get("ContentLength")
|
|
261
286
|
|
|
262
287
|
assert size == file_size
|
|
263
288
|
|
|
@@ -267,12 +292,14 @@ def uploadFromPath(localFilePath: str,
|
|
|
267
292
|
|
|
268
293
|
|
|
269
294
|
@retry(errors=[AWSServerErrors])
|
|
270
|
-
def uploadFile(
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
295
|
+
def uploadFile(
|
|
296
|
+
readable,
|
|
297
|
+
resource,
|
|
298
|
+
bucketName: str,
|
|
299
|
+
fileID: str,
|
|
300
|
+
headerArgs: Optional[dict] = None,
|
|
301
|
+
partSize: int = 50 << 20,
|
|
302
|
+
):
|
|
276
303
|
"""
|
|
277
304
|
Upload a readable object to s3, using multipart uploading if applicable.
|
|
278
305
|
:param readable: a readable stream or a file path to upload to s3
|
|
@@ -288,29 +315,32 @@ def uploadFile(readable,
|
|
|
288
315
|
|
|
289
316
|
client = resource.meta.client
|
|
290
317
|
config = TransferConfig(
|
|
291
|
-
multipart_threshold=partSize,
|
|
292
|
-
multipart_chunksize=partSize,
|
|
293
|
-
use_threads=True
|
|
318
|
+
multipart_threshold=partSize, multipart_chunksize=partSize, use_threads=True
|
|
294
319
|
)
|
|
295
320
|
if isinstance(readable, str):
|
|
296
|
-
client.upload_file(
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
321
|
+
client.upload_file(
|
|
322
|
+
Filename=readable,
|
|
323
|
+
Bucket=bucketName,
|
|
324
|
+
Key=compat_bytes(fileID),
|
|
325
|
+
ExtraArgs=headerArgs,
|
|
326
|
+
Config=config,
|
|
327
|
+
)
|
|
301
328
|
else:
|
|
302
|
-
client.upload_fileobj(
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
329
|
+
client.upload_fileobj(
|
|
330
|
+
Fileobj=readable,
|
|
331
|
+
Bucket=bucketName,
|
|
332
|
+
Key=compat_bytes(fileID),
|
|
333
|
+
ExtraArgs=headerArgs,
|
|
334
|
+
Config=config,
|
|
335
|
+
)
|
|
307
336
|
|
|
308
337
|
# Wait until the object exists before calling head_object
|
|
309
338
|
object_summary = resource.ObjectSummary(bucketName, compat_bytes(fileID))
|
|
310
339
|
object_summary.wait_until_exists(**headerArgs)
|
|
311
340
|
|
|
312
341
|
info = client.head_object(Bucket=bucketName, Key=compat_bytes(fileID), **headerArgs)
|
|
313
|
-
return info.get(
|
|
342
|
+
return info.get("VersionId", None)
|
|
343
|
+
|
|
314
344
|
|
|
315
345
|
class ServerSideCopyProhibitedError(RuntimeError):
|
|
316
346
|
"""
|
|
@@ -318,17 +348,20 @@ class ServerSideCopyProhibitedError(RuntimeError):
|
|
|
318
348
|
insists that you pay to download and upload the data yourself instead.
|
|
319
349
|
"""
|
|
320
350
|
|
|
351
|
+
|
|
321
352
|
@retry(errors=[AWSServerErrors])
|
|
322
|
-
def copyKeyMultipart(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
353
|
+
def copyKeyMultipart(
|
|
354
|
+
resource: "S3ServiceResource",
|
|
355
|
+
srcBucketName: str,
|
|
356
|
+
srcKeyName: str,
|
|
357
|
+
srcKeyVersion: str,
|
|
358
|
+
dstBucketName: str,
|
|
359
|
+
dstKeyName: str,
|
|
360
|
+
sseAlgorithm: Optional[str] = None,
|
|
361
|
+
sseKey: Optional[str] = None,
|
|
362
|
+
copySourceSseAlgorithm: Optional[str] = None,
|
|
363
|
+
copySourceSseKey: Optional[str] = None,
|
|
364
|
+
):
|
|
332
365
|
"""
|
|
333
366
|
Copies a key from a source key to a destination key in multiple parts. Note that if the
|
|
334
367
|
destination key exists it will be overwritten implicitly, and if it does not exist a new
|
|
@@ -365,9 +398,12 @@ def copyKeyMultipart(resource: "S3ServiceResource",
|
|
|
365
398
|
"""
|
|
366
399
|
dstBucket = resource.Bucket(compat_bytes(dstBucketName))
|
|
367
400
|
dstObject = dstBucket.Object(compat_bytes(dstKeyName))
|
|
368
|
-
copySource = {
|
|
401
|
+
copySource = {
|
|
402
|
+
"Bucket": compat_bytes(srcBucketName),
|
|
403
|
+
"Key": compat_bytes(srcKeyName),
|
|
404
|
+
}
|
|
369
405
|
if srcKeyVersion is not None:
|
|
370
|
-
copySource[
|
|
406
|
+
copySource["VersionId"] = compat_bytes(srcKeyVersion)
|
|
371
407
|
|
|
372
408
|
# Get a client to the source region, which may not be the same as the one
|
|
373
409
|
# this resource is connected to. We should probably talk to it for source
|
|
@@ -377,10 +413,8 @@ def copyKeyMultipart(resource: "S3ServiceResource",
|
|
|
377
413
|
source_client = cast(
|
|
378
414
|
"S3Client",
|
|
379
415
|
session.client(
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
config=DIAL_SPECIFIC_REGION_CONFIG
|
|
383
|
-
)
|
|
416
|
+
"s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
|
|
417
|
+
),
|
|
384
418
|
)
|
|
385
419
|
|
|
386
420
|
# The boto3 functions don't allow passing parameters as None to
|
|
@@ -389,19 +423,28 @@ def copyKeyMultipart(resource: "S3ServiceResource",
|
|
|
389
423
|
# required.
|
|
390
424
|
destEncryptionArgs = {}
|
|
391
425
|
if sseKey is not None:
|
|
392
|
-
destEncryptionArgs.update(
|
|
393
|
-
|
|
426
|
+
destEncryptionArgs.update(
|
|
427
|
+
{"SSECustomerAlgorithm": sseAlgorithm, "SSECustomerKey": sseKey}
|
|
428
|
+
)
|
|
394
429
|
copyEncryptionArgs = {}
|
|
395
430
|
if copySourceSseKey is not None:
|
|
396
|
-
copyEncryptionArgs.update(
|
|
397
|
-
|
|
431
|
+
copyEncryptionArgs.update(
|
|
432
|
+
{
|
|
433
|
+
"CopySourceSSECustomerAlgorithm": copySourceSseAlgorithm,
|
|
434
|
+
"CopySourceSSECustomerKey": copySourceSseKey,
|
|
435
|
+
}
|
|
436
|
+
)
|
|
398
437
|
copyEncryptionArgs.update(destEncryptionArgs)
|
|
399
438
|
|
|
400
439
|
try:
|
|
401
440
|
# Kick off a server-side copy operation
|
|
402
|
-
dstObject.copy(
|
|
441
|
+
dstObject.copy(
|
|
442
|
+
copySource, SourceClient=source_client, ExtraArgs=copyEncryptionArgs
|
|
443
|
+
)
|
|
403
444
|
except ClientError as e:
|
|
404
|
-
if get_error_code(e) ==
|
|
445
|
+
if get_error_code(e) == "AccessDenied" and "cross-region" in get_error_message(
|
|
446
|
+
e
|
|
447
|
+
):
|
|
405
448
|
# We have this problem: <https://aws.amazon.com/premiumsupport/knowledge-center/s3-troubleshoot-copy-between-buckets/#Cross-Region_request_issues_with_VPC_endpoints_for_Amazon_S3>
|
|
406
449
|
# The Internet and AWS docs say that we just can't do a
|
|
407
450
|
# cross-region CopyObject from inside a VPC with an endpoint. The
|
|
@@ -411,13 +454,16 @@ def copyKeyMultipart(resource: "S3ServiceResource",
|
|
|
411
454
|
# the source region's API servers, they reject it and tell us to
|
|
412
455
|
# talk to the destination region's API servers instead. Which we
|
|
413
456
|
# can't reach.
|
|
414
|
-
logger.error(
|
|
457
|
+
logger.error(
|
|
458
|
+
"Amazon is refusing to perform a server-side copy of %s: %s",
|
|
459
|
+
copySource,
|
|
460
|
+
e,
|
|
461
|
+
)
|
|
415
462
|
raise ServerSideCopyProhibitedError()
|
|
416
463
|
else:
|
|
417
464
|
# Some other ClientError happened
|
|
418
465
|
raise
|
|
419
466
|
|
|
420
|
-
|
|
421
467
|
# Wait until the object exists before calling head_object
|
|
422
468
|
object_summary = resource.ObjectSummary(dstObject.bucket_name, dstObject.key)
|
|
423
469
|
object_summary.wait_until_exists(**destEncryptionArgs)
|
|
@@ -427,14 +473,15 @@ def copyKeyMultipart(resource: "S3ServiceResource",
|
|
|
427
473
|
# after, leaving open the possibility that it may have been
|
|
428
474
|
# modified again in the few seconds since the copy finished. There
|
|
429
475
|
# isn't much we can do about it.
|
|
430
|
-
info = resource.meta.client.head_object(
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
return info.get(
|
|
476
|
+
info = resource.meta.client.head_object(
|
|
477
|
+
Bucket=dstObject.bucket_name, Key=dstObject.key, **destEncryptionArgs
|
|
478
|
+
)
|
|
479
|
+
return info.get("VersionId", None)
|
|
434
480
|
|
|
435
481
|
|
|
436
|
-
def _put_attributes_using_post(
|
|
437
|
-
|
|
482
|
+
def _put_attributes_using_post(
|
|
483
|
+
self, domain_or_name, item_name, attributes, replace=True, expected_value=None
|
|
484
|
+
):
|
|
438
485
|
"""
|
|
439
486
|
Monkey-patched version of SDBConnection.put_attributes that uses POST instead of GET
|
|
440
487
|
|
|
@@ -444,13 +491,12 @@ def _put_attributes_using_post(self, domain_or_name, item_name, attributes,
|
|
|
444
491
|
https://github.com/BD2KGenomics/toil/issues/502
|
|
445
492
|
"""
|
|
446
493
|
domain, domain_name = self.get_domain_and_name(domain_or_name)
|
|
447
|
-
params = {
|
|
448
|
-
'ItemName': item_name}
|
|
494
|
+
params = {"DomainName": domain_name, "ItemName": item_name}
|
|
449
495
|
self._build_name_value_list(params, attributes, replace)
|
|
450
496
|
if expected_value:
|
|
451
497
|
self._build_expected_value(params, expected_value)
|
|
452
498
|
# The addition of the verb keyword argument is the only difference to put_attributes (Hannes)
|
|
453
|
-
return self.get_status(
|
|
499
|
+
return self.get_status("PutAttributes", params, verb="POST")
|
|
454
500
|
|
|
455
501
|
|
|
456
502
|
def monkeyPatchSdbConnection(sdb):
|
|
@@ -459,6 +505,7 @@ def monkeyPatchSdbConnection(sdb):
|
|
|
459
505
|
"""
|
|
460
506
|
sdb.put_attributes = types.MethodType(_put_attributes_using_post, sdb)
|
|
461
507
|
|
|
508
|
+
|
|
462
509
|
def sdb_unavailable(e):
|
|
463
510
|
# Since we're checking against a collection here we absolutely need an
|
|
464
511
|
# integer status code. This is probably a BotoServerError, but other 500s
|
|
@@ -467,23 +514,28 @@ def sdb_unavailable(e):
|
|
|
467
514
|
|
|
468
515
|
|
|
469
516
|
def no_such_sdb_domain(e):
|
|
470
|
-
return (
|
|
471
|
-
|
|
472
|
-
|
|
517
|
+
return (
|
|
518
|
+
isinstance(e, ClientError)
|
|
519
|
+
and get_error_code(e)
|
|
520
|
+
and get_error_code(e).endswith("NoSuchDomain")
|
|
521
|
+
)
|
|
473
522
|
|
|
474
523
|
|
|
475
524
|
def retryable_ssl_error(e):
|
|
476
525
|
# https://github.com/BD2KGenomics/toil/issues/978
|
|
477
|
-
return isinstance(e, SSLError) and e.reason ==
|
|
526
|
+
return isinstance(e, SSLError) and e.reason == "DECRYPTION_FAILED_OR_BAD_RECORD_MAC"
|
|
478
527
|
|
|
479
528
|
|
|
480
529
|
def retryable_sdb_errors(e):
|
|
481
|
-
return (
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
530
|
+
return (
|
|
531
|
+
sdb_unavailable(e)
|
|
532
|
+
or no_such_sdb_domain(e)
|
|
533
|
+
or connection_error(e)
|
|
534
|
+
or retryable_ssl_error(e)
|
|
535
|
+
)
|
|
485
536
|
|
|
486
537
|
|
|
487
|
-
def retry_sdb(
|
|
538
|
+
def retry_sdb(
|
|
539
|
+
delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors
|
|
540
|
+
):
|
|
488
541
|
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
|
489
|
-
|