toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/utils.py
CHANGED
|
@@ -17,25 +17,28 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import types
|
|
19
19
|
from ssl import SSLError
|
|
20
|
-
from typing import Optional, cast
|
|
20
|
+
from typing import TYPE_CHECKING, Optional, cast
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
|
-
from boto.exception import SDBResponseError
|
|
24
23
|
from botocore.client import Config
|
|
25
24
|
from botocore.exceptions import ClientError
|
|
26
|
-
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
27
25
|
|
|
28
|
-
from toil.lib.aws import session
|
|
29
|
-
from toil.lib.aws.utils import
|
|
26
|
+
from toil.lib.aws import AWSServerErrors, session
|
|
27
|
+
from toil.lib.aws.utils import connection_error, get_bucket_region
|
|
30
28
|
from toil.lib.compatibility import compat_bytes
|
|
31
|
-
from toil.lib.retry import (
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
29
|
+
from toil.lib.retry import (
|
|
30
|
+
DEFAULT_DELAYS,
|
|
31
|
+
DEFAULT_TIMEOUT,
|
|
32
|
+
get_error_code,
|
|
33
|
+
get_error_message,
|
|
34
|
+
get_error_status,
|
|
35
|
+
old_retry,
|
|
36
|
+
retry,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from mypy_boto3_s3 import S3ServiceResource
|
|
41
|
+
from mypy_boto3_sdb.type_defs import AttributeTypeDef, ItemTypeDef
|
|
39
42
|
|
|
40
43
|
logger = logging.getLogger(__name__)
|
|
41
44
|
|
|
@@ -46,10 +49,10 @@ logger = logging.getLogger(__name__)
|
|
|
46
49
|
# also need to set a special flag to make sure we don't use the generic
|
|
47
50
|
# s3.amazonaws.com for us-east-1, or else we might not actually end up talking
|
|
48
51
|
# to us-east-1 when a bucket is there.
|
|
49
|
-
DIAL_SPECIFIC_REGION_CONFIG = Config(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
52
|
+
DIAL_SPECIFIC_REGION_CONFIG = Config(
|
|
53
|
+
s3={"addressing_style": "path", "us_east_1_regional_endpoint": "regional"}
|
|
54
|
+
)
|
|
55
|
+
|
|
53
56
|
|
|
54
57
|
class SDBHelper:
|
|
55
58
|
"""
|
|
@@ -89,6 +92,7 @@ class SDBHelper:
|
|
|
89
92
|
True
|
|
90
93
|
|
|
91
94
|
"""
|
|
95
|
+
|
|
92
96
|
# The SDB documentation is not clear as to whether the attribute value size limit of 1024
|
|
93
97
|
# applies to the base64-encoded value or the raw value. It suggests that responses are
|
|
94
98
|
# automatically encoded from which I conclude that the limit should apply to the raw,
|
|
@@ -101,8 +105,8 @@ class SDBHelper:
|
|
|
101
105
|
maxValueSize = 1024
|
|
102
106
|
maxRawValueSize = maxValueSize * 3 // 4
|
|
103
107
|
# Just make sure we don't have a problem with padding or integer truncation:
|
|
104
|
-
assert len(base64.b64encode(b
|
|
105
|
-
assert len(base64.b64encode(b
|
|
108
|
+
assert len(base64.b64encode(b" " * maxRawValueSize)) == 1024
|
|
109
|
+
assert len(base64.b64encode(b" " * (1 + maxRawValueSize))) > 1024
|
|
106
110
|
|
|
107
111
|
@classmethod
|
|
108
112
|
def _reservedAttributes(cls):
|
|
@@ -118,35 +122,79 @@ class SDBHelper:
|
|
|
118
122
|
|
|
119
123
|
@classmethod
|
|
120
124
|
def maxBinarySize(cls, extraReservedChunks=0):
|
|
121
|
-
return (
|
|
125
|
+
return (
|
|
126
|
+
cls._maxChunks() - extraReservedChunks
|
|
127
|
+
) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
|
|
122
128
|
|
|
123
129
|
@classmethod
|
|
124
130
|
def _maxEncodedSize(cls):
|
|
125
131
|
return cls._maxChunks() * cls.maxValueSize
|
|
126
132
|
|
|
127
133
|
@classmethod
|
|
128
|
-
def binaryToAttributes(cls, binary):
|
|
134
|
+
def binaryToAttributes(cls, binary) -> dict[str, str]:
|
|
129
135
|
"""
|
|
130
136
|
Turn a bytestring, or None, into SimpleDB attributes.
|
|
131
137
|
"""
|
|
132
|
-
if binary is None:
|
|
138
|
+
if binary is None:
|
|
139
|
+
return {"numChunks": "0"}
|
|
133
140
|
assert isinstance(binary, bytes)
|
|
134
141
|
assert len(binary) <= cls.maxBinarySize()
|
|
135
142
|
# The use of compression is just an optimization. We can't include it in the maxValueSize
|
|
136
143
|
# computation because the compression ratio depends on the input.
|
|
137
144
|
compressed = bz2.compress(binary)
|
|
138
145
|
if len(compressed) > len(binary):
|
|
139
|
-
compressed = b
|
|
146
|
+
compressed = b"U" + binary
|
|
140
147
|
else:
|
|
141
|
-
compressed = b
|
|
148
|
+
compressed = b"C" + compressed
|
|
142
149
|
encoded = base64.b64encode(compressed)
|
|
143
150
|
assert len(encoded) <= cls._maxEncodedSize()
|
|
144
151
|
n = cls.maxValueSize
|
|
145
|
-
chunks = (encoded[i:i + n] for i in range(0, len(encoded), n))
|
|
146
|
-
attributes = {
|
|
147
|
-
|
|
152
|
+
chunks = (encoded[i : i + n] for i in range(0, len(encoded), n))
|
|
153
|
+
attributes = {
|
|
154
|
+
cls._chunkName(i): chunk.decode("utf-8") for i, chunk in enumerate(chunks)
|
|
155
|
+
}
|
|
156
|
+
attributes.update({"numChunks": str(len(attributes))})
|
|
148
157
|
return attributes
|
|
149
158
|
|
|
159
|
+
@classmethod
|
|
160
|
+
def attributeDictToList(
|
|
161
|
+
cls, attributes: dict[str, str]
|
|
162
|
+
) -> list["AttributeTypeDef"]:
|
|
163
|
+
"""
|
|
164
|
+
Convert the attribute dict (ex: from binaryToAttributes) into a list of attribute typed dicts
|
|
165
|
+
to be compatible with boto3 argument syntax
|
|
166
|
+
:param attributes: Dict[str, str], attribute in object form
|
|
167
|
+
:return: list of attributes in typed dict form
|
|
168
|
+
"""
|
|
169
|
+
return [{"Name": name, "Value": value} for name, value in attributes.items()]
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def attributeListToDict(
|
|
173
|
+
cls, attributes: list["AttributeTypeDef"]
|
|
174
|
+
) -> dict[str, str]:
|
|
175
|
+
"""
|
|
176
|
+
Convert the attribute boto3 representation of list of attribute typed dicts
|
|
177
|
+
back to a dictionary with name, value pairs
|
|
178
|
+
:param attribute: attribute in typed dict form
|
|
179
|
+
:return: Dict[str, str], attribute in dict form
|
|
180
|
+
"""
|
|
181
|
+
return {attribute["Name"]: attribute["Value"] for attribute in attributes}
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def get_attributes_from_item(
|
|
185
|
+
cls, item: "ItemTypeDef", keys: list[str]
|
|
186
|
+
) -> list[Optional[str]]:
|
|
187
|
+
return_values: list[Optional[str]] = [None for _ in keys]
|
|
188
|
+
mapped_indices: dict[str, int] = {
|
|
189
|
+
name: index for index, name in enumerate(keys)
|
|
190
|
+
}
|
|
191
|
+
for attribute in item["Attributes"]:
|
|
192
|
+
name = attribute["Name"]
|
|
193
|
+
value = attribute["Value"]
|
|
194
|
+
if name in mapped_indices:
|
|
195
|
+
return_values[mapped_indices[name]] = value
|
|
196
|
+
return return_values
|
|
197
|
+
|
|
150
198
|
@classmethod
|
|
151
199
|
def _chunkName(cls, i):
|
|
152
200
|
return str(i).zfill(3)
|
|
@@ -162,26 +210,35 @@ class SDBHelper:
|
|
|
162
210
|
Assuming that binaryToAttributes() is used with SDB's PutAttributes, the return value of
|
|
163
211
|
this method could be used to detect the presence/absence of an item in SDB.
|
|
164
212
|
"""
|
|
165
|
-
return
|
|
213
|
+
return "numChunks"
|
|
166
214
|
|
|
167
215
|
@classmethod
|
|
168
|
-
def attributesToBinary(
|
|
216
|
+
def attributesToBinary(
|
|
217
|
+
cls, attributes: list["AttributeTypeDef"]
|
|
218
|
+
) -> tuple[bytes, int]:
|
|
169
219
|
"""
|
|
170
220
|
:rtype: (str|None,int)
|
|
171
221
|
:return: the binary data and the number of chunks it was composed from
|
|
172
222
|
"""
|
|
173
|
-
chunks = [
|
|
223
|
+
chunks = []
|
|
224
|
+
numChunks: int = 0
|
|
225
|
+
for attribute in attributes:
|
|
226
|
+
name = attribute["Name"]
|
|
227
|
+
value = attribute["Value"]
|
|
228
|
+
if cls._isValidChunkName(name):
|
|
229
|
+
chunks.append((int(name), value))
|
|
230
|
+
if name == "numChunks":
|
|
231
|
+
numChunks = int(value)
|
|
174
232
|
chunks.sort()
|
|
175
|
-
numChunks = int(attributes['numChunks'])
|
|
176
233
|
if numChunks:
|
|
177
|
-
serializedJob = b
|
|
234
|
+
serializedJob = b"".join(v.encode() for k, v in chunks)
|
|
178
235
|
compressed = base64.b64decode(serializedJob)
|
|
179
|
-
if compressed[0] == b
|
|
236
|
+
if compressed[0] == b"C"[0]:
|
|
180
237
|
binary = bz2.decompress(compressed[1:])
|
|
181
|
-
elif compressed[0] == b
|
|
238
|
+
elif compressed[0] == b"U"[0]:
|
|
182
239
|
binary = compressed[1:]
|
|
183
240
|
else:
|
|
184
|
-
raise RuntimeError(f
|
|
241
|
+
raise RuntimeError(f"Unexpected prefix {compressed[0]}")
|
|
185
242
|
else:
|
|
186
243
|
binary = None
|
|
187
244
|
return binary, numChunks
|
|
@@ -192,16 +249,15 @@ def fileSizeAndTime(localFilePath):
|
|
|
192
249
|
return file_stat.st_size, file_stat.st_mtime
|
|
193
250
|
|
|
194
251
|
|
|
195
|
-
@retry(errors=[
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
partSize: int = 50 << 20):
|
|
252
|
+
@retry(errors=[AWSServerErrors])
|
|
253
|
+
def uploadFromPath(
|
|
254
|
+
localFilePath: str,
|
|
255
|
+
resource,
|
|
256
|
+
bucketName: str,
|
|
257
|
+
fileID: str,
|
|
258
|
+
headerArgs: Optional[dict] = None,
|
|
259
|
+
partSize: int = 50 << 20,
|
|
260
|
+
):
|
|
205
261
|
"""
|
|
206
262
|
Uploads a file to s3, using multipart uploading if applicable
|
|
207
263
|
|
|
@@ -220,9 +276,13 @@ def uploadFromPath(localFilePath: str,
|
|
|
220
276
|
client = resource.meta.client
|
|
221
277
|
file_size, file_time = fileSizeAndTime(localFilePath)
|
|
222
278
|
|
|
223
|
-
version = uploadFile(
|
|
224
|
-
|
|
225
|
-
|
|
279
|
+
version = uploadFile(
|
|
280
|
+
localFilePath, resource, bucketName, fileID, headerArgs, partSize
|
|
281
|
+
)
|
|
282
|
+
info = client.head_object(
|
|
283
|
+
Bucket=bucketName, Key=compat_bytes(fileID), VersionId=version, **headerArgs
|
|
284
|
+
)
|
|
285
|
+
size = info.get("ContentLength")
|
|
226
286
|
|
|
227
287
|
assert size == file_size
|
|
228
288
|
|
|
@@ -231,16 +291,15 @@ def uploadFromPath(localFilePath: str,
|
|
|
231
291
|
return version
|
|
232
292
|
|
|
233
293
|
|
|
234
|
-
@retry(errors=[
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
partSize: int = 50 << 20):
|
|
294
|
+
@retry(errors=[AWSServerErrors])
|
|
295
|
+
def uploadFile(
|
|
296
|
+
readable,
|
|
297
|
+
resource,
|
|
298
|
+
bucketName: str,
|
|
299
|
+
fileID: str,
|
|
300
|
+
headerArgs: Optional[dict] = None,
|
|
301
|
+
partSize: int = 50 << 20,
|
|
302
|
+
):
|
|
244
303
|
"""
|
|
245
304
|
Upload a readable object to s3, using multipart uploading if applicable.
|
|
246
305
|
:param readable: a readable stream or a file path to upload to s3
|
|
@@ -256,29 +315,32 @@ def uploadFile(readable,
|
|
|
256
315
|
|
|
257
316
|
client = resource.meta.client
|
|
258
317
|
config = TransferConfig(
|
|
259
|
-
multipart_threshold=partSize,
|
|
260
|
-
multipart_chunksize=partSize,
|
|
261
|
-
use_threads=True
|
|
318
|
+
multipart_threshold=partSize, multipart_chunksize=partSize, use_threads=True
|
|
262
319
|
)
|
|
263
320
|
if isinstance(readable, str):
|
|
264
|
-
client.upload_file(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
321
|
+
client.upload_file(
|
|
322
|
+
Filename=readable,
|
|
323
|
+
Bucket=bucketName,
|
|
324
|
+
Key=compat_bytes(fileID),
|
|
325
|
+
ExtraArgs=headerArgs,
|
|
326
|
+
Config=config,
|
|
327
|
+
)
|
|
269
328
|
else:
|
|
270
|
-
client.upload_fileobj(
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
329
|
+
client.upload_fileobj(
|
|
330
|
+
Fileobj=readable,
|
|
331
|
+
Bucket=bucketName,
|
|
332
|
+
Key=compat_bytes(fileID),
|
|
333
|
+
ExtraArgs=headerArgs,
|
|
334
|
+
Config=config,
|
|
335
|
+
)
|
|
275
336
|
|
|
276
337
|
# Wait until the object exists before calling head_object
|
|
277
338
|
object_summary = resource.ObjectSummary(bucketName, compat_bytes(fileID))
|
|
278
339
|
object_summary.wait_until_exists(**headerArgs)
|
|
279
340
|
|
|
280
341
|
info = client.head_object(Bucket=bucketName, Key=compat_bytes(fileID), **headerArgs)
|
|
281
|
-
return info.get(
|
|
342
|
+
return info.get("VersionId", None)
|
|
343
|
+
|
|
282
344
|
|
|
283
345
|
class ServerSideCopyProhibitedError(RuntimeError):
|
|
284
346
|
"""
|
|
@@ -286,20 +348,20 @@ class ServerSideCopyProhibitedError(RuntimeError):
|
|
|
286
348
|
insists that you pay to download and upload the data yourself instead.
|
|
287
349
|
"""
|
|
288
350
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
351
|
+
|
|
352
|
+
@retry(errors=[AWSServerErrors])
|
|
353
|
+
def copyKeyMultipart(
|
|
354
|
+
resource: "S3ServiceResource",
|
|
355
|
+
srcBucketName: str,
|
|
356
|
+
srcKeyName: str,
|
|
357
|
+
srcKeyVersion: str,
|
|
358
|
+
dstBucketName: str,
|
|
359
|
+
dstKeyName: str,
|
|
360
|
+
sseAlgorithm: Optional[str] = None,
|
|
361
|
+
sseKey: Optional[str] = None,
|
|
362
|
+
copySourceSseAlgorithm: Optional[str] = None,
|
|
363
|
+
copySourceSseKey: Optional[str] = None,
|
|
364
|
+
):
|
|
303
365
|
"""
|
|
304
366
|
Copies a key from a source key to a destination key in multiple parts. Note that if the
|
|
305
367
|
destination key exists it will be overwritten implicitly, and if it does not exist a new
|
|
@@ -336,9 +398,12 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
336
398
|
"""
|
|
337
399
|
dstBucket = resource.Bucket(compat_bytes(dstBucketName))
|
|
338
400
|
dstObject = dstBucket.Object(compat_bytes(dstKeyName))
|
|
339
|
-
copySource = {
|
|
401
|
+
copySource = {
|
|
402
|
+
"Bucket": compat_bytes(srcBucketName),
|
|
403
|
+
"Key": compat_bytes(srcKeyName),
|
|
404
|
+
}
|
|
340
405
|
if srcKeyVersion is not None:
|
|
341
|
-
copySource[
|
|
406
|
+
copySource["VersionId"] = compat_bytes(srcKeyVersion)
|
|
342
407
|
|
|
343
408
|
# Get a client to the source region, which may not be the same as the one
|
|
344
409
|
# this resource is connected to. We should probably talk to it for source
|
|
@@ -346,12 +411,10 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
346
411
|
# not wherever the bucket virtual hostnames go.
|
|
347
412
|
source_region = get_bucket_region(srcBucketName)
|
|
348
413
|
source_client = cast(
|
|
349
|
-
S3Client,
|
|
414
|
+
"S3Client",
|
|
350
415
|
session.client(
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
config=DIAL_SPECIFIC_REGION_CONFIG
|
|
354
|
-
)
|
|
416
|
+
"s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
|
|
417
|
+
),
|
|
355
418
|
)
|
|
356
419
|
|
|
357
420
|
# The boto3 functions don't allow passing parameters as None to
|
|
@@ -360,19 +423,28 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
360
423
|
# required.
|
|
361
424
|
destEncryptionArgs = {}
|
|
362
425
|
if sseKey is not None:
|
|
363
|
-
destEncryptionArgs.update(
|
|
364
|
-
|
|
426
|
+
destEncryptionArgs.update(
|
|
427
|
+
{"SSECustomerAlgorithm": sseAlgorithm, "SSECustomerKey": sseKey}
|
|
428
|
+
)
|
|
365
429
|
copyEncryptionArgs = {}
|
|
366
430
|
if copySourceSseKey is not None:
|
|
367
|
-
copyEncryptionArgs.update(
|
|
368
|
-
|
|
431
|
+
copyEncryptionArgs.update(
|
|
432
|
+
{
|
|
433
|
+
"CopySourceSSECustomerAlgorithm": copySourceSseAlgorithm,
|
|
434
|
+
"CopySourceSSECustomerKey": copySourceSseKey,
|
|
435
|
+
}
|
|
436
|
+
)
|
|
369
437
|
copyEncryptionArgs.update(destEncryptionArgs)
|
|
370
438
|
|
|
371
439
|
try:
|
|
372
440
|
# Kick off a server-side copy operation
|
|
373
|
-
dstObject.copy(
|
|
441
|
+
dstObject.copy(
|
|
442
|
+
copySource, SourceClient=source_client, ExtraArgs=copyEncryptionArgs
|
|
443
|
+
)
|
|
374
444
|
except ClientError as e:
|
|
375
|
-
if get_error_code(e) ==
|
|
445
|
+
if get_error_code(e) == "AccessDenied" and "cross-region" in get_error_message(
|
|
446
|
+
e
|
|
447
|
+
):
|
|
376
448
|
# We have this problem: <https://aws.amazon.com/premiumsupport/knowledge-center/s3-troubleshoot-copy-between-buckets/#Cross-Region_request_issues_with_VPC_endpoints_for_Amazon_S3>
|
|
377
449
|
# The Internet and AWS docs say that we just can't do a
|
|
378
450
|
# cross-region CopyObject from inside a VPC with an endpoint. The
|
|
@@ -382,13 +454,16 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
382
454
|
# the source region's API servers, they reject it and tell us to
|
|
383
455
|
# talk to the destination region's API servers instead. Which we
|
|
384
456
|
# can't reach.
|
|
385
|
-
logger.error(
|
|
457
|
+
logger.error(
|
|
458
|
+
"Amazon is refusing to perform a server-side copy of %s: %s",
|
|
459
|
+
copySource,
|
|
460
|
+
e,
|
|
461
|
+
)
|
|
386
462
|
raise ServerSideCopyProhibitedError()
|
|
387
463
|
else:
|
|
388
464
|
# Some other ClientError happened
|
|
389
465
|
raise
|
|
390
466
|
|
|
391
|
-
|
|
392
467
|
# Wait until the object exists before calling head_object
|
|
393
468
|
object_summary = resource.ObjectSummary(dstObject.bucket_name, dstObject.key)
|
|
394
469
|
object_summary.wait_until_exists(**destEncryptionArgs)
|
|
@@ -398,14 +473,15 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
398
473
|
# after, leaving open the possibility that it may have been
|
|
399
474
|
# modified again in the few seconds since the copy finished. There
|
|
400
475
|
# isn't much we can do about it.
|
|
401
|
-
info = resource.meta.client.head_object(
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
return info.get(
|
|
476
|
+
info = resource.meta.client.head_object(
|
|
477
|
+
Bucket=dstObject.bucket_name, Key=dstObject.key, **destEncryptionArgs
|
|
478
|
+
)
|
|
479
|
+
return info.get("VersionId", None)
|
|
405
480
|
|
|
406
481
|
|
|
407
|
-
def _put_attributes_using_post(
|
|
408
|
-
|
|
482
|
+
def _put_attributes_using_post(
|
|
483
|
+
self, domain_or_name, item_name, attributes, replace=True, expected_value=None
|
|
484
|
+
):
|
|
409
485
|
"""
|
|
410
486
|
Monkey-patched version of SDBConnection.put_attributes that uses POST instead of GET
|
|
411
487
|
|
|
@@ -415,13 +491,12 @@ def _put_attributes_using_post(self, domain_or_name, item_name, attributes,
|
|
|
415
491
|
https://github.com/BD2KGenomics/toil/issues/502
|
|
416
492
|
"""
|
|
417
493
|
domain, domain_name = self.get_domain_and_name(domain_or_name)
|
|
418
|
-
params = {
|
|
419
|
-
'ItemName': item_name}
|
|
494
|
+
params = {"DomainName": domain_name, "ItemName": item_name}
|
|
420
495
|
self._build_name_value_list(params, attributes, replace)
|
|
421
496
|
if expected_value:
|
|
422
497
|
self._build_expected_value(params, expected_value)
|
|
423
498
|
# The addition of the verb keyword argument is the only difference to put_attributes (Hannes)
|
|
424
|
-
return self.get_status(
|
|
499
|
+
return self.get_status("PutAttributes", params, verb="POST")
|
|
425
500
|
|
|
426
501
|
|
|
427
502
|
def monkeyPatchSdbConnection(sdb):
|
|
@@ -430,6 +505,7 @@ def monkeyPatchSdbConnection(sdb):
|
|
|
430
505
|
"""
|
|
431
506
|
sdb.put_attributes = types.MethodType(_put_attributes_using_post, sdb)
|
|
432
507
|
|
|
508
|
+
|
|
433
509
|
def sdb_unavailable(e):
|
|
434
510
|
# Since we're checking against a collection here we absolutely need an
|
|
435
511
|
# integer status code. This is probably a BotoServerError, but other 500s
|
|
@@ -438,23 +514,28 @@ def sdb_unavailable(e):
|
|
|
438
514
|
|
|
439
515
|
|
|
440
516
|
def no_such_sdb_domain(e):
|
|
441
|
-
return (
|
|
442
|
-
|
|
443
|
-
|
|
517
|
+
return (
|
|
518
|
+
isinstance(e, ClientError)
|
|
519
|
+
and get_error_code(e)
|
|
520
|
+
and get_error_code(e).endswith("NoSuchDomain")
|
|
521
|
+
)
|
|
444
522
|
|
|
445
523
|
|
|
446
524
|
def retryable_ssl_error(e):
|
|
447
525
|
# https://github.com/BD2KGenomics/toil/issues/978
|
|
448
|
-
return isinstance(e, SSLError) and e.reason ==
|
|
526
|
+
return isinstance(e, SSLError) and e.reason == "DECRYPTION_FAILED_OR_BAD_RECORD_MAC"
|
|
449
527
|
|
|
450
528
|
|
|
451
529
|
def retryable_sdb_errors(e):
|
|
452
|
-
return (
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
530
|
+
return (
|
|
531
|
+
sdb_unavailable(e)
|
|
532
|
+
or no_such_sdb_domain(e)
|
|
533
|
+
or connection_error(e)
|
|
534
|
+
or retryable_ssl_error(e)
|
|
535
|
+
)
|
|
456
536
|
|
|
457
537
|
|
|
458
|
-
def retry_sdb(
|
|
538
|
+
def retry_sdb(
|
|
539
|
+
delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors
|
|
540
|
+
):
|
|
459
541
|
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
|
460
|
-
|
toil/jobStores/conftest.py
CHANGED