toil 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/slurm.py +191 -16
- toil/cwl/cwltoil.py +17 -82
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +24 -19
- toil/jobStores/aws/jobStore.py +862 -1963
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/googleJobStore.py +25 -9
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +73 -16
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/pipes.py +385 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/web.py +4 -5
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +22 -13
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/test/__init__.py +14 -16
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/wdltoil_test.py +98 -38
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilStats.py +17 -2
- toil/version.py +6 -6
- toil/wdl/wdltoil.py +1032 -546
- toil/worker.py +5 -2
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/METADATA +12 -12
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/RECORD +68 -61
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/WHEEL +0 -0
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/utils.py
CHANGED
|
@@ -17,7 +17,7 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import types
|
|
19
19
|
from ssl import SSLError
|
|
20
|
-
from typing import TYPE_CHECKING, Optional, cast
|
|
20
|
+
from typing import TYPE_CHECKING, IO, Optional, cast, Any
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
23
|
from botocore.client import Config
|
|
@@ -37,8 +37,8 @@ from toil.lib.retry import (
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
if TYPE_CHECKING:
|
|
40
|
-
from mypy_boto3_s3 import S3ServiceResource
|
|
41
|
-
from
|
|
40
|
+
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
41
|
+
from mypy_boto3_s3.type_defs import CopySourceTypeDef
|
|
42
42
|
|
|
43
43
|
logger = logging.getLogger(__name__)
|
|
44
44
|
|
|
@@ -54,210 +54,21 @@ DIAL_SPECIFIC_REGION_CONFIG = Config(
|
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
"""
|
|
59
|
-
A mixin with methods for storing limited amounts of binary data in an SDB item
|
|
60
|
-
|
|
61
|
-
>>> import os
|
|
62
|
-
>>> H=SDBHelper
|
|
63
|
-
>>> H.presenceIndicator() # doctest: +ALLOW_UNICODE
|
|
64
|
-
u'numChunks'
|
|
65
|
-
>>> H.binaryToAttributes(None)['numChunks']
|
|
66
|
-
0
|
|
67
|
-
>>> H.attributesToBinary({u'numChunks': 0})
|
|
68
|
-
(None, 0)
|
|
69
|
-
>>> H.binaryToAttributes(b'') # doctest: +ALLOW_UNICODE +ALLOW_BYTES
|
|
70
|
-
{u'000': b'VQ==', u'numChunks': 1}
|
|
71
|
-
>>> H.attributesToBinary({u'numChunks': 1, u'000': b'VQ=='}) # doctest: +ALLOW_BYTES
|
|
72
|
-
(b'', 1)
|
|
73
|
-
|
|
74
|
-
Good pseudo-random data is very likely smaller than its bzip2ed form. Subtract 1 for the type
|
|
75
|
-
character, i.e 'C' or 'U', with which the string is prefixed. We should get one full chunk:
|
|
76
|
-
|
|
77
|
-
>>> s = os.urandom(H.maxRawValueSize-1)
|
|
78
|
-
>>> d = H.binaryToAttributes(s)
|
|
79
|
-
>>> len(d), len(d['000'])
|
|
80
|
-
(2, 1024)
|
|
81
|
-
>>> H.attributesToBinary(d) == (s, 1)
|
|
82
|
-
True
|
|
83
|
-
|
|
84
|
-
One byte more and we should overflow four bytes into the second chunk, two bytes for
|
|
85
|
-
base64-encoding the additional character and two bytes for base64-padding to the next quartet.
|
|
86
|
-
|
|
87
|
-
>>> s += s[0:1]
|
|
88
|
-
>>> d = H.binaryToAttributes(s)
|
|
89
|
-
>>> len(d), len(d['000']), len(d['001'])
|
|
90
|
-
(3, 1024, 4)
|
|
91
|
-
>>> H.attributesToBinary(d) == (s, 2)
|
|
92
|
-
True
|
|
93
|
-
|
|
94
|
-
"""
|
|
95
|
-
|
|
96
|
-
# The SDB documentation is not clear as to whether the attribute value size limit of 1024
|
|
97
|
-
# applies to the base64-encoded value or the raw value. It suggests that responses are
|
|
98
|
-
# automatically encoded from which I conclude that the limit should apply to the raw,
|
|
99
|
-
# unencoded value. However, there seems to be a discrepancy between how Boto computes the
|
|
100
|
-
# request signature if a value contains a binary data, and how SDB does it. This causes
|
|
101
|
-
# requests to fail signature verification, resulting in a 403. We therefore have to
|
|
102
|
-
# base64-encode values ourselves even if that means we loose a quarter of capacity.
|
|
103
|
-
|
|
104
|
-
maxAttributesPerItem = 256
|
|
105
|
-
maxValueSize = 1024
|
|
106
|
-
maxRawValueSize = maxValueSize * 3 // 4
|
|
107
|
-
# Just make sure we don't have a problem with padding or integer truncation:
|
|
108
|
-
assert len(base64.b64encode(b" " * maxRawValueSize)) == 1024
|
|
109
|
-
assert len(base64.b64encode(b" " * (1 + maxRawValueSize))) > 1024
|
|
110
|
-
|
|
111
|
-
@classmethod
|
|
112
|
-
def _reservedAttributes(cls):
|
|
113
|
-
"""
|
|
114
|
-
Override in subclass to reserve a certain number of attributes that can't be used for
|
|
115
|
-
chunks.
|
|
116
|
-
"""
|
|
117
|
-
return 1
|
|
118
|
-
|
|
119
|
-
@classmethod
|
|
120
|
-
def _maxChunks(cls):
|
|
121
|
-
return cls.maxAttributesPerItem - cls._reservedAttributes()
|
|
122
|
-
|
|
123
|
-
@classmethod
|
|
124
|
-
def maxBinarySize(cls, extraReservedChunks=0):
|
|
125
|
-
return (
|
|
126
|
-
cls._maxChunks() - extraReservedChunks
|
|
127
|
-
) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
|
|
128
|
-
|
|
129
|
-
@classmethod
|
|
130
|
-
def _maxEncodedSize(cls):
|
|
131
|
-
return cls._maxChunks() * cls.maxValueSize
|
|
132
|
-
|
|
133
|
-
@classmethod
|
|
134
|
-
def binaryToAttributes(cls, binary) -> dict[str, str]:
|
|
135
|
-
"""
|
|
136
|
-
Turn a bytestring, or None, into SimpleDB attributes.
|
|
137
|
-
"""
|
|
138
|
-
if binary is None:
|
|
139
|
-
return {"numChunks": "0"}
|
|
140
|
-
assert isinstance(binary, bytes)
|
|
141
|
-
assert len(binary) <= cls.maxBinarySize()
|
|
142
|
-
# The use of compression is just an optimization. We can't include it in the maxValueSize
|
|
143
|
-
# computation because the compression ratio depends on the input.
|
|
144
|
-
compressed = bz2.compress(binary)
|
|
145
|
-
if len(compressed) > len(binary):
|
|
146
|
-
compressed = b"U" + binary
|
|
147
|
-
else:
|
|
148
|
-
compressed = b"C" + compressed
|
|
149
|
-
encoded = base64.b64encode(compressed)
|
|
150
|
-
assert len(encoded) <= cls._maxEncodedSize()
|
|
151
|
-
n = cls.maxValueSize
|
|
152
|
-
chunks = (encoded[i : i + n] for i in range(0, len(encoded), n))
|
|
153
|
-
attributes = {
|
|
154
|
-
cls._chunkName(i): chunk.decode("utf-8") for i, chunk in enumerate(chunks)
|
|
155
|
-
}
|
|
156
|
-
attributes.update({"numChunks": str(len(attributes))})
|
|
157
|
-
return attributes
|
|
158
|
-
|
|
159
|
-
@classmethod
|
|
160
|
-
def attributeDictToList(
|
|
161
|
-
cls, attributes: dict[str, str]
|
|
162
|
-
) -> list["AttributeTypeDef"]:
|
|
163
|
-
"""
|
|
164
|
-
Convert the attribute dict (ex: from binaryToAttributes) into a list of attribute typed dicts
|
|
165
|
-
to be compatible with boto3 argument syntax
|
|
166
|
-
:param attributes: Dict[str, str], attribute in object form
|
|
167
|
-
:return: list of attributes in typed dict form
|
|
168
|
-
"""
|
|
169
|
-
return [{"Name": name, "Value": value} for name, value in attributes.items()]
|
|
170
|
-
|
|
171
|
-
@classmethod
|
|
172
|
-
def attributeListToDict(
|
|
173
|
-
cls, attributes: list["AttributeTypeDef"]
|
|
174
|
-
) -> dict[str, str]:
|
|
175
|
-
"""
|
|
176
|
-
Convert the attribute boto3 representation of list of attribute typed dicts
|
|
177
|
-
back to a dictionary with name, value pairs
|
|
178
|
-
:param attribute: attribute in typed dict form
|
|
179
|
-
:return: Dict[str, str], attribute in dict form
|
|
180
|
-
"""
|
|
181
|
-
return {attribute["Name"]: attribute["Value"] for attribute in attributes}
|
|
182
|
-
|
|
183
|
-
@classmethod
|
|
184
|
-
def get_attributes_from_item(
|
|
185
|
-
cls, item: "ItemTypeDef", keys: list[str]
|
|
186
|
-
) -> list[Optional[str]]:
|
|
187
|
-
return_values: list[Optional[str]] = [None for _ in keys]
|
|
188
|
-
mapped_indices: dict[str, int] = {
|
|
189
|
-
name: index for index, name in enumerate(keys)
|
|
190
|
-
}
|
|
191
|
-
for attribute in item["Attributes"]:
|
|
192
|
-
name = attribute["Name"]
|
|
193
|
-
value = attribute["Value"]
|
|
194
|
-
if name in mapped_indices:
|
|
195
|
-
return_values[mapped_indices[name]] = value
|
|
196
|
-
return return_values
|
|
197
|
-
|
|
198
|
-
@classmethod
|
|
199
|
-
def _chunkName(cls, i):
|
|
200
|
-
return str(i).zfill(3)
|
|
201
|
-
|
|
202
|
-
@classmethod
|
|
203
|
-
def _isValidChunkName(cls, s):
|
|
204
|
-
return len(s) == 3 and s.isdigit()
|
|
205
|
-
|
|
206
|
-
@classmethod
|
|
207
|
-
def presenceIndicator(cls):
|
|
208
|
-
"""
|
|
209
|
-
The key that is guaranteed to be present in the return value of binaryToAttributes().
|
|
210
|
-
Assuming that binaryToAttributes() is used with SDB's PutAttributes, the return value of
|
|
211
|
-
this method could be used to detect the presence/absence of an item in SDB.
|
|
212
|
-
"""
|
|
213
|
-
return "numChunks"
|
|
214
|
-
|
|
215
|
-
@classmethod
|
|
216
|
-
def attributesToBinary(
|
|
217
|
-
cls, attributes: list["AttributeTypeDef"]
|
|
218
|
-
) -> tuple[bytes, int]:
|
|
219
|
-
"""
|
|
220
|
-
:rtype: (str|None,int)
|
|
221
|
-
:return: the binary data and the number of chunks it was composed from
|
|
222
|
-
"""
|
|
223
|
-
chunks = []
|
|
224
|
-
numChunks: int = 0
|
|
225
|
-
for attribute in attributes:
|
|
226
|
-
name = attribute["Name"]
|
|
227
|
-
value = attribute["Value"]
|
|
228
|
-
if cls._isValidChunkName(name):
|
|
229
|
-
chunks.append((int(name), value))
|
|
230
|
-
if name == "numChunks":
|
|
231
|
-
numChunks = int(value)
|
|
232
|
-
chunks.sort()
|
|
233
|
-
if numChunks:
|
|
234
|
-
serializedJob = b"".join(v.encode() for k, v in chunks)
|
|
235
|
-
compressed = base64.b64decode(serializedJob)
|
|
236
|
-
if compressed[0] == b"C"[0]:
|
|
237
|
-
binary = bz2.decompress(compressed[1:])
|
|
238
|
-
elif compressed[0] == b"U"[0]:
|
|
239
|
-
binary = compressed[1:]
|
|
240
|
-
else:
|
|
241
|
-
raise RuntimeError(f"Unexpected prefix {compressed[0]}")
|
|
242
|
-
else:
|
|
243
|
-
binary = None
|
|
244
|
-
return binary, numChunks
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def fileSizeAndTime(localFilePath):
|
|
57
|
+
def fileSizeAndTime(localFilePath: str) -> tuple[int, float]:
|
|
248
58
|
file_stat = os.stat(localFilePath)
|
|
249
59
|
return file_stat.st_size, file_stat.st_mtime
|
|
250
60
|
|
|
251
61
|
|
|
62
|
+
# TODO: This function is unused.
|
|
252
63
|
@retry(errors=[AWSServerErrors])
|
|
253
64
|
def uploadFromPath(
|
|
254
65
|
localFilePath: str,
|
|
255
|
-
resource,
|
|
66
|
+
resource: "S3ServiceResource",
|
|
256
67
|
bucketName: str,
|
|
257
68
|
fileID: str,
|
|
258
|
-
headerArgs: Optional[dict] = None,
|
|
69
|
+
headerArgs: Optional[dict[str, Any]] = None,
|
|
259
70
|
partSize: int = 50 << 20,
|
|
260
|
-
):
|
|
71
|
+
) -> Optional[str]:
|
|
261
72
|
"""
|
|
262
73
|
Uploads a file to s3, using multipart uploading if applicable
|
|
263
74
|
|
|
@@ -279,8 +90,12 @@ def uploadFromPath(
|
|
|
279
90
|
version = uploadFile(
|
|
280
91
|
localFilePath, resource, bucketName, fileID, headerArgs, partSize
|
|
281
92
|
)
|
|
93
|
+
|
|
94
|
+
# Only pass along version if we got one.
|
|
95
|
+
version_args: dict[str, Any] = {"VersionId": version} if version is not None else {}
|
|
96
|
+
|
|
282
97
|
info = client.head_object(
|
|
283
|
-
Bucket=bucketName, Key=compat_bytes(fileID),
|
|
98
|
+
Bucket=bucketName, Key=compat_bytes(fileID), **version_args, **headerArgs
|
|
284
99
|
)
|
|
285
100
|
size = info.get("ContentLength")
|
|
286
101
|
|
|
@@ -293,13 +108,13 @@ def uploadFromPath(
|
|
|
293
108
|
|
|
294
109
|
@retry(errors=[AWSServerErrors])
|
|
295
110
|
def uploadFile(
|
|
296
|
-
readable,
|
|
297
|
-
resource,
|
|
111
|
+
readable: IO[bytes],
|
|
112
|
+
resource: "S3ServiceResource",
|
|
298
113
|
bucketName: str,
|
|
299
114
|
fileID: str,
|
|
300
|
-
headerArgs: Optional[dict] = None,
|
|
115
|
+
headerArgs: Optional[dict[str, Any]] = None,
|
|
301
116
|
partSize: int = 50 << 20,
|
|
302
|
-
):
|
|
117
|
+
) -> Optional[str]:
|
|
303
118
|
"""
|
|
304
119
|
Upload a readable object to s3, using multipart uploading if applicable.
|
|
305
120
|
:param readable: a readable stream or a file path to upload to s3
|
|
@@ -361,7 +176,7 @@ def copyKeyMultipart(
|
|
|
361
176
|
sseKey: Optional[str] = None,
|
|
362
177
|
copySourceSseAlgorithm: Optional[str] = None,
|
|
363
178
|
copySourceSseKey: Optional[str] = None,
|
|
364
|
-
):
|
|
179
|
+
) -> Optional[str]:
|
|
365
180
|
"""
|
|
366
181
|
Copies a key from a source key to a destination key in multiple parts. Note that if the
|
|
367
182
|
destination key exists it will be overwritten implicitly, and if it does not exist a new
|
|
@@ -393,12 +208,11 @@ def copyKeyMultipart(
|
|
|
393
208
|
:param str copySourceSseAlgorithm: Server-side encryption algorithm for the source.
|
|
394
209
|
:param str copySourceSseKey: Server-side encryption key for the source.
|
|
395
210
|
|
|
396
|
-
:rtype: str
|
|
397
211
|
:return: The version of the copied file (or None if versioning is not enabled for dstBucket).
|
|
398
212
|
"""
|
|
399
213
|
dstBucket = resource.Bucket(compat_bytes(dstBucketName))
|
|
400
214
|
dstObject = dstBucket.Object(compat_bytes(dstKeyName))
|
|
401
|
-
copySource = {
|
|
215
|
+
copySource: "CopySourceTypeDef" = {
|
|
402
216
|
"Bucket": compat_bytes(srcBucketName),
|
|
403
217
|
"Key": compat_bytes(srcKeyName),
|
|
404
218
|
}
|
|
@@ -410,23 +224,20 @@ def copyKeyMultipart(
|
|
|
410
224
|
# object metadata. And we really want it to talk to the source region and
|
|
411
225
|
# not wherever the bucket virtual hostnames go.
|
|
412
226
|
source_region = get_bucket_region(srcBucketName)
|
|
413
|
-
source_client =
|
|
414
|
-
"
|
|
415
|
-
session.client(
|
|
416
|
-
"s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
|
|
417
|
-
),
|
|
227
|
+
source_client = session.client(
|
|
228
|
+
"s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
|
|
418
229
|
)
|
|
419
230
|
|
|
420
231
|
# The boto3 functions don't allow passing parameters as None to
|
|
421
232
|
# indicate they weren't provided. So we have to do a bit of work
|
|
422
233
|
# to ensure we only provide the parameters when they are actually
|
|
423
234
|
# required.
|
|
424
|
-
destEncryptionArgs = {}
|
|
235
|
+
destEncryptionArgs: dict[str, Any] = {}
|
|
425
236
|
if sseKey is not None:
|
|
426
237
|
destEncryptionArgs.update(
|
|
427
238
|
{"SSECustomerAlgorithm": sseAlgorithm, "SSECustomerKey": sseKey}
|
|
428
239
|
)
|
|
429
|
-
copyEncryptionArgs = {}
|
|
240
|
+
copyEncryptionArgs: dict[str, Any] = {}
|
|
430
241
|
if copySourceSseKey is not None:
|
|
431
242
|
copyEncryptionArgs.update(
|
|
432
243
|
{
|
|
@@ -479,63 +290,6 @@ def copyKeyMultipart(
|
|
|
479
290
|
return info.get("VersionId", None)
|
|
480
291
|
|
|
481
292
|
|
|
482
|
-
def
|
|
483
|
-
self, domain_or_name, item_name, attributes, replace=True, expected_value=None
|
|
484
|
-
):
|
|
485
|
-
"""
|
|
486
|
-
Monkey-patched version of SDBConnection.put_attributes that uses POST instead of GET
|
|
487
|
-
|
|
488
|
-
The GET version is subject to the URL length limit which kicks in before the 256 x 1024 limit
|
|
489
|
-
for attribute values. Using POST prevents that.
|
|
490
|
-
|
|
491
|
-
https://github.com/BD2KGenomics/toil/issues/502
|
|
492
|
-
"""
|
|
493
|
-
domain, domain_name = self.get_domain_and_name(domain_or_name)
|
|
494
|
-
params = {"DomainName": domain_name, "ItemName": item_name}
|
|
495
|
-
self._build_name_value_list(params, attributes, replace)
|
|
496
|
-
if expected_value:
|
|
497
|
-
self._build_expected_value(params, expected_value)
|
|
498
|
-
# The addition of the verb keyword argument is the only difference to put_attributes (Hannes)
|
|
499
|
-
return self.get_status("PutAttributes", params, verb="POST")
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def monkeyPatchSdbConnection(sdb):
|
|
503
|
-
"""
|
|
504
|
-
:type sdb: SDBConnection
|
|
505
|
-
"""
|
|
506
|
-
sdb.put_attributes = types.MethodType(_put_attributes_using_post, sdb)
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def sdb_unavailable(e):
|
|
510
|
-
# Since we're checking against a collection here we absolutely need an
|
|
511
|
-
# integer status code. This is probably a BotoServerError, but other 500s
|
|
512
|
-
# and 503s probably ought to be retried too.
|
|
513
|
-
return get_error_status(e) in (500, 503)
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
def no_such_sdb_domain(e):
|
|
517
|
-
return (
|
|
518
|
-
isinstance(e, ClientError)
|
|
519
|
-
and get_error_code(e)
|
|
520
|
-
and get_error_code(e).endswith("NoSuchDomain")
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
def retryable_ssl_error(e):
|
|
293
|
+
def retryable_ssl_error(e: BaseException) -> bool:
|
|
525
294
|
# https://github.com/BD2KGenomics/toil/issues/978
|
|
526
295
|
return isinstance(e, SSLError) and e.reason == "DECRYPTION_FAILED_OR_BAD_RECORD_MAC"
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
def retryable_sdb_errors(e):
|
|
530
|
-
return (
|
|
531
|
-
sdb_unavailable(e)
|
|
532
|
-
or no_such_sdb_domain(e)
|
|
533
|
-
or connection_error(e)
|
|
534
|
-
or retryable_ssl_error(e)
|
|
535
|
-
)
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
def retry_sdb(
|
|
539
|
-
delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors
|
|
540
|
-
):
|
|
541
|
-
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
toil/jobStores/googleJobStore.py
CHANGED
|
@@ -39,7 +39,7 @@ from toil.jobStores.abstractJobStore import (
|
|
|
39
39
|
NoSuchJobException,
|
|
40
40
|
NoSuchJobStoreException,
|
|
41
41
|
)
|
|
42
|
-
from toil.
|
|
42
|
+
from toil.lib.pipes import ReadablePipe, WritablePipe
|
|
43
43
|
from toil.lib.compatibility import compat_bytes
|
|
44
44
|
from toil.lib.io import AtomicFileCreate
|
|
45
45
|
from toil.lib.misc import truncExpBackoff
|
|
@@ -115,6 +115,10 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
|
115
115
|
So we take the URL and any notes from client setup here, and if something
|
|
116
116
|
goes wrong that looks like a permission problem we complain with the notes
|
|
117
117
|
attached.
|
|
118
|
+
|
|
119
|
+
Also, if you don't have a project available, you can't use the Python API
|
|
120
|
+
for storage with authentication at all. `gsutil` can do it but you can't.
|
|
121
|
+
TODO: Fall back on command-line gsutil for authenticated reads???
|
|
118
122
|
"""
|
|
119
123
|
try:
|
|
120
124
|
yield
|
|
@@ -125,7 +129,9 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
|
125
129
|
"Are you sure you have set up your Google Account login "
|
|
126
130
|
"for applications with permission to access "
|
|
127
131
|
f"{urlunparse(url)}? "
|
|
128
|
-
"Maybe try `gcloud auth application-default login
|
|
132
|
+
"Maybe try `gcloud auth application-default login` and "
|
|
133
|
+
"providing the (mandatory for the Python API) Google Cloud "
|
|
134
|
+
"project? "
|
|
129
135
|
f"Client setup said: {notes}"
|
|
130
136
|
) from e
|
|
131
137
|
else:
|
|
@@ -261,6 +267,8 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
261
267
|
raise NoSuchJobStoreException(self.locator, "google")
|
|
262
268
|
super().resume()
|
|
263
269
|
|
|
270
|
+
# TODO: Don't we need to set up encryption here???
|
|
271
|
+
|
|
264
272
|
@google_retry
|
|
265
273
|
def destroy(self):
|
|
266
274
|
try:
|
|
@@ -394,8 +402,13 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
394
402
|
) as writable:
|
|
395
403
|
yield writable, fileID
|
|
396
404
|
|
|
397
|
-
def get_empty_file_store_id(
|
|
398
|
-
|
|
405
|
+
def get_empty_file_store_id(
|
|
406
|
+
self,
|
|
407
|
+
job_id=None,
|
|
408
|
+
cleanup=False,
|
|
409
|
+
basename=None,
|
|
410
|
+
):
|
|
411
|
+
fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
|
|
399
412
|
self._write_file(fileID, BytesIO(b""))
|
|
400
413
|
return fileID
|
|
401
414
|
|
|
@@ -617,7 +630,10 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
617
630
|
return filesRead
|
|
618
631
|
|
|
619
632
|
@staticmethod
|
|
620
|
-
def _new_id(
|
|
633
|
+
def _new_id(
|
|
634
|
+
isFile=False,
|
|
635
|
+
jobStoreID=None,
|
|
636
|
+
) -> str:
|
|
621
637
|
if isFile and jobStoreID: # file associated with job
|
|
622
638
|
return jobStoreID + str(uuid.uuid4())
|
|
623
639
|
elif isFile: # nonassociated file
|
|
@@ -671,7 +687,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
671
687
|
):
|
|
672
688
|
"""
|
|
673
689
|
Yields a context manager that can be used to write to the bucket
|
|
674
|
-
with a stream. See :class:`~toil.
|
|
690
|
+
with a stream. See :class:`~toil.lib.pipes.WritablePipe` for an example.
|
|
675
691
|
|
|
676
692
|
Will throw assertion error if the file shouldn't be updated
|
|
677
693
|
and yet exists.
|
|
@@ -692,7 +708,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
692
708
|
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
693
709
|
|
|
694
710
|
:return: an instance of WritablePipe.
|
|
695
|
-
:rtype: :class:`~toil.
|
|
711
|
+
:rtype: :class:`~toil.lib.pipes.WritablePipe`
|
|
696
712
|
"""
|
|
697
713
|
blob = self.bucket.blob(
|
|
698
714
|
compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
|
|
@@ -715,7 +731,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
715
731
|
def _download_stream(self, fileName, encrypt=True, encoding=None, errors=None):
|
|
716
732
|
"""
|
|
717
733
|
Yields a context manager that can be used to read from the bucket
|
|
718
|
-
with a stream. See :class:`~toil.
|
|
734
|
+
with a stream. See :class:`~toil.lib.pipes.WritablePipe` for an example.
|
|
719
735
|
|
|
720
736
|
:param fileName: name of file in bucket to be read
|
|
721
737
|
:type fileName: str
|
|
@@ -730,7 +746,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
730
746
|
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
731
747
|
|
|
732
748
|
:return: an instance of ReadablePipe.
|
|
733
|
-
:rtype: :class:`~toil.
|
|
749
|
+
:rtype: :class:`~toil.lib.pipes.ReadablePipe`
|
|
734
750
|
"""
|
|
735
751
|
|
|
736
752
|
blob = self.bucket.get_blob(
|