toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/slurm.py +191 -16
- toil/common.py +20 -1
- toil/cwl/cwltoil.py +97 -119
- toil/cwl/utils.py +103 -3
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +35 -255
- toil/jobStores/aws/jobStore.py +864 -1964
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/fileJobStore.py +2 -1
- toil/jobStores/googleJobStore.py +32 -13
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/accelerators.py +1 -1
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +138 -19
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/misc.py +1 -1
- toil/lib/pipes.py +385 -0
- toil/lib/plugins.py +106 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/url.py +320 -0
- toil/lib/web.py +4 -5
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +12 -1
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +47 -15
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +2 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +16 -18
- toil/test/batchSystems/batchSystemTest.py +2 -9
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/cwlTest.py +181 -8
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
- toil/test/provisioners/clusterTest.py +15 -2
- toil/test/provisioners/gceProvisionerTest.py +1 -1
- toil/test/server/serverTest.py +78 -36
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum-gs.json +1 -1
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
- toil/test/wdl/wdltoil_test.py +171 -162
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilStats.py +17 -2
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +1179 -825
- toil/worker.py +16 -8
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/utils.py
CHANGED
|
@@ -17,7 +17,7 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import types
|
|
19
19
|
from ssl import SSLError
|
|
20
|
-
from typing import TYPE_CHECKING, Optional, cast
|
|
20
|
+
from typing import TYPE_CHECKING, IO, Optional, cast, Any
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
23
|
from botocore.client import Config
|
|
@@ -37,8 +37,8 @@ from toil.lib.retry import (
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
if TYPE_CHECKING:
|
|
40
|
-
from mypy_boto3_s3 import S3ServiceResource
|
|
41
|
-
from
|
|
40
|
+
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
41
|
+
from mypy_boto3_s3.type_defs import CopySourceTypeDef
|
|
42
42
|
|
|
43
43
|
logger = logging.getLogger(__name__)
|
|
44
44
|
|
|
@@ -54,210 +54,21 @@ DIAL_SPECIFIC_REGION_CONFIG = Config(
|
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
"""
|
|
59
|
-
A mixin with methods for storing limited amounts of binary data in an SDB item
|
|
60
|
-
|
|
61
|
-
>>> import os
|
|
62
|
-
>>> H=SDBHelper
|
|
63
|
-
>>> H.presenceIndicator() # doctest: +ALLOW_UNICODE
|
|
64
|
-
u'numChunks'
|
|
65
|
-
>>> H.binaryToAttributes(None)['numChunks']
|
|
66
|
-
0
|
|
67
|
-
>>> H.attributesToBinary({u'numChunks': 0})
|
|
68
|
-
(None, 0)
|
|
69
|
-
>>> H.binaryToAttributes(b'') # doctest: +ALLOW_UNICODE +ALLOW_BYTES
|
|
70
|
-
{u'000': b'VQ==', u'numChunks': 1}
|
|
71
|
-
>>> H.attributesToBinary({u'numChunks': 1, u'000': b'VQ=='}) # doctest: +ALLOW_BYTES
|
|
72
|
-
(b'', 1)
|
|
73
|
-
|
|
74
|
-
Good pseudo-random data is very likely smaller than its bzip2ed form. Subtract 1 for the type
|
|
75
|
-
character, i.e 'C' or 'U', with which the string is prefixed. We should get one full chunk:
|
|
76
|
-
|
|
77
|
-
>>> s = os.urandom(H.maxRawValueSize-1)
|
|
78
|
-
>>> d = H.binaryToAttributes(s)
|
|
79
|
-
>>> len(d), len(d['000'])
|
|
80
|
-
(2, 1024)
|
|
81
|
-
>>> H.attributesToBinary(d) == (s, 1)
|
|
82
|
-
True
|
|
83
|
-
|
|
84
|
-
One byte more and we should overflow four bytes into the second chunk, two bytes for
|
|
85
|
-
base64-encoding the additional character and two bytes for base64-padding to the next quartet.
|
|
86
|
-
|
|
87
|
-
>>> s += s[0:1]
|
|
88
|
-
>>> d = H.binaryToAttributes(s)
|
|
89
|
-
>>> len(d), len(d['000']), len(d['001'])
|
|
90
|
-
(3, 1024, 4)
|
|
91
|
-
>>> H.attributesToBinary(d) == (s, 2)
|
|
92
|
-
True
|
|
93
|
-
|
|
94
|
-
"""
|
|
95
|
-
|
|
96
|
-
# The SDB documentation is not clear as to whether the attribute value size limit of 1024
|
|
97
|
-
# applies to the base64-encoded value or the raw value. It suggests that responses are
|
|
98
|
-
# automatically encoded from which I conclude that the limit should apply to the raw,
|
|
99
|
-
# unencoded value. However, there seems to be a discrepancy between how Boto computes the
|
|
100
|
-
# request signature if a value contains a binary data, and how SDB does it. This causes
|
|
101
|
-
# requests to fail signature verification, resulting in a 403. We therefore have to
|
|
102
|
-
# base64-encode values ourselves even if that means we loose a quarter of capacity.
|
|
103
|
-
|
|
104
|
-
maxAttributesPerItem = 256
|
|
105
|
-
maxValueSize = 1024
|
|
106
|
-
maxRawValueSize = maxValueSize * 3 // 4
|
|
107
|
-
# Just make sure we don't have a problem with padding or integer truncation:
|
|
108
|
-
assert len(base64.b64encode(b" " * maxRawValueSize)) == 1024
|
|
109
|
-
assert len(base64.b64encode(b" " * (1 + maxRawValueSize))) > 1024
|
|
110
|
-
|
|
111
|
-
@classmethod
|
|
112
|
-
def _reservedAttributes(cls):
|
|
113
|
-
"""
|
|
114
|
-
Override in subclass to reserve a certain number of attributes that can't be used for
|
|
115
|
-
chunks.
|
|
116
|
-
"""
|
|
117
|
-
return 1
|
|
118
|
-
|
|
119
|
-
@classmethod
|
|
120
|
-
def _maxChunks(cls):
|
|
121
|
-
return cls.maxAttributesPerItem - cls._reservedAttributes()
|
|
122
|
-
|
|
123
|
-
@classmethod
|
|
124
|
-
def maxBinarySize(cls, extraReservedChunks=0):
|
|
125
|
-
return (
|
|
126
|
-
cls._maxChunks() - extraReservedChunks
|
|
127
|
-
) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
|
|
128
|
-
|
|
129
|
-
@classmethod
|
|
130
|
-
def _maxEncodedSize(cls):
|
|
131
|
-
return cls._maxChunks() * cls.maxValueSize
|
|
132
|
-
|
|
133
|
-
@classmethod
|
|
134
|
-
def binaryToAttributes(cls, binary) -> dict[str, str]:
|
|
135
|
-
"""
|
|
136
|
-
Turn a bytestring, or None, into SimpleDB attributes.
|
|
137
|
-
"""
|
|
138
|
-
if binary is None:
|
|
139
|
-
return {"numChunks": "0"}
|
|
140
|
-
assert isinstance(binary, bytes)
|
|
141
|
-
assert len(binary) <= cls.maxBinarySize()
|
|
142
|
-
# The use of compression is just an optimization. We can't include it in the maxValueSize
|
|
143
|
-
# computation because the compression ratio depends on the input.
|
|
144
|
-
compressed = bz2.compress(binary)
|
|
145
|
-
if len(compressed) > len(binary):
|
|
146
|
-
compressed = b"U" + binary
|
|
147
|
-
else:
|
|
148
|
-
compressed = b"C" + compressed
|
|
149
|
-
encoded = base64.b64encode(compressed)
|
|
150
|
-
assert len(encoded) <= cls._maxEncodedSize()
|
|
151
|
-
n = cls.maxValueSize
|
|
152
|
-
chunks = (encoded[i : i + n] for i in range(0, len(encoded), n))
|
|
153
|
-
attributes = {
|
|
154
|
-
cls._chunkName(i): chunk.decode("utf-8") for i, chunk in enumerate(chunks)
|
|
155
|
-
}
|
|
156
|
-
attributes.update({"numChunks": str(len(attributes))})
|
|
157
|
-
return attributes
|
|
158
|
-
|
|
159
|
-
@classmethod
|
|
160
|
-
def attributeDictToList(
|
|
161
|
-
cls, attributes: dict[str, str]
|
|
162
|
-
) -> list["AttributeTypeDef"]:
|
|
163
|
-
"""
|
|
164
|
-
Convert the attribute dict (ex: from binaryToAttributes) into a list of attribute typed dicts
|
|
165
|
-
to be compatible with boto3 argument syntax
|
|
166
|
-
:param attributes: Dict[str, str], attribute in object form
|
|
167
|
-
:return: list of attributes in typed dict form
|
|
168
|
-
"""
|
|
169
|
-
return [{"Name": name, "Value": value} for name, value in attributes.items()]
|
|
170
|
-
|
|
171
|
-
@classmethod
|
|
172
|
-
def attributeListToDict(
|
|
173
|
-
cls, attributes: list["AttributeTypeDef"]
|
|
174
|
-
) -> dict[str, str]:
|
|
175
|
-
"""
|
|
176
|
-
Convert the attribute boto3 representation of list of attribute typed dicts
|
|
177
|
-
back to a dictionary with name, value pairs
|
|
178
|
-
:param attribute: attribute in typed dict form
|
|
179
|
-
:return: Dict[str, str], attribute in dict form
|
|
180
|
-
"""
|
|
181
|
-
return {attribute["Name"]: attribute["Value"] for attribute in attributes}
|
|
182
|
-
|
|
183
|
-
@classmethod
|
|
184
|
-
def get_attributes_from_item(
|
|
185
|
-
cls, item: "ItemTypeDef", keys: list[str]
|
|
186
|
-
) -> list[Optional[str]]:
|
|
187
|
-
return_values: list[Optional[str]] = [None for _ in keys]
|
|
188
|
-
mapped_indices: dict[str, int] = {
|
|
189
|
-
name: index for index, name in enumerate(keys)
|
|
190
|
-
}
|
|
191
|
-
for attribute in item["Attributes"]:
|
|
192
|
-
name = attribute["Name"]
|
|
193
|
-
value = attribute["Value"]
|
|
194
|
-
if name in mapped_indices:
|
|
195
|
-
return_values[mapped_indices[name]] = value
|
|
196
|
-
return return_values
|
|
197
|
-
|
|
198
|
-
@classmethod
|
|
199
|
-
def _chunkName(cls, i):
|
|
200
|
-
return str(i).zfill(3)
|
|
201
|
-
|
|
202
|
-
@classmethod
|
|
203
|
-
def _isValidChunkName(cls, s):
|
|
204
|
-
return len(s) == 3 and s.isdigit()
|
|
205
|
-
|
|
206
|
-
@classmethod
|
|
207
|
-
def presenceIndicator(cls):
|
|
208
|
-
"""
|
|
209
|
-
The key that is guaranteed to be present in the return value of binaryToAttributes().
|
|
210
|
-
Assuming that binaryToAttributes() is used with SDB's PutAttributes, the return value of
|
|
211
|
-
this method could be used to detect the presence/absence of an item in SDB.
|
|
212
|
-
"""
|
|
213
|
-
return "numChunks"
|
|
214
|
-
|
|
215
|
-
@classmethod
|
|
216
|
-
def attributesToBinary(
|
|
217
|
-
cls, attributes: list["AttributeTypeDef"]
|
|
218
|
-
) -> tuple[bytes, int]:
|
|
219
|
-
"""
|
|
220
|
-
:rtype: (str|None,int)
|
|
221
|
-
:return: the binary data and the number of chunks it was composed from
|
|
222
|
-
"""
|
|
223
|
-
chunks = []
|
|
224
|
-
numChunks: int = 0
|
|
225
|
-
for attribute in attributes:
|
|
226
|
-
name = attribute["Name"]
|
|
227
|
-
value = attribute["Value"]
|
|
228
|
-
if cls._isValidChunkName(name):
|
|
229
|
-
chunks.append((int(name), value))
|
|
230
|
-
if name == "numChunks":
|
|
231
|
-
numChunks = int(value)
|
|
232
|
-
chunks.sort()
|
|
233
|
-
if numChunks:
|
|
234
|
-
serializedJob = b"".join(v.encode() for k, v in chunks)
|
|
235
|
-
compressed = base64.b64decode(serializedJob)
|
|
236
|
-
if compressed[0] == b"C"[0]:
|
|
237
|
-
binary = bz2.decompress(compressed[1:])
|
|
238
|
-
elif compressed[0] == b"U"[0]:
|
|
239
|
-
binary = compressed[1:]
|
|
240
|
-
else:
|
|
241
|
-
raise RuntimeError(f"Unexpected prefix {compressed[0]}")
|
|
242
|
-
else:
|
|
243
|
-
binary = None
|
|
244
|
-
return binary, numChunks
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def fileSizeAndTime(localFilePath):
|
|
57
|
+
def fileSizeAndTime(localFilePath: str) -> tuple[int, float]:
|
|
248
58
|
file_stat = os.stat(localFilePath)
|
|
249
59
|
return file_stat.st_size, file_stat.st_mtime
|
|
250
60
|
|
|
251
61
|
|
|
62
|
+
# TODO: This function is unused.
|
|
252
63
|
@retry(errors=[AWSServerErrors])
|
|
253
64
|
def uploadFromPath(
|
|
254
65
|
localFilePath: str,
|
|
255
|
-
resource,
|
|
66
|
+
resource: "S3ServiceResource",
|
|
256
67
|
bucketName: str,
|
|
257
68
|
fileID: str,
|
|
258
|
-
headerArgs: Optional[dict] = None,
|
|
69
|
+
headerArgs: Optional[dict[str, Any]] = None,
|
|
259
70
|
partSize: int = 50 << 20,
|
|
260
|
-
):
|
|
71
|
+
) -> Optional[str]:
|
|
261
72
|
"""
|
|
262
73
|
Uploads a file to s3, using multipart uploading if applicable
|
|
263
74
|
|
|
@@ -279,8 +90,12 @@ def uploadFromPath(
|
|
|
279
90
|
version = uploadFile(
|
|
280
91
|
localFilePath, resource, bucketName, fileID, headerArgs, partSize
|
|
281
92
|
)
|
|
93
|
+
|
|
94
|
+
# Only pass along version if we got one.
|
|
95
|
+
version_args: dict[str, Any] = {"VersionId": version} if version is not None else {}
|
|
96
|
+
|
|
282
97
|
info = client.head_object(
|
|
283
|
-
Bucket=bucketName, Key=compat_bytes(fileID),
|
|
98
|
+
Bucket=bucketName, Key=compat_bytes(fileID), **version_args, **headerArgs
|
|
284
99
|
)
|
|
285
100
|
size = info.get("ContentLength")
|
|
286
101
|
|
|
@@ -293,13 +108,13 @@ def uploadFromPath(
|
|
|
293
108
|
|
|
294
109
|
@retry(errors=[AWSServerErrors])
|
|
295
110
|
def uploadFile(
|
|
296
|
-
readable,
|
|
297
|
-
resource,
|
|
111
|
+
readable: IO[bytes],
|
|
112
|
+
resource: "S3ServiceResource",
|
|
298
113
|
bucketName: str,
|
|
299
114
|
fileID: str,
|
|
300
|
-
headerArgs: Optional[dict] = None,
|
|
115
|
+
headerArgs: Optional[dict[str, Any]] = None,
|
|
301
116
|
partSize: int = 50 << 20,
|
|
302
|
-
):
|
|
117
|
+
) -> Optional[str]:
|
|
303
118
|
"""
|
|
304
119
|
Upload a readable object to s3, using multipart uploading if applicable.
|
|
305
120
|
:param readable: a readable stream or a file path to upload to s3
|
|
@@ -361,7 +176,7 @@ def copyKeyMultipart(
|
|
|
361
176
|
sseKey: Optional[str] = None,
|
|
362
177
|
copySourceSseAlgorithm: Optional[str] = None,
|
|
363
178
|
copySourceSseKey: Optional[str] = None,
|
|
364
|
-
):
|
|
179
|
+
) -> Optional[str]:
|
|
365
180
|
"""
|
|
366
181
|
Copies a key from a source key to a destination key in multiple parts. Note that if the
|
|
367
182
|
destination key exists it will be overwritten implicitly, and if it does not exist a new
|
|
@@ -393,12 +208,11 @@ def copyKeyMultipart(
|
|
|
393
208
|
:param str copySourceSseAlgorithm: Server-side encryption algorithm for the source.
|
|
394
209
|
:param str copySourceSseKey: Server-side encryption key for the source.
|
|
395
210
|
|
|
396
|
-
:rtype: str
|
|
397
211
|
:return: The version of the copied file (or None if versioning is not enabled for dstBucket).
|
|
398
212
|
"""
|
|
399
213
|
dstBucket = resource.Bucket(compat_bytes(dstBucketName))
|
|
400
214
|
dstObject = dstBucket.Object(compat_bytes(dstKeyName))
|
|
401
|
-
copySource = {
|
|
215
|
+
copySource: "CopySourceTypeDef" = {
|
|
402
216
|
"Bucket": compat_bytes(srcBucketName),
|
|
403
217
|
"Key": compat_bytes(srcKeyName),
|
|
404
218
|
}
|
|
@@ -410,23 +224,20 @@ def copyKeyMultipart(
|
|
|
410
224
|
# object metadata. And we really want it to talk to the source region and
|
|
411
225
|
# not wherever the bucket virtual hostnames go.
|
|
412
226
|
source_region = get_bucket_region(srcBucketName)
|
|
413
|
-
source_client =
|
|
414
|
-
"
|
|
415
|
-
session.client(
|
|
416
|
-
"s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
|
|
417
|
-
),
|
|
227
|
+
source_client = session.client(
|
|
228
|
+
"s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
|
|
418
229
|
)
|
|
419
230
|
|
|
420
231
|
# The boto3 functions don't allow passing parameters as None to
|
|
421
232
|
# indicate they weren't provided. So we have to do a bit of work
|
|
422
233
|
# to ensure we only provide the parameters when they are actually
|
|
423
234
|
# required.
|
|
424
|
-
destEncryptionArgs = {}
|
|
235
|
+
destEncryptionArgs: dict[str, Any] = {}
|
|
425
236
|
if sseKey is not None:
|
|
426
237
|
destEncryptionArgs.update(
|
|
427
238
|
{"SSECustomerAlgorithm": sseAlgorithm, "SSECustomerKey": sseKey}
|
|
428
239
|
)
|
|
429
|
-
copyEncryptionArgs = {}
|
|
240
|
+
copyEncryptionArgs: dict[str, Any] = {}
|
|
430
241
|
if copySourceSseKey is not None:
|
|
431
242
|
copyEncryptionArgs.update(
|
|
432
243
|
{
|
|
@@ -479,63 +290,6 @@ def copyKeyMultipart(
|
|
|
479
290
|
return info.get("VersionId", None)
|
|
480
291
|
|
|
481
292
|
|
|
482
|
-
def
|
|
483
|
-
self, domain_or_name, item_name, attributes, replace=True, expected_value=None
|
|
484
|
-
):
|
|
485
|
-
"""
|
|
486
|
-
Monkey-patched version of SDBConnection.put_attributes that uses POST instead of GET
|
|
487
|
-
|
|
488
|
-
The GET version is subject to the URL length limit which kicks in before the 256 x 1024 limit
|
|
489
|
-
for attribute values. Using POST prevents that.
|
|
490
|
-
|
|
491
|
-
https://github.com/BD2KGenomics/toil/issues/502
|
|
492
|
-
"""
|
|
493
|
-
domain, domain_name = self.get_domain_and_name(domain_or_name)
|
|
494
|
-
params = {"DomainName": domain_name, "ItemName": item_name}
|
|
495
|
-
self._build_name_value_list(params, attributes, replace)
|
|
496
|
-
if expected_value:
|
|
497
|
-
self._build_expected_value(params, expected_value)
|
|
498
|
-
# The addition of the verb keyword argument is the only difference to put_attributes (Hannes)
|
|
499
|
-
return self.get_status("PutAttributes", params, verb="POST")
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def monkeyPatchSdbConnection(sdb):
|
|
503
|
-
"""
|
|
504
|
-
:type sdb: SDBConnection
|
|
505
|
-
"""
|
|
506
|
-
sdb.put_attributes = types.MethodType(_put_attributes_using_post, sdb)
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def sdb_unavailable(e):
|
|
510
|
-
# Since we're checking against a collection here we absolutely need an
|
|
511
|
-
# integer status code. This is probably a BotoServerError, but other 500s
|
|
512
|
-
# and 503s probably ought to be retried too.
|
|
513
|
-
return get_error_status(e) in (500, 503)
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
def no_such_sdb_domain(e):
|
|
517
|
-
return (
|
|
518
|
-
isinstance(e, ClientError)
|
|
519
|
-
and get_error_code(e)
|
|
520
|
-
and get_error_code(e).endswith("NoSuchDomain")
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
def retryable_ssl_error(e):
|
|
293
|
+
def retryable_ssl_error(e: BaseException) -> bool:
|
|
525
294
|
# https://github.com/BD2KGenomics/toil/issues/978
|
|
526
295
|
return isinstance(e, SSLError) and e.reason == "DECRYPTION_FAILED_OR_BAD_RECORD_MAC"
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
def retryable_sdb_errors(e):
|
|
530
|
-
return (
|
|
531
|
-
sdb_unavailable(e)
|
|
532
|
-
or no_such_sdb_domain(e)
|
|
533
|
-
or connection_error(e)
|
|
534
|
-
or retryable_ssl_error(e)
|
|
535
|
-
)
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
def retry_sdb(
|
|
539
|
-
delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors
|
|
540
|
-
):
|
|
541
|
-
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
toil/jobStores/fileJobStore.py
CHANGED
|
@@ -42,11 +42,12 @@ from toil.lib.io import (
|
|
|
42
42
|
mkdtemp,
|
|
43
43
|
robust_rmtree,
|
|
44
44
|
)
|
|
45
|
+
from toil.lib.url import URLAccess
|
|
45
46
|
|
|
46
47
|
logger = logging.getLogger(__name__)
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
class FileJobStore(AbstractJobStore):
|
|
50
|
+
class FileJobStore(AbstractJobStore, URLAccess):
|
|
50
51
|
"""
|
|
51
52
|
A job store that uses a directory on a locally attached file system. To be compatible with
|
|
52
53
|
distributed batch systems, that file system must be shared by all worker nodes.
|
toil/jobStores/googleJobStore.py
CHANGED
|
@@ -28,9 +28,10 @@ from google.api_core.exceptions import (
|
|
|
28
28
|
InternalServerError,
|
|
29
29
|
ServiceUnavailable,
|
|
30
30
|
)
|
|
31
|
-
from google.auth.exceptions import DefaultCredentialsError
|
|
31
|
+
from google.auth.exceptions import DefaultCredentialsError, InvalidOperation
|
|
32
32
|
from google.cloud import exceptions, storage
|
|
33
33
|
|
|
34
|
+
from toil import memoize
|
|
34
35
|
from toil.jobStores.abstractJobStore import (
|
|
35
36
|
AbstractJobStore,
|
|
36
37
|
JobStoreExistsException,
|
|
@@ -38,11 +39,12 @@ from toil.jobStores.abstractJobStore import (
|
|
|
38
39
|
NoSuchJobException,
|
|
39
40
|
NoSuchJobStoreException,
|
|
40
41
|
)
|
|
41
|
-
from toil.
|
|
42
|
+
from toil.lib.pipes import ReadablePipe, WritablePipe
|
|
42
43
|
from toil.lib.compatibility import compat_bytes
|
|
43
44
|
from toil.lib.io import AtomicFileCreate
|
|
44
45
|
from toil.lib.misc import truncExpBackoff
|
|
45
46
|
from toil.lib.retry import old_retry
|
|
47
|
+
from toil.lib.url import URLAccess
|
|
46
48
|
|
|
47
49
|
log = logging.getLogger(__name__)
|
|
48
50
|
|
|
@@ -113,17 +115,23 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
|
113
115
|
So we take the URL and any notes from client setup here, and if something
|
|
114
116
|
goes wrong that looks like a permission problem we complain with the notes
|
|
115
117
|
attached.
|
|
118
|
+
|
|
119
|
+
Also, if you don't have a project available, you can't use the Python API
|
|
120
|
+
for storage with authentication at all. `gsutil` can do it but you can't.
|
|
121
|
+
TODO: Fall back on command-line gsutil for authenticated reads???
|
|
116
122
|
"""
|
|
117
123
|
try:
|
|
118
124
|
yield
|
|
119
|
-
except
|
|
125
|
+
except InvalidOperation as e:
|
|
120
126
|
if "Anonymous credentials cannot be refreshed" in str(e):
|
|
121
127
|
raise RuntimeError(
|
|
122
128
|
"Google Storage tried to refresh anonymous credentials. "
|
|
123
129
|
"Are you sure you have set up your Google Account login "
|
|
124
130
|
"for applications with permission to access "
|
|
125
131
|
f"{urlunparse(url)}? "
|
|
126
|
-
"Maybe try `gcloud auth application-default login
|
|
132
|
+
"Maybe try `gcloud auth application-default login` and "
|
|
133
|
+
"providing the (mandatory for the Python API) Google Cloud "
|
|
134
|
+
"project? "
|
|
127
135
|
f"Client setup said: {notes}"
|
|
128
136
|
) from e
|
|
129
137
|
else:
|
|
@@ -131,7 +139,7 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
|
131
139
|
|
|
132
140
|
|
|
133
141
|
|
|
134
|
-
class GoogleJobStore(AbstractJobStore):
|
|
142
|
+
class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
135
143
|
|
|
136
144
|
nodeServiceAccountJson = "/root/service_account.json"
|
|
137
145
|
|
|
@@ -160,9 +168,10 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
160
168
|
self.storageClient, self.auth_notes = self.create_client()
|
|
161
169
|
|
|
162
170
|
@classmethod
|
|
171
|
+
@memoize
|
|
163
172
|
def create_client(cls) -> tuple[storage.Client, str]:
|
|
164
173
|
"""
|
|
165
|
-
Produce a client for Google
|
|
174
|
+
Produce a client for Google Storage with the highest level of access we can get.
|
|
166
175
|
|
|
167
176
|
Fall back to anonymous access if no project is available, unlike the
|
|
168
177
|
Google Storage module's behavior.
|
|
@@ -258,6 +267,8 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
258
267
|
raise NoSuchJobStoreException(self.locator, "google")
|
|
259
268
|
super().resume()
|
|
260
269
|
|
|
270
|
+
# TODO: Don't we need to set up encryption here???
|
|
271
|
+
|
|
261
272
|
@google_retry
|
|
262
273
|
def destroy(self):
|
|
263
274
|
try:
|
|
@@ -391,8 +402,13 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
391
402
|
) as writable:
|
|
392
403
|
yield writable, fileID
|
|
393
404
|
|
|
394
|
-
def get_empty_file_store_id(
|
|
395
|
-
|
|
405
|
+
def get_empty_file_store_id(
|
|
406
|
+
self,
|
|
407
|
+
job_id=None,
|
|
408
|
+
cleanup=False,
|
|
409
|
+
basename=None,
|
|
410
|
+
):
|
|
411
|
+
fileID = self._new_id(isFile=True, jobStoreID=job_id if cleanup else None)
|
|
396
412
|
self._write_file(fileID, BytesIO(b""))
|
|
397
413
|
return fileID
|
|
398
414
|
|
|
@@ -614,7 +630,10 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
614
630
|
return filesRead
|
|
615
631
|
|
|
616
632
|
@staticmethod
|
|
617
|
-
def _new_id(
|
|
633
|
+
def _new_id(
|
|
634
|
+
isFile=False,
|
|
635
|
+
jobStoreID=None,
|
|
636
|
+
) -> str:
|
|
618
637
|
if isFile and jobStoreID: # file associated with job
|
|
619
638
|
return jobStoreID + str(uuid.uuid4())
|
|
620
639
|
elif isFile: # nonassociated file
|
|
@@ -668,7 +687,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
668
687
|
):
|
|
669
688
|
"""
|
|
670
689
|
Yields a context manager that can be used to write to the bucket
|
|
671
|
-
with a stream. See :class:`~toil.
|
|
690
|
+
with a stream. See :class:`~toil.lib.pipes.WritablePipe` for an example.
|
|
672
691
|
|
|
673
692
|
Will throw assertion error if the file shouldn't be updated
|
|
674
693
|
and yet exists.
|
|
@@ -689,7 +708,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
689
708
|
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
690
709
|
|
|
691
710
|
:return: an instance of WritablePipe.
|
|
692
|
-
:rtype: :class:`~toil.
|
|
711
|
+
:rtype: :class:`~toil.lib.pipes.WritablePipe`
|
|
693
712
|
"""
|
|
694
713
|
blob = self.bucket.blob(
|
|
695
714
|
compat_bytes(fileName), encryption_key=self.sseKey if encrypt else None
|
|
@@ -712,7 +731,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
712
731
|
def _download_stream(self, fileName, encrypt=True, encoding=None, errors=None):
|
|
713
732
|
"""
|
|
714
733
|
Yields a context manager that can be used to read from the bucket
|
|
715
|
-
with a stream. See :class:`~toil.
|
|
734
|
+
with a stream. See :class:`~toil.lib.pipes.WritablePipe` for an example.
|
|
716
735
|
|
|
717
736
|
:param fileName: name of file in bucket to be read
|
|
718
737
|
:type fileName: str
|
|
@@ -727,7 +746,7 @@ class GoogleJobStore(AbstractJobStore):
|
|
|
727
746
|
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
728
747
|
|
|
729
748
|
:return: an instance of ReadablePipe.
|
|
730
|
-
:rtype: :class:`~toil.
|
|
749
|
+
:rtype: :class:`~toil.lib.pipes.ReadablePipe`
|
|
731
750
|
"""
|
|
732
751
|
|
|
733
752
|
blob = self.bucket.get_blob(
|