toil 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +22 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +59 -45
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +2 -2
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +64 -22
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +7 -3
- toil/common.py +36 -13
- toil/cwl/cwltoil.py +365 -312
- toil/deferred.py +1 -1
- toil/fileStores/abstractFileStore.py +17 -17
- toil/fileStores/cachingFileStore.py +2 -2
- toil/fileStores/nonCachingFileStore.py +1 -1
- toil/job.py +228 -60
- toil/jobStores/abstractJobStore.py +18 -10
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +57 -29
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +2 -2
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +72 -24
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +5 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/io.py +14 -2
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +55 -21
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +2 -2
- toil/lib/throttle.py +1 -1
- toil/options/common.py +27 -24
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +9 -7
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +58 -16
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +1 -1
- toil/test/cwl/cwlTest.py +8 -91
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +10 -13
- toil/test/jobStores/jobStoreTest.py +33 -49
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +90 -8
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +61 -3
- toil/test/utils/utilsTest.py +20 -18
- toil/test/wdl/wdltoil_test.py +24 -71
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +2 -1
- toil/utils/toilStatus.py +97 -51
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +318 -51
- toil/worker.py +96 -69
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/METADATA +55 -21
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/RECORD +93 -90
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -23,27 +23,26 @@ import time
|
|
|
23
23
|
import uuid
|
|
24
24
|
from contextlib import contextmanager
|
|
25
25
|
from io import BytesIO
|
|
26
|
-
from typing import List, Optional, IO
|
|
26
|
+
from typing import List, Optional, IO, Dict, Union, Generator, Tuple, cast, TYPE_CHECKING
|
|
27
27
|
from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
|
|
28
28
|
|
|
29
|
-
import boto.s3.connection
|
|
30
|
-
import boto.sdb
|
|
31
|
-
from boto.exception import SDBResponseError
|
|
32
29
|
from botocore.exceptions import ClientError
|
|
30
|
+
from mypy_boto3_sdb import SimpleDBClient
|
|
31
|
+
from mypy_boto3_sdb.type_defs import ReplaceableItemTypeDef, ReplaceableAttributeTypeDef, SelectResultTypeDef, ItemTypeDef, AttributeTypeDef, DeletableItemTypeDef, UpdateConditionTypeDef
|
|
33
32
|
|
|
34
33
|
import toil.lib.encryption as encryption
|
|
35
34
|
from toil.fileStores import FileID
|
|
35
|
+
from toil.job import Job, JobDescription
|
|
36
36
|
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
37
37
|
ConcurrentFileModificationException,
|
|
38
38
|
JobStoreExistsException,
|
|
39
39
|
NoSuchFileException,
|
|
40
40
|
NoSuchJobException,
|
|
41
|
-
NoSuchJobStoreException)
|
|
41
|
+
NoSuchJobStoreException, LocatorException)
|
|
42
42
|
from toil.jobStores.aws.utils import (SDBHelper,
|
|
43
43
|
ServerSideCopyProhibitedError,
|
|
44
44
|
copyKeyMultipart,
|
|
45
45
|
fileSizeAndTime,
|
|
46
|
-
monkeyPatchSdbConnection,
|
|
47
46
|
no_such_sdb_domain,
|
|
48
47
|
retry_sdb,
|
|
49
48
|
sdb_unavailable,
|
|
@@ -61,7 +60,7 @@ from toil.lib.aws.utils import (create_s3_bucket,
|
|
|
61
60
|
get_object_for_url,
|
|
62
61
|
list_objects_for_url,
|
|
63
62
|
retry_s3,
|
|
64
|
-
retryable_s3_errors)
|
|
63
|
+
retryable_s3_errors, boto3_pager, get_item_from_attributes)
|
|
65
64
|
from toil.lib.compatibility import compat_bytes
|
|
66
65
|
from toil.lib.ec2nodes import EC2Regions
|
|
67
66
|
from toil.lib.exceptions import panic
|
|
@@ -70,6 +69,9 @@ from toil.lib.memoize import strict_bool
|
|
|
70
69
|
from toil.lib.objects import InnerClass
|
|
71
70
|
from toil.lib.retry import get_error_code, get_error_status, retry
|
|
72
71
|
|
|
72
|
+
if TYPE_CHECKING:
|
|
73
|
+
from toil import Config
|
|
74
|
+
|
|
73
75
|
boto3_session = establish_boto3_session()
|
|
74
76
|
s3_boto3_resource = boto3_session.resource('s3')
|
|
75
77
|
s3_boto3_client = boto3_session.client('s3')
|
|
@@ -85,6 +87,12 @@ class ChecksumError(Exception):
|
|
|
85
87
|
"""Raised when a download from AWS does not contain the correct data."""
|
|
86
88
|
|
|
87
89
|
|
|
90
|
+
class DomainDoesNotExist(Exception):
|
|
91
|
+
"""Raised when a domain that is expected to exist does not exist."""
|
|
92
|
+
def __init__(self, domain_name):
|
|
93
|
+
super().__init__(f"Expected domain {domain_name} to exist!")
|
|
94
|
+
|
|
95
|
+
|
|
88
96
|
class AWSJobStore(AbstractJobStore):
|
|
89
97
|
"""
|
|
90
98
|
A job store that uses Amazon's S3 for file storage and SimpleDB for storing job info and
|
|
@@ -134,19 +142,19 @@ class AWSJobStore(AbstractJobStore):
|
|
|
134
142
|
logger.debug("Instantiating %s for region %s and name prefix '%s'",
|
|
135
143
|
self.__class__, region, namePrefix)
|
|
136
144
|
self.region = region
|
|
137
|
-
self.
|
|
138
|
-
self.
|
|
139
|
-
self.
|
|
140
|
-
self.
|
|
141
|
-
self.
|
|
142
|
-
self.db =
|
|
145
|
+
self.name_prefix = namePrefix
|
|
146
|
+
self.part_size = partSize
|
|
147
|
+
self.jobs_domain_name: Optional[str] = None
|
|
148
|
+
self.files_domain_name: Optional[str] = None
|
|
149
|
+
self.files_bucket = None
|
|
150
|
+
self.db = boto3_session.client(service_name="sdb", region_name=region)
|
|
143
151
|
|
|
144
152
|
self.s3_resource = boto3_session.resource('s3', region_name=self.region)
|
|
145
153
|
self.s3_client = self.s3_resource.meta.client
|
|
146
154
|
|
|
147
|
-
def initialize(self, config):
|
|
155
|
+
def initialize(self, config: "Config") -> None:
|
|
148
156
|
if self._registered:
|
|
149
|
-
raise JobStoreExistsException(self.locator)
|
|
157
|
+
raise JobStoreExistsException(self.locator, "aws")
|
|
150
158
|
self._registered = None
|
|
151
159
|
try:
|
|
152
160
|
self._bind(create=True)
|
|
@@ -159,36 +167,38 @@ class AWSJobStore(AbstractJobStore):
|
|
|
159
167
|
self._registered = True
|
|
160
168
|
|
|
161
169
|
@property
|
|
162
|
-
def sseKeyPath(self):
|
|
170
|
+
def sseKeyPath(self) -> Optional[str]:
|
|
163
171
|
return self.config.sseKey
|
|
164
172
|
|
|
165
|
-
def resume(self):
|
|
173
|
+
def resume(self) -> None:
|
|
166
174
|
if not self._registered:
|
|
167
|
-
raise NoSuchJobStoreException(self.locator)
|
|
175
|
+
raise NoSuchJobStoreException(self.locator, "aws")
|
|
168
176
|
self._bind(create=False)
|
|
169
177
|
super().resume()
|
|
170
178
|
|
|
171
|
-
def _bind(self, create=False, block=True, check_versioning_consistency=True):
|
|
179
|
+
def _bind(self, create: bool = False, block: bool = True, check_versioning_consistency: bool = True) -> None:
|
|
172
180
|
def qualify(name):
|
|
173
181
|
assert len(name) <= self.maxNameLen
|
|
174
|
-
return self.
|
|
182
|
+
return self.name_prefix + self.nameSeparator + name
|
|
175
183
|
|
|
176
184
|
# The order in which this sequence of events happens is important. We can easily handle the
|
|
177
185
|
# inability to bind a domain, but it is a little harder to handle some cases of binding the
|
|
178
186
|
# jobstore bucket. Maintaining this order allows for an easier `destroy` method.
|
|
179
|
-
if self.
|
|
180
|
-
self.
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
self.
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
if self.jobs_domain_name is None:
|
|
188
|
+
self.jobs_domain_name = qualify("jobs")
|
|
189
|
+
self._bindDomain(self.jobs_domain_name, create=create, block=block)
|
|
190
|
+
if self.files_domain_name is None:
|
|
191
|
+
self.files_domain_name = qualify("files")
|
|
192
|
+
self._bindDomain(self.files_domain_name, create=create, block=block)
|
|
193
|
+
if self.files_bucket is None:
|
|
194
|
+
self.files_bucket = self._bindBucket(qualify('files'),
|
|
195
|
+
create=create,
|
|
196
|
+
block=block,
|
|
197
|
+
versioning=True,
|
|
198
|
+
check_versioning_consistency=check_versioning_consistency)
|
|
189
199
|
|
|
190
200
|
@property
|
|
191
|
-
def _registered(self):
|
|
201
|
+
def _registered(self) -> Optional[bool]:
|
|
192
202
|
"""
|
|
193
203
|
A optional boolean property indicating whether this job store is registered. The
|
|
194
204
|
registry is the authority on deciding if a job store exists or not. If True, this job
|
|
@@ -205,55 +215,60 @@ class AWSJobStore(AbstractJobStore):
|
|
|
205
215
|
# store destruction, indicates a job store in transition, reflecting the fact that 3.3.0
|
|
206
216
|
# may leak buckets or domains even though the registry reports 'False' for them. We
|
|
207
217
|
# can't handle job stores that were partially created by 3.3.0, though.
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
218
|
+
registry_domain_name = "toil-registry"
|
|
219
|
+
try:
|
|
220
|
+
self._bindDomain(domain_name=registry_domain_name,
|
|
221
|
+
create=False,
|
|
222
|
+
block=False)
|
|
223
|
+
except DomainDoesNotExist:
|
|
212
224
|
return False
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
|
|
226
|
+
for attempt in retry_sdb():
|
|
227
|
+
with attempt:
|
|
228
|
+
get_result = self.db.get_attributes(DomainName=registry_domain_name,
|
|
229
|
+
ItemName=self.name_prefix,
|
|
230
|
+
AttributeNames=['exists'],
|
|
231
|
+
ConsistentRead=True)
|
|
232
|
+
attributes: List[AttributeTypeDef] = get_result.get("Attributes", []) # the documentation says 'Attributes' should always exist, but this is not true
|
|
233
|
+
exists: Optional[str] = get_item_from_attributes(attributes=attributes, name="exists")
|
|
234
|
+
if exists is None:
|
|
235
|
+
return False
|
|
236
|
+
elif exists == 'True':
|
|
237
|
+
return True
|
|
238
|
+
elif exists == 'False':
|
|
239
|
+
return None
|
|
240
|
+
else:
|
|
241
|
+
assert False
|
|
230
242
|
|
|
231
243
|
@_registered.setter
|
|
232
|
-
def _registered(self, value):
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
244
|
+
def _registered(self, value: bool) -> None:
|
|
245
|
+
registry_domain_name = "toil-registry"
|
|
246
|
+
try:
|
|
247
|
+
self._bindDomain(domain_name=registry_domain_name,
|
|
248
|
+
# Only create registry domain when registering or
|
|
249
|
+
# transitioning a store
|
|
250
|
+
create=value is not False,
|
|
251
|
+
block=False)
|
|
252
|
+
except DomainDoesNotExist:
|
|
240
253
|
pass
|
|
241
254
|
else:
|
|
242
255
|
for attempt in retry_sdb():
|
|
243
256
|
with attempt:
|
|
244
257
|
if value is False:
|
|
245
|
-
|
|
258
|
+
self.db.delete_attributes(DomainName=registry_domain_name,
|
|
259
|
+
ItemName=self.name_prefix)
|
|
246
260
|
else:
|
|
247
261
|
if value is True:
|
|
248
|
-
attributes =
|
|
262
|
+
attributes: List[ReplaceableAttributeTypeDef] = [{"Name": "exists", "Value": "True", "Replace": True}]
|
|
249
263
|
elif value is None:
|
|
250
|
-
attributes =
|
|
264
|
+
attributes = [{"Name": "exists", "Value": "False", "Replace": True}]
|
|
251
265
|
else:
|
|
252
266
|
assert False
|
|
253
|
-
|
|
254
|
-
|
|
267
|
+
self.db.put_attributes(DomainName=registry_domain_name,
|
|
268
|
+
ItemName=self.name_prefix,
|
|
269
|
+
Attributes=attributes)
|
|
255
270
|
|
|
256
|
-
def _checkItem(self, item, enforce: bool = True):
|
|
271
|
+
def _checkItem(self, item: ItemTypeDef, enforce: bool = True) -> None:
|
|
257
272
|
"""
|
|
258
273
|
Make sure that the given SimpleDB item actually has the attributes we think it should.
|
|
259
274
|
|
|
@@ -261,32 +276,48 @@ class AWSJobStore(AbstractJobStore):
|
|
|
261
276
|
|
|
262
277
|
If enforce is false, log but don't throw.
|
|
263
278
|
"""
|
|
279
|
+
self._checkAttributes(item["Attributes"], enforce)
|
|
264
280
|
|
|
265
|
-
|
|
281
|
+
def _checkAttributes(self, attributes: List[AttributeTypeDef], enforce: bool = True) -> None:
|
|
282
|
+
if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
|
|
266
283
|
logger.error("overlargeID attribute isn't present: either SimpleDB entry is "
|
|
267
|
-
"corrupt or jobstore is from an extremely old Toil: %s",
|
|
284
|
+
"corrupt or jobstore is from an extremely old Toil: %s", attributes)
|
|
268
285
|
if enforce:
|
|
269
286
|
raise RuntimeError("encountered SimpleDB entry missing required attribute "
|
|
270
287
|
"'overlargeID'; is your job store ancient?")
|
|
271
288
|
|
|
272
|
-
def
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
289
|
+
def _awsJobFromAttributes(self, attributes: List[AttributeTypeDef]) -> Job:
|
|
290
|
+
"""
|
|
291
|
+
Get a Toil Job object from attributes that are defined in an item from the DB
|
|
292
|
+
:param attributes: List of attributes
|
|
293
|
+
:return: Toil job
|
|
294
|
+
"""
|
|
295
|
+
self._checkAttributes(attributes)
|
|
296
|
+
overlarge_id_value = get_item_from_attributes(attributes=attributes, name="overlargeID")
|
|
297
|
+
if overlarge_id_value:
|
|
298
|
+
assert self.file_exists(overlarge_id_value)
|
|
276
299
|
# This is an overlarge job, download the actual attributes
|
|
277
300
|
# from the file store
|
|
278
301
|
logger.debug("Loading overlarge job from S3.")
|
|
279
|
-
with self.read_file_stream(
|
|
302
|
+
with self.read_file_stream(overlarge_id_value) as fh:
|
|
280
303
|
binary = fh.read()
|
|
281
304
|
else:
|
|
282
|
-
binary, _ = SDBHelper.attributesToBinary(
|
|
305
|
+
binary, _ = SDBHelper.attributesToBinary(attributes)
|
|
283
306
|
assert binary is not None
|
|
284
307
|
job = pickle.loads(binary)
|
|
285
308
|
if job is not None:
|
|
286
309
|
job.assignConfig(self.config)
|
|
287
310
|
return job
|
|
288
311
|
|
|
289
|
-
def
|
|
312
|
+
def _awsJobFromItem(self, item: ItemTypeDef) -> Job:
|
|
313
|
+
"""
|
|
314
|
+
Get a Toil Job object from an item from the DB
|
|
315
|
+
:param item: ItemTypeDef
|
|
316
|
+
:return: Toil Job
|
|
317
|
+
"""
|
|
318
|
+
return self._awsJobFromAttributes(item["Attributes"])
|
|
319
|
+
|
|
320
|
+
def _awsJobToAttributes(self, job: JobDescription) -> List[AttributeTypeDef]:
|
|
290
321
|
binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
|
|
291
322
|
if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
|
|
292
323
|
# Store as an overlarge job in S3
|
|
@@ -297,66 +328,82 @@ class AWSJobStore(AbstractJobStore):
|
|
|
297
328
|
else:
|
|
298
329
|
item = SDBHelper.binaryToAttributes(binary)
|
|
299
330
|
item["overlargeID"] = ""
|
|
300
|
-
return item
|
|
331
|
+
return SDBHelper.attributeDictToList(item)
|
|
332
|
+
|
|
333
|
+
def _awsJobToItem(self, job: JobDescription, name: str) -> ItemTypeDef:
|
|
334
|
+
return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
|
|
301
335
|
|
|
302
336
|
jobsPerBatchInsert = 25
|
|
303
337
|
|
|
304
338
|
@contextmanager
|
|
305
|
-
def batch(self):
|
|
339
|
+
def batch(self) -> None:
|
|
306
340
|
self._batchedUpdates = []
|
|
307
341
|
yield
|
|
308
342
|
batches = [self._batchedUpdates[i:i + self.jobsPerBatchInsert] for i in
|
|
309
343
|
range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)]
|
|
310
344
|
|
|
311
345
|
for batch in batches:
|
|
346
|
+
items: List[ReplaceableItemTypeDef] = []
|
|
312
347
|
for jobDescription in batch:
|
|
348
|
+
item_attributes: List[ReplaceableAttributeTypeDef] = []
|
|
313
349
|
jobDescription.pre_update_hook()
|
|
314
|
-
|
|
350
|
+
item_name = compat_bytes(jobDescription.jobStoreID)
|
|
351
|
+
got_job_attributes: List[AttributeTypeDef] = self._awsJobToAttributes(jobDescription)
|
|
352
|
+
for each_attribute in got_job_attributes:
|
|
353
|
+
new_attribute: ReplaceableAttributeTypeDef = {"Name": each_attribute["Name"],
|
|
354
|
+
"Value": each_attribute["Value"],
|
|
355
|
+
"Replace": True}
|
|
356
|
+
item_attributes.append(new_attribute)
|
|
357
|
+
items.append({"Name": item_name,
|
|
358
|
+
"Attributes": item_attributes})
|
|
359
|
+
|
|
315
360
|
for attempt in retry_sdb():
|
|
316
361
|
with attempt:
|
|
317
|
-
|
|
362
|
+
self.db.batch_put_attributes(DomainName=self.jobs_domain_name, Items=items)
|
|
318
363
|
self._batchedUpdates = None
|
|
319
364
|
|
|
320
|
-
def assign_job_id(self, job_description):
|
|
365
|
+
def assign_job_id(self, job_description: JobDescription) -> None:
|
|
321
366
|
jobStoreID = self._new_job_id()
|
|
322
|
-
logger.debug("Assigning ID to job %s
|
|
323
|
-
jobStoreID, '<no command>' if job_description.command is None else job_description.command)
|
|
367
|
+
logger.debug("Assigning ID to job %s", jobStoreID)
|
|
324
368
|
job_description.jobStoreID = jobStoreID
|
|
325
369
|
|
|
326
|
-
def create_job(self, job_description):
|
|
370
|
+
def create_job(self, job_description: JobDescription) -> JobDescription:
|
|
327
371
|
if hasattr(self, "_batchedUpdates") and self._batchedUpdates is not None:
|
|
328
372
|
self._batchedUpdates.append(job_description)
|
|
329
373
|
else:
|
|
330
374
|
self.update_job(job_description)
|
|
331
375
|
return job_description
|
|
332
376
|
|
|
333
|
-
def job_exists(self, job_id):
|
|
377
|
+
def job_exists(self, job_id: Union[bytes, str]) -> bool:
|
|
334
378
|
for attempt in retry_sdb():
|
|
335
379
|
with attempt:
|
|
336
|
-
return
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
380
|
+
return len(self.db.get_attributes(DomainName=self.jobs_domain_name,
|
|
381
|
+
ItemName=compat_bytes(job_id),
|
|
382
|
+
AttributeNames=[SDBHelper.presenceIndicator()],
|
|
383
|
+
ConsistentRead=True).get("Attributes", [])) > 0
|
|
340
384
|
|
|
341
|
-
def jobs(self):
|
|
342
|
-
|
|
385
|
+
def jobs(self) -> Generator[Job, None, None]:
|
|
386
|
+
job_items: Optional[List[ItemTypeDef]] = None
|
|
343
387
|
for attempt in retry_sdb():
|
|
344
388
|
with attempt:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
389
|
+
job_items = boto3_pager(self.db.select,
|
|
390
|
+
"Items",
|
|
391
|
+
ConsistentRead=True,
|
|
392
|
+
SelectExpression="select * from `%s`" % self.jobs_domain_name)
|
|
393
|
+
assert job_items is not None
|
|
394
|
+
for jobItem in job_items:
|
|
350
395
|
yield self._awsJobFromItem(jobItem)
|
|
351
396
|
|
|
352
|
-
def load_job(self, job_id):
|
|
353
|
-
|
|
397
|
+
def load_job(self, job_id: FileID) -> Job:
|
|
398
|
+
item_attributes = None
|
|
354
399
|
for attempt in retry_sdb():
|
|
355
400
|
with attempt:
|
|
356
|
-
|
|
357
|
-
|
|
401
|
+
item_attributes = self.db.get_attributes(DomainName=self.jobs_domain_name,
|
|
402
|
+
ItemName=compat_bytes(job_id),
|
|
403
|
+
ConsistentRead=True).get("Attributes", [])
|
|
404
|
+
if not item_attributes:
|
|
358
405
|
raise NoSuchJobException(job_id)
|
|
359
|
-
job = self.
|
|
406
|
+
job = self._awsJobFromAttributes(item_attributes)
|
|
360
407
|
if job is None:
|
|
361
408
|
raise NoSuchJobException(job_id)
|
|
362
409
|
logger.debug("Loaded job %s", job_id)
|
|
@@ -365,10 +412,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
365
412
|
def update_job(self, job_description):
|
|
366
413
|
logger.debug("Updating job %s", job_description.jobStoreID)
|
|
367
414
|
job_description.pre_update_hook()
|
|
368
|
-
|
|
415
|
+
job_attributes = self._awsJobToAttributes(job_description)
|
|
416
|
+
update_attributes: List[ReplaceableAttributeTypeDef] = [{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
|
|
417
|
+
for attribute in job_attributes]
|
|
369
418
|
for attempt in retry_sdb():
|
|
370
419
|
with attempt:
|
|
371
|
-
|
|
420
|
+
self.db.put_attributes(DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_description.jobStoreID), Attributes=update_attributes)
|
|
372
421
|
|
|
373
422
|
itemsPerBatchDelete = 25
|
|
374
423
|
|
|
@@ -377,49 +426,53 @@ class AWSJobStore(AbstractJobStore):
|
|
|
377
426
|
logger.debug("Deleting job %s", job_id)
|
|
378
427
|
|
|
379
428
|
# If the job is overlarge, delete its file from the filestore
|
|
380
|
-
item = None
|
|
381
429
|
for attempt in retry_sdb():
|
|
382
430
|
with attempt:
|
|
383
|
-
|
|
431
|
+
attributes = self.db.get_attributes(DomainName=self.jobs_domain_name,
|
|
432
|
+
ItemName=compat_bytes(job_id),
|
|
433
|
+
ConsistentRead=True).get("Attributes", [])
|
|
384
434
|
# If the overlargeID has fallen off, maybe we partially deleted the
|
|
385
435
|
# attributes of the item? Or raced on it? Or hit SimpleDB being merely
|
|
386
436
|
# eventually consistent? We should still be able to get rid of it.
|
|
387
|
-
self.
|
|
388
|
-
|
|
437
|
+
self._checkAttributes(attributes, enforce=False)
|
|
438
|
+
overlarge_id_value = get_item_from_attributes(attributes=attributes, name="overlargeID")
|
|
439
|
+
if overlarge_id_value:
|
|
389
440
|
logger.debug("Deleting job from filestore")
|
|
390
|
-
self.delete_file(
|
|
441
|
+
self.delete_file(overlarge_id_value)
|
|
391
442
|
for attempt in retry_sdb():
|
|
392
443
|
with attempt:
|
|
393
|
-
self.
|
|
394
|
-
items = None
|
|
444
|
+
self.db.delete_attributes(DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id))
|
|
445
|
+
items: Optional[List[ItemTypeDef]] = None
|
|
395
446
|
for attempt in retry_sdb():
|
|
396
447
|
with attempt:
|
|
397
|
-
items = list(self.
|
|
398
|
-
|
|
399
|
-
|
|
448
|
+
items = list(boto3_pager(self.db.select,
|
|
449
|
+
"Items",
|
|
450
|
+
ConsistentRead=True,
|
|
451
|
+
SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'"))
|
|
400
452
|
assert items is not None
|
|
401
453
|
if items:
|
|
402
454
|
logger.debug("Deleting %d file(s) associated with job %s", len(items), job_id)
|
|
403
455
|
n = self.itemsPerBatchDelete
|
|
404
456
|
batches = [items[i:i + n] for i in range(0, len(items), n)]
|
|
405
457
|
for batch in batches:
|
|
406
|
-
|
|
458
|
+
delete_items: List[DeletableItemTypeDef] = [{"Name": item["Name"]} for item in batch]
|
|
407
459
|
for attempt in retry_sdb():
|
|
408
460
|
with attempt:
|
|
409
|
-
self.
|
|
461
|
+
self.db.batch_delete_attributes(DomainName=self.files_domain_name, Items=delete_items)
|
|
410
462
|
for item in items:
|
|
411
|
-
|
|
463
|
+
item: ItemTypeDef
|
|
464
|
+
version = get_item_from_attributes(attributes=item["Attributes"], name="version")
|
|
412
465
|
for attempt in retry_s3():
|
|
413
466
|
with attempt:
|
|
414
467
|
if version:
|
|
415
|
-
self.s3_client.delete_object(Bucket=self.
|
|
416
|
-
Key=compat_bytes(item
|
|
468
|
+
self.s3_client.delete_object(Bucket=self.files_bucket.name,
|
|
469
|
+
Key=compat_bytes(item["Name"]),
|
|
417
470
|
VersionId=version)
|
|
418
471
|
else:
|
|
419
|
-
self.s3_client.delete_object(Bucket=self.
|
|
420
|
-
Key=compat_bytes(item
|
|
472
|
+
self.s3_client.delete_object(Bucket=self.files_bucket.name,
|
|
473
|
+
Key=compat_bytes(item["Name"]))
|
|
421
474
|
|
|
422
|
-
def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
|
|
475
|
+
def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None) -> FileID:
|
|
423
476
|
info = self.FileInfo.create(jobStoreID if cleanup else None)
|
|
424
477
|
with info.uploadStream() as _:
|
|
425
478
|
# Empty
|
|
@@ -428,7 +481,8 @@ class AWSJobStore(AbstractJobStore):
|
|
|
428
481
|
logger.debug("Created %r.", info)
|
|
429
482
|
return info.fileID
|
|
430
483
|
|
|
431
|
-
def _import_file(self, otherCls, uri, shared_file_name=None,
|
|
484
|
+
def _import_file(self, otherCls, uri: ParseResult, shared_file_name: Optional[str] = None,
|
|
485
|
+
hardlink: bool = False, symlink: bool = True) -> Optional[FileID]:
|
|
432
486
|
try:
|
|
433
487
|
if issubclass(otherCls, AWSJobStore):
|
|
434
488
|
srcObj = get_object_for_url(uri, existing=True)
|
|
@@ -451,7 +505,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
451
505
|
# copy if exception
|
|
452
506
|
return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
|
|
453
507
|
|
|
454
|
-
def _export_file(self, otherCls, file_id, uri):
|
|
508
|
+
def _export_file(self, otherCls, file_id: FileID, uri: ParseResult) -> None:
|
|
455
509
|
try:
|
|
456
510
|
if issubclass(otherCls, AWSJobStore):
|
|
457
511
|
dstObj = get_object_for_url(uri)
|
|
@@ -475,11 +529,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
475
529
|
return cls._get_is_directory(url)
|
|
476
530
|
|
|
477
531
|
@classmethod
|
|
478
|
-
def _get_size(cls, url):
|
|
532
|
+
def _get_size(cls, url: ParseResult) -> int:
|
|
479
533
|
return get_object_for_url(url, existing=True).content_length
|
|
480
534
|
|
|
481
535
|
@classmethod
|
|
482
|
-
def _read_from_url(cls, url, writable):
|
|
536
|
+
def _read_from_url(cls, url: ParseResult, writable):
|
|
483
537
|
srcObj = get_object_for_url(url, existing=True)
|
|
484
538
|
srcObj.download_fileobj(writable)
|
|
485
539
|
return (
|
|
@@ -497,7 +551,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
497
551
|
return response['Body']
|
|
498
552
|
|
|
499
553
|
@classmethod
|
|
500
|
-
def _write_to_url(cls, readable, url, executable=False):
|
|
554
|
+
def _write_to_url(cls, readable, url: ParseResult, executable: bool = False) -> None:
|
|
501
555
|
dstObj = get_object_for_url(url)
|
|
502
556
|
|
|
503
557
|
logger.debug("Uploading %s", dstObj.key)
|
|
@@ -519,10 +573,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
519
573
|
return len(list_objects_for_url(url)) > 0
|
|
520
574
|
|
|
521
575
|
@classmethod
|
|
522
|
-
def _supports_url(cls, url, export=False):
|
|
576
|
+
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
523
577
|
return url.scheme.lower() == 's3'
|
|
524
578
|
|
|
525
|
-
def write_file(self, local_path, job_id=None, cleanup=False):
|
|
579
|
+
def write_file(self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False) -> FileID:
|
|
526
580
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
527
581
|
info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
528
582
|
info.save()
|
|
@@ -530,7 +584,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
530
584
|
return info.fileID
|
|
531
585
|
|
|
532
586
|
@contextmanager
|
|
533
|
-
def write_file_stream(self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None):
|
|
587
|
+
def write_file_stream(self, job_id: Optional[FileID] = None, cleanup: bool = False, basename=None, encoding=None, errors=None):
|
|
534
588
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
535
589
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
536
590
|
yield writable, info.fileID
|
|
@@ -614,7 +668,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
614
668
|
itemsProcessed = 0
|
|
615
669
|
|
|
616
670
|
for info in self._read_logs(callback, self.statsFileOwnerID):
|
|
617
|
-
info._ownerID = self.readStatsFileOwnerID
|
|
671
|
+
info._ownerID = str(self.readStatsFileOwnerID) # boto3 requires strings
|
|
618
672
|
info.save()
|
|
619
673
|
itemsProcessed += 1
|
|
620
674
|
|
|
@@ -628,10 +682,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
628
682
|
items = None
|
|
629
683
|
for attempt in retry_sdb():
|
|
630
684
|
with attempt:
|
|
631
|
-
items =
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
685
|
+
items = boto3_pager(self.db.select,
|
|
686
|
+
"Items",
|
|
687
|
+
ConsistentRead=True,
|
|
688
|
+
SelectExpression="select * from `{}` where ownerID='{}'".format(self.files_domain_name, str(ownerId)))
|
|
635
689
|
assert items is not None
|
|
636
690
|
for item in items:
|
|
637
691
|
info = self.FileInfo.fromItem(item)
|
|
@@ -648,10 +702,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
648
702
|
with info.uploadStream(allowInlining=False) as f:
|
|
649
703
|
f.write(info.content)
|
|
650
704
|
|
|
651
|
-
self.
|
|
705
|
+
self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(ACL='public-read')
|
|
652
706
|
|
|
653
707
|
url = self.s3_client.generate_presigned_url('get_object',
|
|
654
|
-
Params={'Bucket': self.
|
|
708
|
+
Params={'Bucket': self.files_bucket.name,
|
|
655
709
|
'Key': compat_bytes(jobStoreFileID),
|
|
656
710
|
'VersionId': info.version},
|
|
657
711
|
ExpiresIn=self.publicUrlExpiration.total_seconds())
|
|
@@ -676,16 +730,6 @@ class AWSJobStore(AbstractJobStore):
|
|
|
676
730
|
self._requireValidSharedFileName(shared_file_name)
|
|
677
731
|
return self.get_public_url(self._shared_file_id(shared_file_name))
|
|
678
732
|
|
|
679
|
-
def _connectSimpleDB(self):
|
|
680
|
-
"""
|
|
681
|
-
:rtype: SDBConnection
|
|
682
|
-
"""
|
|
683
|
-
db = boto.sdb.connect_to_region(self.region)
|
|
684
|
-
if db is None:
|
|
685
|
-
raise ValueError("Could not connect to SimpleDB. Make sure '%s' is a valid SimpleDB region." % self.region)
|
|
686
|
-
monkeyPatchSdbConnection(db)
|
|
687
|
-
return db
|
|
688
|
-
|
|
689
733
|
def _bindBucket(self,
|
|
690
734
|
bucket_name: str,
|
|
691
735
|
create: bool = False,
|
|
@@ -717,7 +761,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
717
761
|
"""
|
|
718
762
|
|
|
719
763
|
if (isinstance(error, ClientError) and
|
|
720
|
-
|
|
764
|
+
get_error_status(error) in (404, 409)):
|
|
721
765
|
# Handle cases where the bucket creation is in a weird state that might let us proceed.
|
|
722
766
|
# https://github.com/BD2KGenomics/toil/issues/955
|
|
723
767
|
# https://github.com/BD2KGenomics/toil/issues/995
|
|
@@ -760,7 +804,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
760
804
|
# NoSuchBucket. We let that kick us back up to the
|
|
761
805
|
# main retry loop.
|
|
762
806
|
assert (
|
|
763
|
-
|
|
807
|
+
get_bucket_region(bucket_name) == self.region
|
|
764
808
|
), f"bucket_name: {bucket_name}, {get_bucket_region(bucket_name)} != {self.region}"
|
|
765
809
|
|
|
766
810
|
tags = build_tag_dict_from_env()
|
|
@@ -815,8 +859,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
815
859
|
|
|
816
860
|
return bucket
|
|
817
861
|
|
|
818
|
-
def _bindDomain(self, domain_name, create=False, block=True):
|
|
862
|
+
def _bindDomain(self, domain_name: str, create: bool = False, block: bool = True) -> None:
|
|
819
863
|
"""
|
|
864
|
+
Return the Boto3 domain name representing the SDB domain. When create=True, it will
|
|
865
|
+
create the domain if it does not exist.
|
|
820
866
|
Return the Boto Domain object representing the SDB domain of the given name. If the
|
|
821
867
|
domain does not exist and `create` is True, it will be created.
|
|
822
868
|
|
|
@@ -824,11 +870,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
824
870
|
|
|
825
871
|
:param bool create: True if domain should be created if it doesn't exist
|
|
826
872
|
|
|
827
|
-
:param bool block: If False,
|
|
873
|
+
:param bool block: If False, raise DomainDoesNotExist if the domain doesn't exist. If True, wait until
|
|
828
874
|
domain appears. This parameter is ignored if create is True.
|
|
829
875
|
|
|
830
|
-
:rtype:
|
|
831
|
-
:raises
|
|
876
|
+
:rtype: None
|
|
877
|
+
:raises ClientError: If `block` is True and the domain still doesn't exist after the
|
|
832
878
|
retry timeout expires.
|
|
833
879
|
"""
|
|
834
880
|
logger.debug("Binding to job store domain '%s'.", domain_name)
|
|
@@ -838,15 +884,17 @@ class AWSJobStore(AbstractJobStore):
|
|
|
838
884
|
for attempt in retry_sdb(**retryargs):
|
|
839
885
|
with attempt:
|
|
840
886
|
try:
|
|
841
|
-
|
|
842
|
-
|
|
887
|
+
self.db.domain_metadata(DomainName=domain_name)
|
|
888
|
+
return
|
|
889
|
+
except ClientError as e:
|
|
843
890
|
if no_such_sdb_domain(e):
|
|
844
891
|
if create:
|
|
845
|
-
|
|
892
|
+
self.db.create_domain(DomainName=domain_name)
|
|
893
|
+
return
|
|
846
894
|
elif block:
|
|
847
895
|
raise
|
|
848
896
|
else:
|
|
849
|
-
|
|
897
|
+
raise DomainDoesNotExist(domain_name)
|
|
850
898
|
else:
|
|
851
899
|
raise
|
|
852
900
|
|
|
@@ -958,7 +1006,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
958
1006
|
self.version = ''
|
|
959
1007
|
|
|
960
1008
|
@classmethod
|
|
961
|
-
def create(cls, ownerID):
|
|
1009
|
+
def create(cls, ownerID: str):
|
|
962
1010
|
return cls(str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None)
|
|
963
1011
|
|
|
964
1012
|
@classmethod
|
|
@@ -969,18 +1017,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
969
1017
|
def exists(cls, jobStoreFileID):
|
|
970
1018
|
for attempt in retry_sdb():
|
|
971
1019
|
with attempt:
|
|
972
|
-
return bool(cls.outer.
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
1020
|
+
return bool(cls.outer.db.get_attributes(DomainName=cls.outer.files_domain_name,
|
|
1021
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1022
|
+
AttributeNames=[cls.presenceIndicator()],
|
|
1023
|
+
ConsistentRead=True).get("Attributes", []))
|
|
976
1024
|
|
|
977
1025
|
@classmethod
|
|
978
1026
|
def load(cls, jobStoreFileID):
|
|
979
1027
|
for attempt in retry_sdb():
|
|
980
1028
|
with attempt:
|
|
981
1029
|
self = cls.fromItem(
|
|
982
|
-
|
|
983
|
-
|
|
1030
|
+
{
|
|
1031
|
+
"Name": compat_bytes(jobStoreFileID),
|
|
1032
|
+
"Attributes": cls.outer.db.get_attributes(DomainName=cls.outer.files_domain_name,
|
|
1033
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1034
|
+
ConsistentRead=True).get("Attributes", [])
|
|
1035
|
+
})
|
|
984
1036
|
return self
|
|
985
1037
|
|
|
986
1038
|
@classmethod
|
|
@@ -1010,7 +1062,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1010
1062
|
return self
|
|
1011
1063
|
|
|
1012
1064
|
@classmethod
|
|
1013
|
-
def fromItem(cls, item):
|
|
1065
|
+
def fromItem(cls, item: ItemTypeDef):
|
|
1014
1066
|
"""
|
|
1015
1067
|
Convert an SDB item to an instance of this class.
|
|
1016
1068
|
|
|
@@ -1023,31 +1075,26 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1023
1075
|
return s if s is None else str(s)
|
|
1024
1076
|
|
|
1025
1077
|
# ownerID and encrypted are the only mandatory attributes
|
|
1026
|
-
ownerID =
|
|
1027
|
-
encrypted = item.get('encrypted')
|
|
1078
|
+
ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(item, ["ownerID", "encrypted", "version", "checksum"])
|
|
1028
1079
|
if ownerID is None:
|
|
1029
1080
|
assert encrypted is None
|
|
1030
1081
|
return None
|
|
1031
1082
|
else:
|
|
1032
|
-
version = strOrNone(item['version'])
|
|
1033
|
-
checksum = strOrNone(item.get('checksum'))
|
|
1034
1083
|
encrypted = strict_bool(encrypted)
|
|
1035
|
-
content, numContentChunks = cls.attributesToBinary(item)
|
|
1084
|
+
content, numContentChunks = cls.attributesToBinary(item["Attributes"])
|
|
1036
1085
|
if encrypted:
|
|
1037
1086
|
sseKeyPath = cls.outer.sseKeyPath
|
|
1038
1087
|
if sseKeyPath is None:
|
|
1039
1088
|
raise AssertionError('Content is encrypted but no key was provided.')
|
|
1040
1089
|
if content is not None:
|
|
1041
1090
|
content = encryption.decrypt(content, sseKeyPath)
|
|
1042
|
-
self = cls(fileID=item
|
|
1091
|
+
self = cls(fileID=item["Name"], ownerID=ownerID, encrypted=encrypted, version=version,
|
|
1043
1092
|
content=content, numContentChunks=numContentChunks, checksum=checksum)
|
|
1044
1093
|
return self
|
|
1045
1094
|
|
|
1046
|
-
def toItem(self):
|
|
1095
|
+
def toItem(self) -> Tuple[Dict[str, str], int]:
|
|
1047
1096
|
"""
|
|
1048
|
-
Convert this instance to
|
|
1049
|
-
|
|
1050
|
-
:rtype: (dict,int)
|
|
1097
|
+
Convert this instance to a dictionary of attribute names to values
|
|
1051
1098
|
|
|
1052
1099
|
:return: the attributes dict and an integer specifying the the number of chunk
|
|
1053
1100
|
attributes in the dictionary that are used for storing inlined content.
|
|
@@ -1061,9 +1108,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1061
1108
|
content = encryption.encrypt(content, sseKeyPath)
|
|
1062
1109
|
assert content is None or isinstance(content, bytes)
|
|
1063
1110
|
attributes = self.binaryToAttributes(content)
|
|
1064
|
-
numChunks = attributes['numChunks']
|
|
1065
|
-
attributes.update(dict(ownerID=self.ownerID,
|
|
1066
|
-
encrypted=self.encrypted,
|
|
1111
|
+
numChunks = int(attributes['numChunks'])
|
|
1112
|
+
attributes.update(dict(ownerID=self.ownerID or '',
|
|
1113
|
+
encrypted=str(self.encrypted),
|
|
1067
1114
|
version=self.version or '',
|
|
1068
1115
|
checksum=self.checksum or ''))
|
|
1069
1116
|
return attributes, numChunks
|
|
@@ -1078,32 +1125,47 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1078
1125
|
|
|
1079
1126
|
def save(self):
|
|
1080
1127
|
attributes, numNewContentChunks = self.toItem()
|
|
1128
|
+
attributes_boto3 = SDBHelper.attributeDictToList(attributes)
|
|
1081
1129
|
# False stands for absence
|
|
1082
|
-
|
|
1130
|
+
if self.previousVersion is None:
|
|
1131
|
+
expected: UpdateConditionTypeDef = {"Name": 'version', "Exists": False}
|
|
1132
|
+
else:
|
|
1133
|
+
expected = {"Name": 'version', "Value": cast(str, self.previousVersion)}
|
|
1083
1134
|
try:
|
|
1084
1135
|
for attempt in retry_sdb():
|
|
1085
1136
|
with attempt:
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1137
|
+
self.outer.db.put_attributes(DomainName=self.outer.files_domain_name,
|
|
1138
|
+
ItemName=compat_bytes(self.fileID),
|
|
1139
|
+
Attributes=[{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
|
|
1140
|
+
for attribute in attributes_boto3],
|
|
1141
|
+
Expected=expected)
|
|
1089
1142
|
# clean up the old version of the file if necessary and safe
|
|
1090
1143
|
if self.previousVersion and (self.previousVersion != self.version):
|
|
1091
1144
|
for attempt in retry_s3():
|
|
1092
1145
|
with attempt:
|
|
1093
|
-
self.outer.s3_client.delete_object(Bucket=self.outer.
|
|
1146
|
+
self.outer.s3_client.delete_object(Bucket=self.outer.files_bucket.name,
|
|
1094
1147
|
Key=compat_bytes(self.fileID),
|
|
1095
1148
|
VersionId=self.previousVersion)
|
|
1096
1149
|
self._previousVersion = self._version
|
|
1097
1150
|
if numNewContentChunks < self._numContentChunks:
|
|
1098
1151
|
residualChunks = range(numNewContentChunks, self._numContentChunks)
|
|
1099
|
-
|
|
1152
|
+
residual_chunk_names = [self._chunkName(i) for i in residualChunks]
|
|
1153
|
+
# boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
|
|
1154
|
+
# the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
|
|
1155
|
+
# but this doesnt extend to boto3
|
|
1156
|
+
delete_attributes = self.outer.db.get_attributes(DomainName=self.outer.files_domain_name,
|
|
1157
|
+
ItemName=compat_bytes(self.fileID),
|
|
1158
|
+
AttributeNames=[chunk for chunk in residual_chunk_names]).get("Attributes")
|
|
1100
1159
|
for attempt in retry_sdb():
|
|
1101
1160
|
with attempt:
|
|
1102
|
-
self.outer.
|
|
1103
|
-
|
|
1161
|
+
self.outer.db.delete_attributes(DomainName=self.outer.files_domain_name,
|
|
1162
|
+
ItemName=compat_bytes(self.fileID),
|
|
1163
|
+
Attributes=delete_attributes)
|
|
1164
|
+
self.outer.db.get_attributes(DomainName=self.outer.files_domain_name, ItemName=compat_bytes(self.fileID))
|
|
1165
|
+
|
|
1104
1166
|
self._numContentChunks = numNewContentChunks
|
|
1105
|
-
except
|
|
1106
|
-
if e
|
|
1167
|
+
except ClientError as e:
|
|
1168
|
+
if get_error_code(e) == 'ConditionalCheckFailed':
|
|
1107
1169
|
raise ConcurrentFileModificationException(self.fileID)
|
|
1108
1170
|
else:
|
|
1109
1171
|
raise
|
|
@@ -1123,10 +1185,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1123
1185
|
self.checksum = self._get_file_checksum(localFilePath) if calculateChecksum else None
|
|
1124
1186
|
self.version = uploadFromPath(localFilePath,
|
|
1125
1187
|
resource=resource,
|
|
1126
|
-
bucketName=self.outer.
|
|
1188
|
+
bucketName=self.outer.files_bucket.name,
|
|
1127
1189
|
fileID=compat_bytes(self.fileID),
|
|
1128
1190
|
headerArgs=headerArgs,
|
|
1129
|
-
partSize=self.outer.
|
|
1191
|
+
partSize=self.outer.part_size)
|
|
1130
1192
|
|
|
1131
1193
|
def _start_checksum(self, to_match=None, algorithm='sha1'):
|
|
1132
1194
|
"""
|
|
@@ -1173,7 +1235,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1173
1235
|
# We expected a particular hash
|
|
1174
1236
|
if result_hash != checksum_in_progress[2]:
|
|
1175
1237
|
raise ChecksumError('Checksum mismatch. Expected: %s Actual: %s' %
|
|
1176
|
-
|
|
1238
|
+
(checksum_in_progress[2], result_hash))
|
|
1177
1239
|
|
|
1178
1240
|
return '$'.join([checksum_in_progress[0], result_hash])
|
|
1179
1241
|
|
|
@@ -1204,7 +1266,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1204
1266
|
class MultiPartPipe(WritablePipe):
|
|
1205
1267
|
def readFrom(self, readable):
|
|
1206
1268
|
# Get the first block of data we want to put
|
|
1207
|
-
buf = readable.read(store.
|
|
1269
|
+
buf = readable.read(store.part_size)
|
|
1208
1270
|
assert isinstance(buf, bytes)
|
|
1209
1271
|
|
|
1210
1272
|
if allowInlining and len(buf) <= info.maxInlinedSize():
|
|
@@ -1219,7 +1281,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1219
1281
|
info._update_checksum(hasher, buf)
|
|
1220
1282
|
|
|
1221
1283
|
client = store.s3_client
|
|
1222
|
-
bucket_name = store.
|
|
1284
|
+
bucket_name = store.files_bucket.name
|
|
1223
1285
|
headerArgs = info._s3EncryptionArgs()
|
|
1224
1286
|
|
|
1225
1287
|
for attempt in retry_s3():
|
|
@@ -1233,7 +1295,6 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1233
1295
|
parts = []
|
|
1234
1296
|
logger.debug('Multipart upload started as %s', uploadId)
|
|
1235
1297
|
|
|
1236
|
-
|
|
1237
1298
|
for attempt in retry_s3():
|
|
1238
1299
|
with attempt:
|
|
1239
1300
|
for i in range(CONSISTENCY_TICKS):
|
|
@@ -1242,8 +1303,8 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1242
1303
|
MaxUploads=1,
|
|
1243
1304
|
Prefix=compat_bytes(info.fileID))
|
|
1244
1305
|
if ('Uploads' in response and
|
|
1245
|
-
|
|
1246
|
-
|
|
1306
|
+
len(response['Uploads']) != 0 and
|
|
1307
|
+
response['Uploads'][0]['UploadId'] == uploadId):
|
|
1247
1308
|
|
|
1248
1309
|
logger.debug('Multipart upload visible as %s', uploadId)
|
|
1249
1310
|
break
|
|
@@ -1268,7 +1329,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1268
1329
|
parts.append({"PartNumber": part_num + 1, "ETag": part["ETag"]})
|
|
1269
1330
|
|
|
1270
1331
|
# Get the next block of data we want to put
|
|
1271
|
-
buf = readable.read(info.outer.
|
|
1332
|
+
buf = readable.read(info.outer.part_size)
|
|
1272
1333
|
assert isinstance(buf, bytes)
|
|
1273
1334
|
if len(buf) == 0:
|
|
1274
1335
|
# Don't allow any part other than the very first to be empty.
|
|
@@ -1284,7 +1345,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1284
1345
|
|
|
1285
1346
|
else:
|
|
1286
1347
|
|
|
1287
|
-
while not store._getBucketVersioning(store.
|
|
1348
|
+
while not store._getBucketVersioning(store.files_bucket.name):
|
|
1288
1349
|
logger.warning('Versioning does not appear to be enabled yet. Deferring multipart '
|
|
1289
1350
|
'upload completion...')
|
|
1290
1351
|
time.sleep(1)
|
|
@@ -1341,7 +1402,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1341
1402
|
info._update_checksum(hasher, buf)
|
|
1342
1403
|
info.checksum = info._finish_checksum(hasher)
|
|
1343
1404
|
|
|
1344
|
-
bucket_name = store.
|
|
1405
|
+
bucket_name = store.files_bucket.name
|
|
1345
1406
|
headerArgs = info._s3EncryptionArgs()
|
|
1346
1407
|
client = store.s3_client
|
|
1347
1408
|
|
|
@@ -1422,7 +1483,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1422
1483
|
srcBucketName=compat_bytes(srcObj.bucket_name),
|
|
1423
1484
|
srcKeyName=compat_bytes(srcObj.key),
|
|
1424
1485
|
srcKeyVersion=compat_bytes(srcObj.version_id),
|
|
1425
|
-
dstBucketName=compat_bytes(self.outer.
|
|
1486
|
+
dstBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1426
1487
|
dstKeyName=compat_bytes(self._fileID),
|
|
1427
1488
|
sseAlgorithm='AES256',
|
|
1428
1489
|
sseKey=self._getSSEKey())
|
|
@@ -1445,7 +1506,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1445
1506
|
# encrypted = True if self.outer.sseKeyPath else False
|
|
1446
1507
|
with attempt:
|
|
1447
1508
|
copyKeyMultipart(resource,
|
|
1448
|
-
srcBucketName=compat_bytes(self.outer.
|
|
1509
|
+
srcBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1449
1510
|
srcKeyName=compat_bytes(self.fileID),
|
|
1450
1511
|
srcKeyVersion=compat_bytes(self.version),
|
|
1451
1512
|
dstBucketName=compat_bytes(dstObj.bucket_name),
|
|
@@ -1462,7 +1523,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1462
1523
|
f.write(self.content)
|
|
1463
1524
|
elif self.version:
|
|
1464
1525
|
headerArgs = self._s3EncryptionArgs()
|
|
1465
|
-
obj = self.outer.
|
|
1526
|
+
obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
|
|
1466
1527
|
|
|
1467
1528
|
for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, ChecksumError)):
|
|
1468
1529
|
with attempt:
|
|
@@ -1494,7 +1555,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1494
1555
|
writable.write(info.content)
|
|
1495
1556
|
elif info.version:
|
|
1496
1557
|
headerArgs = info._s3EncryptionArgs()
|
|
1497
|
-
obj = info.outer.
|
|
1558
|
+
obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
|
|
1498
1559
|
for attempt in retry_s3():
|
|
1499
1560
|
with attempt:
|
|
1500
1561
|
obj.download_fileobj(writable, ExtraArgs={'VersionId': info.version, **headerArgs})
|
|
@@ -1541,15 +1602,16 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1541
1602
|
def delete(self):
|
|
1542
1603
|
store = self.outer
|
|
1543
1604
|
if self.previousVersion is not None:
|
|
1605
|
+
expected: UpdateConditionTypeDef = {"Name": 'version', "Value": cast(str, self.previousVersion)}
|
|
1544
1606
|
for attempt in retry_sdb():
|
|
1545
1607
|
with attempt:
|
|
1546
|
-
store.
|
|
1547
|
-
|
|
1548
|
-
|
|
1608
|
+
store.db.delete_attributes(DomainName=store.files_domain_name,
|
|
1609
|
+
ItemName=compat_bytes(self.fileID),
|
|
1610
|
+
Expected=expected)
|
|
1549
1611
|
if self.previousVersion:
|
|
1550
1612
|
for attempt in retry_s3():
|
|
1551
1613
|
with attempt:
|
|
1552
|
-
store.s3_client.delete_object(Bucket=store.
|
|
1614
|
+
store.s3_client.delete_object(Bucket=store.files_bucket.name,
|
|
1553
1615
|
Key=compat_bytes(self.fileID),
|
|
1554
1616
|
VersionId=self.previousVersion)
|
|
1555
1617
|
|
|
@@ -1562,7 +1624,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1562
1624
|
elif self.version:
|
|
1563
1625
|
for attempt in retry_s3():
|
|
1564
1626
|
with attempt:
|
|
1565
|
-
obj = self.outer.
|
|
1627
|
+
obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
|
|
1566
1628
|
return obj.content_length
|
|
1567
1629
|
else:
|
|
1568
1630
|
return 0
|
|
@@ -1631,22 +1693,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1631
1693
|
pass
|
|
1632
1694
|
# TODO: Add other failure cases to be ignored here.
|
|
1633
1695
|
self._registered = None
|
|
1634
|
-
if self.
|
|
1635
|
-
self._delete_bucket(self.
|
|
1636
|
-
self.
|
|
1637
|
-
for name in '
|
|
1638
|
-
|
|
1639
|
-
if
|
|
1640
|
-
self._delete_domain(
|
|
1696
|
+
if self.files_bucket is not None:
|
|
1697
|
+
self._delete_bucket(self.files_bucket)
|
|
1698
|
+
self.files_bucket = None
|
|
1699
|
+
for name in 'files_domain_name', 'jobs_domain_name':
|
|
1700
|
+
domainName = getattr(self, name)
|
|
1701
|
+
if domainName is not None:
|
|
1702
|
+
self._delete_domain(domainName)
|
|
1641
1703
|
setattr(self, name, None)
|
|
1642
1704
|
self._registered = False
|
|
1643
1705
|
|
|
1644
|
-
def _delete_domain(self,
|
|
1706
|
+
def _delete_domain(self, domainName):
|
|
1645
1707
|
for attempt in retry_sdb():
|
|
1646
1708
|
with attempt:
|
|
1647
1709
|
try:
|
|
1648
|
-
|
|
1649
|
-
except
|
|
1710
|
+
self.db.delete_domain(DomainName=domainName)
|
|
1711
|
+
except ClientError as e:
|
|
1650
1712
|
if not no_such_sdb_domain(e):
|
|
1651
1713
|
raise
|
|
1652
1714
|
|
|
@@ -1680,8 +1742,8 @@ aRepr.maxstring = 38 # so UUIDs don't get truncated (36 for UUID plus 2 for quo
|
|
|
1680
1742
|
custom_repr = aRepr.repr
|
|
1681
1743
|
|
|
1682
1744
|
|
|
1683
|
-
class BucketLocationConflictException(
|
|
1745
|
+
class BucketLocationConflictException(LocatorException):
|
|
1684
1746
|
def __init__(self, bucketRegion):
|
|
1685
1747
|
super().__init__(
|
|
1686
1748
|
'A bucket with the same name as the jobstore was found in another region (%s). '
|
|
1687
|
-
'Cannot proceed as the unique bucket name is already in use.'
|
|
1749
|
+
'Cannot proceed as the unique bucket name is already in use.', locator=bucketRegion)
|