toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -21,46 +21,53 @@ import reprlib
|
|
|
21
21
|
import stat
|
|
22
22
|
import time
|
|
23
23
|
import uuid
|
|
24
|
+
from collections.abc import Generator
|
|
24
25
|
from contextlib import contextmanager
|
|
25
26
|
from io import BytesIO
|
|
26
|
-
from typing import
|
|
27
|
+
from typing import IO, TYPE_CHECKING, Optional, Union, cast
|
|
27
28
|
from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
|
|
28
29
|
|
|
29
30
|
from botocore.exceptions import ClientError
|
|
30
|
-
from mypy_boto3_sdb import SimpleDBClient
|
|
31
|
-
from mypy_boto3_sdb.type_defs import ReplaceableItemTypeDef, ReplaceableAttributeTypeDef, SelectResultTypeDef, ItemTypeDef, AttributeTypeDef, DeletableItemTypeDef, UpdateConditionTypeDef
|
|
32
31
|
|
|
33
32
|
import toil.lib.encryption as encryption
|
|
34
33
|
from toil.fileStores import FileID
|
|
35
34
|
from toil.job import Job, JobDescription
|
|
36
|
-
from toil.jobStores.abstractJobStore import (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
35
|
+
from toil.jobStores.abstractJobStore import (
|
|
36
|
+
AbstractJobStore,
|
|
37
|
+
ConcurrentFileModificationException,
|
|
38
|
+
JobStoreExistsException,
|
|
39
|
+
LocatorException,
|
|
40
|
+
NoSuchFileException,
|
|
41
|
+
NoSuchJobException,
|
|
42
|
+
NoSuchJobStoreException,
|
|
43
|
+
)
|
|
44
|
+
from toil.jobStores.aws.utils import (
|
|
45
|
+
SDBHelper,
|
|
46
|
+
ServerSideCopyProhibitedError,
|
|
47
|
+
copyKeyMultipart,
|
|
48
|
+
fileSizeAndTime,
|
|
49
|
+
no_such_sdb_domain,
|
|
50
|
+
retry_sdb,
|
|
51
|
+
sdb_unavailable,
|
|
52
|
+
uploadFile,
|
|
53
|
+
uploadFromPath,
|
|
54
|
+
)
|
|
55
|
+
from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
|
|
54
56
|
from toil.lib.aws import build_tag_dict_from_env
|
|
55
57
|
from toil.lib.aws.session import establish_boto3_session
|
|
56
|
-
from toil.lib.aws.utils import (
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
58
|
+
from toil.lib.aws.utils import (
|
|
59
|
+
NoBucketLocationError,
|
|
60
|
+
boto3_pager,
|
|
61
|
+
create_s3_bucket,
|
|
62
|
+
enable_public_objects,
|
|
63
|
+
flatten_tags,
|
|
64
|
+
get_bucket_region,
|
|
65
|
+
get_item_from_attributes,
|
|
66
|
+
get_object_for_url,
|
|
67
|
+
list_objects_for_url,
|
|
68
|
+
retry_s3,
|
|
69
|
+
retryable_s3_errors,
|
|
70
|
+
)
|
|
64
71
|
from toil.lib.compatibility import compat_bytes
|
|
65
72
|
from toil.lib.ec2nodes import EC2Regions
|
|
66
73
|
from toil.lib.exceptions import panic
|
|
@@ -70,11 +77,20 @@ from toil.lib.objects import InnerClass
|
|
|
70
77
|
from toil.lib.retry import get_error_code, get_error_status, retry
|
|
71
78
|
|
|
72
79
|
if TYPE_CHECKING:
|
|
80
|
+
from mypy_boto3_sdb.type_defs import (
|
|
81
|
+
AttributeTypeDef,
|
|
82
|
+
DeletableItemTypeDef,
|
|
83
|
+
ItemTypeDef,
|
|
84
|
+
ReplaceableAttributeTypeDef,
|
|
85
|
+
ReplaceableItemTypeDef,
|
|
86
|
+
UpdateConditionTypeDef,
|
|
87
|
+
)
|
|
88
|
+
|
|
73
89
|
from toil import Config
|
|
74
90
|
|
|
75
91
|
boto3_session = establish_boto3_session()
|
|
76
|
-
s3_boto3_resource = boto3_session.resource(
|
|
77
|
-
s3_boto3_client = boto3_session.client(
|
|
92
|
+
s3_boto3_resource = boto3_session.resource("s3")
|
|
93
|
+
s3_boto3_client = boto3_session.client("s3")
|
|
78
94
|
logger = logging.getLogger(__name__)
|
|
79
95
|
|
|
80
96
|
# Sometimes we have to wait for multipart uploads to become real. How long
|
|
@@ -89,6 +105,7 @@ class ChecksumError(Exception):
|
|
|
89
105
|
|
|
90
106
|
class DomainDoesNotExist(Exception):
|
|
91
107
|
"""Raised when a domain that is expected to exist does not exist."""
|
|
108
|
+
|
|
92
109
|
def __init__(self, domain_name):
|
|
93
110
|
super().__init__(f"Expected domain {domain_name} to exist!")
|
|
94
111
|
|
|
@@ -106,14 +123,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
106
123
|
# URLs where the may interfere with the certificate common name. We use a double
|
|
107
124
|
# underscore as a separator instead.
|
|
108
125
|
#
|
|
109
|
-
bucketNameRe = re.compile(r
|
|
126
|
+
bucketNameRe = re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$")
|
|
110
127
|
|
|
111
128
|
# See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
|
|
112
129
|
#
|
|
113
130
|
minBucketNameLen = 3
|
|
114
131
|
maxBucketNameLen = 63
|
|
115
132
|
maxNameLen = 10
|
|
116
|
-
nameSeparator =
|
|
133
|
+
nameSeparator = "--"
|
|
117
134
|
|
|
118
135
|
def __init__(self, locator: str, partSize: int = 50 << 20) -> None:
|
|
119
136
|
"""
|
|
@@ -124,23 +141,35 @@ class AWSJobStore(AbstractJobStore):
|
|
|
124
141
|
whole file
|
|
125
142
|
"""
|
|
126
143
|
super().__init__(locator)
|
|
127
|
-
region, namePrefix = locator.split(
|
|
144
|
+
region, namePrefix = locator.split(":")
|
|
128
145
|
regions = EC2Regions.keys()
|
|
129
146
|
if region not in regions:
|
|
130
147
|
raise ValueError(f'Region "{region}" is not one of: {regions}')
|
|
131
148
|
if not self.bucketNameRe.match(namePrefix):
|
|
132
|
-
raise ValueError(
|
|
133
|
-
|
|
134
|
-
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Invalid name prefix '%s'. Name prefixes must contain only digits, "
|
|
151
|
+
"hyphens or lower-case letters and must not start or end in a "
|
|
152
|
+
"hyphen." % namePrefix
|
|
153
|
+
)
|
|
135
154
|
# Reserve 13 for separator and suffix
|
|
136
|
-
if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
155
|
+
if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
|
|
156
|
+
self.nameSeparator
|
|
157
|
+
):
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
|
|
160
|
+
"characters." % namePrefix
|
|
161
|
+
)
|
|
162
|
+
if "--" in namePrefix:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
"Invalid name prefix '%s'. Name prefixes may not contain "
|
|
165
|
+
"%s." % (namePrefix, self.nameSeparator)
|
|
166
|
+
)
|
|
167
|
+
logger.debug(
|
|
168
|
+
"Instantiating %s for region %s and name prefix '%s'",
|
|
169
|
+
self.__class__,
|
|
170
|
+
region,
|
|
171
|
+
namePrefix,
|
|
172
|
+
)
|
|
144
173
|
self.region = region
|
|
145
174
|
self.name_prefix = namePrefix
|
|
146
175
|
self.part_size = partSize
|
|
@@ -149,7 +178,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
149
178
|
self.files_bucket = None
|
|
150
179
|
self.db = boto3_session.client(service_name="sdb", region_name=region)
|
|
151
180
|
|
|
152
|
-
self.s3_resource = boto3_session.resource(
|
|
181
|
+
self.s3_resource = boto3_session.resource("s3", region_name=self.region)
|
|
153
182
|
self.s3_client = self.s3_resource.meta.client
|
|
154
183
|
|
|
155
184
|
def initialize(self, config: "Config") -> None:
|
|
@@ -176,7 +205,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
176
205
|
self._bind(create=False)
|
|
177
206
|
super().resume()
|
|
178
207
|
|
|
179
|
-
def _bind(
|
|
208
|
+
def _bind(
|
|
209
|
+
self,
|
|
210
|
+
create: bool = False,
|
|
211
|
+
block: bool = True,
|
|
212
|
+
check_versioning_consistency: bool = True,
|
|
213
|
+
) -> None:
|
|
180
214
|
def qualify(name):
|
|
181
215
|
assert len(name) <= self.maxNameLen
|
|
182
216
|
return self.name_prefix + self.nameSeparator + name
|
|
@@ -191,11 +225,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
191
225
|
self.files_domain_name = qualify("files")
|
|
192
226
|
self._bindDomain(self.files_domain_name, create=create, block=block)
|
|
193
227
|
if self.files_bucket is None:
|
|
194
|
-
self.files_bucket = self._bindBucket(
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
228
|
+
self.files_bucket = self._bindBucket(
|
|
229
|
+
qualify("files"),
|
|
230
|
+
create=create,
|
|
231
|
+
block=block,
|
|
232
|
+
versioning=True,
|
|
233
|
+
check_versioning_consistency=check_versioning_consistency,
|
|
234
|
+
)
|
|
199
235
|
|
|
200
236
|
@property
|
|
201
237
|
def _registered(self) -> Optional[bool]:
|
|
@@ -217,25 +253,31 @@ class AWSJobStore(AbstractJobStore):
|
|
|
217
253
|
# can't handle job stores that were partially created by 3.3.0, though.
|
|
218
254
|
registry_domain_name = "toil-registry"
|
|
219
255
|
try:
|
|
220
|
-
self._bindDomain(
|
|
221
|
-
|
|
222
|
-
|
|
256
|
+
self._bindDomain(
|
|
257
|
+
domain_name=registry_domain_name, create=False, block=False
|
|
258
|
+
)
|
|
223
259
|
except DomainDoesNotExist:
|
|
224
260
|
return False
|
|
225
261
|
|
|
226
262
|
for attempt in retry_sdb():
|
|
227
263
|
with attempt:
|
|
228
|
-
get_result = self.db.get_attributes(
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
264
|
+
get_result = self.db.get_attributes(
|
|
265
|
+
DomainName=registry_domain_name,
|
|
266
|
+
ItemName=self.name_prefix,
|
|
267
|
+
AttributeNames=["exists"],
|
|
268
|
+
ConsistentRead=True,
|
|
269
|
+
)
|
|
270
|
+
attributes: list["AttributeTypeDef"] = get_result.get(
|
|
271
|
+
"Attributes", []
|
|
272
|
+
) # the documentation says 'Attributes' should always exist, but this is not true
|
|
273
|
+
exists: Optional[str] = get_item_from_attributes(
|
|
274
|
+
attributes=attributes, name="exists"
|
|
275
|
+
)
|
|
234
276
|
if exists is None:
|
|
235
277
|
return False
|
|
236
|
-
elif exists ==
|
|
278
|
+
elif exists == "True":
|
|
237
279
|
return True
|
|
238
|
-
elif exists ==
|
|
280
|
+
elif exists == "False":
|
|
239
281
|
return None
|
|
240
282
|
else:
|
|
241
283
|
assert False
|
|
@@ -244,31 +286,40 @@ class AWSJobStore(AbstractJobStore):
|
|
|
244
286
|
def _registered(self, value: bool) -> None:
|
|
245
287
|
registry_domain_name = "toil-registry"
|
|
246
288
|
try:
|
|
247
|
-
self._bindDomain(
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
289
|
+
self._bindDomain(
|
|
290
|
+
domain_name=registry_domain_name,
|
|
291
|
+
# Only create registry domain when registering or
|
|
292
|
+
# transitioning a store
|
|
293
|
+
create=value is not False,
|
|
294
|
+
block=False,
|
|
295
|
+
)
|
|
252
296
|
except DomainDoesNotExist:
|
|
253
297
|
pass
|
|
254
298
|
else:
|
|
255
299
|
for attempt in retry_sdb():
|
|
256
300
|
with attempt:
|
|
257
301
|
if value is False:
|
|
258
|
-
self.db.delete_attributes(
|
|
259
|
-
|
|
302
|
+
self.db.delete_attributes(
|
|
303
|
+
DomainName=registry_domain_name, ItemName=self.name_prefix
|
|
304
|
+
)
|
|
260
305
|
else:
|
|
261
306
|
if value is True:
|
|
262
|
-
attributes:
|
|
307
|
+
attributes: list["ReplaceableAttributeTypeDef"] = [
|
|
308
|
+
{"Name": "exists", "Value": "True", "Replace": True}
|
|
309
|
+
]
|
|
263
310
|
elif value is None:
|
|
264
|
-
attributes = [
|
|
311
|
+
attributes = [
|
|
312
|
+
{"Name": "exists", "Value": "False", "Replace": True}
|
|
313
|
+
]
|
|
265
314
|
else:
|
|
266
315
|
assert False
|
|
267
|
-
self.db.put_attributes(
|
|
268
|
-
|
|
269
|
-
|
|
316
|
+
self.db.put_attributes(
|
|
317
|
+
DomainName=registry_domain_name,
|
|
318
|
+
ItemName=self.name_prefix,
|
|
319
|
+
Attributes=attributes,
|
|
320
|
+
)
|
|
270
321
|
|
|
271
|
-
def _checkItem(self, item: ItemTypeDef, enforce: bool = True) -> None:
|
|
322
|
+
def _checkItem(self, item: "ItemTypeDef", enforce: bool = True) -> None:
|
|
272
323
|
"""
|
|
273
324
|
Make sure that the given SimpleDB item actually has the attributes we think it should.
|
|
274
325
|
|
|
@@ -278,22 +329,31 @@ class AWSJobStore(AbstractJobStore):
|
|
|
278
329
|
"""
|
|
279
330
|
self._checkAttributes(item["Attributes"], enforce)
|
|
280
331
|
|
|
281
|
-
def _checkAttributes(
|
|
332
|
+
def _checkAttributes(
|
|
333
|
+
self, attributes: list["AttributeTypeDef"], enforce: bool = True
|
|
334
|
+
) -> None:
|
|
282
335
|
if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
|
|
283
|
-
logger.error(
|
|
284
|
-
|
|
336
|
+
logger.error(
|
|
337
|
+
"overlargeID attribute isn't present: either SimpleDB entry is "
|
|
338
|
+
"corrupt or jobstore is from an extremely old Toil: %s",
|
|
339
|
+
attributes,
|
|
340
|
+
)
|
|
285
341
|
if enforce:
|
|
286
|
-
raise RuntimeError(
|
|
287
|
-
|
|
342
|
+
raise RuntimeError(
|
|
343
|
+
"encountered SimpleDB entry missing required attribute "
|
|
344
|
+
"'overlargeID'; is your job store ancient?"
|
|
345
|
+
)
|
|
288
346
|
|
|
289
|
-
def _awsJobFromAttributes(self, attributes:
|
|
347
|
+
def _awsJobFromAttributes(self, attributes: list["AttributeTypeDef"]) -> Job:
|
|
290
348
|
"""
|
|
291
349
|
Get a Toil Job object from attributes that are defined in an item from the DB
|
|
292
350
|
:param attributes: List of attributes
|
|
293
351
|
:return: Toil job
|
|
294
352
|
"""
|
|
295
353
|
self._checkAttributes(attributes)
|
|
296
|
-
overlarge_id_value = get_item_from_attributes(
|
|
354
|
+
overlarge_id_value = get_item_from_attributes(
|
|
355
|
+
attributes=attributes, name="overlargeID"
|
|
356
|
+
)
|
|
297
357
|
if overlarge_id_value:
|
|
298
358
|
assert self.file_exists(overlarge_id_value)
|
|
299
359
|
# This is an overlarge job, download the actual attributes
|
|
@@ -309,15 +369,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
309
369
|
job.assignConfig(self.config)
|
|
310
370
|
return job
|
|
311
371
|
|
|
312
|
-
def _awsJobFromItem(self, item: ItemTypeDef) -> Job:
|
|
372
|
+
def _awsJobFromItem(self, item: "ItemTypeDef") -> Job:
|
|
313
373
|
"""
|
|
314
374
|
Get a Toil Job object from an item from the DB
|
|
315
|
-
:param item: ItemTypeDef
|
|
316
375
|
:return: Toil Job
|
|
317
376
|
"""
|
|
318
377
|
return self._awsJobFromAttributes(item["Attributes"])
|
|
319
378
|
|
|
320
|
-
def _awsJobToAttributes(self, job: JobDescription) ->
|
|
379
|
+
def _awsJobToAttributes(self, job: JobDescription) -> list["AttributeTypeDef"]:
|
|
321
380
|
binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
|
|
322
381
|
if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
|
|
323
382
|
# Store as an overlarge job in S3
|
|
@@ -330,7 +389,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
330
389
|
item["overlargeID"] = ""
|
|
331
390
|
return SDBHelper.attributeDictToList(item)
|
|
332
391
|
|
|
333
|
-
def _awsJobToItem(self, job: JobDescription, name: str) -> ItemTypeDef:
|
|
392
|
+
def _awsJobToItem(self, job: JobDescription, name: str) -> "ItemTypeDef":
|
|
334
393
|
return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
|
|
335
394
|
|
|
336
395
|
jobsPerBatchInsert = 25
|
|
@@ -339,27 +398,34 @@ class AWSJobStore(AbstractJobStore):
|
|
|
339
398
|
def batch(self) -> None:
|
|
340
399
|
self._batchedUpdates = []
|
|
341
400
|
yield
|
|
342
|
-
batches = [
|
|
343
|
-
|
|
401
|
+
batches = [
|
|
402
|
+
self._batchedUpdates[i : i + self.jobsPerBatchInsert]
|
|
403
|
+
for i in range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)
|
|
404
|
+
]
|
|
344
405
|
|
|
345
406
|
for batch in batches:
|
|
346
|
-
items:
|
|
407
|
+
items: list["ReplaceableItemTypeDef"] = []
|
|
347
408
|
for jobDescription in batch:
|
|
348
|
-
item_attributes:
|
|
409
|
+
item_attributes: list["ReplaceableAttributeTypeDef"] = []
|
|
349
410
|
jobDescription.pre_update_hook()
|
|
350
411
|
item_name = compat_bytes(jobDescription.jobStoreID)
|
|
351
|
-
got_job_attributes:
|
|
412
|
+
got_job_attributes: list["AttributeTypeDef"] = self._awsJobToAttributes(
|
|
413
|
+
jobDescription
|
|
414
|
+
)
|
|
352
415
|
for each_attribute in got_job_attributes:
|
|
353
|
-
new_attribute: ReplaceableAttributeTypeDef = {
|
|
354
|
-
|
|
355
|
-
|
|
416
|
+
new_attribute: "ReplaceableAttributeTypeDef" = {
|
|
417
|
+
"Name": each_attribute["Name"],
|
|
418
|
+
"Value": each_attribute["Value"],
|
|
419
|
+
"Replace": True,
|
|
420
|
+
}
|
|
356
421
|
item_attributes.append(new_attribute)
|
|
357
|
-
items.append({"Name": item_name,
|
|
358
|
-
"Attributes": item_attributes})
|
|
422
|
+
items.append({"Name": item_name, "Attributes": item_attributes})
|
|
359
423
|
|
|
360
424
|
for attempt in retry_sdb():
|
|
361
425
|
with attempt:
|
|
362
|
-
self.db.batch_put_attributes(
|
|
426
|
+
self.db.batch_put_attributes(
|
|
427
|
+
DomainName=self.jobs_domain_name, Items=items
|
|
428
|
+
)
|
|
363
429
|
self._batchedUpdates = None
|
|
364
430
|
|
|
365
431
|
def assign_job_id(self, job_description: JobDescription) -> None:
|
|
@@ -377,19 +443,28 @@ class AWSJobStore(AbstractJobStore):
|
|
|
377
443
|
def job_exists(self, job_id: Union[bytes, str]) -> bool:
|
|
378
444
|
for attempt in retry_sdb():
|
|
379
445
|
with attempt:
|
|
380
|
-
return
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
446
|
+
return (
|
|
447
|
+
len(
|
|
448
|
+
self.db.get_attributes(
|
|
449
|
+
DomainName=self.jobs_domain_name,
|
|
450
|
+
ItemName=compat_bytes(job_id),
|
|
451
|
+
AttributeNames=[SDBHelper.presenceIndicator()],
|
|
452
|
+
ConsistentRead=True,
|
|
453
|
+
).get("Attributes", [])
|
|
454
|
+
)
|
|
455
|
+
> 0
|
|
456
|
+
)
|
|
384
457
|
|
|
385
458
|
def jobs(self) -> Generator[Job, None, None]:
|
|
386
|
-
job_items: Optional[
|
|
459
|
+
job_items: Optional[list["ItemTypeDef"]] = None
|
|
387
460
|
for attempt in retry_sdb():
|
|
388
461
|
with attempt:
|
|
389
|
-
job_items = boto3_pager(
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
462
|
+
job_items = boto3_pager(
|
|
463
|
+
self.db.select,
|
|
464
|
+
"Items",
|
|
465
|
+
ConsistentRead=True,
|
|
466
|
+
SelectExpression="select * from `%s`" % self.jobs_domain_name,
|
|
467
|
+
)
|
|
393
468
|
assert job_items is not None
|
|
394
469
|
for jobItem in job_items:
|
|
395
470
|
yield self._awsJobFromItem(jobItem)
|
|
@@ -398,9 +473,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
398
473
|
item_attributes = None
|
|
399
474
|
for attempt in retry_sdb():
|
|
400
475
|
with attempt:
|
|
401
|
-
item_attributes = self.db.get_attributes(
|
|
402
|
-
|
|
403
|
-
|
|
476
|
+
item_attributes = self.db.get_attributes(
|
|
477
|
+
DomainName=self.jobs_domain_name,
|
|
478
|
+
ItemName=compat_bytes(job_id),
|
|
479
|
+
ConsistentRead=True,
|
|
480
|
+
).get("Attributes", [])
|
|
404
481
|
if not item_attributes:
|
|
405
482
|
raise NoSuchJobException(job_id)
|
|
406
483
|
job = self._awsJobFromAttributes(item_attributes)
|
|
@@ -413,11 +490,17 @@ class AWSJobStore(AbstractJobStore):
|
|
|
413
490
|
logger.debug("Updating job %s", job_description.jobStoreID)
|
|
414
491
|
job_description.pre_update_hook()
|
|
415
492
|
job_attributes = self._awsJobToAttributes(job_description)
|
|
416
|
-
update_attributes:
|
|
417
|
-
|
|
493
|
+
update_attributes: list["ReplaceableAttributeTypeDef"] = [
|
|
494
|
+
{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
|
|
495
|
+
for attribute in job_attributes
|
|
496
|
+
]
|
|
418
497
|
for attempt in retry_sdb():
|
|
419
498
|
with attempt:
|
|
420
|
-
self.db.put_attributes(
|
|
499
|
+
self.db.put_attributes(
|
|
500
|
+
DomainName=self.jobs_domain_name,
|
|
501
|
+
ItemName=compat_bytes(job_description.jobStoreID),
|
|
502
|
+
Attributes=update_attributes,
|
|
503
|
+
)
|
|
421
504
|
|
|
422
505
|
itemsPerBatchDelete = 25
|
|
423
506
|
|
|
@@ -428,51 +511,75 @@ class AWSJobStore(AbstractJobStore):
|
|
|
428
511
|
# If the job is overlarge, delete its file from the filestore
|
|
429
512
|
for attempt in retry_sdb():
|
|
430
513
|
with attempt:
|
|
431
|
-
attributes = self.db.get_attributes(
|
|
432
|
-
|
|
433
|
-
|
|
514
|
+
attributes = self.db.get_attributes(
|
|
515
|
+
DomainName=self.jobs_domain_name,
|
|
516
|
+
ItemName=compat_bytes(job_id),
|
|
517
|
+
ConsistentRead=True,
|
|
518
|
+
).get("Attributes", [])
|
|
434
519
|
# If the overlargeID has fallen off, maybe we partially deleted the
|
|
435
520
|
# attributes of the item? Or raced on it? Or hit SimpleDB being merely
|
|
436
521
|
# eventually consistent? We should still be able to get rid of it.
|
|
437
522
|
self._checkAttributes(attributes, enforce=False)
|
|
438
|
-
overlarge_id_value = get_item_from_attributes(
|
|
523
|
+
overlarge_id_value = get_item_from_attributes(
|
|
524
|
+
attributes=attributes, name="overlargeID"
|
|
525
|
+
)
|
|
439
526
|
if overlarge_id_value:
|
|
440
527
|
logger.debug("Deleting job from filestore")
|
|
441
528
|
self.delete_file(overlarge_id_value)
|
|
442
529
|
for attempt in retry_sdb():
|
|
443
530
|
with attempt:
|
|
444
|
-
self.db.delete_attributes(
|
|
445
|
-
|
|
531
|
+
self.db.delete_attributes(
|
|
532
|
+
DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id)
|
|
533
|
+
)
|
|
534
|
+
items: Optional[list["ItemTypeDef"]] = None
|
|
446
535
|
for attempt in retry_sdb():
|
|
447
536
|
with attempt:
|
|
448
|
-
items = list(
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
537
|
+
items = list(
|
|
538
|
+
boto3_pager(
|
|
539
|
+
self.db.select,
|
|
540
|
+
"Items",
|
|
541
|
+
ConsistentRead=True,
|
|
542
|
+
SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'",
|
|
543
|
+
)
|
|
544
|
+
)
|
|
452
545
|
assert items is not None
|
|
453
546
|
if items:
|
|
454
|
-
logger.debug(
|
|
547
|
+
logger.debug(
|
|
548
|
+
"Deleting %d file(s) associated with job %s", len(items), job_id
|
|
549
|
+
)
|
|
455
550
|
n = self.itemsPerBatchDelete
|
|
456
|
-
batches = [items[i:i + n] for i in range(0, len(items), n)]
|
|
551
|
+
batches = [items[i : i + n] for i in range(0, len(items), n)]
|
|
457
552
|
for batch in batches:
|
|
458
|
-
delete_items:
|
|
553
|
+
delete_items: list["DeletableItemTypeDef"] = [
|
|
554
|
+
{"Name": item["Name"]} for item in batch
|
|
555
|
+
]
|
|
459
556
|
for attempt in retry_sdb():
|
|
460
557
|
with attempt:
|
|
461
|
-
self.db.batch_delete_attributes(
|
|
558
|
+
self.db.batch_delete_attributes(
|
|
559
|
+
DomainName=self.files_domain_name, Items=delete_items
|
|
560
|
+
)
|
|
462
561
|
for item in items:
|
|
463
|
-
item: ItemTypeDef
|
|
464
|
-
version = get_item_from_attributes(
|
|
562
|
+
item: "ItemTypeDef"
|
|
563
|
+
version = get_item_from_attributes(
|
|
564
|
+
attributes=item["Attributes"], name="version"
|
|
565
|
+
)
|
|
465
566
|
for attempt in retry_s3():
|
|
466
567
|
with attempt:
|
|
467
568
|
if version:
|
|
468
|
-
self.s3_client.delete_object(
|
|
469
|
-
|
|
470
|
-
|
|
569
|
+
self.s3_client.delete_object(
|
|
570
|
+
Bucket=self.files_bucket.name,
|
|
571
|
+
Key=compat_bytes(item["Name"]),
|
|
572
|
+
VersionId=version,
|
|
573
|
+
)
|
|
471
574
|
else:
|
|
472
|
-
self.s3_client.delete_object(
|
|
473
|
-
|
|
575
|
+
self.s3_client.delete_object(
|
|
576
|
+
Bucket=self.files_bucket.name,
|
|
577
|
+
Key=compat_bytes(item["Name"]),
|
|
578
|
+
)
|
|
474
579
|
|
|
475
|
-
def get_empty_file_store_id(
|
|
580
|
+
def get_empty_file_store_id(
|
|
581
|
+
self, jobStoreID=None, cleanup=False, basename=None
|
|
582
|
+
) -> FileID:
|
|
476
583
|
info = self.FileInfo.create(jobStoreID if cleanup else None)
|
|
477
584
|
with info.uploadStream() as _:
|
|
478
585
|
# Empty
|
|
@@ -481,8 +588,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
481
588
|
logger.debug("Created %r.", info)
|
|
482
589
|
return info.fileID
|
|
483
590
|
|
|
484
|
-
def _import_file(
|
|
485
|
-
|
|
591
|
+
def _import_file(
|
|
592
|
+
self,
|
|
593
|
+
otherCls,
|
|
594
|
+
uri: ParseResult,
|
|
595
|
+
shared_file_name: Optional[str] = None,
|
|
596
|
+
hardlink: bool = False,
|
|
597
|
+
symlink: bool = True,
|
|
598
|
+
) -> Optional[FileID]:
|
|
486
599
|
try:
|
|
487
600
|
if issubclass(otherCls, AWSJobStore):
|
|
488
601
|
srcObj = get_object_for_url(uri, existing=True)
|
|
@@ -492,15 +605,19 @@ class AWSJobStore(AbstractJobStore):
|
|
|
492
605
|
else:
|
|
493
606
|
self._requireValidSharedFileName(shared_file_name)
|
|
494
607
|
jobStoreFileID = self._shared_file_id(shared_file_name)
|
|
495
|
-
info = self.FileInfo.loadOrCreate(
|
|
496
|
-
|
|
497
|
-
|
|
608
|
+
info = self.FileInfo.loadOrCreate(
|
|
609
|
+
jobStoreFileID=jobStoreFileID,
|
|
610
|
+
ownerID=str(self.sharedFileOwnerID),
|
|
611
|
+
encrypted=None,
|
|
612
|
+
)
|
|
498
613
|
info.copyFrom(srcObj)
|
|
499
614
|
info.save()
|
|
500
615
|
return FileID(info.fileID, size) if shared_file_name is None else None
|
|
501
|
-
except ServerSideCopyProhibitedError:
|
|
502
|
-
# AWS refuses to do this copy for us
|
|
503
|
-
logger.warning(
|
|
616
|
+
except (NoBucketLocationError, ServerSideCopyProhibitedError):
|
|
617
|
+
# AWS refuses to tell us where the bucket is or do this copy for us
|
|
618
|
+
logger.warning(
|
|
619
|
+
"Falling back to copying via the local machine. This could get expensive!"
|
|
620
|
+
)
|
|
504
621
|
|
|
505
622
|
# copy if exception
|
|
506
623
|
return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
|
|
@@ -512,9 +629,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
512
629
|
info = self.FileInfo.loadOrFail(file_id)
|
|
513
630
|
info.copyTo(dstObj)
|
|
514
631
|
return
|
|
515
|
-
except ServerSideCopyProhibitedError:
|
|
516
|
-
# AWS refuses to do this copy for us
|
|
517
|
-
logger.warning(
|
|
632
|
+
except (NoBucketLocationError, ServerSideCopyProhibitedError):
|
|
633
|
+
# AWS refuses to tell us where the bucket is or do this copy for us
|
|
634
|
+
logger.warning(
|
|
635
|
+
"Falling back to copying via the local machine. This could get expensive!"
|
|
636
|
+
)
|
|
518
637
|
else:
|
|
519
638
|
super()._default_export_file(otherCls, file_id, uri)
|
|
520
639
|
|
|
@@ -536,34 +655,35 @@ class AWSJobStore(AbstractJobStore):
|
|
|
536
655
|
def _read_from_url(cls, url: ParseResult, writable):
|
|
537
656
|
srcObj = get_object_for_url(url, existing=True)
|
|
538
657
|
srcObj.download_fileobj(writable)
|
|
539
|
-
return (
|
|
540
|
-
srcObj.content_length,
|
|
541
|
-
False # executable bit is always False
|
|
542
|
-
)
|
|
658
|
+
return (srcObj.content_length, False) # executable bit is always False
|
|
543
659
|
|
|
544
660
|
@classmethod
|
|
545
661
|
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
546
662
|
src_obj = get_object_for_url(url, existing=True)
|
|
547
663
|
response = src_obj.get()
|
|
548
664
|
# We should get back a response with a stream in 'Body'
|
|
549
|
-
if
|
|
665
|
+
if "Body" not in response:
|
|
550
666
|
raise RuntimeError(f"Could not fetch body stream for {url}")
|
|
551
|
-
return response[
|
|
667
|
+
return response["Body"]
|
|
552
668
|
|
|
553
669
|
@classmethod
|
|
554
|
-
def _write_to_url(
|
|
670
|
+
def _write_to_url(
|
|
671
|
+
cls, readable, url: ParseResult, executable: bool = False
|
|
672
|
+
) -> None:
|
|
555
673
|
dstObj = get_object_for_url(url)
|
|
556
674
|
|
|
557
675
|
logger.debug("Uploading %s", dstObj.key)
|
|
558
676
|
# uploadFile takes care of using multipart upload if the file is larger than partSize (default to 5MB)
|
|
559
|
-
uploadFile(
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
677
|
+
uploadFile(
|
|
678
|
+
readable=readable,
|
|
679
|
+
resource=s3_boto3_resource,
|
|
680
|
+
bucketName=dstObj.bucket_name,
|
|
681
|
+
fileID=dstObj.key,
|
|
682
|
+
partSize=5 * 1000 * 1000,
|
|
683
|
+
)
|
|
564
684
|
|
|
565
685
|
@classmethod
|
|
566
|
-
def _list_url(cls, url: ParseResult) ->
|
|
686
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
567
687
|
return list_objects_for_url(url)
|
|
568
688
|
|
|
569
689
|
@classmethod
|
|
@@ -574,9 +694,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
574
694
|
|
|
575
695
|
@classmethod
|
|
576
696
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
577
|
-
return url.scheme.lower() ==
|
|
697
|
+
return url.scheme.lower() == "s3"
|
|
578
698
|
|
|
579
|
-
def write_file(
|
|
699
|
+
def write_file(
|
|
700
|
+
self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False
|
|
701
|
+
) -> FileID:
|
|
580
702
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
581
703
|
info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
582
704
|
info.save()
|
|
@@ -584,7 +706,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
584
706
|
return info.fileID
|
|
585
707
|
|
|
586
708
|
@contextmanager
|
|
587
|
-
def write_file_stream(
|
|
709
|
+
def write_file_stream(
|
|
710
|
+
self,
|
|
711
|
+
job_id: Optional[FileID] = None,
|
|
712
|
+
cleanup: bool = False,
|
|
713
|
+
basename=None,
|
|
714
|
+
encoding=None,
|
|
715
|
+
errors=None,
|
|
716
|
+
):
|
|
588
717
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
589
718
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
590
719
|
yield writable, info.fileID
|
|
@@ -592,11 +721,15 @@ class AWSJobStore(AbstractJobStore):
|
|
|
592
721
|
logger.debug("Wrote %r.", info)
|
|
593
722
|
|
|
594
723
|
@contextmanager
|
|
595
|
-
def write_shared_file_stream(
|
|
724
|
+
def write_shared_file_stream(
|
|
725
|
+
self, shared_file_name, encrypted=None, encoding=None, errors=None
|
|
726
|
+
):
|
|
596
727
|
self._requireValidSharedFileName(shared_file_name)
|
|
597
|
-
info = self.FileInfo.loadOrCreate(
|
|
598
|
-
|
|
599
|
-
|
|
728
|
+
info = self.FileInfo.loadOrCreate(
|
|
729
|
+
jobStoreFileID=self._shared_file_id(shared_file_name),
|
|
730
|
+
ownerID=str(self.sharedFileOwnerID),
|
|
731
|
+
encrypted=encrypted,
|
|
732
|
+
)
|
|
600
733
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
601
734
|
yield writable
|
|
602
735
|
info.save()
|
|
@@ -629,7 +762,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
629
762
|
info = self.FileInfo.loadOrFail(file_id)
|
|
630
763
|
logger.debug("Reading %r into %r.", info, local_path)
|
|
631
764
|
info.download(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
632
|
-
if getattr(file_id,
|
|
765
|
+
if getattr(file_id, "executable", False):
|
|
633
766
|
os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
|
|
634
767
|
|
|
635
768
|
@contextmanager
|
|
@@ -644,7 +777,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
644
777
|
self._requireValidSharedFileName(shared_file_name)
|
|
645
778
|
jobStoreFileID = self._shared_file_id(shared_file_name)
|
|
646
779
|
info = self.FileInfo.loadOrFail(jobStoreFileID, customName=shared_file_name)
|
|
647
|
-
logger.debug(
|
|
780
|
+
logger.debug(
|
|
781
|
+
"Reading %r for shared file %r into stream.", info, shared_file_name
|
|
782
|
+
)
|
|
648
783
|
with info.downloadStream(encoding=encoding, errors=errors) as readable:
|
|
649
784
|
yield readable
|
|
650
785
|
|
|
@@ -660,7 +795,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
660
795
|
with info.uploadStream(multipart=False) as writeable:
|
|
661
796
|
if isinstance(msg, str):
|
|
662
797
|
# This stream is for binary data, so encode any non-encoded things
|
|
663
|
-
msg = msg.encode(
|
|
798
|
+
msg = msg.encode("utf-8", errors="ignore")
|
|
664
799
|
writeable.write(msg)
|
|
665
800
|
info.save()
|
|
666
801
|
|
|
@@ -682,10 +817,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
682
817
|
items = None
|
|
683
818
|
for attempt in retry_sdb():
|
|
684
819
|
with attempt:
|
|
685
|
-
items = boto3_pager(
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
820
|
+
items = boto3_pager(
|
|
821
|
+
self.db.select,
|
|
822
|
+
"Items",
|
|
823
|
+
ConsistentRead=True,
|
|
824
|
+
SelectExpression=f"select * from `{self.files_domain_name}` where ownerID='{str(ownerId)}'",
|
|
825
|
+
)
|
|
689
826
|
assert items is not None
|
|
690
827
|
for item in items:
|
|
691
828
|
info = self.FileInfo.fromItem(item)
|
|
@@ -702,13 +839,19 @@ class AWSJobStore(AbstractJobStore):
|
|
|
702
839
|
with info.uploadStream(allowInlining=False) as f:
|
|
703
840
|
f.write(info.content)
|
|
704
841
|
|
|
705
|
-
self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
|
|
842
|
+
self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
|
|
843
|
+
ACL="public-read"
|
|
844
|
+
)
|
|
706
845
|
|
|
707
|
-
url = self.s3_client.generate_presigned_url(
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
846
|
+
url = self.s3_client.generate_presigned_url(
|
|
847
|
+
"get_object",
|
|
848
|
+
Params={
|
|
849
|
+
"Bucket": self.files_bucket.name,
|
|
850
|
+
"Key": compat_bytes(jobStoreFileID),
|
|
851
|
+
"VersionId": info.version,
|
|
852
|
+
},
|
|
853
|
+
ExpiresIn=self.publicUrlExpiration.total_seconds(),
|
|
854
|
+
)
|
|
712
855
|
|
|
713
856
|
# boto doesn't properly remove the x-amz-security-token parameter when
|
|
714
857
|
# query_auth is False when using an IAM role (see issue #2043). Including the
|
|
@@ -716,12 +859,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
716
859
|
# even if the resource is public, so we need to remove it.
|
|
717
860
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
|
718
861
|
params = parse_qs(query)
|
|
719
|
-
if
|
|
720
|
-
del params[
|
|
721
|
-
if
|
|
722
|
-
del params[
|
|
723
|
-
if
|
|
724
|
-
del params[
|
|
862
|
+
if "x-amz-security-token" in params:
|
|
863
|
+
del params["x-amz-security-token"]
|
|
864
|
+
if "AWSAccessKeyId" in params:
|
|
865
|
+
del params["AWSAccessKeyId"]
|
|
866
|
+
if "Signature" in params:
|
|
867
|
+
del params["Signature"]
|
|
725
868
|
query = urlencode(params, doseq=True)
|
|
726
869
|
url = urlunsplit((scheme, netloc, path, query, fragment))
|
|
727
870
|
return url
|
|
@@ -730,12 +873,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
730
873
|
self._requireValidSharedFileName(shared_file_name)
|
|
731
874
|
return self.get_public_url(self._shared_file_id(shared_file_name))
|
|
732
875
|
|
|
733
|
-
def _bindBucket(
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
876
|
+
def _bindBucket(
|
|
877
|
+
self,
|
|
878
|
+
bucket_name: str,
|
|
879
|
+
create: bool = False,
|
|
880
|
+
block: bool = True,
|
|
881
|
+
versioning: bool = False,
|
|
882
|
+
check_versioning_consistency: bool = True,
|
|
883
|
+
):
|
|
739
884
|
"""
|
|
740
885
|
Return the Boto Bucket object representing the S3 bucket with the given name. If the
|
|
741
886
|
bucket does not exist and `create` is True, it will be created.
|
|
@@ -760,8 +905,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
760
905
|
Decide, given an error, whether we should retry binding the bucket.
|
|
761
906
|
"""
|
|
762
907
|
|
|
763
|
-
if
|
|
764
|
-
get_error_status(error) in (404, 409)):
|
|
908
|
+
if isinstance(error, ClientError) and get_error_status(error) in (404, 409):
|
|
765
909
|
# Handle cases where the bucket creation is in a weird state that might let us proceed.
|
|
766
910
|
# https://github.com/BD2KGenomics/toil/issues/955
|
|
767
911
|
# https://github.com/BD2KGenomics/toil/issues/995
|
|
@@ -771,7 +915,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
771
915
|
# OperationAborted == 409
|
|
772
916
|
# NoSuchBucket == 404
|
|
773
917
|
return True
|
|
774
|
-
if get_error_code(error) ==
|
|
918
|
+
if get_error_code(error) == "SlowDown":
|
|
775
919
|
# We may get told to SlowDown by AWS when we try to create our
|
|
776
920
|
# bucket. In that case, we should retry and use the exponential
|
|
777
921
|
# backoff.
|
|
@@ -804,15 +948,17 @@ class AWSJobStore(AbstractJobStore):
|
|
|
804
948
|
# NoSuchBucket. We let that kick us back up to the
|
|
805
949
|
# main retry loop.
|
|
806
950
|
assert (
|
|
807
|
-
|
|
951
|
+
get_bucket_region(bucket_name) == self.region
|
|
808
952
|
), f"bucket_name: {bucket_name}, {get_bucket_region(bucket_name)} != {self.region}"
|
|
809
953
|
|
|
810
954
|
tags = build_tag_dict_from_env()
|
|
811
955
|
|
|
812
956
|
if tags:
|
|
813
957
|
flat_tags = flatten_tags(tags)
|
|
814
|
-
bucket_tagging = self.s3_resource.BucketTagging(
|
|
815
|
-
|
|
958
|
+
bucket_tagging = self.s3_resource.BucketTagging(
|
|
959
|
+
bucket_name
|
|
960
|
+
)
|
|
961
|
+
bucket_tagging.put(Tagging={"TagSet": flat_tags})
|
|
816
962
|
|
|
817
963
|
# Configure bucket so that we can make objects in
|
|
818
964
|
# it public, which was the historical default.
|
|
@@ -825,7 +971,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
825
971
|
# This is raised if the user attempts to get a bucket in a region outside
|
|
826
972
|
# the specified one, if the specified one is not `us-east-1`. The us-east-1
|
|
827
973
|
# server allows a user to use buckets from any region.
|
|
828
|
-
raise BucketLocationConflictException(
|
|
974
|
+
raise BucketLocationConflictException(
|
|
975
|
+
get_bucket_region(bucket_name)
|
|
976
|
+
)
|
|
829
977
|
else:
|
|
830
978
|
raise
|
|
831
979
|
else:
|
|
@@ -842,24 +990,32 @@ class AWSJobStore(AbstractJobStore):
|
|
|
842
990
|
# consistent?
|
|
843
991
|
time.sleep(1)
|
|
844
992
|
while not self._getBucketVersioning(bucket_name):
|
|
845
|
-
logger.warning(
|
|
993
|
+
logger.warning(
|
|
994
|
+
f"Waiting for versioning activation on bucket '{bucket_name}'..."
|
|
995
|
+
)
|
|
846
996
|
time.sleep(1)
|
|
847
997
|
elif check_versioning_consistency:
|
|
848
998
|
# now test for versioning consistency
|
|
849
999
|
# we should never see any of these errors since 'versioning' should always be true
|
|
850
1000
|
bucket_versioning = self._getBucketVersioning(bucket_name)
|
|
851
1001
|
if bucket_versioning != versioning:
|
|
852
|
-
assert False,
|
|
1002
|
+
assert False, "Cannot modify versioning on existing bucket"
|
|
853
1003
|
elif bucket_versioning is None:
|
|
854
|
-
assert False,
|
|
1004
|
+
assert False, "Cannot use a bucket with versioning suspended"
|
|
855
1005
|
if bucketExisted:
|
|
856
|
-
logger.debug(
|
|
1006
|
+
logger.debug(
|
|
1007
|
+
f"Using pre-existing job store bucket '{bucket_name}'."
|
|
1008
|
+
)
|
|
857
1009
|
else:
|
|
858
|
-
logger.debug(
|
|
1010
|
+
logger.debug(
|
|
1011
|
+
f"Created new job store bucket '{bucket_name}' with versioning state {versioning}."
|
|
1012
|
+
)
|
|
859
1013
|
|
|
860
1014
|
return bucket
|
|
861
1015
|
|
|
862
|
-
def _bindDomain(
|
|
1016
|
+
def _bindDomain(
|
|
1017
|
+
self, domain_name: str, create: bool = False, block: bool = True
|
|
1018
|
+
) -> None:
|
|
863
1019
|
"""
|
|
864
1020
|
Return the Boto3 domain name representing the SDB domain. When create=True, it will
|
|
865
1021
|
create the domain if it does not exist.
|
|
@@ -878,9 +1034,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
878
1034
|
retry timeout expires.
|
|
879
1035
|
"""
|
|
880
1036
|
logger.debug("Binding to job store domain '%s'.", domain_name)
|
|
881
|
-
retryargs = dict(
|
|
1037
|
+
retryargs = dict(
|
|
1038
|
+
predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e)
|
|
1039
|
+
)
|
|
882
1040
|
if not block:
|
|
883
|
-
retryargs[
|
|
1041
|
+
retryargs["timeout"] = 15
|
|
884
1042
|
for attempt in retry_sdb(**retryargs):
|
|
885
1043
|
with attempt:
|
|
886
1044
|
try:
|
|
@@ -902,13 +1060,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
902
1060
|
return str(uuid.uuid4())
|
|
903
1061
|
|
|
904
1062
|
# A dummy job ID under which all shared files are stored
|
|
905
|
-
sharedFileOwnerID = uuid.UUID(
|
|
1063
|
+
sharedFileOwnerID = uuid.UUID("891f7db6-e4d9-4221-a58e-ab6cc4395f94")
|
|
906
1064
|
|
|
907
1065
|
# A dummy job ID under which all unread stats files are stored
|
|
908
|
-
statsFileOwnerID = uuid.UUID(
|
|
1066
|
+
statsFileOwnerID = uuid.UUID("bfcf5286-4bc7-41ef-a85d-9ab415b69d53")
|
|
909
1067
|
|
|
910
1068
|
# A dummy job ID under which all read stats files are stored
|
|
911
|
-
readStatsFileOwnerID = uuid.UUID(
|
|
1069
|
+
readStatsFileOwnerID = uuid.UUID("e77fc3aa-d232-4255-ae04-f64ee8eb0bfa")
|
|
912
1070
|
|
|
913
1071
|
def _shared_file_id(self, shared_file_name):
|
|
914
1072
|
return str(uuid.uuid5(self.sharedFileOwnerID, shared_file_name))
|
|
@@ -918,13 +1076,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
918
1076
|
"""
|
|
919
1077
|
Represents a file in this job store.
|
|
920
1078
|
"""
|
|
1079
|
+
|
|
921
1080
|
outer = None
|
|
922
1081
|
"""
|
|
923
1082
|
:type: AWSJobStore
|
|
924
1083
|
"""
|
|
925
1084
|
|
|
926
|
-
def __init__(
|
|
927
|
-
|
|
1085
|
+
def __init__(
|
|
1086
|
+
self,
|
|
1087
|
+
fileID,
|
|
1088
|
+
ownerID,
|
|
1089
|
+
encrypted,
|
|
1090
|
+
version=None,
|
|
1091
|
+
content=None,
|
|
1092
|
+
numContentChunks=0,
|
|
1093
|
+
checksum=None,
|
|
1094
|
+
):
|
|
928
1095
|
"""
|
|
929
1096
|
:type fileID: str
|
|
930
1097
|
:param fileID: the file's ID
|
|
@@ -1003,24 +1170,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1003
1170
|
assert content is None or isinstance(content, bytes)
|
|
1004
1171
|
self._content = content
|
|
1005
1172
|
if content is not None:
|
|
1006
|
-
self.version =
|
|
1173
|
+
self.version = ""
|
|
1007
1174
|
|
|
1008
1175
|
@classmethod
|
|
1009
1176
|
def create(cls, ownerID: str):
|
|
1010
|
-
return cls(
|
|
1177
|
+
return cls(
|
|
1178
|
+
str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None
|
|
1179
|
+
)
|
|
1011
1180
|
|
|
1012
1181
|
@classmethod
|
|
1013
1182
|
def presenceIndicator(cls):
|
|
1014
|
-
return
|
|
1183
|
+
return "encrypted"
|
|
1015
1184
|
|
|
1016
1185
|
@classmethod
|
|
1017
1186
|
def exists(cls, jobStoreFileID):
|
|
1018
1187
|
for attempt in retry_sdb():
|
|
1019
1188
|
with attempt:
|
|
1020
|
-
return bool(
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1189
|
+
return bool(
|
|
1190
|
+
cls.outer.db.get_attributes(
|
|
1191
|
+
DomainName=cls.outer.files_domain_name,
|
|
1192
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1193
|
+
AttributeNames=[cls.presenceIndicator()],
|
|
1194
|
+
ConsistentRead=True,
|
|
1195
|
+
).get("Attributes", [])
|
|
1196
|
+
)
|
|
1024
1197
|
|
|
1025
1198
|
@classmethod
|
|
1026
1199
|
def load(cls, jobStoreFileID):
|
|
@@ -1029,10 +1202,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1029
1202
|
self = cls.fromItem(
|
|
1030
1203
|
{
|
|
1031
1204
|
"Name": compat_bytes(jobStoreFileID),
|
|
1032
|
-
"Attributes": cls.outer.db.get_attributes(
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1205
|
+
"Attributes": cls.outer.db.get_attributes(
|
|
1206
|
+
DomainName=cls.outer.files_domain_name,
|
|
1207
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1208
|
+
ConsistentRead=True,
|
|
1209
|
+
).get("Attributes", []),
|
|
1210
|
+
}
|
|
1211
|
+
)
|
|
1036
1212
|
return self
|
|
1037
1213
|
|
|
1038
1214
|
@classmethod
|
|
@@ -1062,7 +1238,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1062
1238
|
return self
|
|
1063
1239
|
|
|
1064
1240
|
@classmethod
|
|
1065
|
-
def fromItem(cls, item: ItemTypeDef):
|
|
1241
|
+
def fromItem(cls, item: "ItemTypeDef"):
|
|
1066
1242
|
"""
|
|
1067
1243
|
Convert an SDB item to an instance of this class.
|
|
1068
1244
|
|
|
@@ -1075,7 +1251,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1075
1251
|
return s if s is None else str(s)
|
|
1076
1252
|
|
|
1077
1253
|
# ownerID and encrypted are the only mandatory attributes
|
|
1078
|
-
ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
|
|
1254
|
+
ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
|
|
1255
|
+
item, ["ownerID", "encrypted", "version", "checksum"]
|
|
1256
|
+
)
|
|
1079
1257
|
if ownerID is None:
|
|
1080
1258
|
assert encrypted is None
|
|
1081
1259
|
return None
|
|
@@ -1085,14 +1263,23 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1085
1263
|
if encrypted:
|
|
1086
1264
|
sseKeyPath = cls.outer.sseKeyPath
|
|
1087
1265
|
if sseKeyPath is None:
|
|
1088
|
-
raise AssertionError(
|
|
1266
|
+
raise AssertionError(
|
|
1267
|
+
"Content is encrypted but no key was provided."
|
|
1268
|
+
)
|
|
1089
1269
|
if content is not None:
|
|
1090
1270
|
content = encryption.decrypt(content, sseKeyPath)
|
|
1091
|
-
self = cls(
|
|
1092
|
-
|
|
1271
|
+
self = cls(
|
|
1272
|
+
fileID=item["Name"],
|
|
1273
|
+
ownerID=ownerID,
|
|
1274
|
+
encrypted=encrypted,
|
|
1275
|
+
version=version,
|
|
1276
|
+
content=content,
|
|
1277
|
+
numContentChunks=numContentChunks,
|
|
1278
|
+
checksum=checksum,
|
|
1279
|
+
)
|
|
1093
1280
|
return self
|
|
1094
1281
|
|
|
1095
|
-
def toItem(self) ->
|
|
1282
|
+
def toItem(self) -> tuple[dict[str, str], int]:
|
|
1096
1283
|
"""
|
|
1097
1284
|
Convert this instance to a dictionary of attribute names to values
|
|
1098
1285
|
|
|
@@ -1104,15 +1291,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1104
1291
|
if self.encrypted and content is not None:
|
|
1105
1292
|
sseKeyPath = self.outer.sseKeyPath
|
|
1106
1293
|
if sseKeyPath is None:
|
|
1107
|
-
raise AssertionError(
|
|
1294
|
+
raise AssertionError(
|
|
1295
|
+
"Encryption requested but no key was provided."
|
|
1296
|
+
)
|
|
1108
1297
|
content = encryption.encrypt(content, sseKeyPath)
|
|
1109
1298
|
assert content is None or isinstance(content, bytes)
|
|
1110
1299
|
attributes = self.binaryToAttributes(content)
|
|
1111
|
-
numChunks = int(attributes[
|
|
1112
|
-
attributes.update(
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1300
|
+
numChunks = int(attributes["numChunks"])
|
|
1301
|
+
attributes.update(
|
|
1302
|
+
dict(
|
|
1303
|
+
ownerID=self.ownerID or "",
|
|
1304
|
+
encrypted=str(self.encrypted),
|
|
1305
|
+
version=self.version or "",
|
|
1306
|
+
checksum=self.checksum or "",
|
|
1307
|
+
)
|
|
1308
|
+
)
|
|
1116
1309
|
return attributes, numChunks
|
|
1117
1310
|
|
|
1118
1311
|
@classmethod
|
|
@@ -1128,24 +1321,37 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1128
1321
|
attributes_boto3 = SDBHelper.attributeDictToList(attributes)
|
|
1129
1322
|
# False stands for absence
|
|
1130
1323
|
if self.previousVersion is None:
|
|
1131
|
-
expected: UpdateConditionTypeDef = {
|
|
1324
|
+
expected: "UpdateConditionTypeDef" = {
|
|
1325
|
+
"Name": "version",
|
|
1326
|
+
"Exists": False,
|
|
1327
|
+
}
|
|
1132
1328
|
else:
|
|
1133
|
-
expected = {"Name":
|
|
1329
|
+
expected = {"Name": "version", "Value": cast(str, self.previousVersion)}
|
|
1134
1330
|
try:
|
|
1135
1331
|
for attempt in retry_sdb():
|
|
1136
1332
|
with attempt:
|
|
1137
|
-
self.outer.db.put_attributes(
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1333
|
+
self.outer.db.put_attributes(
|
|
1334
|
+
DomainName=self.outer.files_domain_name,
|
|
1335
|
+
ItemName=compat_bytes(self.fileID),
|
|
1336
|
+
Attributes=[
|
|
1337
|
+
{
|
|
1338
|
+
"Name": attribute["Name"],
|
|
1339
|
+
"Value": attribute["Value"],
|
|
1340
|
+
"Replace": True,
|
|
1341
|
+
}
|
|
1342
|
+
for attribute in attributes_boto3
|
|
1343
|
+
],
|
|
1344
|
+
Expected=expected,
|
|
1345
|
+
)
|
|
1142
1346
|
# clean up the old version of the file if necessary and safe
|
|
1143
1347
|
if self.previousVersion and (self.previousVersion != self.version):
|
|
1144
1348
|
for attempt in retry_s3():
|
|
1145
1349
|
with attempt:
|
|
1146
|
-
self.outer.s3_client.delete_object(
|
|
1147
|
-
|
|
1148
|
-
|
|
1350
|
+
self.outer.s3_client.delete_object(
|
|
1351
|
+
Bucket=self.outer.files_bucket.name,
|
|
1352
|
+
Key=compat_bytes(self.fileID),
|
|
1353
|
+
VersionId=self.previousVersion,
|
|
1354
|
+
)
|
|
1149
1355
|
self._previousVersion = self._version
|
|
1150
1356
|
if numNewContentChunks < self._numContentChunks:
|
|
1151
1357
|
residualChunks = range(numNewContentChunks, self._numContentChunks)
|
|
@@ -1153,19 +1359,26 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1153
1359
|
# boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
|
|
1154
1360
|
# the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
|
|
1155
1361
|
# but this doesnt extend to boto3
|
|
1156
|
-
delete_attributes = self.outer.db.get_attributes(
|
|
1157
|
-
|
|
1158
|
-
|
|
1362
|
+
delete_attributes = self.outer.db.get_attributes(
|
|
1363
|
+
DomainName=self.outer.files_domain_name,
|
|
1364
|
+
ItemName=compat_bytes(self.fileID),
|
|
1365
|
+
AttributeNames=[chunk for chunk in residual_chunk_names],
|
|
1366
|
+
).get("Attributes")
|
|
1159
1367
|
for attempt in retry_sdb():
|
|
1160
1368
|
with attempt:
|
|
1161
|
-
self.outer.db.delete_attributes(
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1369
|
+
self.outer.db.delete_attributes(
|
|
1370
|
+
DomainName=self.outer.files_domain_name,
|
|
1371
|
+
ItemName=compat_bytes(self.fileID),
|
|
1372
|
+
Attributes=delete_attributes,
|
|
1373
|
+
)
|
|
1374
|
+
self.outer.db.get_attributes(
|
|
1375
|
+
DomainName=self.outer.files_domain_name,
|
|
1376
|
+
ItemName=compat_bytes(self.fileID),
|
|
1377
|
+
)
|
|
1165
1378
|
|
|
1166
1379
|
self._numContentChunks = numNewContentChunks
|
|
1167
1380
|
except ClientError as e:
|
|
1168
|
-
if get_error_code(e) ==
|
|
1381
|
+
if get_error_code(e) == "ConditionalCheckFailed":
|
|
1169
1382
|
raise ConcurrentFileModificationException(self.fileID)
|
|
1170
1383
|
else:
|
|
1171
1384
|
raise
|
|
@@ -1173,24 +1386,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1173
1386
|
def upload(self, localFilePath, calculateChecksum=True):
|
|
1174
1387
|
file_size, file_time = fileSizeAndTime(localFilePath)
|
|
1175
1388
|
if file_size <= self.maxInlinedSize():
|
|
1176
|
-
with open(localFilePath,
|
|
1389
|
+
with open(localFilePath, "rb") as f:
|
|
1177
1390
|
self.content = f.read()
|
|
1178
1391
|
# Clear out any old checksum in case of overwrite
|
|
1179
|
-
self.checksum =
|
|
1392
|
+
self.checksum = ""
|
|
1180
1393
|
else:
|
|
1181
1394
|
headerArgs = self._s3EncryptionArgs()
|
|
1182
1395
|
# Create a new Resource in case it needs to be on its own thread
|
|
1183
|
-
resource = boto3_session.resource(
|
|
1184
|
-
|
|
1185
|
-
self.checksum =
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1396
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1397
|
+
|
|
1398
|
+
self.checksum = (
|
|
1399
|
+
self._get_file_checksum(localFilePath)
|
|
1400
|
+
if calculateChecksum
|
|
1401
|
+
else None
|
|
1402
|
+
)
|
|
1403
|
+
self.version = uploadFromPath(
|
|
1404
|
+
localFilePath,
|
|
1405
|
+
resource=resource,
|
|
1406
|
+
bucketName=self.outer.files_bucket.name,
|
|
1407
|
+
fileID=compat_bytes(self.fileID),
|
|
1408
|
+
headerArgs=headerArgs,
|
|
1409
|
+
partSize=self.outer.part_size,
|
|
1410
|
+
)
|
|
1411
|
+
|
|
1412
|
+
def _start_checksum(self, to_match=None, algorithm="sha1"):
|
|
1194
1413
|
"""
|
|
1195
1414
|
Get a hasher that can be used with _update_checksum and
|
|
1196
1415
|
_finish_checksum.
|
|
@@ -1208,12 +1427,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1208
1427
|
expected = None
|
|
1209
1428
|
|
|
1210
1429
|
if to_match is not None:
|
|
1211
|
-
parts = to_match.split(
|
|
1430
|
+
parts = to_match.split("$")
|
|
1212
1431
|
algorithm = parts[0]
|
|
1213
1432
|
expected = parts[1]
|
|
1214
1433
|
|
|
1215
1434
|
wrapped = getattr(hashlib, algorithm)()
|
|
1216
|
-
logger.debug(f
|
|
1435
|
+
logger.debug(f"Starting {algorithm} checksum to match {expected}")
|
|
1217
1436
|
return algorithm, wrapped, expected
|
|
1218
1437
|
|
|
1219
1438
|
def _update_checksum(self, checksum_in_progress, data):
|
|
@@ -1230,26 +1449,32 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1230
1449
|
|
|
1231
1450
|
result_hash = checksum_in_progress[1].hexdigest()
|
|
1232
1451
|
|
|
1233
|
-
logger.debug(
|
|
1452
|
+
logger.debug(
|
|
1453
|
+
f"Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}"
|
|
1454
|
+
)
|
|
1234
1455
|
if checksum_in_progress[2] is not None:
|
|
1235
1456
|
# We expected a particular hash
|
|
1236
1457
|
if result_hash != checksum_in_progress[2]:
|
|
1237
|
-
raise ChecksumError(
|
|
1238
|
-
|
|
1458
|
+
raise ChecksumError(
|
|
1459
|
+
"Checksum mismatch. Expected: %s Actual: %s"
|
|
1460
|
+
% (checksum_in_progress[2], result_hash)
|
|
1461
|
+
)
|
|
1239
1462
|
|
|
1240
|
-
return
|
|
1463
|
+
return "$".join([checksum_in_progress[0], result_hash])
|
|
1241
1464
|
|
|
1242
1465
|
def _get_file_checksum(self, localFilePath, to_match=None):
|
|
1243
|
-
with open(localFilePath,
|
|
1466
|
+
with open(localFilePath, "rb") as f:
|
|
1244
1467
|
hasher = self._start_checksum(to_match=to_match)
|
|
1245
1468
|
contents = f.read(1024 * 1024)
|
|
1246
|
-
while contents != b
|
|
1469
|
+
while contents != b"":
|
|
1247
1470
|
self._update_checksum(hasher, contents)
|
|
1248
1471
|
contents = f.read(1024 * 1024)
|
|
1249
1472
|
return self._finish_checksum(hasher)
|
|
1250
1473
|
|
|
1251
1474
|
@contextmanager
|
|
1252
|
-
def uploadStream(
|
|
1475
|
+
def uploadStream(
|
|
1476
|
+
self, multipart=True, allowInlining=True, encoding=None, errors=None
|
|
1477
|
+
):
|
|
1253
1478
|
"""
|
|
1254
1479
|
Context manager that gives out a binary or text mode upload stream to upload data.
|
|
1255
1480
|
"""
|
|
@@ -1270,14 +1495,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1270
1495
|
assert isinstance(buf, bytes)
|
|
1271
1496
|
|
|
1272
1497
|
if allowInlining and len(buf) <= info.maxInlinedSize():
|
|
1273
|
-
logger.debug(
|
|
1498
|
+
logger.debug("Inlining content of %d bytes", len(buf))
|
|
1274
1499
|
info.content = buf
|
|
1275
1500
|
# There will be no checksum
|
|
1276
|
-
info.checksum =
|
|
1501
|
+
info.checksum = ""
|
|
1277
1502
|
else:
|
|
1278
1503
|
# We will compute a checksum
|
|
1279
1504
|
hasher = info._start_checksum()
|
|
1280
|
-
logger.debug(
|
|
1505
|
+
logger.debug("Updating checksum with %d bytes", len(buf))
|
|
1281
1506
|
info._update_checksum(hasher, buf)
|
|
1282
1507
|
|
|
1283
1508
|
client = store.s3_client
|
|
@@ -1286,47 +1511,72 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1286
1511
|
|
|
1287
1512
|
for attempt in retry_s3():
|
|
1288
1513
|
with attempt:
|
|
1289
|
-
logger.debug(
|
|
1514
|
+
logger.debug("Starting multipart upload")
|
|
1290
1515
|
# low-level clients are thread safe
|
|
1291
|
-
upload = client.create_multipart_upload(
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1516
|
+
upload = client.create_multipart_upload(
|
|
1517
|
+
Bucket=bucket_name,
|
|
1518
|
+
Key=compat_bytes(info.fileID),
|
|
1519
|
+
**headerArgs,
|
|
1520
|
+
)
|
|
1521
|
+
uploadId = upload["UploadId"]
|
|
1295
1522
|
parts = []
|
|
1296
|
-
logger.debug(
|
|
1523
|
+
logger.debug("Multipart upload started as %s", uploadId)
|
|
1297
1524
|
|
|
1298
1525
|
for attempt in retry_s3():
|
|
1299
1526
|
with attempt:
|
|
1300
1527
|
for i in range(CONSISTENCY_TICKS):
|
|
1301
1528
|
# Sometimes we can create a multipart upload and not see it. Wait around for it.
|
|
1302
|
-
response = client.list_multipart_uploads(
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1529
|
+
response = client.list_multipart_uploads(
|
|
1530
|
+
Bucket=bucket_name,
|
|
1531
|
+
MaxUploads=1,
|
|
1532
|
+
Prefix=compat_bytes(info.fileID),
|
|
1533
|
+
)
|
|
1534
|
+
if (
|
|
1535
|
+
"Uploads" in response
|
|
1536
|
+
and len(response["Uploads"]) != 0
|
|
1537
|
+
and response["Uploads"][0]["UploadId"]
|
|
1538
|
+
== uploadId
|
|
1539
|
+
):
|
|
1540
|
+
|
|
1541
|
+
logger.debug(
|
|
1542
|
+
"Multipart upload visible as %s", uploadId
|
|
1543
|
+
)
|
|
1310
1544
|
break
|
|
1311
1545
|
else:
|
|
1312
|
-
logger.debug(
|
|
1313
|
-
|
|
1546
|
+
logger.debug(
|
|
1547
|
+
"Multipart upload %s is not visible; we see %s",
|
|
1548
|
+
uploadId,
|
|
1549
|
+
response.get("Uploads"),
|
|
1550
|
+
)
|
|
1551
|
+
time.sleep(CONSISTENCY_TIME * 2**i)
|
|
1314
1552
|
|
|
1315
1553
|
try:
|
|
1316
1554
|
for part_num in itertools.count():
|
|
1317
1555
|
for attempt in retry_s3():
|
|
1318
1556
|
with attempt:
|
|
1319
|
-
logger.debug(
|
|
1557
|
+
logger.debug(
|
|
1558
|
+
"Uploading part %d of %d bytes to %s",
|
|
1559
|
+
part_num + 1,
|
|
1560
|
+
len(buf),
|
|
1561
|
+
uploadId,
|
|
1562
|
+
)
|
|
1320
1563
|
# TODO: include the Content-MD5 header:
|
|
1321
1564
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.complete_multipart_upload
|
|
1322
|
-
part = client.upload_part(
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1565
|
+
part = client.upload_part(
|
|
1566
|
+
Bucket=bucket_name,
|
|
1567
|
+
Key=compat_bytes(info.fileID),
|
|
1568
|
+
PartNumber=part_num + 1,
|
|
1569
|
+
UploadId=uploadId,
|
|
1570
|
+
Body=BytesIO(buf),
|
|
1571
|
+
**headerArgs,
|
|
1572
|
+
)
|
|
1573
|
+
|
|
1574
|
+
parts.append(
|
|
1575
|
+
{
|
|
1576
|
+
"PartNumber": part_num + 1,
|
|
1577
|
+
"ETag": part["ETag"],
|
|
1578
|
+
}
|
|
1579
|
+
)
|
|
1330
1580
|
|
|
1331
1581
|
# Get the next block of data we want to put
|
|
1332
1582
|
buf = readable.read(info.outer.part_size)
|
|
@@ -1339,15 +1589,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1339
1589
|
with panic(log=logger):
|
|
1340
1590
|
for attempt in retry_s3():
|
|
1341
1591
|
with attempt:
|
|
1342
|
-
client.abort_multipart_upload(
|
|
1343
|
-
|
|
1344
|
-
|
|
1592
|
+
client.abort_multipart_upload(
|
|
1593
|
+
Bucket=bucket_name,
|
|
1594
|
+
Key=compat_bytes(info.fileID),
|
|
1595
|
+
UploadId=uploadId,
|
|
1596
|
+
)
|
|
1345
1597
|
|
|
1346
1598
|
else:
|
|
1347
1599
|
|
|
1348
|
-
while not store._getBucketVersioning(
|
|
1349
|
-
|
|
1350
|
-
|
|
1600
|
+
while not store._getBucketVersioning(
|
|
1601
|
+
store.files_bucket.name
|
|
1602
|
+
):
|
|
1603
|
+
logger.warning(
|
|
1604
|
+
"Versioning does not appear to be enabled yet. Deferring multipart "
|
|
1605
|
+
"upload completion..."
|
|
1606
|
+
)
|
|
1351
1607
|
time.sleep(1)
|
|
1352
1608
|
|
|
1353
1609
|
# Save the checksum
|
|
@@ -1359,32 +1615,46 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1359
1615
|
# in tests
|
|
1360
1616
|
# (https://github.com/DataBiosphere/toil/issues/3894)
|
|
1361
1617
|
with attempt:
|
|
1362
|
-
logger.debug(
|
|
1618
|
+
logger.debug("Attempting to complete upload...")
|
|
1363
1619
|
completed = client.complete_multipart_upload(
|
|
1364
1620
|
Bucket=bucket_name,
|
|
1365
1621
|
Key=compat_bytes(info.fileID),
|
|
1366
1622
|
UploadId=uploadId,
|
|
1367
|
-
MultipartUpload={"Parts": parts}
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1623
|
+
MultipartUpload={"Parts": parts},
|
|
1624
|
+
)
|
|
1625
|
+
|
|
1626
|
+
logger.debug(
|
|
1627
|
+
"Completed upload object of type %s: %s",
|
|
1628
|
+
str(type(completed)),
|
|
1629
|
+
repr(completed),
|
|
1630
|
+
)
|
|
1631
|
+
info.version = completed.get("VersionId")
|
|
1632
|
+
logger.debug(
|
|
1633
|
+
"Completed upload with version %s",
|
|
1634
|
+
str(info.version),
|
|
1635
|
+
)
|
|
1373
1636
|
|
|
1374
1637
|
if info.version is None:
|
|
1375
1638
|
# Somehow we don't know the version. Try and get it.
|
|
1376
|
-
for attempt in retry_s3(
|
|
1639
|
+
for attempt in retry_s3(
|
|
1640
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1641
|
+
or isinstance(e, AssertionError)
|
|
1642
|
+
):
|
|
1377
1643
|
with attempt:
|
|
1378
|
-
version = client.head_object(
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1644
|
+
version = client.head_object(
|
|
1645
|
+
Bucket=bucket_name,
|
|
1646
|
+
Key=compat_bytes(info.fileID),
|
|
1647
|
+
**headerArgs,
|
|
1648
|
+
).get("VersionId", None)
|
|
1649
|
+
logger.warning(
|
|
1650
|
+
"Loaded key for upload with no version and got version %s",
|
|
1651
|
+
str(version),
|
|
1652
|
+
)
|
|
1383
1653
|
info.version = version
|
|
1384
1654
|
assert info.version is not None
|
|
1385
1655
|
|
|
1386
1656
|
# Make sure we actually wrote something, even if an empty file
|
|
1387
|
-
assert
|
|
1657
|
+
assert bool(info.version) or info.content is not None
|
|
1388
1658
|
|
|
1389
1659
|
class SinglePartPipe(WritablePipe):
|
|
1390
1660
|
def readFrom(self, readable):
|
|
@@ -1392,10 +1662,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1392
1662
|
assert isinstance(buf, bytes)
|
|
1393
1663
|
dataLength = len(buf)
|
|
1394
1664
|
if allowInlining and dataLength <= info.maxInlinedSize():
|
|
1395
|
-
logger.debug(
|
|
1665
|
+
logger.debug("Inlining content of %d bytes", len(buf))
|
|
1396
1666
|
info.content = buf
|
|
1397
1667
|
# There will be no checksum
|
|
1398
|
-
info.checksum =
|
|
1668
|
+
info.checksum = ""
|
|
1399
1669
|
else:
|
|
1400
1670
|
# We will compute a checksum
|
|
1401
1671
|
hasher = info._start_checksum()
|
|
@@ -1409,39 +1679,57 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1409
1679
|
buf = BytesIO(buf)
|
|
1410
1680
|
|
|
1411
1681
|
while not store._getBucketVersioning(bucket_name):
|
|
1412
|
-
logger.warning(
|
|
1413
|
-
|
|
1682
|
+
logger.warning(
|
|
1683
|
+
"Versioning does not appear to be enabled yet. Deferring single part "
|
|
1684
|
+
"upload..."
|
|
1685
|
+
)
|
|
1414
1686
|
time.sleep(1)
|
|
1415
1687
|
|
|
1416
1688
|
for attempt in retry_s3():
|
|
1417
1689
|
with attempt:
|
|
1418
|
-
logger.debug(
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1690
|
+
logger.debug(
|
|
1691
|
+
"Uploading single part of %d bytes", dataLength
|
|
1692
|
+
)
|
|
1693
|
+
client.upload_fileobj(
|
|
1694
|
+
Bucket=bucket_name,
|
|
1695
|
+
Key=compat_bytes(info.fileID),
|
|
1696
|
+
Fileobj=buf,
|
|
1697
|
+
ExtraArgs=headerArgs,
|
|
1698
|
+
)
|
|
1423
1699
|
|
|
1424
1700
|
# use head_object with the SSE headers to access versionId and content_length attributes
|
|
1425
|
-
headObj = client.head_object(
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1701
|
+
headObj = client.head_object(
|
|
1702
|
+
Bucket=bucket_name,
|
|
1703
|
+
Key=compat_bytes(info.fileID),
|
|
1704
|
+
**headerArgs,
|
|
1705
|
+
)
|
|
1706
|
+
assert dataLength == headObj.get("ContentLength", None)
|
|
1707
|
+
info.version = headObj.get("VersionId", None)
|
|
1708
|
+
logger.debug(
|
|
1709
|
+
"Upload received version %s", str(info.version)
|
|
1710
|
+
)
|
|
1431
1711
|
|
|
1432
1712
|
if info.version is None:
|
|
1433
1713
|
# Somehow we don't know the version
|
|
1434
|
-
for attempt in retry_s3(
|
|
1714
|
+
for attempt in retry_s3(
|
|
1715
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1716
|
+
or isinstance(e, AssertionError)
|
|
1717
|
+
):
|
|
1435
1718
|
with attempt:
|
|
1436
|
-
headObj = client.head_object(
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1719
|
+
headObj = client.head_object(
|
|
1720
|
+
Bucket=bucket_name,
|
|
1721
|
+
Key=compat_bytes(info.fileID),
|
|
1722
|
+
**headerArgs,
|
|
1723
|
+
)
|
|
1724
|
+
info.version = headObj.get("VersionId", None)
|
|
1725
|
+
logger.warning(
|
|
1726
|
+
"Reloaded key with no version and got version %s",
|
|
1727
|
+
str(info.version),
|
|
1728
|
+
)
|
|
1441
1729
|
assert info.version is not None
|
|
1442
1730
|
|
|
1443
1731
|
# Make sure we actually wrote something, even if an empty file
|
|
1444
|
-
assert
|
|
1732
|
+
assert bool(info.version) or info.content is not None
|
|
1445
1733
|
|
|
1446
1734
|
if multipart:
|
|
1447
1735
|
pipe = MultiPartPipe(encoding=encoding, errors=errors)
|
|
@@ -1452,20 +1740,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1452
1740
|
yield writable
|
|
1453
1741
|
|
|
1454
1742
|
if not pipe.reader_done:
|
|
1455
|
-
logger.debug(f
|
|
1456
|
-
raise RuntimeError(
|
|
1743
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1744
|
+
raise RuntimeError(
|
|
1745
|
+
"Escaped context manager without written data being read!"
|
|
1746
|
+
)
|
|
1457
1747
|
|
|
1458
1748
|
# We check our work to make sure we have exactly one of embedded
|
|
1459
1749
|
# content or a real object version.
|
|
1460
1750
|
|
|
1461
1751
|
if self.content is None:
|
|
1462
1752
|
if not bool(self.version):
|
|
1463
|
-
logger.debug(f
|
|
1464
|
-
raise RuntimeError(
|
|
1753
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1754
|
+
raise RuntimeError("No content added and no version created")
|
|
1465
1755
|
else:
|
|
1466
1756
|
if bool(self.version):
|
|
1467
|
-
logger.debug(f
|
|
1468
|
-
raise RuntimeError(
|
|
1757
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1758
|
+
raise RuntimeError("Content added and version created")
|
|
1469
1759
|
|
|
1470
1760
|
def copyFrom(self, srcObj):
|
|
1471
1761
|
"""
|
|
@@ -1475,18 +1765,20 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1475
1765
|
"""
|
|
1476
1766
|
assert srcObj.content_length is not None
|
|
1477
1767
|
if srcObj.content_length <= self.maxInlinedSize():
|
|
1478
|
-
self.content = srcObj.get().get(
|
|
1768
|
+
self.content = srcObj.get().get("Body").read()
|
|
1479
1769
|
else:
|
|
1480
1770
|
# Create a new Resource in case it needs to be on its own thread
|
|
1481
|
-
resource = boto3_session.resource(
|
|
1482
|
-
self.version = copyKeyMultipart(
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1771
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1772
|
+
self.version = copyKeyMultipart(
|
|
1773
|
+
resource,
|
|
1774
|
+
srcBucketName=compat_bytes(srcObj.bucket_name),
|
|
1775
|
+
srcKeyName=compat_bytes(srcObj.key),
|
|
1776
|
+
srcKeyVersion=compat_bytes(srcObj.version_id),
|
|
1777
|
+
dstBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1778
|
+
dstKeyName=compat_bytes(self._fileID),
|
|
1779
|
+
sseAlgorithm="AES256",
|
|
1780
|
+
sseKey=self._getSSEKey(),
|
|
1781
|
+
)
|
|
1490
1782
|
|
|
1491
1783
|
def copyTo(self, dstObj):
|
|
1492
1784
|
"""
|
|
@@ -1500,35 +1792,43 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1500
1792
|
dstObj.put(Body=self.content)
|
|
1501
1793
|
elif self.version:
|
|
1502
1794
|
# Create a new Resource in case it needs to be on its own thread
|
|
1503
|
-
resource = boto3_session.resource(
|
|
1795
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1504
1796
|
|
|
1505
1797
|
for attempt in retry_s3():
|
|
1506
1798
|
# encrypted = True if self.outer.sseKeyPath else False
|
|
1507
1799
|
with attempt:
|
|
1508
|
-
copyKeyMultipart(
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1800
|
+
copyKeyMultipart(
|
|
1801
|
+
resource,
|
|
1802
|
+
srcBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1803
|
+
srcKeyName=compat_bytes(self.fileID),
|
|
1804
|
+
srcKeyVersion=compat_bytes(self.version),
|
|
1805
|
+
dstBucketName=compat_bytes(dstObj.bucket_name),
|
|
1806
|
+
dstKeyName=compat_bytes(dstObj.key),
|
|
1807
|
+
copySourceSseAlgorithm="AES256",
|
|
1808
|
+
copySourceSseKey=self._getSSEKey(),
|
|
1809
|
+
)
|
|
1516
1810
|
else:
|
|
1517
1811
|
assert False
|
|
1518
1812
|
|
|
1519
1813
|
def download(self, localFilePath, verifyChecksum=True):
|
|
1520
1814
|
if self.content is not None:
|
|
1521
1815
|
with AtomicFileCreate(localFilePath) as tmpPath:
|
|
1522
|
-
with open(tmpPath,
|
|
1816
|
+
with open(tmpPath, "wb") as f:
|
|
1523
1817
|
f.write(self.content)
|
|
1524
1818
|
elif self.version:
|
|
1525
1819
|
headerArgs = self._s3EncryptionArgs()
|
|
1526
1820
|
obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
|
|
1527
1821
|
|
|
1528
|
-
for attempt in retry_s3(
|
|
1822
|
+
for attempt in retry_s3(
|
|
1823
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1824
|
+
or isinstance(e, ChecksumError)
|
|
1825
|
+
):
|
|
1529
1826
|
with attempt:
|
|
1530
1827
|
with AtomicFileCreate(localFilePath) as tmpPath:
|
|
1531
|
-
obj.download_file(
|
|
1828
|
+
obj.download_file(
|
|
1829
|
+
Filename=tmpPath,
|
|
1830
|
+
ExtraArgs={"VersionId": self.version, **headerArgs},
|
|
1831
|
+
)
|
|
1532
1832
|
|
|
1533
1833
|
if verifyChecksum and self.checksum:
|
|
1534
1834
|
try:
|
|
@@ -1536,7 +1836,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1536
1836
|
self._get_file_checksum(localFilePath, self.checksum)
|
|
1537
1837
|
except ChecksumError as e:
|
|
1538
1838
|
# Annotate checksum mismatches with file name
|
|
1539
|
-
raise ChecksumError(
|
|
1839
|
+
raise ChecksumError(
|
|
1840
|
+
"Checksums do not match for file %s."
|
|
1841
|
+
% localFilePath
|
|
1842
|
+
) from e
|
|
1540
1843
|
# The error will get caught and result in a retry of the download until we run out of retries.
|
|
1541
1844
|
# TODO: handle obviously truncated downloads by resuming instead.
|
|
1542
1845
|
else:
|
|
@@ -1558,7 +1861,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1558
1861
|
obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
|
|
1559
1862
|
for attempt in retry_s3():
|
|
1560
1863
|
with attempt:
|
|
1561
|
-
obj.download_fileobj(
|
|
1864
|
+
obj.download_fileobj(
|
|
1865
|
+
writable,
|
|
1866
|
+
ExtraArgs={"VersionId": info.version, **headerArgs},
|
|
1867
|
+
)
|
|
1562
1868
|
else:
|
|
1563
1869
|
assert False
|
|
1564
1870
|
|
|
@@ -1574,7 +1880,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1574
1880
|
def transform(self, readable, writable):
|
|
1575
1881
|
hasher = info._start_checksum(to_match=info.checksum)
|
|
1576
1882
|
contents = readable.read(1024 * 1024)
|
|
1577
|
-
while contents != b
|
|
1883
|
+
while contents != b"":
|
|
1578
1884
|
info._update_checksum(hasher, contents)
|
|
1579
1885
|
try:
|
|
1580
1886
|
writable.write(contents)
|
|
@@ -1591,7 +1897,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1591
1897
|
if verifyChecksum and self.checksum:
|
|
1592
1898
|
with DownloadPipe() as readable:
|
|
1593
1899
|
# Interpose a pipe to check the hash
|
|
1594
|
-
with HashingPipe(
|
|
1900
|
+
with HashingPipe(
|
|
1901
|
+
readable, encoding=encoding, errors=errors
|
|
1902
|
+
) as verified:
|
|
1595
1903
|
yield verified
|
|
1596
1904
|
else:
|
|
1597
1905
|
# Readable end of pipe produces text mode output if encoding specified
|
|
@@ -1602,18 +1910,25 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1602
1910
|
def delete(self):
|
|
1603
1911
|
store = self.outer
|
|
1604
1912
|
if self.previousVersion is not None:
|
|
1605
|
-
expected: UpdateConditionTypeDef = {
|
|
1913
|
+
expected: "UpdateConditionTypeDef" = {
|
|
1914
|
+
"Name": "version",
|
|
1915
|
+
"Value": cast(str, self.previousVersion),
|
|
1916
|
+
}
|
|
1606
1917
|
for attempt in retry_sdb():
|
|
1607
1918
|
with attempt:
|
|
1608
|
-
store.db.delete_attributes(
|
|
1609
|
-
|
|
1610
|
-
|
|
1919
|
+
store.db.delete_attributes(
|
|
1920
|
+
DomainName=store.files_domain_name,
|
|
1921
|
+
ItemName=compat_bytes(self.fileID),
|
|
1922
|
+
Expected=expected,
|
|
1923
|
+
)
|
|
1611
1924
|
if self.previousVersion:
|
|
1612
1925
|
for attempt in retry_s3():
|
|
1613
1926
|
with attempt:
|
|
1614
|
-
store.s3_client.delete_object(
|
|
1615
|
-
|
|
1616
|
-
|
|
1927
|
+
store.s3_client.delete_object(
|
|
1928
|
+
Bucket=store.files_bucket.name,
|
|
1929
|
+
Key=compat_bytes(self.fileID),
|
|
1930
|
+
VersionId=self.previousVersion,
|
|
1931
|
+
)
|
|
1617
1932
|
|
|
1618
1933
|
def getSize(self):
|
|
1619
1934
|
"""
|
|
@@ -1632,7 +1947,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1632
1947
|
def _getSSEKey(self) -> Optional[bytes]:
|
|
1633
1948
|
sseKeyPath = self.outer.sseKeyPath
|
|
1634
1949
|
if sseKeyPath:
|
|
1635
|
-
with open(sseKeyPath,
|
|
1950
|
+
with open(sseKeyPath, "rb") as f:
|
|
1636
1951
|
sseKey = f.read()
|
|
1637
1952
|
return sseKey
|
|
1638
1953
|
|
|
@@ -1641,25 +1956,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1641
1956
|
# parameters and will be used to set the http headers
|
|
1642
1957
|
if self.encrypted:
|
|
1643
1958
|
sseKey = self._getSSEKey()
|
|
1644
|
-
assert
|
|
1959
|
+
assert (
|
|
1960
|
+
sseKey is not None
|
|
1961
|
+
), "Content is encrypted but no key was provided."
|
|
1645
1962
|
assert len(sseKey) == 32
|
|
1646
1963
|
# boto3 encodes the key and calculates the MD5 for us
|
|
1647
|
-
return {
|
|
1964
|
+
return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sseKey}
|
|
1648
1965
|
else:
|
|
1649
1966
|
return {}
|
|
1650
1967
|
|
|
1651
1968
|
def __repr__(self):
|
|
1652
1969
|
r = custom_repr
|
|
1653
|
-
d = (
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1970
|
+
d = (
|
|
1971
|
+
("fileID", r(self.fileID)),
|
|
1972
|
+
("ownerID", r(self.ownerID)),
|
|
1973
|
+
("encrypted", r(self.encrypted)),
|
|
1974
|
+
("version", r(self.version)),
|
|
1975
|
+
("previousVersion", r(self.previousVersion)),
|
|
1976
|
+
("content", r(self.content)),
|
|
1977
|
+
("checksum", r(self.checksum)),
|
|
1978
|
+
("_numContentChunks", r(self._numContentChunks)),
|
|
1979
|
+
)
|
|
1980
|
+
return "{}({})".format(
|
|
1981
|
+
type(self).__name__, ", ".join(f"{k}={v}" for k, v in d)
|
|
1982
|
+
)
|
|
1663
1983
|
|
|
1664
1984
|
versionings = dict(Enabled=True, Disabled=False, Suspended=None)
|
|
1665
1985
|
|
|
@@ -1696,7 +2016,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1696
2016
|
if self.files_bucket is not None:
|
|
1697
2017
|
self._delete_bucket(self.files_bucket)
|
|
1698
2018
|
self.files_bucket = None
|
|
1699
|
-
for name in
|
|
2019
|
+
for name in "files_domain_name", "jobs_domain_name":
|
|
1700
2020
|
domainName = getattr(self, name)
|
|
1701
2021
|
if domainName is not None:
|
|
1702
2022
|
self._delete_domain(domainName)
|
|
@@ -1720,12 +2040,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1720
2040
|
for attempt in retry_s3():
|
|
1721
2041
|
with attempt:
|
|
1722
2042
|
try:
|
|
1723
|
-
uploads = s3_boto3_client.list_multipart_uploads(
|
|
2043
|
+
uploads = s3_boto3_client.list_multipart_uploads(
|
|
2044
|
+
Bucket=bucket.name
|
|
2045
|
+
).get("Uploads")
|
|
1724
2046
|
if uploads:
|
|
1725
2047
|
for u in uploads:
|
|
1726
|
-
s3_boto3_client.abort_multipart_upload(
|
|
1727
|
-
|
|
1728
|
-
|
|
2048
|
+
s3_boto3_client.abort_multipart_upload(
|
|
2049
|
+
Bucket=bucket.name, Key=u["Key"], UploadId=u["UploadId"]
|
|
2050
|
+
)
|
|
1729
2051
|
|
|
1730
2052
|
bucket.objects.all().delete()
|
|
1731
2053
|
bucket.object_versions.delete()
|
|
@@ -1745,5 +2067,7 @@ custom_repr = aRepr.repr
|
|
|
1745
2067
|
class BucketLocationConflictException(LocatorException):
|
|
1746
2068
|
def __init__(self, bucketRegion):
|
|
1747
2069
|
super().__init__(
|
|
1748
|
-
|
|
1749
|
-
|
|
2070
|
+
"A bucket with the same name as the jobstore was found in another region (%s). "
|
|
2071
|
+
"Cannot proceed as the unique bucket name is already in use.",
|
|
2072
|
+
locator=bucketRegion,
|
|
2073
|
+
)
|