toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -21,46 +21,53 @@ import reprlib
|
|
|
21
21
|
import stat
|
|
22
22
|
import time
|
|
23
23
|
import uuid
|
|
24
|
+
from collections.abc import Generator
|
|
24
25
|
from contextlib import contextmanager
|
|
25
26
|
from io import BytesIO
|
|
26
|
-
from typing import
|
|
27
|
+
from typing import IO, TYPE_CHECKING, Optional, Union, cast
|
|
27
28
|
from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
|
|
28
29
|
|
|
29
30
|
from botocore.exceptions import ClientError
|
|
30
|
-
from mypy_boto3_sdb import SimpleDBClient
|
|
31
|
-
from mypy_boto3_sdb.type_defs import ReplaceableItemTypeDef, ReplaceableAttributeTypeDef, SelectResultTypeDef, ItemTypeDef, AttributeTypeDef, DeletableItemTypeDef, UpdateConditionTypeDef
|
|
32
31
|
|
|
33
32
|
import toil.lib.encryption as encryption
|
|
34
33
|
from toil.fileStores import FileID
|
|
35
34
|
from toil.job import Job, JobDescription
|
|
36
|
-
from toil.jobStores.abstractJobStore import (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
35
|
+
from toil.jobStores.abstractJobStore import (
|
|
36
|
+
AbstractJobStore,
|
|
37
|
+
ConcurrentFileModificationException,
|
|
38
|
+
JobStoreExistsException,
|
|
39
|
+
LocatorException,
|
|
40
|
+
NoSuchFileException,
|
|
41
|
+
NoSuchJobException,
|
|
42
|
+
NoSuchJobStoreException,
|
|
43
|
+
)
|
|
44
|
+
from toil.jobStores.aws.utils import (
|
|
45
|
+
SDBHelper,
|
|
46
|
+
ServerSideCopyProhibitedError,
|
|
47
|
+
copyKeyMultipart,
|
|
48
|
+
fileSizeAndTime,
|
|
49
|
+
no_such_sdb_domain,
|
|
50
|
+
retry_sdb,
|
|
51
|
+
sdb_unavailable,
|
|
52
|
+
uploadFile,
|
|
53
|
+
uploadFromPath,
|
|
54
|
+
)
|
|
55
|
+
from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
|
|
54
56
|
from toil.lib.aws import build_tag_dict_from_env
|
|
55
57
|
from toil.lib.aws.session import establish_boto3_session
|
|
56
|
-
from toil.lib.aws.utils import (
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
58
|
+
from toil.lib.aws.utils import (
|
|
59
|
+
NoBucketLocationError,
|
|
60
|
+
boto3_pager,
|
|
61
|
+
create_s3_bucket,
|
|
62
|
+
enable_public_objects,
|
|
63
|
+
flatten_tags,
|
|
64
|
+
get_bucket_region,
|
|
65
|
+
get_item_from_attributes,
|
|
66
|
+
get_object_for_url,
|
|
67
|
+
list_objects_for_url,
|
|
68
|
+
retry_s3,
|
|
69
|
+
retryable_s3_errors,
|
|
70
|
+
)
|
|
64
71
|
from toil.lib.compatibility import compat_bytes
|
|
65
72
|
from toil.lib.ec2nodes import EC2Regions
|
|
66
73
|
from toil.lib.exceptions import panic
|
|
@@ -70,11 +77,20 @@ from toil.lib.objects import InnerClass
|
|
|
70
77
|
from toil.lib.retry import get_error_code, get_error_status, retry
|
|
71
78
|
|
|
72
79
|
if TYPE_CHECKING:
|
|
80
|
+
from mypy_boto3_sdb.type_defs import (
|
|
81
|
+
AttributeTypeDef,
|
|
82
|
+
DeletableItemTypeDef,
|
|
83
|
+
ItemTypeDef,
|
|
84
|
+
ReplaceableAttributeTypeDef,
|
|
85
|
+
ReplaceableItemTypeDef,
|
|
86
|
+
UpdateConditionTypeDef,
|
|
87
|
+
)
|
|
88
|
+
|
|
73
89
|
from toil import Config
|
|
74
90
|
|
|
75
91
|
boto3_session = establish_boto3_session()
|
|
76
|
-
s3_boto3_resource = boto3_session.resource(
|
|
77
|
-
s3_boto3_client = boto3_session.client(
|
|
92
|
+
s3_boto3_resource = boto3_session.resource("s3")
|
|
93
|
+
s3_boto3_client = boto3_session.client("s3")
|
|
78
94
|
logger = logging.getLogger(__name__)
|
|
79
95
|
|
|
80
96
|
# Sometimes we have to wait for multipart uploads to become real. How long
|
|
@@ -89,6 +105,7 @@ class ChecksumError(Exception):
|
|
|
89
105
|
|
|
90
106
|
class DomainDoesNotExist(Exception):
|
|
91
107
|
"""Raised when a domain that is expected to exist does not exist."""
|
|
108
|
+
|
|
92
109
|
def __init__(self, domain_name):
|
|
93
110
|
super().__init__(f"Expected domain {domain_name} to exist!")
|
|
94
111
|
|
|
@@ -106,14 +123,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
106
123
|
# URLs where the may interfere with the certificate common name. We use a double
|
|
107
124
|
# underscore as a separator instead.
|
|
108
125
|
#
|
|
109
|
-
bucketNameRe = re.compile(r
|
|
126
|
+
bucketNameRe = re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$")
|
|
110
127
|
|
|
111
128
|
# See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
|
|
112
129
|
#
|
|
113
130
|
minBucketNameLen = 3
|
|
114
131
|
maxBucketNameLen = 63
|
|
115
132
|
maxNameLen = 10
|
|
116
|
-
nameSeparator =
|
|
133
|
+
nameSeparator = "--"
|
|
117
134
|
|
|
118
135
|
def __init__(self, locator: str, partSize: int = 50 << 20) -> None:
|
|
119
136
|
"""
|
|
@@ -124,23 +141,35 @@ class AWSJobStore(AbstractJobStore):
|
|
|
124
141
|
whole file
|
|
125
142
|
"""
|
|
126
143
|
super().__init__(locator)
|
|
127
|
-
region, namePrefix = locator.split(
|
|
144
|
+
region, namePrefix = locator.split(":")
|
|
128
145
|
regions = EC2Regions.keys()
|
|
129
146
|
if region not in regions:
|
|
130
147
|
raise ValueError(f'Region "{region}" is not one of: {regions}')
|
|
131
148
|
if not self.bucketNameRe.match(namePrefix):
|
|
132
|
-
raise ValueError(
|
|
133
|
-
|
|
134
|
-
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Invalid name prefix '%s'. Name prefixes must contain only digits, "
|
|
151
|
+
"hyphens or lower-case letters and must not start or end in a "
|
|
152
|
+
"hyphen." % namePrefix
|
|
153
|
+
)
|
|
135
154
|
# Reserve 13 for separator and suffix
|
|
136
|
-
if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
155
|
+
if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
|
|
156
|
+
self.nameSeparator
|
|
157
|
+
):
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
|
|
160
|
+
"characters." % namePrefix
|
|
161
|
+
)
|
|
162
|
+
if "--" in namePrefix:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
"Invalid name prefix '%s'. Name prefixes may not contain "
|
|
165
|
+
"%s." % (namePrefix, self.nameSeparator)
|
|
166
|
+
)
|
|
167
|
+
logger.debug(
|
|
168
|
+
"Instantiating %s for region %s and name prefix '%s'",
|
|
169
|
+
self.__class__,
|
|
170
|
+
region,
|
|
171
|
+
namePrefix,
|
|
172
|
+
)
|
|
144
173
|
self.region = region
|
|
145
174
|
self.name_prefix = namePrefix
|
|
146
175
|
self.part_size = partSize
|
|
@@ -149,7 +178,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
149
178
|
self.files_bucket = None
|
|
150
179
|
self.db = boto3_session.client(service_name="sdb", region_name=region)
|
|
151
180
|
|
|
152
|
-
self.s3_resource = boto3_session.resource(
|
|
181
|
+
self.s3_resource = boto3_session.resource("s3", region_name=self.region)
|
|
153
182
|
self.s3_client = self.s3_resource.meta.client
|
|
154
183
|
|
|
155
184
|
def initialize(self, config: "Config") -> None:
|
|
@@ -176,7 +205,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
176
205
|
self._bind(create=False)
|
|
177
206
|
super().resume()
|
|
178
207
|
|
|
179
|
-
def _bind(
|
|
208
|
+
def _bind(
|
|
209
|
+
self,
|
|
210
|
+
create: bool = False,
|
|
211
|
+
block: bool = True,
|
|
212
|
+
check_versioning_consistency: bool = True,
|
|
213
|
+
) -> None:
|
|
180
214
|
def qualify(name):
|
|
181
215
|
assert len(name) <= self.maxNameLen
|
|
182
216
|
return self.name_prefix + self.nameSeparator + name
|
|
@@ -191,11 +225,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
191
225
|
self.files_domain_name = qualify("files")
|
|
192
226
|
self._bindDomain(self.files_domain_name, create=create, block=block)
|
|
193
227
|
if self.files_bucket is None:
|
|
194
|
-
self.files_bucket = self._bindBucket(
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
228
|
+
self.files_bucket = self._bindBucket(
|
|
229
|
+
qualify("files"),
|
|
230
|
+
create=create,
|
|
231
|
+
block=block,
|
|
232
|
+
versioning=True,
|
|
233
|
+
check_versioning_consistency=check_versioning_consistency,
|
|
234
|
+
)
|
|
199
235
|
|
|
200
236
|
@property
|
|
201
237
|
def _registered(self) -> Optional[bool]:
|
|
@@ -217,25 +253,31 @@ class AWSJobStore(AbstractJobStore):
|
|
|
217
253
|
# can't handle job stores that were partially created by 3.3.0, though.
|
|
218
254
|
registry_domain_name = "toil-registry"
|
|
219
255
|
try:
|
|
220
|
-
self._bindDomain(
|
|
221
|
-
|
|
222
|
-
|
|
256
|
+
self._bindDomain(
|
|
257
|
+
domain_name=registry_domain_name, create=False, block=False
|
|
258
|
+
)
|
|
223
259
|
except DomainDoesNotExist:
|
|
224
260
|
return False
|
|
225
261
|
|
|
226
262
|
for attempt in retry_sdb():
|
|
227
263
|
with attempt:
|
|
228
|
-
get_result = self.db.get_attributes(
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
264
|
+
get_result = self.db.get_attributes(
|
|
265
|
+
DomainName=registry_domain_name,
|
|
266
|
+
ItemName=self.name_prefix,
|
|
267
|
+
AttributeNames=["exists"],
|
|
268
|
+
ConsistentRead=True,
|
|
269
|
+
)
|
|
270
|
+
attributes: list["AttributeTypeDef"] = get_result.get(
|
|
271
|
+
"Attributes", []
|
|
272
|
+
) # the documentation says 'Attributes' should always exist, but this is not true
|
|
273
|
+
exists: Optional[str] = get_item_from_attributes(
|
|
274
|
+
attributes=attributes, name="exists"
|
|
275
|
+
)
|
|
234
276
|
if exists is None:
|
|
235
277
|
return False
|
|
236
|
-
elif exists ==
|
|
278
|
+
elif exists == "True":
|
|
237
279
|
return True
|
|
238
|
-
elif exists ==
|
|
280
|
+
elif exists == "False":
|
|
239
281
|
return None
|
|
240
282
|
else:
|
|
241
283
|
assert False
|
|
@@ -244,31 +286,40 @@ class AWSJobStore(AbstractJobStore):
|
|
|
244
286
|
def _registered(self, value: bool) -> None:
|
|
245
287
|
registry_domain_name = "toil-registry"
|
|
246
288
|
try:
|
|
247
|
-
self._bindDomain(
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
289
|
+
self._bindDomain(
|
|
290
|
+
domain_name=registry_domain_name,
|
|
291
|
+
# Only create registry domain when registering or
|
|
292
|
+
# transitioning a store
|
|
293
|
+
create=value is not False,
|
|
294
|
+
block=False,
|
|
295
|
+
)
|
|
252
296
|
except DomainDoesNotExist:
|
|
253
297
|
pass
|
|
254
298
|
else:
|
|
255
299
|
for attempt in retry_sdb():
|
|
256
300
|
with attempt:
|
|
257
301
|
if value is False:
|
|
258
|
-
self.db.delete_attributes(
|
|
259
|
-
|
|
302
|
+
self.db.delete_attributes(
|
|
303
|
+
DomainName=registry_domain_name, ItemName=self.name_prefix
|
|
304
|
+
)
|
|
260
305
|
else:
|
|
261
306
|
if value is True:
|
|
262
|
-
attributes:
|
|
307
|
+
attributes: list["ReplaceableAttributeTypeDef"] = [
|
|
308
|
+
{"Name": "exists", "Value": "True", "Replace": True}
|
|
309
|
+
]
|
|
263
310
|
elif value is None:
|
|
264
|
-
attributes = [
|
|
311
|
+
attributes = [
|
|
312
|
+
{"Name": "exists", "Value": "False", "Replace": True}
|
|
313
|
+
]
|
|
265
314
|
else:
|
|
266
315
|
assert False
|
|
267
|
-
self.db.put_attributes(
|
|
268
|
-
|
|
269
|
-
|
|
316
|
+
self.db.put_attributes(
|
|
317
|
+
DomainName=registry_domain_name,
|
|
318
|
+
ItemName=self.name_prefix,
|
|
319
|
+
Attributes=attributes,
|
|
320
|
+
)
|
|
270
321
|
|
|
271
|
-
def _checkItem(self, item: ItemTypeDef, enforce: bool = True) -> None:
|
|
322
|
+
def _checkItem(self, item: "ItemTypeDef", enforce: bool = True) -> None:
|
|
272
323
|
"""
|
|
273
324
|
Make sure that the given SimpleDB item actually has the attributes we think it should.
|
|
274
325
|
|
|
@@ -278,22 +329,31 @@ class AWSJobStore(AbstractJobStore):
|
|
|
278
329
|
"""
|
|
279
330
|
self._checkAttributes(item["Attributes"], enforce)
|
|
280
331
|
|
|
281
|
-
def _checkAttributes(
|
|
332
|
+
def _checkAttributes(
|
|
333
|
+
self, attributes: list["AttributeTypeDef"], enforce: bool = True
|
|
334
|
+
) -> None:
|
|
282
335
|
if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
|
|
283
|
-
logger.error(
|
|
284
|
-
|
|
336
|
+
logger.error(
|
|
337
|
+
"overlargeID attribute isn't present: either SimpleDB entry is "
|
|
338
|
+
"corrupt or jobstore is from an extremely old Toil: %s",
|
|
339
|
+
attributes,
|
|
340
|
+
)
|
|
285
341
|
if enforce:
|
|
286
|
-
raise RuntimeError(
|
|
287
|
-
|
|
342
|
+
raise RuntimeError(
|
|
343
|
+
"encountered SimpleDB entry missing required attribute "
|
|
344
|
+
"'overlargeID'; is your job store ancient?"
|
|
345
|
+
)
|
|
288
346
|
|
|
289
|
-
def _awsJobFromAttributes(self, attributes:
|
|
347
|
+
def _awsJobFromAttributes(self, attributes: list["AttributeTypeDef"]) -> Job:
|
|
290
348
|
"""
|
|
291
349
|
Get a Toil Job object from attributes that are defined in an item from the DB
|
|
292
350
|
:param attributes: List of attributes
|
|
293
351
|
:return: Toil job
|
|
294
352
|
"""
|
|
295
353
|
self._checkAttributes(attributes)
|
|
296
|
-
overlarge_id_value = get_item_from_attributes(
|
|
354
|
+
overlarge_id_value = get_item_from_attributes(
|
|
355
|
+
attributes=attributes, name="overlargeID"
|
|
356
|
+
)
|
|
297
357
|
if overlarge_id_value:
|
|
298
358
|
assert self.file_exists(overlarge_id_value)
|
|
299
359
|
# This is an overlarge job, download the actual attributes
|
|
@@ -309,15 +369,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
309
369
|
job.assignConfig(self.config)
|
|
310
370
|
return job
|
|
311
371
|
|
|
312
|
-
def _awsJobFromItem(self, item: ItemTypeDef) -> Job:
|
|
372
|
+
def _awsJobFromItem(self, item: "ItemTypeDef") -> Job:
|
|
313
373
|
"""
|
|
314
374
|
Get a Toil Job object from an item from the DB
|
|
315
|
-
:param item: ItemTypeDef
|
|
316
375
|
:return: Toil Job
|
|
317
376
|
"""
|
|
318
377
|
return self._awsJobFromAttributes(item["Attributes"])
|
|
319
378
|
|
|
320
|
-
def _awsJobToAttributes(self, job: JobDescription) ->
|
|
379
|
+
def _awsJobToAttributes(self, job: JobDescription) -> list["AttributeTypeDef"]:
|
|
321
380
|
binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
|
|
322
381
|
if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
|
|
323
382
|
# Store as an overlarge job in S3
|
|
@@ -330,7 +389,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
330
389
|
item["overlargeID"] = ""
|
|
331
390
|
return SDBHelper.attributeDictToList(item)
|
|
332
391
|
|
|
333
|
-
def _awsJobToItem(self, job: JobDescription, name: str) -> ItemTypeDef:
|
|
392
|
+
def _awsJobToItem(self, job: JobDescription, name: str) -> "ItemTypeDef":
|
|
334
393
|
return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
|
|
335
394
|
|
|
336
395
|
jobsPerBatchInsert = 25
|
|
@@ -339,27 +398,34 @@ class AWSJobStore(AbstractJobStore):
|
|
|
339
398
|
def batch(self) -> None:
|
|
340
399
|
self._batchedUpdates = []
|
|
341
400
|
yield
|
|
342
|
-
batches = [
|
|
343
|
-
|
|
401
|
+
batches = [
|
|
402
|
+
self._batchedUpdates[i : i + self.jobsPerBatchInsert]
|
|
403
|
+
for i in range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)
|
|
404
|
+
]
|
|
344
405
|
|
|
345
406
|
for batch in batches:
|
|
346
|
-
items:
|
|
407
|
+
items: list["ReplaceableItemTypeDef"] = []
|
|
347
408
|
for jobDescription in batch:
|
|
348
|
-
item_attributes:
|
|
409
|
+
item_attributes: list["ReplaceableAttributeTypeDef"] = []
|
|
349
410
|
jobDescription.pre_update_hook()
|
|
350
411
|
item_name = compat_bytes(jobDescription.jobStoreID)
|
|
351
|
-
got_job_attributes:
|
|
412
|
+
got_job_attributes: list["AttributeTypeDef"] = self._awsJobToAttributes(
|
|
413
|
+
jobDescription
|
|
414
|
+
)
|
|
352
415
|
for each_attribute in got_job_attributes:
|
|
353
|
-
new_attribute: ReplaceableAttributeTypeDef = {
|
|
354
|
-
|
|
355
|
-
|
|
416
|
+
new_attribute: "ReplaceableAttributeTypeDef" = {
|
|
417
|
+
"Name": each_attribute["Name"],
|
|
418
|
+
"Value": each_attribute["Value"],
|
|
419
|
+
"Replace": True,
|
|
420
|
+
}
|
|
356
421
|
item_attributes.append(new_attribute)
|
|
357
|
-
items.append({"Name": item_name,
|
|
358
|
-
"Attributes": item_attributes})
|
|
422
|
+
items.append({"Name": item_name, "Attributes": item_attributes})
|
|
359
423
|
|
|
360
424
|
for attempt in retry_sdb():
|
|
361
425
|
with attempt:
|
|
362
|
-
self.db.batch_put_attributes(
|
|
426
|
+
self.db.batch_put_attributes(
|
|
427
|
+
DomainName=self.jobs_domain_name, Items=items
|
|
428
|
+
)
|
|
363
429
|
self._batchedUpdates = None
|
|
364
430
|
|
|
365
431
|
def assign_job_id(self, job_description: JobDescription) -> None:
|
|
@@ -377,19 +443,28 @@ class AWSJobStore(AbstractJobStore):
|
|
|
377
443
|
def job_exists(self, job_id: Union[bytes, str]) -> bool:
|
|
378
444
|
for attempt in retry_sdb():
|
|
379
445
|
with attempt:
|
|
380
|
-
return
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
446
|
+
return (
|
|
447
|
+
len(
|
|
448
|
+
self.db.get_attributes(
|
|
449
|
+
DomainName=self.jobs_domain_name,
|
|
450
|
+
ItemName=compat_bytes(job_id),
|
|
451
|
+
AttributeNames=[SDBHelper.presenceIndicator()],
|
|
452
|
+
ConsistentRead=True,
|
|
453
|
+
).get("Attributes", [])
|
|
454
|
+
)
|
|
455
|
+
> 0
|
|
456
|
+
)
|
|
384
457
|
|
|
385
458
|
def jobs(self) -> Generator[Job, None, None]:
|
|
386
|
-
job_items: Optional[
|
|
459
|
+
job_items: Optional[list["ItemTypeDef"]] = None
|
|
387
460
|
for attempt in retry_sdb():
|
|
388
461
|
with attempt:
|
|
389
|
-
job_items = boto3_pager(
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
462
|
+
job_items = boto3_pager(
|
|
463
|
+
self.db.select,
|
|
464
|
+
"Items",
|
|
465
|
+
ConsistentRead=True,
|
|
466
|
+
SelectExpression="select * from `%s`" % self.jobs_domain_name,
|
|
467
|
+
)
|
|
393
468
|
assert job_items is not None
|
|
394
469
|
for jobItem in job_items:
|
|
395
470
|
yield self._awsJobFromItem(jobItem)
|
|
@@ -398,9 +473,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
398
473
|
item_attributes = None
|
|
399
474
|
for attempt in retry_sdb():
|
|
400
475
|
with attempt:
|
|
401
|
-
item_attributes = self.db.get_attributes(
|
|
402
|
-
|
|
403
|
-
|
|
476
|
+
item_attributes = self.db.get_attributes(
|
|
477
|
+
DomainName=self.jobs_domain_name,
|
|
478
|
+
ItemName=compat_bytes(job_id),
|
|
479
|
+
ConsistentRead=True,
|
|
480
|
+
).get("Attributes", [])
|
|
404
481
|
if not item_attributes:
|
|
405
482
|
raise NoSuchJobException(job_id)
|
|
406
483
|
job = self._awsJobFromAttributes(item_attributes)
|
|
@@ -413,11 +490,17 @@ class AWSJobStore(AbstractJobStore):
|
|
|
413
490
|
logger.debug("Updating job %s", job_description.jobStoreID)
|
|
414
491
|
job_description.pre_update_hook()
|
|
415
492
|
job_attributes = self._awsJobToAttributes(job_description)
|
|
416
|
-
update_attributes:
|
|
417
|
-
|
|
493
|
+
update_attributes: list["ReplaceableAttributeTypeDef"] = [
|
|
494
|
+
{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
|
|
495
|
+
for attribute in job_attributes
|
|
496
|
+
]
|
|
418
497
|
for attempt in retry_sdb():
|
|
419
498
|
with attempt:
|
|
420
|
-
self.db.put_attributes(
|
|
499
|
+
self.db.put_attributes(
|
|
500
|
+
DomainName=self.jobs_domain_name,
|
|
501
|
+
ItemName=compat_bytes(job_description.jobStoreID),
|
|
502
|
+
Attributes=update_attributes,
|
|
503
|
+
)
|
|
421
504
|
|
|
422
505
|
itemsPerBatchDelete = 25
|
|
423
506
|
|
|
@@ -428,51 +511,75 @@ class AWSJobStore(AbstractJobStore):
|
|
|
428
511
|
# If the job is overlarge, delete its file from the filestore
|
|
429
512
|
for attempt in retry_sdb():
|
|
430
513
|
with attempt:
|
|
431
|
-
attributes = self.db.get_attributes(
|
|
432
|
-
|
|
433
|
-
|
|
514
|
+
attributes = self.db.get_attributes(
|
|
515
|
+
DomainName=self.jobs_domain_name,
|
|
516
|
+
ItemName=compat_bytes(job_id),
|
|
517
|
+
ConsistentRead=True,
|
|
518
|
+
).get("Attributes", [])
|
|
434
519
|
# If the overlargeID has fallen off, maybe we partially deleted the
|
|
435
520
|
# attributes of the item? Or raced on it? Or hit SimpleDB being merely
|
|
436
521
|
# eventually consistent? We should still be able to get rid of it.
|
|
437
522
|
self._checkAttributes(attributes, enforce=False)
|
|
438
|
-
overlarge_id_value = get_item_from_attributes(
|
|
523
|
+
overlarge_id_value = get_item_from_attributes(
|
|
524
|
+
attributes=attributes, name="overlargeID"
|
|
525
|
+
)
|
|
439
526
|
if overlarge_id_value:
|
|
440
527
|
logger.debug("Deleting job from filestore")
|
|
441
528
|
self.delete_file(overlarge_id_value)
|
|
442
529
|
for attempt in retry_sdb():
|
|
443
530
|
with attempt:
|
|
444
|
-
self.db.delete_attributes(
|
|
445
|
-
|
|
531
|
+
self.db.delete_attributes(
|
|
532
|
+
DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id)
|
|
533
|
+
)
|
|
534
|
+
items: Optional[list["ItemTypeDef"]] = None
|
|
446
535
|
for attempt in retry_sdb():
|
|
447
536
|
with attempt:
|
|
448
|
-
items = list(
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
537
|
+
items = list(
|
|
538
|
+
boto3_pager(
|
|
539
|
+
self.db.select,
|
|
540
|
+
"Items",
|
|
541
|
+
ConsistentRead=True,
|
|
542
|
+
SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'",
|
|
543
|
+
)
|
|
544
|
+
)
|
|
452
545
|
assert items is not None
|
|
453
546
|
if items:
|
|
454
|
-
logger.debug(
|
|
547
|
+
logger.debug(
|
|
548
|
+
"Deleting %d file(s) associated with job %s", len(items), job_id
|
|
549
|
+
)
|
|
455
550
|
n = self.itemsPerBatchDelete
|
|
456
|
-
batches = [items[i:i + n] for i in range(0, len(items), n)]
|
|
551
|
+
batches = [items[i : i + n] for i in range(0, len(items), n)]
|
|
457
552
|
for batch in batches:
|
|
458
|
-
delete_items:
|
|
553
|
+
delete_items: list["DeletableItemTypeDef"] = [
|
|
554
|
+
{"Name": item["Name"]} for item in batch
|
|
555
|
+
]
|
|
459
556
|
for attempt in retry_sdb():
|
|
460
557
|
with attempt:
|
|
461
|
-
self.db.batch_delete_attributes(
|
|
558
|
+
self.db.batch_delete_attributes(
|
|
559
|
+
DomainName=self.files_domain_name, Items=delete_items
|
|
560
|
+
)
|
|
462
561
|
for item in items:
|
|
463
|
-
item: ItemTypeDef
|
|
464
|
-
version = get_item_from_attributes(
|
|
562
|
+
item: "ItemTypeDef"
|
|
563
|
+
version = get_item_from_attributes(
|
|
564
|
+
attributes=item["Attributes"], name="version"
|
|
565
|
+
)
|
|
465
566
|
for attempt in retry_s3():
|
|
466
567
|
with attempt:
|
|
467
568
|
if version:
|
|
468
|
-
self.s3_client.delete_object(
|
|
469
|
-
|
|
470
|
-
|
|
569
|
+
self.s3_client.delete_object(
|
|
570
|
+
Bucket=self.files_bucket.name,
|
|
571
|
+
Key=compat_bytes(item["Name"]),
|
|
572
|
+
VersionId=version,
|
|
573
|
+
)
|
|
471
574
|
else:
|
|
472
|
-
self.s3_client.delete_object(
|
|
473
|
-
|
|
575
|
+
self.s3_client.delete_object(
|
|
576
|
+
Bucket=self.files_bucket.name,
|
|
577
|
+
Key=compat_bytes(item["Name"]),
|
|
578
|
+
)
|
|
474
579
|
|
|
475
|
-
def get_empty_file_store_id(
|
|
580
|
+
def get_empty_file_store_id(
|
|
581
|
+
self, jobStoreID=None, cleanup=False, basename=None
|
|
582
|
+
) -> FileID:
|
|
476
583
|
info = self.FileInfo.create(jobStoreID if cleanup else None)
|
|
477
584
|
with info.uploadStream() as _:
|
|
478
585
|
# Empty
|
|
@@ -481,8 +588,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
481
588
|
logger.debug("Created %r.", info)
|
|
482
589
|
return info.fileID
|
|
483
590
|
|
|
484
|
-
def _import_file(
|
|
485
|
-
|
|
591
|
+
def _import_file(
|
|
592
|
+
self,
|
|
593
|
+
otherCls,
|
|
594
|
+
uri: ParseResult,
|
|
595
|
+
shared_file_name: Optional[str] = None,
|
|
596
|
+
hardlink: bool = False,
|
|
597
|
+
symlink: bool = True,
|
|
598
|
+
) -> Optional[FileID]:
|
|
486
599
|
try:
|
|
487
600
|
if issubclass(otherCls, AWSJobStore):
|
|
488
601
|
srcObj = get_object_for_url(uri, existing=True)
|
|
@@ -492,15 +605,19 @@ class AWSJobStore(AbstractJobStore):
|
|
|
492
605
|
else:
|
|
493
606
|
self._requireValidSharedFileName(shared_file_name)
|
|
494
607
|
jobStoreFileID = self._shared_file_id(shared_file_name)
|
|
495
|
-
info = self.FileInfo.loadOrCreate(
|
|
496
|
-
|
|
497
|
-
|
|
608
|
+
info = self.FileInfo.loadOrCreate(
|
|
609
|
+
jobStoreFileID=jobStoreFileID,
|
|
610
|
+
ownerID=str(self.sharedFileOwnerID),
|
|
611
|
+
encrypted=None,
|
|
612
|
+
)
|
|
498
613
|
info.copyFrom(srcObj)
|
|
499
614
|
info.save()
|
|
500
615
|
return FileID(info.fileID, size) if shared_file_name is None else None
|
|
501
|
-
except ServerSideCopyProhibitedError:
|
|
502
|
-
# AWS refuses to do this copy for us
|
|
503
|
-
logger.warning(
|
|
616
|
+
except (NoBucketLocationError, ServerSideCopyProhibitedError):
|
|
617
|
+
# AWS refuses to tell us where the bucket is or do this copy for us
|
|
618
|
+
logger.warning(
|
|
619
|
+
"Falling back to copying via the local machine. This could get expensive!"
|
|
620
|
+
)
|
|
504
621
|
|
|
505
622
|
# copy if exception
|
|
506
623
|
return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
|
|
@@ -512,71 +629,112 @@ class AWSJobStore(AbstractJobStore):
|
|
|
512
629
|
info = self.FileInfo.loadOrFail(file_id)
|
|
513
630
|
info.copyTo(dstObj)
|
|
514
631
|
return
|
|
515
|
-
except ServerSideCopyProhibitedError:
|
|
516
|
-
# AWS refuses to do this copy for us
|
|
517
|
-
logger.warning(
|
|
632
|
+
except (NoBucketLocationError, ServerSideCopyProhibitedError):
|
|
633
|
+
# AWS refuses to tell us where the bucket is or do this copy for us
|
|
634
|
+
logger.warning(
|
|
635
|
+
"Falling back to copying via the local machine. This could get expensive!"
|
|
636
|
+
)
|
|
518
637
|
else:
|
|
519
638
|
super()._default_export_file(otherCls, file_id, uri)
|
|
520
639
|
|
|
640
|
+
###
|
|
641
|
+
# URL access implementation
|
|
642
|
+
###
|
|
643
|
+
|
|
644
|
+
# URL access methods aren't used by the rest of the job store methods.
|
|
645
|
+
|
|
521
646
|
@classmethod
|
|
522
647
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
523
648
|
try:
|
|
524
|
-
|
|
649
|
+
try:
|
|
650
|
+
get_object_for_url(url, existing=True, anonymous=True)
|
|
651
|
+
except PermissionError:
|
|
652
|
+
# If we can't look anonymously, log in
|
|
653
|
+
get_object_for_url(url, existing=True)
|
|
525
654
|
return True
|
|
526
655
|
except FileNotFoundError:
|
|
527
656
|
# Not a file
|
|
528
|
-
# Might be a directory.
|
|
657
|
+
# Might be a directory. Or we might not have access to know.
|
|
658
|
+
# See if it's a directory.
|
|
529
659
|
return cls._get_is_directory(url)
|
|
530
660
|
|
|
531
661
|
@classmethod
|
|
532
662
|
def _get_size(cls, url: ParseResult) -> int:
|
|
533
|
-
|
|
663
|
+
try:
|
|
664
|
+
src_obj = get_object_for_url(url, existing=True, anonymous=True)
|
|
665
|
+
except PermissionError:
|
|
666
|
+
src_obj = get_object_for_url(url, existing=True)
|
|
667
|
+
return src_obj.content_length
|
|
534
668
|
|
|
535
669
|
@classmethod
|
|
536
670
|
def _read_from_url(cls, url: ParseResult, writable):
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
671
|
+
try:
|
|
672
|
+
src_obj = get_object_for_url(url, existing=True, anonymous=True)
|
|
673
|
+
src_obj.download_fileobj(writable)
|
|
674
|
+
except Exception as e:
|
|
675
|
+
if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
|
|
676
|
+
# The object setup or the download does not have permission. Try again with a login.
|
|
677
|
+
src_obj = get_object_for_url(url, existing=True)
|
|
678
|
+
src_obj.download_fileobj(writable)
|
|
679
|
+
else:
|
|
680
|
+
raise
|
|
681
|
+
return (src_obj.content_length, False) # executable bit is always False
|
|
543
682
|
|
|
544
683
|
@classmethod
|
|
545
684
|
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
546
|
-
|
|
547
|
-
|
|
685
|
+
try:
|
|
686
|
+
src_obj = get_object_for_url(url, existing=True, anonymous=True)
|
|
687
|
+
response = src_obj.get()
|
|
688
|
+
except Exception as e:
|
|
689
|
+
if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
|
|
690
|
+
# The object setup or the download does not have permission. Try again with a login.
|
|
691
|
+
src_obj = get_object_for_url(url, existing=True)
|
|
692
|
+
response = src_obj.get()
|
|
693
|
+
else:
|
|
694
|
+
raise
|
|
548
695
|
# We should get back a response with a stream in 'Body'
|
|
549
|
-
if
|
|
696
|
+
if "Body" not in response:
|
|
550
697
|
raise RuntimeError(f"Could not fetch body stream for {url}")
|
|
551
|
-
return response[
|
|
698
|
+
return response["Body"]
|
|
552
699
|
|
|
553
700
|
@classmethod
|
|
554
|
-
def _write_to_url(
|
|
701
|
+
def _write_to_url(
|
|
702
|
+
cls, readable, url: ParseResult, executable: bool = False
|
|
703
|
+
) -> None:
|
|
704
|
+
# Don't try to do anonympus writes.
|
|
555
705
|
dstObj = get_object_for_url(url)
|
|
556
706
|
|
|
557
707
|
logger.debug("Uploading %s", dstObj.key)
|
|
558
708
|
# uploadFile takes care of using multipart upload if the file is larger than partSize (default to 5MB)
|
|
559
|
-
uploadFile(
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
709
|
+
uploadFile(
|
|
710
|
+
readable=readable,
|
|
711
|
+
resource=s3_boto3_resource,
|
|
712
|
+
bucketName=dstObj.bucket_name,
|
|
713
|
+
fileID=dstObj.key,
|
|
714
|
+
partSize=5 * 1000 * 1000,
|
|
715
|
+
)
|
|
564
716
|
|
|
565
717
|
@classmethod
|
|
566
|
-
def _list_url(cls, url: ParseResult) ->
|
|
567
|
-
|
|
718
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
719
|
+
try:
|
|
720
|
+
return list_objects_for_url(url, anonymous=True)
|
|
721
|
+
except PermissionError:
|
|
722
|
+
return list_objects_for_url(url)
|
|
723
|
+
|
|
568
724
|
|
|
569
725
|
@classmethod
|
|
570
726
|
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
571
727
|
# We consider it a directory if anything is in it.
|
|
572
728
|
# TODO: Can we just get the first item and not the whole list?
|
|
573
|
-
return len(
|
|
729
|
+
return len(cls._list_url(url)) > 0
|
|
574
730
|
|
|
575
731
|
@classmethod
|
|
576
732
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
577
|
-
return url.scheme.lower() ==
|
|
733
|
+
return url.scheme.lower() == "s3"
|
|
578
734
|
|
|
579
|
-
def write_file(
|
|
735
|
+
def write_file(
|
|
736
|
+
self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False
|
|
737
|
+
) -> FileID:
|
|
580
738
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
581
739
|
info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
582
740
|
info.save()
|
|
@@ -584,7 +742,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
584
742
|
return info.fileID
|
|
585
743
|
|
|
586
744
|
@contextmanager
|
|
587
|
-
def write_file_stream(
|
|
745
|
+
def write_file_stream(
|
|
746
|
+
self,
|
|
747
|
+
job_id: Optional[FileID] = None,
|
|
748
|
+
cleanup: bool = False,
|
|
749
|
+
basename=None,
|
|
750
|
+
encoding=None,
|
|
751
|
+
errors=None,
|
|
752
|
+
):
|
|
588
753
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
589
754
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
590
755
|
yield writable, info.fileID
|
|
@@ -592,11 +757,15 @@ class AWSJobStore(AbstractJobStore):
|
|
|
592
757
|
logger.debug("Wrote %r.", info)
|
|
593
758
|
|
|
594
759
|
@contextmanager
|
|
595
|
-
def write_shared_file_stream(
|
|
760
|
+
def write_shared_file_stream(
|
|
761
|
+
self, shared_file_name, encrypted=None, encoding=None, errors=None
|
|
762
|
+
):
|
|
596
763
|
self._requireValidSharedFileName(shared_file_name)
|
|
597
|
-
info = self.FileInfo.loadOrCreate(
|
|
598
|
-
|
|
599
|
-
|
|
764
|
+
info = self.FileInfo.loadOrCreate(
|
|
765
|
+
jobStoreFileID=self._shared_file_id(shared_file_name),
|
|
766
|
+
ownerID=str(self.sharedFileOwnerID),
|
|
767
|
+
encrypted=encrypted,
|
|
768
|
+
)
|
|
600
769
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
601
770
|
yield writable
|
|
602
771
|
info.save()
|
|
@@ -629,7 +798,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
629
798
|
info = self.FileInfo.loadOrFail(file_id)
|
|
630
799
|
logger.debug("Reading %r into %r.", info, local_path)
|
|
631
800
|
info.download(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
632
|
-
if getattr(file_id,
|
|
801
|
+
if getattr(file_id, "executable", False):
|
|
633
802
|
os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
|
|
634
803
|
|
|
635
804
|
@contextmanager
|
|
@@ -644,7 +813,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
644
813
|
self._requireValidSharedFileName(shared_file_name)
|
|
645
814
|
jobStoreFileID = self._shared_file_id(shared_file_name)
|
|
646
815
|
info = self.FileInfo.loadOrFail(jobStoreFileID, customName=shared_file_name)
|
|
647
|
-
logger.debug(
|
|
816
|
+
logger.debug(
|
|
817
|
+
"Reading %r for shared file %r into stream.", info, shared_file_name
|
|
818
|
+
)
|
|
648
819
|
with info.downloadStream(encoding=encoding, errors=errors) as readable:
|
|
649
820
|
yield readable
|
|
650
821
|
|
|
@@ -660,7 +831,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
660
831
|
with info.uploadStream(multipart=False) as writeable:
|
|
661
832
|
if isinstance(msg, str):
|
|
662
833
|
# This stream is for binary data, so encode any non-encoded things
|
|
663
|
-
msg = msg.encode(
|
|
834
|
+
msg = msg.encode("utf-8", errors="ignore")
|
|
664
835
|
writeable.write(msg)
|
|
665
836
|
info.save()
|
|
666
837
|
|
|
@@ -682,10 +853,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
682
853
|
items = None
|
|
683
854
|
for attempt in retry_sdb():
|
|
684
855
|
with attempt:
|
|
685
|
-
items = boto3_pager(
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
856
|
+
items = boto3_pager(
|
|
857
|
+
self.db.select,
|
|
858
|
+
"Items",
|
|
859
|
+
ConsistentRead=True,
|
|
860
|
+
SelectExpression=f"select * from `{self.files_domain_name}` where ownerID='{str(ownerId)}'",
|
|
861
|
+
)
|
|
689
862
|
assert items is not None
|
|
690
863
|
for item in items:
|
|
691
864
|
info = self.FileInfo.fromItem(item)
|
|
@@ -702,13 +875,19 @@ class AWSJobStore(AbstractJobStore):
|
|
|
702
875
|
with info.uploadStream(allowInlining=False) as f:
|
|
703
876
|
f.write(info.content)
|
|
704
877
|
|
|
705
|
-
self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
|
|
878
|
+
self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
|
|
879
|
+
ACL="public-read"
|
|
880
|
+
)
|
|
706
881
|
|
|
707
|
-
url = self.s3_client.generate_presigned_url(
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
882
|
+
url = self.s3_client.generate_presigned_url(
|
|
883
|
+
"get_object",
|
|
884
|
+
Params={
|
|
885
|
+
"Bucket": self.files_bucket.name,
|
|
886
|
+
"Key": compat_bytes(jobStoreFileID),
|
|
887
|
+
"VersionId": info.version,
|
|
888
|
+
},
|
|
889
|
+
ExpiresIn=self.publicUrlExpiration.total_seconds(),
|
|
890
|
+
)
|
|
712
891
|
|
|
713
892
|
# boto doesn't properly remove the x-amz-security-token parameter when
|
|
714
893
|
# query_auth is False when using an IAM role (see issue #2043). Including the
|
|
@@ -716,12 +895,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
716
895
|
# even if the resource is public, so we need to remove it.
|
|
717
896
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
|
718
897
|
params = parse_qs(query)
|
|
719
|
-
if
|
|
720
|
-
del params[
|
|
721
|
-
if
|
|
722
|
-
del params[
|
|
723
|
-
if
|
|
724
|
-
del params[
|
|
898
|
+
if "x-amz-security-token" in params:
|
|
899
|
+
del params["x-amz-security-token"]
|
|
900
|
+
if "AWSAccessKeyId" in params:
|
|
901
|
+
del params["AWSAccessKeyId"]
|
|
902
|
+
if "Signature" in params:
|
|
903
|
+
del params["Signature"]
|
|
725
904
|
query = urlencode(params, doseq=True)
|
|
726
905
|
url = urlunsplit((scheme, netloc, path, query, fragment))
|
|
727
906
|
return url
|
|
@@ -730,12 +909,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
730
909
|
self._requireValidSharedFileName(shared_file_name)
|
|
731
910
|
return self.get_public_url(self._shared_file_id(shared_file_name))
|
|
732
911
|
|
|
733
|
-
def _bindBucket(
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
912
|
+
def _bindBucket(
|
|
913
|
+
self,
|
|
914
|
+
bucket_name: str,
|
|
915
|
+
create: bool = False,
|
|
916
|
+
block: bool = True,
|
|
917
|
+
versioning: bool = False,
|
|
918
|
+
check_versioning_consistency: bool = True,
|
|
919
|
+
):
|
|
739
920
|
"""
|
|
740
921
|
Return the Boto Bucket object representing the S3 bucket with the given name. If the
|
|
741
922
|
bucket does not exist and `create` is True, it will be created.
|
|
@@ -760,8 +941,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
760
941
|
Decide, given an error, whether we should retry binding the bucket.
|
|
761
942
|
"""
|
|
762
943
|
|
|
763
|
-
if
|
|
764
|
-
get_error_status(error) in (404, 409)):
|
|
944
|
+
if isinstance(error, ClientError) and get_error_status(error) in (404, 409):
|
|
765
945
|
# Handle cases where the bucket creation is in a weird state that might let us proceed.
|
|
766
946
|
# https://github.com/BD2KGenomics/toil/issues/955
|
|
767
947
|
# https://github.com/BD2KGenomics/toil/issues/995
|
|
@@ -771,7 +951,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
771
951
|
# OperationAborted == 409
|
|
772
952
|
# NoSuchBucket == 404
|
|
773
953
|
return True
|
|
774
|
-
if get_error_code(error) ==
|
|
954
|
+
if get_error_code(error) == "SlowDown":
|
|
775
955
|
# We may get told to SlowDown by AWS when we try to create our
|
|
776
956
|
# bucket. In that case, we should retry and use the exponential
|
|
777
957
|
# backoff.
|
|
@@ -804,15 +984,17 @@ class AWSJobStore(AbstractJobStore):
|
|
|
804
984
|
# NoSuchBucket. We let that kick us back up to the
|
|
805
985
|
# main retry loop.
|
|
806
986
|
assert (
|
|
807
|
-
|
|
987
|
+
get_bucket_region(bucket_name) == self.region
|
|
808
988
|
), f"bucket_name: {bucket_name}, {get_bucket_region(bucket_name)} != {self.region}"
|
|
809
989
|
|
|
810
990
|
tags = build_tag_dict_from_env()
|
|
811
991
|
|
|
812
992
|
if tags:
|
|
813
993
|
flat_tags = flatten_tags(tags)
|
|
814
|
-
bucket_tagging = self.s3_resource.BucketTagging(
|
|
815
|
-
|
|
994
|
+
bucket_tagging = self.s3_resource.BucketTagging(
|
|
995
|
+
bucket_name
|
|
996
|
+
)
|
|
997
|
+
bucket_tagging.put(Tagging={"TagSet": flat_tags})
|
|
816
998
|
|
|
817
999
|
# Configure bucket so that we can make objects in
|
|
818
1000
|
# it public, which was the historical default.
|
|
@@ -825,7 +1007,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
825
1007
|
# This is raised if the user attempts to get a bucket in a region outside
|
|
826
1008
|
# the specified one, if the specified one is not `us-east-1`. The us-east-1
|
|
827
1009
|
# server allows a user to use buckets from any region.
|
|
828
|
-
raise BucketLocationConflictException(
|
|
1010
|
+
raise BucketLocationConflictException(
|
|
1011
|
+
get_bucket_region(bucket_name)
|
|
1012
|
+
)
|
|
829
1013
|
else:
|
|
830
1014
|
raise
|
|
831
1015
|
else:
|
|
@@ -842,24 +1026,32 @@ class AWSJobStore(AbstractJobStore):
|
|
|
842
1026
|
# consistent?
|
|
843
1027
|
time.sleep(1)
|
|
844
1028
|
while not self._getBucketVersioning(bucket_name):
|
|
845
|
-
logger.warning(
|
|
1029
|
+
logger.warning(
|
|
1030
|
+
f"Waiting for versioning activation on bucket '{bucket_name}'..."
|
|
1031
|
+
)
|
|
846
1032
|
time.sleep(1)
|
|
847
1033
|
elif check_versioning_consistency:
|
|
848
1034
|
# now test for versioning consistency
|
|
849
1035
|
# we should never see any of these errors since 'versioning' should always be true
|
|
850
1036
|
bucket_versioning = self._getBucketVersioning(bucket_name)
|
|
851
1037
|
if bucket_versioning != versioning:
|
|
852
|
-
assert False,
|
|
1038
|
+
assert False, "Cannot modify versioning on existing bucket"
|
|
853
1039
|
elif bucket_versioning is None:
|
|
854
|
-
assert False,
|
|
1040
|
+
assert False, "Cannot use a bucket with versioning suspended"
|
|
855
1041
|
if bucketExisted:
|
|
856
|
-
logger.debug(
|
|
1042
|
+
logger.debug(
|
|
1043
|
+
f"Using pre-existing job store bucket '{bucket_name}'."
|
|
1044
|
+
)
|
|
857
1045
|
else:
|
|
858
|
-
logger.debug(
|
|
1046
|
+
logger.debug(
|
|
1047
|
+
f"Created new job store bucket '{bucket_name}' with versioning state {versioning}."
|
|
1048
|
+
)
|
|
859
1049
|
|
|
860
1050
|
return bucket
|
|
861
1051
|
|
|
862
|
-
def _bindDomain(
|
|
1052
|
+
def _bindDomain(
|
|
1053
|
+
self, domain_name: str, create: bool = False, block: bool = True
|
|
1054
|
+
) -> None:
|
|
863
1055
|
"""
|
|
864
1056
|
Return the Boto3 domain name representing the SDB domain. When create=True, it will
|
|
865
1057
|
create the domain if it does not exist.
|
|
@@ -878,9 +1070,11 @@ class AWSJobStore(AbstractJobStore):
|
|
|
878
1070
|
retry timeout expires.
|
|
879
1071
|
"""
|
|
880
1072
|
logger.debug("Binding to job store domain '%s'.", domain_name)
|
|
881
|
-
retryargs = dict(
|
|
1073
|
+
retryargs = dict(
|
|
1074
|
+
predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e)
|
|
1075
|
+
)
|
|
882
1076
|
if not block:
|
|
883
|
-
retryargs[
|
|
1077
|
+
retryargs["timeout"] = 15
|
|
884
1078
|
for attempt in retry_sdb(**retryargs):
|
|
885
1079
|
with attempt:
|
|
886
1080
|
try:
|
|
@@ -902,13 +1096,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
902
1096
|
return str(uuid.uuid4())
|
|
903
1097
|
|
|
904
1098
|
# A dummy job ID under which all shared files are stored
|
|
905
|
-
sharedFileOwnerID = uuid.UUID(
|
|
1099
|
+
sharedFileOwnerID = uuid.UUID("891f7db6-e4d9-4221-a58e-ab6cc4395f94")
|
|
906
1100
|
|
|
907
1101
|
# A dummy job ID under which all unread stats files are stored
|
|
908
|
-
statsFileOwnerID = uuid.UUID(
|
|
1102
|
+
statsFileOwnerID = uuid.UUID("bfcf5286-4bc7-41ef-a85d-9ab415b69d53")
|
|
909
1103
|
|
|
910
1104
|
# A dummy job ID under which all read stats files are stored
|
|
911
|
-
readStatsFileOwnerID = uuid.UUID(
|
|
1105
|
+
readStatsFileOwnerID = uuid.UUID("e77fc3aa-d232-4255-ae04-f64ee8eb0bfa")
|
|
912
1106
|
|
|
913
1107
|
def _shared_file_id(self, shared_file_name):
|
|
914
1108
|
return str(uuid.uuid5(self.sharedFileOwnerID, shared_file_name))
|
|
@@ -918,13 +1112,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
918
1112
|
"""
|
|
919
1113
|
Represents a file in this job store.
|
|
920
1114
|
"""
|
|
1115
|
+
|
|
921
1116
|
outer = None
|
|
922
1117
|
"""
|
|
923
1118
|
:type: AWSJobStore
|
|
924
1119
|
"""
|
|
925
1120
|
|
|
926
|
-
def __init__(
|
|
927
|
-
|
|
1121
|
+
def __init__(
|
|
1122
|
+
self,
|
|
1123
|
+
fileID,
|
|
1124
|
+
ownerID,
|
|
1125
|
+
encrypted,
|
|
1126
|
+
version=None,
|
|
1127
|
+
content=None,
|
|
1128
|
+
numContentChunks=0,
|
|
1129
|
+
checksum=None,
|
|
1130
|
+
):
|
|
928
1131
|
"""
|
|
929
1132
|
:type fileID: str
|
|
930
1133
|
:param fileID: the file's ID
|
|
@@ -1003,24 +1206,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1003
1206
|
assert content is None or isinstance(content, bytes)
|
|
1004
1207
|
self._content = content
|
|
1005
1208
|
if content is not None:
|
|
1006
|
-
self.version =
|
|
1209
|
+
self.version = ""
|
|
1007
1210
|
|
|
1008
1211
|
@classmethod
|
|
1009
1212
|
def create(cls, ownerID: str):
|
|
1010
|
-
return cls(
|
|
1213
|
+
return cls(
|
|
1214
|
+
str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None
|
|
1215
|
+
)
|
|
1011
1216
|
|
|
1012
1217
|
@classmethod
|
|
1013
1218
|
def presenceIndicator(cls):
|
|
1014
|
-
return
|
|
1219
|
+
return "encrypted"
|
|
1015
1220
|
|
|
1016
1221
|
@classmethod
|
|
1017
1222
|
def exists(cls, jobStoreFileID):
|
|
1018
1223
|
for attempt in retry_sdb():
|
|
1019
1224
|
with attempt:
|
|
1020
|
-
return bool(
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1225
|
+
return bool(
|
|
1226
|
+
cls.outer.db.get_attributes(
|
|
1227
|
+
DomainName=cls.outer.files_domain_name,
|
|
1228
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1229
|
+
AttributeNames=[cls.presenceIndicator()],
|
|
1230
|
+
ConsistentRead=True,
|
|
1231
|
+
).get("Attributes", [])
|
|
1232
|
+
)
|
|
1024
1233
|
|
|
1025
1234
|
@classmethod
|
|
1026
1235
|
def load(cls, jobStoreFileID):
|
|
@@ -1029,10 +1238,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1029
1238
|
self = cls.fromItem(
|
|
1030
1239
|
{
|
|
1031
1240
|
"Name": compat_bytes(jobStoreFileID),
|
|
1032
|
-
"Attributes": cls.outer.db.get_attributes(
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1241
|
+
"Attributes": cls.outer.db.get_attributes(
|
|
1242
|
+
DomainName=cls.outer.files_domain_name,
|
|
1243
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1244
|
+
ConsistentRead=True,
|
|
1245
|
+
).get("Attributes", []),
|
|
1246
|
+
}
|
|
1247
|
+
)
|
|
1036
1248
|
return self
|
|
1037
1249
|
|
|
1038
1250
|
@classmethod
|
|
@@ -1062,7 +1274,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1062
1274
|
return self
|
|
1063
1275
|
|
|
1064
1276
|
@classmethod
|
|
1065
|
-
def fromItem(cls, item: ItemTypeDef):
|
|
1277
|
+
def fromItem(cls, item: "ItemTypeDef"):
|
|
1066
1278
|
"""
|
|
1067
1279
|
Convert an SDB item to an instance of this class.
|
|
1068
1280
|
|
|
@@ -1075,7 +1287,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1075
1287
|
return s if s is None else str(s)
|
|
1076
1288
|
|
|
1077
1289
|
# ownerID and encrypted are the only mandatory attributes
|
|
1078
|
-
ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
|
|
1290
|
+
ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
|
|
1291
|
+
item, ["ownerID", "encrypted", "version", "checksum"]
|
|
1292
|
+
)
|
|
1079
1293
|
if ownerID is None:
|
|
1080
1294
|
assert encrypted is None
|
|
1081
1295
|
return None
|
|
@@ -1085,14 +1299,23 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1085
1299
|
if encrypted:
|
|
1086
1300
|
sseKeyPath = cls.outer.sseKeyPath
|
|
1087
1301
|
if sseKeyPath is None:
|
|
1088
|
-
raise AssertionError(
|
|
1302
|
+
raise AssertionError(
|
|
1303
|
+
"Content is encrypted but no key was provided."
|
|
1304
|
+
)
|
|
1089
1305
|
if content is not None:
|
|
1090
1306
|
content = encryption.decrypt(content, sseKeyPath)
|
|
1091
|
-
self = cls(
|
|
1092
|
-
|
|
1307
|
+
self = cls(
|
|
1308
|
+
fileID=item["Name"],
|
|
1309
|
+
ownerID=ownerID,
|
|
1310
|
+
encrypted=encrypted,
|
|
1311
|
+
version=version,
|
|
1312
|
+
content=content,
|
|
1313
|
+
numContentChunks=numContentChunks,
|
|
1314
|
+
checksum=checksum,
|
|
1315
|
+
)
|
|
1093
1316
|
return self
|
|
1094
1317
|
|
|
1095
|
-
def toItem(self) ->
|
|
1318
|
+
def toItem(self) -> tuple[dict[str, str], int]:
|
|
1096
1319
|
"""
|
|
1097
1320
|
Convert this instance to a dictionary of attribute names to values
|
|
1098
1321
|
|
|
@@ -1104,15 +1327,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1104
1327
|
if self.encrypted and content is not None:
|
|
1105
1328
|
sseKeyPath = self.outer.sseKeyPath
|
|
1106
1329
|
if sseKeyPath is None:
|
|
1107
|
-
raise AssertionError(
|
|
1330
|
+
raise AssertionError(
|
|
1331
|
+
"Encryption requested but no key was provided."
|
|
1332
|
+
)
|
|
1108
1333
|
content = encryption.encrypt(content, sseKeyPath)
|
|
1109
1334
|
assert content is None or isinstance(content, bytes)
|
|
1110
1335
|
attributes = self.binaryToAttributes(content)
|
|
1111
|
-
numChunks = int(attributes[
|
|
1112
|
-
attributes.update(
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1336
|
+
numChunks = int(attributes["numChunks"])
|
|
1337
|
+
attributes.update(
|
|
1338
|
+
dict(
|
|
1339
|
+
ownerID=self.ownerID or "",
|
|
1340
|
+
encrypted=str(self.encrypted),
|
|
1341
|
+
version=self.version or "",
|
|
1342
|
+
checksum=self.checksum or "",
|
|
1343
|
+
)
|
|
1344
|
+
)
|
|
1116
1345
|
return attributes, numChunks
|
|
1117
1346
|
|
|
1118
1347
|
@classmethod
|
|
@@ -1128,24 +1357,37 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1128
1357
|
attributes_boto3 = SDBHelper.attributeDictToList(attributes)
|
|
1129
1358
|
# False stands for absence
|
|
1130
1359
|
if self.previousVersion is None:
|
|
1131
|
-
expected: UpdateConditionTypeDef = {
|
|
1360
|
+
expected: "UpdateConditionTypeDef" = {
|
|
1361
|
+
"Name": "version",
|
|
1362
|
+
"Exists": False,
|
|
1363
|
+
}
|
|
1132
1364
|
else:
|
|
1133
|
-
expected = {"Name":
|
|
1365
|
+
expected = {"Name": "version", "Value": cast(str, self.previousVersion)}
|
|
1134
1366
|
try:
|
|
1135
1367
|
for attempt in retry_sdb():
|
|
1136
1368
|
with attempt:
|
|
1137
|
-
self.outer.db.put_attributes(
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1369
|
+
self.outer.db.put_attributes(
|
|
1370
|
+
DomainName=self.outer.files_domain_name,
|
|
1371
|
+
ItemName=compat_bytes(self.fileID),
|
|
1372
|
+
Attributes=[
|
|
1373
|
+
{
|
|
1374
|
+
"Name": attribute["Name"],
|
|
1375
|
+
"Value": attribute["Value"],
|
|
1376
|
+
"Replace": True,
|
|
1377
|
+
}
|
|
1378
|
+
for attribute in attributes_boto3
|
|
1379
|
+
],
|
|
1380
|
+
Expected=expected,
|
|
1381
|
+
)
|
|
1142
1382
|
# clean up the old version of the file if necessary and safe
|
|
1143
1383
|
if self.previousVersion and (self.previousVersion != self.version):
|
|
1144
1384
|
for attempt in retry_s3():
|
|
1145
1385
|
with attempt:
|
|
1146
|
-
self.outer.s3_client.delete_object(
|
|
1147
|
-
|
|
1148
|
-
|
|
1386
|
+
self.outer.s3_client.delete_object(
|
|
1387
|
+
Bucket=self.outer.files_bucket.name,
|
|
1388
|
+
Key=compat_bytes(self.fileID),
|
|
1389
|
+
VersionId=self.previousVersion,
|
|
1390
|
+
)
|
|
1149
1391
|
self._previousVersion = self._version
|
|
1150
1392
|
if numNewContentChunks < self._numContentChunks:
|
|
1151
1393
|
residualChunks = range(numNewContentChunks, self._numContentChunks)
|
|
@@ -1153,19 +1395,26 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1153
1395
|
# boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
|
|
1154
1396
|
# the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
|
|
1155
1397
|
# but this doesnt extend to boto3
|
|
1156
|
-
delete_attributes = self.outer.db.get_attributes(
|
|
1157
|
-
|
|
1158
|
-
|
|
1398
|
+
delete_attributes = self.outer.db.get_attributes(
|
|
1399
|
+
DomainName=self.outer.files_domain_name,
|
|
1400
|
+
ItemName=compat_bytes(self.fileID),
|
|
1401
|
+
AttributeNames=[chunk for chunk in residual_chunk_names],
|
|
1402
|
+
).get("Attributes")
|
|
1159
1403
|
for attempt in retry_sdb():
|
|
1160
1404
|
with attempt:
|
|
1161
|
-
self.outer.db.delete_attributes(
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1405
|
+
self.outer.db.delete_attributes(
|
|
1406
|
+
DomainName=self.outer.files_domain_name,
|
|
1407
|
+
ItemName=compat_bytes(self.fileID),
|
|
1408
|
+
Attributes=delete_attributes,
|
|
1409
|
+
)
|
|
1410
|
+
self.outer.db.get_attributes(
|
|
1411
|
+
DomainName=self.outer.files_domain_name,
|
|
1412
|
+
ItemName=compat_bytes(self.fileID),
|
|
1413
|
+
)
|
|
1165
1414
|
|
|
1166
1415
|
self._numContentChunks = numNewContentChunks
|
|
1167
1416
|
except ClientError as e:
|
|
1168
|
-
if get_error_code(e) ==
|
|
1417
|
+
if get_error_code(e) == "ConditionalCheckFailed":
|
|
1169
1418
|
raise ConcurrentFileModificationException(self.fileID)
|
|
1170
1419
|
else:
|
|
1171
1420
|
raise
|
|
@@ -1173,24 +1422,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1173
1422
|
def upload(self, localFilePath, calculateChecksum=True):
|
|
1174
1423
|
file_size, file_time = fileSizeAndTime(localFilePath)
|
|
1175
1424
|
if file_size <= self.maxInlinedSize():
|
|
1176
|
-
with open(localFilePath,
|
|
1425
|
+
with open(localFilePath, "rb") as f:
|
|
1177
1426
|
self.content = f.read()
|
|
1178
1427
|
# Clear out any old checksum in case of overwrite
|
|
1179
|
-
self.checksum =
|
|
1428
|
+
self.checksum = ""
|
|
1180
1429
|
else:
|
|
1181
1430
|
headerArgs = self._s3EncryptionArgs()
|
|
1182
1431
|
# Create a new Resource in case it needs to be on its own thread
|
|
1183
|
-
resource = boto3_session.resource(
|
|
1184
|
-
|
|
1185
|
-
self.checksum =
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1432
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1433
|
+
|
|
1434
|
+
self.checksum = (
|
|
1435
|
+
self._get_file_checksum(localFilePath)
|
|
1436
|
+
if calculateChecksum
|
|
1437
|
+
else None
|
|
1438
|
+
)
|
|
1439
|
+
self.version = uploadFromPath(
|
|
1440
|
+
localFilePath,
|
|
1441
|
+
resource=resource,
|
|
1442
|
+
bucketName=self.outer.files_bucket.name,
|
|
1443
|
+
fileID=compat_bytes(self.fileID),
|
|
1444
|
+
headerArgs=headerArgs,
|
|
1445
|
+
partSize=self.outer.part_size,
|
|
1446
|
+
)
|
|
1447
|
+
|
|
1448
|
+
def _start_checksum(self, to_match=None, algorithm="sha1"):
|
|
1194
1449
|
"""
|
|
1195
1450
|
Get a hasher that can be used with _update_checksum and
|
|
1196
1451
|
_finish_checksum.
|
|
@@ -1208,12 +1463,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1208
1463
|
expected = None
|
|
1209
1464
|
|
|
1210
1465
|
if to_match is not None:
|
|
1211
|
-
parts = to_match.split(
|
|
1466
|
+
parts = to_match.split("$")
|
|
1212
1467
|
algorithm = parts[0]
|
|
1213
1468
|
expected = parts[1]
|
|
1214
1469
|
|
|
1215
1470
|
wrapped = getattr(hashlib, algorithm)()
|
|
1216
|
-
logger.debug(f
|
|
1471
|
+
logger.debug(f"Starting {algorithm} checksum to match {expected}")
|
|
1217
1472
|
return algorithm, wrapped, expected
|
|
1218
1473
|
|
|
1219
1474
|
def _update_checksum(self, checksum_in_progress, data):
|
|
@@ -1230,26 +1485,32 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1230
1485
|
|
|
1231
1486
|
result_hash = checksum_in_progress[1].hexdigest()
|
|
1232
1487
|
|
|
1233
|
-
logger.debug(
|
|
1488
|
+
logger.debug(
|
|
1489
|
+
f"Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}"
|
|
1490
|
+
)
|
|
1234
1491
|
if checksum_in_progress[2] is not None:
|
|
1235
1492
|
# We expected a particular hash
|
|
1236
1493
|
if result_hash != checksum_in_progress[2]:
|
|
1237
|
-
raise ChecksumError(
|
|
1238
|
-
|
|
1494
|
+
raise ChecksumError(
|
|
1495
|
+
"Checksum mismatch. Expected: %s Actual: %s"
|
|
1496
|
+
% (checksum_in_progress[2], result_hash)
|
|
1497
|
+
)
|
|
1239
1498
|
|
|
1240
|
-
return
|
|
1499
|
+
return "$".join([checksum_in_progress[0], result_hash])
|
|
1241
1500
|
|
|
1242
1501
|
def _get_file_checksum(self, localFilePath, to_match=None):
|
|
1243
|
-
with open(localFilePath,
|
|
1502
|
+
with open(localFilePath, "rb") as f:
|
|
1244
1503
|
hasher = self._start_checksum(to_match=to_match)
|
|
1245
1504
|
contents = f.read(1024 * 1024)
|
|
1246
|
-
while contents != b
|
|
1505
|
+
while contents != b"":
|
|
1247
1506
|
self._update_checksum(hasher, contents)
|
|
1248
1507
|
contents = f.read(1024 * 1024)
|
|
1249
1508
|
return self._finish_checksum(hasher)
|
|
1250
1509
|
|
|
1251
1510
|
@contextmanager
|
|
1252
|
-
def uploadStream(
|
|
1511
|
+
def uploadStream(
|
|
1512
|
+
self, multipart=True, allowInlining=True, encoding=None, errors=None
|
|
1513
|
+
):
|
|
1253
1514
|
"""
|
|
1254
1515
|
Context manager that gives out a binary or text mode upload stream to upload data.
|
|
1255
1516
|
"""
|
|
@@ -1270,14 +1531,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1270
1531
|
assert isinstance(buf, bytes)
|
|
1271
1532
|
|
|
1272
1533
|
if allowInlining and len(buf) <= info.maxInlinedSize():
|
|
1273
|
-
logger.debug(
|
|
1534
|
+
logger.debug("Inlining content of %d bytes", len(buf))
|
|
1274
1535
|
info.content = buf
|
|
1275
1536
|
# There will be no checksum
|
|
1276
|
-
info.checksum =
|
|
1537
|
+
info.checksum = ""
|
|
1277
1538
|
else:
|
|
1278
1539
|
# We will compute a checksum
|
|
1279
1540
|
hasher = info._start_checksum()
|
|
1280
|
-
logger.debug(
|
|
1541
|
+
logger.debug("Updating checksum with %d bytes", len(buf))
|
|
1281
1542
|
info._update_checksum(hasher, buf)
|
|
1282
1543
|
|
|
1283
1544
|
client = store.s3_client
|
|
@@ -1286,47 +1547,72 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1286
1547
|
|
|
1287
1548
|
for attempt in retry_s3():
|
|
1288
1549
|
with attempt:
|
|
1289
|
-
logger.debug(
|
|
1550
|
+
logger.debug("Starting multipart upload")
|
|
1290
1551
|
# low-level clients are thread safe
|
|
1291
|
-
upload = client.create_multipart_upload(
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1552
|
+
upload = client.create_multipart_upload(
|
|
1553
|
+
Bucket=bucket_name,
|
|
1554
|
+
Key=compat_bytes(info.fileID),
|
|
1555
|
+
**headerArgs,
|
|
1556
|
+
)
|
|
1557
|
+
uploadId = upload["UploadId"]
|
|
1295
1558
|
parts = []
|
|
1296
|
-
logger.debug(
|
|
1559
|
+
logger.debug("Multipart upload started as %s", uploadId)
|
|
1297
1560
|
|
|
1298
1561
|
for attempt in retry_s3():
|
|
1299
1562
|
with attempt:
|
|
1300
1563
|
for i in range(CONSISTENCY_TICKS):
|
|
1301
1564
|
# Sometimes we can create a multipart upload and not see it. Wait around for it.
|
|
1302
|
-
response = client.list_multipart_uploads(
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1565
|
+
response = client.list_multipart_uploads(
|
|
1566
|
+
Bucket=bucket_name,
|
|
1567
|
+
MaxUploads=1,
|
|
1568
|
+
Prefix=compat_bytes(info.fileID),
|
|
1569
|
+
)
|
|
1570
|
+
if (
|
|
1571
|
+
"Uploads" in response
|
|
1572
|
+
and len(response["Uploads"]) != 0
|
|
1573
|
+
and response["Uploads"][0]["UploadId"]
|
|
1574
|
+
== uploadId
|
|
1575
|
+
):
|
|
1576
|
+
|
|
1577
|
+
logger.debug(
|
|
1578
|
+
"Multipart upload visible as %s", uploadId
|
|
1579
|
+
)
|
|
1310
1580
|
break
|
|
1311
1581
|
else:
|
|
1312
|
-
logger.debug(
|
|
1313
|
-
|
|
1582
|
+
logger.debug(
|
|
1583
|
+
"Multipart upload %s is not visible; we see %s",
|
|
1584
|
+
uploadId,
|
|
1585
|
+
response.get("Uploads"),
|
|
1586
|
+
)
|
|
1587
|
+
time.sleep(CONSISTENCY_TIME * 2**i)
|
|
1314
1588
|
|
|
1315
1589
|
try:
|
|
1316
1590
|
for part_num in itertools.count():
|
|
1317
1591
|
for attempt in retry_s3():
|
|
1318
1592
|
with attempt:
|
|
1319
|
-
logger.debug(
|
|
1593
|
+
logger.debug(
|
|
1594
|
+
"Uploading part %d of %d bytes to %s",
|
|
1595
|
+
part_num + 1,
|
|
1596
|
+
len(buf),
|
|
1597
|
+
uploadId,
|
|
1598
|
+
)
|
|
1320
1599
|
# TODO: include the Content-MD5 header:
|
|
1321
1600
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.complete_multipart_upload
|
|
1322
|
-
part = client.upload_part(
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1601
|
+
part = client.upload_part(
|
|
1602
|
+
Bucket=bucket_name,
|
|
1603
|
+
Key=compat_bytes(info.fileID),
|
|
1604
|
+
PartNumber=part_num + 1,
|
|
1605
|
+
UploadId=uploadId,
|
|
1606
|
+
Body=BytesIO(buf),
|
|
1607
|
+
**headerArgs,
|
|
1608
|
+
)
|
|
1609
|
+
|
|
1610
|
+
parts.append(
|
|
1611
|
+
{
|
|
1612
|
+
"PartNumber": part_num + 1,
|
|
1613
|
+
"ETag": part["ETag"],
|
|
1614
|
+
}
|
|
1615
|
+
)
|
|
1330
1616
|
|
|
1331
1617
|
# Get the next block of data we want to put
|
|
1332
1618
|
buf = readable.read(info.outer.part_size)
|
|
@@ -1339,15 +1625,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1339
1625
|
with panic(log=logger):
|
|
1340
1626
|
for attempt in retry_s3():
|
|
1341
1627
|
with attempt:
|
|
1342
|
-
client.abort_multipart_upload(
|
|
1343
|
-
|
|
1344
|
-
|
|
1628
|
+
client.abort_multipart_upload(
|
|
1629
|
+
Bucket=bucket_name,
|
|
1630
|
+
Key=compat_bytes(info.fileID),
|
|
1631
|
+
UploadId=uploadId,
|
|
1632
|
+
)
|
|
1345
1633
|
|
|
1346
1634
|
else:
|
|
1347
1635
|
|
|
1348
|
-
while not store._getBucketVersioning(
|
|
1349
|
-
|
|
1350
|
-
|
|
1636
|
+
while not store._getBucketVersioning(
|
|
1637
|
+
store.files_bucket.name
|
|
1638
|
+
):
|
|
1639
|
+
logger.warning(
|
|
1640
|
+
"Versioning does not appear to be enabled yet. Deferring multipart "
|
|
1641
|
+
"upload completion..."
|
|
1642
|
+
)
|
|
1351
1643
|
time.sleep(1)
|
|
1352
1644
|
|
|
1353
1645
|
# Save the checksum
|
|
@@ -1359,32 +1651,46 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1359
1651
|
# in tests
|
|
1360
1652
|
# (https://github.com/DataBiosphere/toil/issues/3894)
|
|
1361
1653
|
with attempt:
|
|
1362
|
-
logger.debug(
|
|
1654
|
+
logger.debug("Attempting to complete upload...")
|
|
1363
1655
|
completed = client.complete_multipart_upload(
|
|
1364
1656
|
Bucket=bucket_name,
|
|
1365
1657
|
Key=compat_bytes(info.fileID),
|
|
1366
1658
|
UploadId=uploadId,
|
|
1367
|
-
MultipartUpload={"Parts": parts}
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1659
|
+
MultipartUpload={"Parts": parts},
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
logger.debug(
|
|
1663
|
+
"Completed upload object of type %s: %s",
|
|
1664
|
+
str(type(completed)),
|
|
1665
|
+
repr(completed),
|
|
1666
|
+
)
|
|
1667
|
+
info.version = completed.get("VersionId")
|
|
1668
|
+
logger.debug(
|
|
1669
|
+
"Completed upload with version %s",
|
|
1670
|
+
str(info.version),
|
|
1671
|
+
)
|
|
1373
1672
|
|
|
1374
1673
|
if info.version is None:
|
|
1375
1674
|
# Somehow we don't know the version. Try and get it.
|
|
1376
|
-
for attempt in retry_s3(
|
|
1675
|
+
for attempt in retry_s3(
|
|
1676
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1677
|
+
or isinstance(e, AssertionError)
|
|
1678
|
+
):
|
|
1377
1679
|
with attempt:
|
|
1378
|
-
version = client.head_object(
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1680
|
+
version = client.head_object(
|
|
1681
|
+
Bucket=bucket_name,
|
|
1682
|
+
Key=compat_bytes(info.fileID),
|
|
1683
|
+
**headerArgs,
|
|
1684
|
+
).get("VersionId", None)
|
|
1685
|
+
logger.warning(
|
|
1686
|
+
"Loaded key for upload with no version and got version %s",
|
|
1687
|
+
str(version),
|
|
1688
|
+
)
|
|
1383
1689
|
info.version = version
|
|
1384
1690
|
assert info.version is not None
|
|
1385
1691
|
|
|
1386
1692
|
# Make sure we actually wrote something, even if an empty file
|
|
1387
|
-
assert
|
|
1693
|
+
assert bool(info.version) or info.content is not None
|
|
1388
1694
|
|
|
1389
1695
|
class SinglePartPipe(WritablePipe):
|
|
1390
1696
|
def readFrom(self, readable):
|
|
@@ -1392,10 +1698,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1392
1698
|
assert isinstance(buf, bytes)
|
|
1393
1699
|
dataLength = len(buf)
|
|
1394
1700
|
if allowInlining and dataLength <= info.maxInlinedSize():
|
|
1395
|
-
logger.debug(
|
|
1701
|
+
logger.debug("Inlining content of %d bytes", len(buf))
|
|
1396
1702
|
info.content = buf
|
|
1397
1703
|
# There will be no checksum
|
|
1398
|
-
info.checksum =
|
|
1704
|
+
info.checksum = ""
|
|
1399
1705
|
else:
|
|
1400
1706
|
# We will compute a checksum
|
|
1401
1707
|
hasher = info._start_checksum()
|
|
@@ -1409,39 +1715,57 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1409
1715
|
buf = BytesIO(buf)
|
|
1410
1716
|
|
|
1411
1717
|
while not store._getBucketVersioning(bucket_name):
|
|
1412
|
-
logger.warning(
|
|
1413
|
-
|
|
1718
|
+
logger.warning(
|
|
1719
|
+
"Versioning does not appear to be enabled yet. Deferring single part "
|
|
1720
|
+
"upload..."
|
|
1721
|
+
)
|
|
1414
1722
|
time.sleep(1)
|
|
1415
1723
|
|
|
1416
1724
|
for attempt in retry_s3():
|
|
1417
1725
|
with attempt:
|
|
1418
|
-
logger.debug(
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1726
|
+
logger.debug(
|
|
1727
|
+
"Uploading single part of %d bytes", dataLength
|
|
1728
|
+
)
|
|
1729
|
+
client.upload_fileobj(
|
|
1730
|
+
Bucket=bucket_name,
|
|
1731
|
+
Key=compat_bytes(info.fileID),
|
|
1732
|
+
Fileobj=buf,
|
|
1733
|
+
ExtraArgs=headerArgs,
|
|
1734
|
+
)
|
|
1423
1735
|
|
|
1424
1736
|
# use head_object with the SSE headers to access versionId and content_length attributes
|
|
1425
|
-
headObj = client.head_object(
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1737
|
+
headObj = client.head_object(
|
|
1738
|
+
Bucket=bucket_name,
|
|
1739
|
+
Key=compat_bytes(info.fileID),
|
|
1740
|
+
**headerArgs,
|
|
1741
|
+
)
|
|
1742
|
+
assert dataLength == headObj.get("ContentLength", None)
|
|
1743
|
+
info.version = headObj.get("VersionId", None)
|
|
1744
|
+
logger.debug(
|
|
1745
|
+
"Upload received version %s", str(info.version)
|
|
1746
|
+
)
|
|
1431
1747
|
|
|
1432
1748
|
if info.version is None:
|
|
1433
1749
|
# Somehow we don't know the version
|
|
1434
|
-
for attempt in retry_s3(
|
|
1750
|
+
for attempt in retry_s3(
|
|
1751
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1752
|
+
or isinstance(e, AssertionError)
|
|
1753
|
+
):
|
|
1435
1754
|
with attempt:
|
|
1436
|
-
headObj = client.head_object(
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1755
|
+
headObj = client.head_object(
|
|
1756
|
+
Bucket=bucket_name,
|
|
1757
|
+
Key=compat_bytes(info.fileID),
|
|
1758
|
+
**headerArgs,
|
|
1759
|
+
)
|
|
1760
|
+
info.version = headObj.get("VersionId", None)
|
|
1761
|
+
logger.warning(
|
|
1762
|
+
"Reloaded key with no version and got version %s",
|
|
1763
|
+
str(info.version),
|
|
1764
|
+
)
|
|
1441
1765
|
assert info.version is not None
|
|
1442
1766
|
|
|
1443
1767
|
# Make sure we actually wrote something, even if an empty file
|
|
1444
|
-
assert
|
|
1768
|
+
assert bool(info.version) or info.content is not None
|
|
1445
1769
|
|
|
1446
1770
|
if multipart:
|
|
1447
1771
|
pipe = MultiPartPipe(encoding=encoding, errors=errors)
|
|
@@ -1452,20 +1776,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1452
1776
|
yield writable
|
|
1453
1777
|
|
|
1454
1778
|
if not pipe.reader_done:
|
|
1455
|
-
logger.debug(f
|
|
1456
|
-
raise RuntimeError(
|
|
1779
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1780
|
+
raise RuntimeError(
|
|
1781
|
+
"Escaped context manager without written data being read!"
|
|
1782
|
+
)
|
|
1457
1783
|
|
|
1458
1784
|
# We check our work to make sure we have exactly one of embedded
|
|
1459
1785
|
# content or a real object version.
|
|
1460
1786
|
|
|
1461
1787
|
if self.content is None:
|
|
1462
1788
|
if not bool(self.version):
|
|
1463
|
-
logger.debug(f
|
|
1464
|
-
raise RuntimeError(
|
|
1789
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1790
|
+
raise RuntimeError("No content added and no version created")
|
|
1465
1791
|
else:
|
|
1466
1792
|
if bool(self.version):
|
|
1467
|
-
logger.debug(f
|
|
1468
|
-
raise RuntimeError(
|
|
1793
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1794
|
+
raise RuntimeError("Content added and version created")
|
|
1469
1795
|
|
|
1470
1796
|
def copyFrom(self, srcObj):
|
|
1471
1797
|
"""
|
|
@@ -1475,18 +1801,20 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1475
1801
|
"""
|
|
1476
1802
|
assert srcObj.content_length is not None
|
|
1477
1803
|
if srcObj.content_length <= self.maxInlinedSize():
|
|
1478
|
-
self.content = srcObj.get().get(
|
|
1804
|
+
self.content = srcObj.get().get("Body").read()
|
|
1479
1805
|
else:
|
|
1480
1806
|
# Create a new Resource in case it needs to be on its own thread
|
|
1481
|
-
resource = boto3_session.resource(
|
|
1482
|
-
self.version = copyKeyMultipart(
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1807
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1808
|
+
self.version = copyKeyMultipart(
|
|
1809
|
+
resource,
|
|
1810
|
+
srcBucketName=compat_bytes(srcObj.bucket_name),
|
|
1811
|
+
srcKeyName=compat_bytes(srcObj.key),
|
|
1812
|
+
srcKeyVersion=compat_bytes(srcObj.version_id),
|
|
1813
|
+
dstBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1814
|
+
dstKeyName=compat_bytes(self._fileID),
|
|
1815
|
+
sseAlgorithm="AES256",
|
|
1816
|
+
sseKey=self._getSSEKey(),
|
|
1817
|
+
)
|
|
1490
1818
|
|
|
1491
1819
|
def copyTo(self, dstObj):
|
|
1492
1820
|
"""
|
|
@@ -1500,35 +1828,43 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1500
1828
|
dstObj.put(Body=self.content)
|
|
1501
1829
|
elif self.version:
|
|
1502
1830
|
# Create a new Resource in case it needs to be on its own thread
|
|
1503
|
-
resource = boto3_session.resource(
|
|
1831
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1504
1832
|
|
|
1505
1833
|
for attempt in retry_s3():
|
|
1506
1834
|
# encrypted = True if self.outer.sseKeyPath else False
|
|
1507
1835
|
with attempt:
|
|
1508
|
-
copyKeyMultipart(
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1836
|
+
copyKeyMultipart(
|
|
1837
|
+
resource,
|
|
1838
|
+
srcBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1839
|
+
srcKeyName=compat_bytes(self.fileID),
|
|
1840
|
+
srcKeyVersion=compat_bytes(self.version),
|
|
1841
|
+
dstBucketName=compat_bytes(dstObj.bucket_name),
|
|
1842
|
+
dstKeyName=compat_bytes(dstObj.key),
|
|
1843
|
+
copySourceSseAlgorithm="AES256",
|
|
1844
|
+
copySourceSseKey=self._getSSEKey(),
|
|
1845
|
+
)
|
|
1516
1846
|
else:
|
|
1517
1847
|
assert False
|
|
1518
1848
|
|
|
1519
1849
|
def download(self, localFilePath, verifyChecksum=True):
|
|
1520
1850
|
if self.content is not None:
|
|
1521
1851
|
with AtomicFileCreate(localFilePath) as tmpPath:
|
|
1522
|
-
with open(tmpPath,
|
|
1852
|
+
with open(tmpPath, "wb") as f:
|
|
1523
1853
|
f.write(self.content)
|
|
1524
1854
|
elif self.version:
|
|
1525
1855
|
headerArgs = self._s3EncryptionArgs()
|
|
1526
1856
|
obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
|
|
1527
1857
|
|
|
1528
|
-
for attempt in retry_s3(
|
|
1858
|
+
for attempt in retry_s3(
|
|
1859
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1860
|
+
or isinstance(e, ChecksumError)
|
|
1861
|
+
):
|
|
1529
1862
|
with attempt:
|
|
1530
1863
|
with AtomicFileCreate(localFilePath) as tmpPath:
|
|
1531
|
-
obj.download_file(
|
|
1864
|
+
obj.download_file(
|
|
1865
|
+
Filename=tmpPath,
|
|
1866
|
+
ExtraArgs={"VersionId": self.version, **headerArgs},
|
|
1867
|
+
)
|
|
1532
1868
|
|
|
1533
1869
|
if verifyChecksum and self.checksum:
|
|
1534
1870
|
try:
|
|
@@ -1536,7 +1872,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1536
1872
|
self._get_file_checksum(localFilePath, self.checksum)
|
|
1537
1873
|
except ChecksumError as e:
|
|
1538
1874
|
# Annotate checksum mismatches with file name
|
|
1539
|
-
raise ChecksumError(
|
|
1875
|
+
raise ChecksumError(
|
|
1876
|
+
"Checksums do not match for file %s."
|
|
1877
|
+
% localFilePath
|
|
1878
|
+
) from e
|
|
1540
1879
|
# The error will get caught and result in a retry of the download until we run out of retries.
|
|
1541
1880
|
# TODO: handle obviously truncated downloads by resuming instead.
|
|
1542
1881
|
else:
|
|
@@ -1558,7 +1897,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1558
1897
|
obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
|
|
1559
1898
|
for attempt in retry_s3():
|
|
1560
1899
|
with attempt:
|
|
1561
|
-
obj.download_fileobj(
|
|
1900
|
+
obj.download_fileobj(
|
|
1901
|
+
writable,
|
|
1902
|
+
ExtraArgs={"VersionId": info.version, **headerArgs},
|
|
1903
|
+
)
|
|
1562
1904
|
else:
|
|
1563
1905
|
assert False
|
|
1564
1906
|
|
|
@@ -1574,7 +1916,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1574
1916
|
def transform(self, readable, writable):
|
|
1575
1917
|
hasher = info._start_checksum(to_match=info.checksum)
|
|
1576
1918
|
contents = readable.read(1024 * 1024)
|
|
1577
|
-
while contents != b
|
|
1919
|
+
while contents != b"":
|
|
1578
1920
|
info._update_checksum(hasher, contents)
|
|
1579
1921
|
try:
|
|
1580
1922
|
writable.write(contents)
|
|
@@ -1591,7 +1933,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1591
1933
|
if verifyChecksum and self.checksum:
|
|
1592
1934
|
with DownloadPipe() as readable:
|
|
1593
1935
|
# Interpose a pipe to check the hash
|
|
1594
|
-
with HashingPipe(
|
|
1936
|
+
with HashingPipe(
|
|
1937
|
+
readable, encoding=encoding, errors=errors
|
|
1938
|
+
) as verified:
|
|
1595
1939
|
yield verified
|
|
1596
1940
|
else:
|
|
1597
1941
|
# Readable end of pipe produces text mode output if encoding specified
|
|
@@ -1602,18 +1946,25 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1602
1946
|
def delete(self):
|
|
1603
1947
|
store = self.outer
|
|
1604
1948
|
if self.previousVersion is not None:
|
|
1605
|
-
expected: UpdateConditionTypeDef = {
|
|
1949
|
+
expected: "UpdateConditionTypeDef" = {
|
|
1950
|
+
"Name": "version",
|
|
1951
|
+
"Value": cast(str, self.previousVersion),
|
|
1952
|
+
}
|
|
1606
1953
|
for attempt in retry_sdb():
|
|
1607
1954
|
with attempt:
|
|
1608
|
-
store.db.delete_attributes(
|
|
1609
|
-
|
|
1610
|
-
|
|
1955
|
+
store.db.delete_attributes(
|
|
1956
|
+
DomainName=store.files_domain_name,
|
|
1957
|
+
ItemName=compat_bytes(self.fileID),
|
|
1958
|
+
Expected=expected,
|
|
1959
|
+
)
|
|
1611
1960
|
if self.previousVersion:
|
|
1612
1961
|
for attempt in retry_s3():
|
|
1613
1962
|
with attempt:
|
|
1614
|
-
store.s3_client.delete_object(
|
|
1615
|
-
|
|
1616
|
-
|
|
1963
|
+
store.s3_client.delete_object(
|
|
1964
|
+
Bucket=store.files_bucket.name,
|
|
1965
|
+
Key=compat_bytes(self.fileID),
|
|
1966
|
+
VersionId=self.previousVersion,
|
|
1967
|
+
)
|
|
1617
1968
|
|
|
1618
1969
|
def getSize(self):
|
|
1619
1970
|
"""
|
|
@@ -1632,7 +1983,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1632
1983
|
def _getSSEKey(self) -> Optional[bytes]:
|
|
1633
1984
|
sseKeyPath = self.outer.sseKeyPath
|
|
1634
1985
|
if sseKeyPath:
|
|
1635
|
-
with open(sseKeyPath,
|
|
1986
|
+
with open(sseKeyPath, "rb") as f:
|
|
1636
1987
|
sseKey = f.read()
|
|
1637
1988
|
return sseKey
|
|
1638
1989
|
|
|
@@ -1641,25 +1992,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1641
1992
|
# parameters and will be used to set the http headers
|
|
1642
1993
|
if self.encrypted:
|
|
1643
1994
|
sseKey = self._getSSEKey()
|
|
1644
|
-
assert
|
|
1995
|
+
assert (
|
|
1996
|
+
sseKey is not None
|
|
1997
|
+
), "Content is encrypted but no key was provided."
|
|
1645
1998
|
assert len(sseKey) == 32
|
|
1646
1999
|
# boto3 encodes the key and calculates the MD5 for us
|
|
1647
|
-
return {
|
|
2000
|
+
return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sseKey}
|
|
1648
2001
|
else:
|
|
1649
2002
|
return {}
|
|
1650
2003
|
|
|
1651
2004
|
def __repr__(self):
|
|
1652
2005
|
r = custom_repr
|
|
1653
|
-
d = (
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
2006
|
+
d = (
|
|
2007
|
+
("fileID", r(self.fileID)),
|
|
2008
|
+
("ownerID", r(self.ownerID)),
|
|
2009
|
+
("encrypted", r(self.encrypted)),
|
|
2010
|
+
("version", r(self.version)),
|
|
2011
|
+
("previousVersion", r(self.previousVersion)),
|
|
2012
|
+
("content", r(self.content)),
|
|
2013
|
+
("checksum", r(self.checksum)),
|
|
2014
|
+
("_numContentChunks", r(self._numContentChunks)),
|
|
2015
|
+
)
|
|
2016
|
+
return "{}({})".format(
|
|
2017
|
+
type(self).__name__, ", ".join(f"{k}={v}" for k, v in d)
|
|
2018
|
+
)
|
|
1663
2019
|
|
|
1664
2020
|
versionings = dict(Enabled=True, Disabled=False, Suspended=None)
|
|
1665
2021
|
|
|
@@ -1696,7 +2052,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1696
2052
|
if self.files_bucket is not None:
|
|
1697
2053
|
self._delete_bucket(self.files_bucket)
|
|
1698
2054
|
self.files_bucket = None
|
|
1699
|
-
for name in
|
|
2055
|
+
for name in "files_domain_name", "jobs_domain_name":
|
|
1700
2056
|
domainName = getattr(self, name)
|
|
1701
2057
|
if domainName is not None:
|
|
1702
2058
|
self._delete_domain(domainName)
|
|
@@ -1720,12 +2076,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1720
2076
|
for attempt in retry_s3():
|
|
1721
2077
|
with attempt:
|
|
1722
2078
|
try:
|
|
1723
|
-
uploads = s3_boto3_client.list_multipart_uploads(
|
|
2079
|
+
uploads = s3_boto3_client.list_multipart_uploads(
|
|
2080
|
+
Bucket=bucket.name
|
|
2081
|
+
).get("Uploads")
|
|
1724
2082
|
if uploads:
|
|
1725
2083
|
for u in uploads:
|
|
1726
|
-
s3_boto3_client.abort_multipart_upload(
|
|
1727
|
-
|
|
1728
|
-
|
|
2084
|
+
s3_boto3_client.abort_multipart_upload(
|
|
2085
|
+
Bucket=bucket.name, Key=u["Key"], UploadId=u["UploadId"]
|
|
2086
|
+
)
|
|
1729
2087
|
|
|
1730
2088
|
bucket.objects.all().delete()
|
|
1731
2089
|
bucket.object_versions.delete()
|
|
@@ -1745,5 +2103,7 @@ custom_repr = aRepr.repr
|
|
|
1745
2103
|
class BucketLocationConflictException(LocatorException):
|
|
1746
2104
|
def __init__(self, bucketRegion):
|
|
1747
2105
|
super().__init__(
|
|
1748
|
-
|
|
1749
|
-
|
|
2106
|
+
"A bucket with the same name as the jobstore was found in another region (%s). "
|
|
2107
|
+
"Cannot proceed as the unique bucket name is already in use.",
|
|
2108
|
+
locator=bucketRegion,
|
|
2109
|
+
)
|