toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -21,47 +21,53 @@ import reprlib
|
|
|
21
21
|
import stat
|
|
22
22
|
import time
|
|
23
23
|
import uuid
|
|
24
|
+
from collections.abc import Generator
|
|
24
25
|
from contextlib import contextmanager
|
|
25
26
|
from io import BytesIO
|
|
26
|
-
from typing import
|
|
27
|
+
from typing import IO, TYPE_CHECKING, Optional, Union, cast
|
|
27
28
|
from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
|
|
28
29
|
|
|
29
|
-
import boto.s3.connection
|
|
30
|
-
import boto.sdb
|
|
31
|
-
from boto.exception import SDBResponseError
|
|
32
30
|
from botocore.exceptions import ClientError
|
|
33
31
|
|
|
34
32
|
import toil.lib.encryption as encryption
|
|
35
33
|
from toil.fileStores import FileID
|
|
36
|
-
from toil.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
34
|
+
from toil.job import Job, JobDescription
|
|
35
|
+
from toil.jobStores.abstractJobStore import (
|
|
36
|
+
AbstractJobStore,
|
|
37
|
+
ConcurrentFileModificationException,
|
|
38
|
+
JobStoreExistsException,
|
|
39
|
+
LocatorException,
|
|
40
|
+
NoSuchFileException,
|
|
41
|
+
NoSuchJobException,
|
|
42
|
+
NoSuchJobStoreException,
|
|
43
|
+
)
|
|
44
|
+
from toil.jobStores.aws.utils import (
|
|
45
|
+
SDBHelper,
|
|
46
|
+
ServerSideCopyProhibitedError,
|
|
47
|
+
copyKeyMultipart,
|
|
48
|
+
fileSizeAndTime,
|
|
49
|
+
no_such_sdb_domain,
|
|
50
|
+
retry_sdb,
|
|
51
|
+
sdb_unavailable,
|
|
52
|
+
uploadFile,
|
|
53
|
+
uploadFromPath,
|
|
54
|
+
)
|
|
55
|
+
from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
|
|
55
56
|
from toil.lib.aws import build_tag_dict_from_env
|
|
56
57
|
from toil.lib.aws.session import establish_boto3_session
|
|
57
|
-
from toil.lib.aws.utils import (
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
58
|
+
from toil.lib.aws.utils import (
|
|
59
|
+
NoBucketLocationError,
|
|
60
|
+
boto3_pager,
|
|
61
|
+
create_s3_bucket,
|
|
62
|
+
enable_public_objects,
|
|
63
|
+
flatten_tags,
|
|
64
|
+
get_bucket_region,
|
|
65
|
+
get_item_from_attributes,
|
|
66
|
+
get_object_for_url,
|
|
67
|
+
list_objects_for_url,
|
|
68
|
+
retry_s3,
|
|
69
|
+
retryable_s3_errors,
|
|
70
|
+
)
|
|
65
71
|
from toil.lib.compatibility import compat_bytes
|
|
66
72
|
from toil.lib.ec2nodes import EC2Regions
|
|
67
73
|
from toil.lib.exceptions import panic
|
|
@@ -70,9 +76,21 @@ from toil.lib.memoize import strict_bool
|
|
|
70
76
|
from toil.lib.objects import InnerClass
|
|
71
77
|
from toil.lib.retry import get_error_code, get_error_status, retry
|
|
72
78
|
|
|
79
|
+
if TYPE_CHECKING:
|
|
80
|
+
from mypy_boto3_sdb.type_defs import (
|
|
81
|
+
AttributeTypeDef,
|
|
82
|
+
DeletableItemTypeDef,
|
|
83
|
+
ItemTypeDef,
|
|
84
|
+
ReplaceableAttributeTypeDef,
|
|
85
|
+
ReplaceableItemTypeDef,
|
|
86
|
+
UpdateConditionTypeDef,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
from toil import Config
|
|
90
|
+
|
|
73
91
|
boto3_session = establish_boto3_session()
|
|
74
|
-
s3_boto3_resource = boto3_session.resource(
|
|
75
|
-
s3_boto3_client = boto3_session.client(
|
|
92
|
+
s3_boto3_resource = boto3_session.resource("s3")
|
|
93
|
+
s3_boto3_client = boto3_session.client("s3")
|
|
76
94
|
logger = logging.getLogger(__name__)
|
|
77
95
|
|
|
78
96
|
# Sometimes we have to wait for multipart uploads to become real. How long
|
|
@@ -85,6 +103,13 @@ class ChecksumError(Exception):
|
|
|
85
103
|
"""Raised when a download from AWS does not contain the correct data."""
|
|
86
104
|
|
|
87
105
|
|
|
106
|
+
class DomainDoesNotExist(Exception):
|
|
107
|
+
"""Raised when a domain that is expected to exist does not exist."""
|
|
108
|
+
|
|
109
|
+
def __init__(self, domain_name):
|
|
110
|
+
super().__init__(f"Expected domain {domain_name} to exist!")
|
|
111
|
+
|
|
112
|
+
|
|
88
113
|
class AWSJobStore(AbstractJobStore):
|
|
89
114
|
"""
|
|
90
115
|
A job store that uses Amazon's S3 for file storage and SimpleDB for storing job info and
|
|
@@ -98,14 +123,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
98
123
|
# URLs where the may interfere with the certificate common name. We use a double
|
|
99
124
|
# underscore as a separator instead.
|
|
100
125
|
#
|
|
101
|
-
bucketNameRe = re.compile(r
|
|
126
|
+
bucketNameRe = re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$")
|
|
102
127
|
|
|
103
128
|
# See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
|
|
104
129
|
#
|
|
105
130
|
minBucketNameLen = 3
|
|
106
131
|
maxBucketNameLen = 63
|
|
107
132
|
maxNameLen = 10
|
|
108
|
-
nameSeparator =
|
|
133
|
+
nameSeparator = "--"
|
|
109
134
|
|
|
110
135
|
def __init__(self, locator: str, partSize: int = 50 << 20) -> None:
|
|
111
136
|
"""
|
|
@@ -116,37 +141,49 @@ class AWSJobStore(AbstractJobStore):
|
|
|
116
141
|
whole file
|
|
117
142
|
"""
|
|
118
143
|
super().__init__(locator)
|
|
119
|
-
region, namePrefix = locator.split(
|
|
144
|
+
region, namePrefix = locator.split(":")
|
|
120
145
|
regions = EC2Regions.keys()
|
|
121
146
|
if region not in regions:
|
|
122
147
|
raise ValueError(f'Region "{region}" is not one of: {regions}')
|
|
123
148
|
if not self.bucketNameRe.match(namePrefix):
|
|
124
|
-
raise ValueError(
|
|
125
|
-
|
|
126
|
-
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Invalid name prefix '%s'. Name prefixes must contain only digits, "
|
|
151
|
+
"hyphens or lower-case letters and must not start or end in a "
|
|
152
|
+
"hyphen." % namePrefix
|
|
153
|
+
)
|
|
127
154
|
# Reserve 13 for separator and suffix
|
|
128
|
-
if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
155
|
+
if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
|
|
156
|
+
self.nameSeparator
|
|
157
|
+
):
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
|
|
160
|
+
"characters." % namePrefix
|
|
161
|
+
)
|
|
162
|
+
if "--" in namePrefix:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
"Invalid name prefix '%s'. Name prefixes may not contain "
|
|
165
|
+
"%s." % (namePrefix, self.nameSeparator)
|
|
166
|
+
)
|
|
167
|
+
logger.debug(
|
|
168
|
+
"Instantiating %s for region %s and name prefix '%s'",
|
|
169
|
+
self.__class__,
|
|
170
|
+
region,
|
|
171
|
+
namePrefix,
|
|
172
|
+
)
|
|
136
173
|
self.region = region
|
|
137
|
-
self.
|
|
138
|
-
self.
|
|
139
|
-
self.
|
|
140
|
-
self.
|
|
141
|
-
self.
|
|
142
|
-
self.db =
|
|
143
|
-
|
|
144
|
-
self.s3_resource = boto3_session.resource(
|
|
174
|
+
self.name_prefix = namePrefix
|
|
175
|
+
self.part_size = partSize
|
|
176
|
+
self.jobs_domain_name: Optional[str] = None
|
|
177
|
+
self.files_domain_name: Optional[str] = None
|
|
178
|
+
self.files_bucket = None
|
|
179
|
+
self.db = boto3_session.client(service_name="sdb", region_name=region)
|
|
180
|
+
|
|
181
|
+
self.s3_resource = boto3_session.resource("s3", region_name=self.region)
|
|
145
182
|
self.s3_client = self.s3_resource.meta.client
|
|
146
183
|
|
|
147
|
-
def initialize(self, config):
|
|
184
|
+
def initialize(self, config: "Config") -> None:
|
|
148
185
|
if self._registered:
|
|
149
|
-
raise JobStoreExistsException(self.locator)
|
|
186
|
+
raise JobStoreExistsException(self.locator, "aws")
|
|
150
187
|
self._registered = None
|
|
151
188
|
try:
|
|
152
189
|
self._bind(create=True)
|
|
@@ -159,36 +196,45 @@ class AWSJobStore(AbstractJobStore):
|
|
|
159
196
|
self._registered = True
|
|
160
197
|
|
|
161
198
|
@property
|
|
162
|
-
def sseKeyPath(self):
|
|
199
|
+
def sseKeyPath(self) -> Optional[str]:
|
|
163
200
|
return self.config.sseKey
|
|
164
201
|
|
|
165
|
-
def resume(self):
|
|
202
|
+
def resume(self) -> None:
|
|
166
203
|
if not self._registered:
|
|
167
|
-
raise NoSuchJobStoreException(self.locator)
|
|
204
|
+
raise NoSuchJobStoreException(self.locator, "aws")
|
|
168
205
|
self._bind(create=False)
|
|
169
206
|
super().resume()
|
|
170
207
|
|
|
171
|
-
def _bind(
|
|
208
|
+
def _bind(
|
|
209
|
+
self,
|
|
210
|
+
create: bool = False,
|
|
211
|
+
block: bool = True,
|
|
212
|
+
check_versioning_consistency: bool = True,
|
|
213
|
+
) -> None:
|
|
172
214
|
def qualify(name):
|
|
173
215
|
assert len(name) <= self.maxNameLen
|
|
174
|
-
return self.
|
|
216
|
+
return self.name_prefix + self.nameSeparator + name
|
|
175
217
|
|
|
176
218
|
# The order in which this sequence of events happens is important. We can easily handle the
|
|
177
219
|
# inability to bind a domain, but it is a little harder to handle some cases of binding the
|
|
178
220
|
# jobstore bucket. Maintaining this order allows for an easier `destroy` method.
|
|
179
|
-
if self.
|
|
180
|
-
self.
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
self.
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
221
|
+
if self.jobs_domain_name is None:
|
|
222
|
+
self.jobs_domain_name = qualify("jobs")
|
|
223
|
+
self._bindDomain(self.jobs_domain_name, create=create, block=block)
|
|
224
|
+
if self.files_domain_name is None:
|
|
225
|
+
self.files_domain_name = qualify("files")
|
|
226
|
+
self._bindDomain(self.files_domain_name, create=create, block=block)
|
|
227
|
+
if self.files_bucket is None:
|
|
228
|
+
self.files_bucket = self._bindBucket(
|
|
229
|
+
qualify("files"),
|
|
230
|
+
create=create,
|
|
231
|
+
block=block,
|
|
232
|
+
versioning=True,
|
|
233
|
+
check_versioning_consistency=check_versioning_consistency,
|
|
234
|
+
)
|
|
189
235
|
|
|
190
236
|
@property
|
|
191
|
-
def _registered(self):
|
|
237
|
+
def _registered(self) -> Optional[bool]:
|
|
192
238
|
"""
|
|
193
239
|
A optional boolean property indicating whether this job store is registered. The
|
|
194
240
|
registry is the authority on deciding if a job store exists or not. If True, this job
|
|
@@ -205,55 +251,75 @@ class AWSJobStore(AbstractJobStore):
|
|
|
205
251
|
# store destruction, indicates a job store in transition, reflecting the fact that 3.3.0
|
|
206
252
|
# may leak buckets or domains even though the registry reports 'False' for them. We
|
|
207
253
|
# can't handle job stores that were partially created by 3.3.0, though.
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
254
|
+
registry_domain_name = "toil-registry"
|
|
255
|
+
try:
|
|
256
|
+
self._bindDomain(
|
|
257
|
+
domain_name=registry_domain_name, create=False, block=False
|
|
258
|
+
)
|
|
259
|
+
except DomainDoesNotExist:
|
|
212
260
|
return False
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
261
|
+
|
|
262
|
+
for attempt in retry_sdb():
|
|
263
|
+
with attempt:
|
|
264
|
+
get_result = self.db.get_attributes(
|
|
265
|
+
DomainName=registry_domain_name,
|
|
266
|
+
ItemName=self.name_prefix,
|
|
267
|
+
AttributeNames=["exists"],
|
|
268
|
+
ConsistentRead=True,
|
|
269
|
+
)
|
|
270
|
+
attributes: list["AttributeTypeDef"] = get_result.get(
|
|
271
|
+
"Attributes", []
|
|
272
|
+
) # the documentation says 'Attributes' should always exist, but this is not true
|
|
273
|
+
exists: Optional[str] = get_item_from_attributes(
|
|
274
|
+
attributes=attributes, name="exists"
|
|
275
|
+
)
|
|
276
|
+
if exists is None:
|
|
277
|
+
return False
|
|
278
|
+
elif exists == "True":
|
|
279
|
+
return True
|
|
280
|
+
elif exists == "False":
|
|
281
|
+
return None
|
|
282
|
+
else:
|
|
283
|
+
assert False
|
|
230
284
|
|
|
231
285
|
@_registered.setter
|
|
232
|
-
def _registered(self, value):
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
286
|
+
def _registered(self, value: bool) -> None:
|
|
287
|
+
registry_domain_name = "toil-registry"
|
|
288
|
+
try:
|
|
289
|
+
self._bindDomain(
|
|
290
|
+
domain_name=registry_domain_name,
|
|
291
|
+
# Only create registry domain when registering or
|
|
292
|
+
# transitioning a store
|
|
293
|
+
create=value is not False,
|
|
294
|
+
block=False,
|
|
295
|
+
)
|
|
296
|
+
except DomainDoesNotExist:
|
|
240
297
|
pass
|
|
241
298
|
else:
|
|
242
299
|
for attempt in retry_sdb():
|
|
243
300
|
with attempt:
|
|
244
301
|
if value is False:
|
|
245
|
-
|
|
302
|
+
self.db.delete_attributes(
|
|
303
|
+
DomainName=registry_domain_name, ItemName=self.name_prefix
|
|
304
|
+
)
|
|
246
305
|
else:
|
|
247
306
|
if value is True:
|
|
248
|
-
attributes =
|
|
307
|
+
attributes: list["ReplaceableAttributeTypeDef"] = [
|
|
308
|
+
{"Name": "exists", "Value": "True", "Replace": True}
|
|
309
|
+
]
|
|
249
310
|
elif value is None:
|
|
250
|
-
attributes =
|
|
311
|
+
attributes = [
|
|
312
|
+
{"Name": "exists", "Value": "False", "Replace": True}
|
|
313
|
+
]
|
|
251
314
|
else:
|
|
252
315
|
assert False
|
|
253
|
-
|
|
254
|
-
|
|
316
|
+
self.db.put_attributes(
|
|
317
|
+
DomainName=registry_domain_name,
|
|
318
|
+
ItemName=self.name_prefix,
|
|
319
|
+
Attributes=attributes,
|
|
320
|
+
)
|
|
255
321
|
|
|
256
|
-
def _checkItem(self, item, enforce: bool = True):
|
|
322
|
+
def _checkItem(self, item: "ItemTypeDef", enforce: bool = True) -> None:
|
|
257
323
|
"""
|
|
258
324
|
Make sure that the given SimpleDB item actually has the attributes we think it should.
|
|
259
325
|
|
|
@@ -261,32 +327,56 @@ class AWSJobStore(AbstractJobStore):
|
|
|
261
327
|
|
|
262
328
|
If enforce is false, log but don't throw.
|
|
263
329
|
"""
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
330
|
+
self._checkAttributes(item["Attributes"], enforce)
|
|
331
|
+
|
|
332
|
+
def _checkAttributes(
|
|
333
|
+
self, attributes: list["AttributeTypeDef"], enforce: bool = True
|
|
334
|
+
) -> None:
|
|
335
|
+
if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
|
|
336
|
+
logger.error(
|
|
337
|
+
"overlargeID attribute isn't present: either SimpleDB entry is "
|
|
338
|
+
"corrupt or jobstore is from an extremely old Toil: %s",
|
|
339
|
+
attributes,
|
|
340
|
+
)
|
|
268
341
|
if enforce:
|
|
269
|
-
raise RuntimeError(
|
|
270
|
-
|
|
342
|
+
raise RuntimeError(
|
|
343
|
+
"encountered SimpleDB entry missing required attribute "
|
|
344
|
+
"'overlargeID'; is your job store ancient?"
|
|
345
|
+
)
|
|
271
346
|
|
|
272
|
-
def
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
347
|
+
def _awsJobFromAttributes(self, attributes: list["AttributeTypeDef"]) -> Job:
|
|
348
|
+
"""
|
|
349
|
+
Get a Toil Job object from attributes that are defined in an item from the DB
|
|
350
|
+
:param attributes: List of attributes
|
|
351
|
+
:return: Toil job
|
|
352
|
+
"""
|
|
353
|
+
self._checkAttributes(attributes)
|
|
354
|
+
overlarge_id_value = get_item_from_attributes(
|
|
355
|
+
attributes=attributes, name="overlargeID"
|
|
356
|
+
)
|
|
357
|
+
if overlarge_id_value:
|
|
358
|
+
assert self.file_exists(overlarge_id_value)
|
|
276
359
|
# This is an overlarge job, download the actual attributes
|
|
277
360
|
# from the file store
|
|
278
361
|
logger.debug("Loading overlarge job from S3.")
|
|
279
|
-
with self.read_file_stream(
|
|
362
|
+
with self.read_file_stream(overlarge_id_value) as fh:
|
|
280
363
|
binary = fh.read()
|
|
281
364
|
else:
|
|
282
|
-
binary, _ = SDBHelper.attributesToBinary(
|
|
365
|
+
binary, _ = SDBHelper.attributesToBinary(attributes)
|
|
283
366
|
assert binary is not None
|
|
284
367
|
job = pickle.loads(binary)
|
|
285
368
|
if job is not None:
|
|
286
369
|
job.assignConfig(self.config)
|
|
287
370
|
return job
|
|
288
371
|
|
|
289
|
-
def
|
|
372
|
+
def _awsJobFromItem(self, item: "ItemTypeDef") -> Job:
|
|
373
|
+
"""
|
|
374
|
+
Get a Toil Job object from an item from the DB
|
|
375
|
+
:return: Toil Job
|
|
376
|
+
"""
|
|
377
|
+
return self._awsJobFromAttributes(item["Attributes"])
|
|
378
|
+
|
|
379
|
+
def _awsJobToAttributes(self, job: JobDescription) -> list["AttributeTypeDef"]:
|
|
290
380
|
binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
|
|
291
381
|
if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
|
|
292
382
|
# Store as an overlarge job in S3
|
|
@@ -297,66 +387,100 @@ class AWSJobStore(AbstractJobStore):
|
|
|
297
387
|
else:
|
|
298
388
|
item = SDBHelper.binaryToAttributes(binary)
|
|
299
389
|
item["overlargeID"] = ""
|
|
300
|
-
return item
|
|
390
|
+
return SDBHelper.attributeDictToList(item)
|
|
391
|
+
|
|
392
|
+
def _awsJobToItem(self, job: JobDescription, name: str) -> "ItemTypeDef":
|
|
393
|
+
return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
|
|
301
394
|
|
|
302
395
|
jobsPerBatchInsert = 25
|
|
303
396
|
|
|
304
397
|
@contextmanager
|
|
305
|
-
def batch(self):
|
|
398
|
+
def batch(self) -> None:
|
|
306
399
|
self._batchedUpdates = []
|
|
307
400
|
yield
|
|
308
|
-
batches = [
|
|
309
|
-
|
|
401
|
+
batches = [
|
|
402
|
+
self._batchedUpdates[i : i + self.jobsPerBatchInsert]
|
|
403
|
+
for i in range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)
|
|
404
|
+
]
|
|
310
405
|
|
|
311
406
|
for batch in batches:
|
|
407
|
+
items: list["ReplaceableItemTypeDef"] = []
|
|
312
408
|
for jobDescription in batch:
|
|
409
|
+
item_attributes: list["ReplaceableAttributeTypeDef"] = []
|
|
313
410
|
jobDescription.pre_update_hook()
|
|
314
|
-
|
|
411
|
+
item_name = compat_bytes(jobDescription.jobStoreID)
|
|
412
|
+
got_job_attributes: list["AttributeTypeDef"] = self._awsJobToAttributes(
|
|
413
|
+
jobDescription
|
|
414
|
+
)
|
|
415
|
+
for each_attribute in got_job_attributes:
|
|
416
|
+
new_attribute: "ReplaceableAttributeTypeDef" = {
|
|
417
|
+
"Name": each_attribute["Name"],
|
|
418
|
+
"Value": each_attribute["Value"],
|
|
419
|
+
"Replace": True,
|
|
420
|
+
}
|
|
421
|
+
item_attributes.append(new_attribute)
|
|
422
|
+
items.append({"Name": item_name, "Attributes": item_attributes})
|
|
423
|
+
|
|
315
424
|
for attempt in retry_sdb():
|
|
316
425
|
with attempt:
|
|
317
|
-
|
|
426
|
+
self.db.batch_put_attributes(
|
|
427
|
+
DomainName=self.jobs_domain_name, Items=items
|
|
428
|
+
)
|
|
318
429
|
self._batchedUpdates = None
|
|
319
430
|
|
|
320
|
-
def assign_job_id(self, job_description):
|
|
431
|
+
def assign_job_id(self, job_description: JobDescription) -> None:
|
|
321
432
|
jobStoreID = self._new_job_id()
|
|
322
|
-
logger.debug("Assigning ID to job %s
|
|
323
|
-
jobStoreID, '<no command>' if job_description.command is None else job_description.command)
|
|
433
|
+
logger.debug("Assigning ID to job %s", jobStoreID)
|
|
324
434
|
job_description.jobStoreID = jobStoreID
|
|
325
435
|
|
|
326
|
-
def create_job(self, job_description):
|
|
436
|
+
def create_job(self, job_description: JobDescription) -> JobDescription:
|
|
327
437
|
if hasattr(self, "_batchedUpdates") and self._batchedUpdates is not None:
|
|
328
438
|
self._batchedUpdates.append(job_description)
|
|
329
439
|
else:
|
|
330
440
|
self.update_job(job_description)
|
|
331
441
|
return job_description
|
|
332
442
|
|
|
333
|
-
def job_exists(self, job_id):
|
|
443
|
+
def job_exists(self, job_id: Union[bytes, str]) -> bool:
|
|
334
444
|
for attempt in retry_sdb():
|
|
335
445
|
with attempt:
|
|
336
|
-
return
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
446
|
+
return (
|
|
447
|
+
len(
|
|
448
|
+
self.db.get_attributes(
|
|
449
|
+
DomainName=self.jobs_domain_name,
|
|
450
|
+
ItemName=compat_bytes(job_id),
|
|
451
|
+
AttributeNames=[SDBHelper.presenceIndicator()],
|
|
452
|
+
ConsistentRead=True,
|
|
453
|
+
).get("Attributes", [])
|
|
454
|
+
)
|
|
455
|
+
> 0
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def jobs(self) -> Generator[Job, None, None]:
|
|
459
|
+
job_items: Optional[list["ItemTypeDef"]] = None
|
|
343
460
|
for attempt in retry_sdb():
|
|
344
461
|
with attempt:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
462
|
+
job_items = boto3_pager(
|
|
463
|
+
self.db.select,
|
|
464
|
+
"Items",
|
|
465
|
+
ConsistentRead=True,
|
|
466
|
+
SelectExpression="select * from `%s`" % self.jobs_domain_name,
|
|
467
|
+
)
|
|
468
|
+
assert job_items is not None
|
|
469
|
+
for jobItem in job_items:
|
|
350
470
|
yield self._awsJobFromItem(jobItem)
|
|
351
471
|
|
|
352
|
-
def load_job(self, job_id):
|
|
353
|
-
|
|
472
|
+
def load_job(self, job_id: FileID) -> Job:
|
|
473
|
+
item_attributes = None
|
|
354
474
|
for attempt in retry_sdb():
|
|
355
475
|
with attempt:
|
|
356
|
-
|
|
357
|
-
|
|
476
|
+
item_attributes = self.db.get_attributes(
|
|
477
|
+
DomainName=self.jobs_domain_name,
|
|
478
|
+
ItemName=compat_bytes(job_id),
|
|
479
|
+
ConsistentRead=True,
|
|
480
|
+
).get("Attributes", [])
|
|
481
|
+
if not item_attributes:
|
|
358
482
|
raise NoSuchJobException(job_id)
|
|
359
|
-
job = self.
|
|
483
|
+
job = self._awsJobFromAttributes(item_attributes)
|
|
360
484
|
if job is None:
|
|
361
485
|
raise NoSuchJobException(job_id)
|
|
362
486
|
logger.debug("Loaded job %s", job_id)
|
|
@@ -365,10 +489,18 @@ class AWSJobStore(AbstractJobStore):
|
|
|
365
489
|
def update_job(self, job_description):
|
|
366
490
|
logger.debug("Updating job %s", job_description.jobStoreID)
|
|
367
491
|
job_description.pre_update_hook()
|
|
368
|
-
|
|
492
|
+
job_attributes = self._awsJobToAttributes(job_description)
|
|
493
|
+
update_attributes: list["ReplaceableAttributeTypeDef"] = [
|
|
494
|
+
{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
|
|
495
|
+
for attribute in job_attributes
|
|
496
|
+
]
|
|
369
497
|
for attempt in retry_sdb():
|
|
370
498
|
with attempt:
|
|
371
|
-
|
|
499
|
+
self.db.put_attributes(
|
|
500
|
+
DomainName=self.jobs_domain_name,
|
|
501
|
+
ItemName=compat_bytes(job_description.jobStoreID),
|
|
502
|
+
Attributes=update_attributes,
|
|
503
|
+
)
|
|
372
504
|
|
|
373
505
|
itemsPerBatchDelete = 25
|
|
374
506
|
|
|
@@ -377,49 +509,77 @@ class AWSJobStore(AbstractJobStore):
|
|
|
377
509
|
logger.debug("Deleting job %s", job_id)
|
|
378
510
|
|
|
379
511
|
# If the job is overlarge, delete its file from the filestore
|
|
380
|
-
item = None
|
|
381
512
|
for attempt in retry_sdb():
|
|
382
513
|
with attempt:
|
|
383
|
-
|
|
514
|
+
attributes = self.db.get_attributes(
|
|
515
|
+
DomainName=self.jobs_domain_name,
|
|
516
|
+
ItemName=compat_bytes(job_id),
|
|
517
|
+
ConsistentRead=True,
|
|
518
|
+
).get("Attributes", [])
|
|
384
519
|
# If the overlargeID has fallen off, maybe we partially deleted the
|
|
385
520
|
# attributes of the item? Or raced on it? Or hit SimpleDB being merely
|
|
386
521
|
# eventually consistent? We should still be able to get rid of it.
|
|
387
|
-
self.
|
|
388
|
-
|
|
522
|
+
self._checkAttributes(attributes, enforce=False)
|
|
523
|
+
overlarge_id_value = get_item_from_attributes(
|
|
524
|
+
attributes=attributes, name="overlargeID"
|
|
525
|
+
)
|
|
526
|
+
if overlarge_id_value:
|
|
389
527
|
logger.debug("Deleting job from filestore")
|
|
390
|
-
self.delete_file(
|
|
528
|
+
self.delete_file(overlarge_id_value)
|
|
391
529
|
for attempt in retry_sdb():
|
|
392
530
|
with attempt:
|
|
393
|
-
self.
|
|
394
|
-
|
|
531
|
+
self.db.delete_attributes(
|
|
532
|
+
DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id)
|
|
533
|
+
)
|
|
534
|
+
items: Optional[list["ItemTypeDef"]] = None
|
|
395
535
|
for attempt in retry_sdb():
|
|
396
536
|
with attempt:
|
|
397
|
-
items = list(
|
|
398
|
-
|
|
399
|
-
|
|
537
|
+
items = list(
|
|
538
|
+
boto3_pager(
|
|
539
|
+
self.db.select,
|
|
540
|
+
"Items",
|
|
541
|
+
ConsistentRead=True,
|
|
542
|
+
SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'",
|
|
543
|
+
)
|
|
544
|
+
)
|
|
400
545
|
assert items is not None
|
|
401
546
|
if items:
|
|
402
|
-
logger.debug(
|
|
547
|
+
logger.debug(
|
|
548
|
+
"Deleting %d file(s) associated with job %s", len(items), job_id
|
|
549
|
+
)
|
|
403
550
|
n = self.itemsPerBatchDelete
|
|
404
|
-
batches = [items[i:i + n] for i in range(0, len(items), n)]
|
|
551
|
+
batches = [items[i : i + n] for i in range(0, len(items), n)]
|
|
405
552
|
for batch in batches:
|
|
406
|
-
|
|
553
|
+
delete_items: list["DeletableItemTypeDef"] = [
|
|
554
|
+
{"Name": item["Name"]} for item in batch
|
|
555
|
+
]
|
|
407
556
|
for attempt in retry_sdb():
|
|
408
557
|
with attempt:
|
|
409
|
-
self.
|
|
558
|
+
self.db.batch_delete_attributes(
|
|
559
|
+
DomainName=self.files_domain_name, Items=delete_items
|
|
560
|
+
)
|
|
410
561
|
for item in items:
|
|
411
|
-
|
|
562
|
+
item: "ItemTypeDef"
|
|
563
|
+
version = get_item_from_attributes(
|
|
564
|
+
attributes=item["Attributes"], name="version"
|
|
565
|
+
)
|
|
412
566
|
for attempt in retry_s3():
|
|
413
567
|
with attempt:
|
|
414
568
|
if version:
|
|
415
|
-
self.s3_client.delete_object(
|
|
416
|
-
|
|
417
|
-
|
|
569
|
+
self.s3_client.delete_object(
|
|
570
|
+
Bucket=self.files_bucket.name,
|
|
571
|
+
Key=compat_bytes(item["Name"]),
|
|
572
|
+
VersionId=version,
|
|
573
|
+
)
|
|
418
574
|
else:
|
|
419
|
-
self.s3_client.delete_object(
|
|
420
|
-
|
|
575
|
+
self.s3_client.delete_object(
|
|
576
|
+
Bucket=self.files_bucket.name,
|
|
577
|
+
Key=compat_bytes(item["Name"]),
|
|
578
|
+
)
|
|
421
579
|
|
|
422
|
-
def get_empty_file_store_id(
|
|
580
|
+
def get_empty_file_store_id(
|
|
581
|
+
self, jobStoreID=None, cleanup=False, basename=None
|
|
582
|
+
) -> FileID:
|
|
423
583
|
info = self.FileInfo.create(jobStoreID if cleanup else None)
|
|
424
584
|
with info.uploadStream() as _:
|
|
425
585
|
# Empty
|
|
@@ -428,7 +588,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
428
588
|
logger.debug("Created %r.", info)
|
|
429
589
|
return info.fileID
|
|
430
590
|
|
|
431
|
-
def _import_file(
|
|
591
|
+
def _import_file(
|
|
592
|
+
self,
|
|
593
|
+
otherCls,
|
|
594
|
+
uri: ParseResult,
|
|
595
|
+
shared_file_name: Optional[str] = None,
|
|
596
|
+
hardlink: bool = False,
|
|
597
|
+
symlink: bool = True,
|
|
598
|
+
) -> Optional[FileID]:
|
|
432
599
|
try:
|
|
433
600
|
if issubclass(otherCls, AWSJobStore):
|
|
434
601
|
srcObj = get_object_for_url(uri, existing=True)
|
|
@@ -438,29 +605,35 @@ class AWSJobStore(AbstractJobStore):
|
|
|
438
605
|
else:
|
|
439
606
|
self._requireValidSharedFileName(shared_file_name)
|
|
440
607
|
jobStoreFileID = self._shared_file_id(shared_file_name)
|
|
441
|
-
info = self.FileInfo.loadOrCreate(
|
|
442
|
-
|
|
443
|
-
|
|
608
|
+
info = self.FileInfo.loadOrCreate(
|
|
609
|
+
jobStoreFileID=jobStoreFileID,
|
|
610
|
+
ownerID=str(self.sharedFileOwnerID),
|
|
611
|
+
encrypted=None,
|
|
612
|
+
)
|
|
444
613
|
info.copyFrom(srcObj)
|
|
445
614
|
info.save()
|
|
446
615
|
return FileID(info.fileID, size) if shared_file_name is None else None
|
|
447
|
-
except ServerSideCopyProhibitedError:
|
|
448
|
-
# AWS refuses to do this copy for us
|
|
449
|
-
logger.warning(
|
|
616
|
+
except (NoBucketLocationError, ServerSideCopyProhibitedError):
|
|
617
|
+
# AWS refuses to tell us where the bucket is or do this copy for us
|
|
618
|
+
logger.warning(
|
|
619
|
+
"Falling back to copying via the local machine. This could get expensive!"
|
|
620
|
+
)
|
|
450
621
|
|
|
451
622
|
# copy if exception
|
|
452
623
|
return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
|
|
453
624
|
|
|
454
|
-
def _export_file(self, otherCls, file_id, uri):
|
|
625
|
+
def _export_file(self, otherCls, file_id: FileID, uri: ParseResult) -> None:
|
|
455
626
|
try:
|
|
456
627
|
if issubclass(otherCls, AWSJobStore):
|
|
457
628
|
dstObj = get_object_for_url(uri)
|
|
458
629
|
info = self.FileInfo.loadOrFail(file_id)
|
|
459
630
|
info.copyTo(dstObj)
|
|
460
631
|
return
|
|
461
|
-
except ServerSideCopyProhibitedError:
|
|
462
|
-
# AWS refuses to do this copy for us
|
|
463
|
-
logger.warning(
|
|
632
|
+
except (NoBucketLocationError, ServerSideCopyProhibitedError):
|
|
633
|
+
# AWS refuses to tell us where the bucket is or do this copy for us
|
|
634
|
+
logger.warning(
|
|
635
|
+
"Falling back to copying via the local machine. This could get expensive!"
|
|
636
|
+
)
|
|
464
637
|
else:
|
|
465
638
|
super()._default_export_file(otherCls, file_id, uri)
|
|
466
639
|
|
|
@@ -475,41 +648,42 @@ class AWSJobStore(AbstractJobStore):
|
|
|
475
648
|
return cls._get_is_directory(url)
|
|
476
649
|
|
|
477
650
|
@classmethod
|
|
478
|
-
def _get_size(cls, url):
|
|
651
|
+
def _get_size(cls, url: ParseResult) -> int:
|
|
479
652
|
return get_object_for_url(url, existing=True).content_length
|
|
480
653
|
|
|
481
654
|
@classmethod
|
|
482
|
-
def _read_from_url(cls, url, writable):
|
|
655
|
+
def _read_from_url(cls, url: ParseResult, writable):
|
|
483
656
|
srcObj = get_object_for_url(url, existing=True)
|
|
484
657
|
srcObj.download_fileobj(writable)
|
|
485
|
-
return (
|
|
486
|
-
srcObj.content_length,
|
|
487
|
-
False # executable bit is always False
|
|
488
|
-
)
|
|
658
|
+
return (srcObj.content_length, False) # executable bit is always False
|
|
489
659
|
|
|
490
660
|
@classmethod
|
|
491
661
|
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
492
662
|
src_obj = get_object_for_url(url, existing=True)
|
|
493
663
|
response = src_obj.get()
|
|
494
664
|
# We should get back a response with a stream in 'Body'
|
|
495
|
-
if
|
|
665
|
+
if "Body" not in response:
|
|
496
666
|
raise RuntimeError(f"Could not fetch body stream for {url}")
|
|
497
|
-
return response[
|
|
667
|
+
return response["Body"]
|
|
498
668
|
|
|
499
669
|
@classmethod
|
|
500
|
-
def _write_to_url(
|
|
670
|
+
def _write_to_url(
|
|
671
|
+
cls, readable, url: ParseResult, executable: bool = False
|
|
672
|
+
) -> None:
|
|
501
673
|
dstObj = get_object_for_url(url)
|
|
502
674
|
|
|
503
675
|
logger.debug("Uploading %s", dstObj.key)
|
|
504
676
|
# uploadFile takes care of using multipart upload if the file is larger than partSize (default to 5MB)
|
|
505
|
-
uploadFile(
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
677
|
+
uploadFile(
|
|
678
|
+
readable=readable,
|
|
679
|
+
resource=s3_boto3_resource,
|
|
680
|
+
bucketName=dstObj.bucket_name,
|
|
681
|
+
fileID=dstObj.key,
|
|
682
|
+
partSize=5 * 1000 * 1000,
|
|
683
|
+
)
|
|
510
684
|
|
|
511
685
|
@classmethod
|
|
512
|
-
def _list_url(cls, url: ParseResult) ->
|
|
686
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
513
687
|
return list_objects_for_url(url)
|
|
514
688
|
|
|
515
689
|
@classmethod
|
|
@@ -519,10 +693,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
519
693
|
return len(list_objects_for_url(url)) > 0
|
|
520
694
|
|
|
521
695
|
@classmethod
|
|
522
|
-
def _supports_url(cls, url, export=False):
|
|
523
|
-
return url.scheme.lower() ==
|
|
696
|
+
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
697
|
+
return url.scheme.lower() == "s3"
|
|
524
698
|
|
|
525
|
-
def write_file(
|
|
699
|
+
def write_file(
|
|
700
|
+
self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False
|
|
701
|
+
) -> FileID:
|
|
526
702
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
527
703
|
info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
528
704
|
info.save()
|
|
@@ -530,7 +706,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
530
706
|
return info.fileID
|
|
531
707
|
|
|
532
708
|
@contextmanager
|
|
533
|
-
def write_file_stream(
|
|
709
|
+
def write_file_stream(
|
|
710
|
+
self,
|
|
711
|
+
job_id: Optional[FileID] = None,
|
|
712
|
+
cleanup: bool = False,
|
|
713
|
+
basename=None,
|
|
714
|
+
encoding=None,
|
|
715
|
+
errors=None,
|
|
716
|
+
):
|
|
534
717
|
info = self.FileInfo.create(job_id if cleanup else None)
|
|
535
718
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
536
719
|
yield writable, info.fileID
|
|
@@ -538,11 +721,15 @@ class AWSJobStore(AbstractJobStore):
|
|
|
538
721
|
logger.debug("Wrote %r.", info)
|
|
539
722
|
|
|
540
723
|
@contextmanager
|
|
541
|
-
def write_shared_file_stream(
|
|
724
|
+
def write_shared_file_stream(
|
|
725
|
+
self, shared_file_name, encrypted=None, encoding=None, errors=None
|
|
726
|
+
):
|
|
542
727
|
self._requireValidSharedFileName(shared_file_name)
|
|
543
|
-
info = self.FileInfo.loadOrCreate(
|
|
544
|
-
|
|
545
|
-
|
|
728
|
+
info = self.FileInfo.loadOrCreate(
|
|
729
|
+
jobStoreFileID=self._shared_file_id(shared_file_name),
|
|
730
|
+
ownerID=str(self.sharedFileOwnerID),
|
|
731
|
+
encrypted=encrypted,
|
|
732
|
+
)
|
|
546
733
|
with info.uploadStream(encoding=encoding, errors=errors) as writable:
|
|
547
734
|
yield writable
|
|
548
735
|
info.save()
|
|
@@ -575,7 +762,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
575
762
|
info = self.FileInfo.loadOrFail(file_id)
|
|
576
763
|
logger.debug("Reading %r into %r.", info, local_path)
|
|
577
764
|
info.download(local_path, not self.config.disableJobStoreChecksumVerification)
|
|
578
|
-
if getattr(file_id,
|
|
765
|
+
if getattr(file_id, "executable", False):
|
|
579
766
|
os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
|
|
580
767
|
|
|
581
768
|
@contextmanager
|
|
@@ -590,7 +777,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
590
777
|
self._requireValidSharedFileName(shared_file_name)
|
|
591
778
|
jobStoreFileID = self._shared_file_id(shared_file_name)
|
|
592
779
|
info = self.FileInfo.loadOrFail(jobStoreFileID, customName=shared_file_name)
|
|
593
|
-
logger.debug(
|
|
780
|
+
logger.debug(
|
|
781
|
+
"Reading %r for shared file %r into stream.", info, shared_file_name
|
|
782
|
+
)
|
|
594
783
|
with info.downloadStream(encoding=encoding, errors=errors) as readable:
|
|
595
784
|
yield readable
|
|
596
785
|
|
|
@@ -606,7 +795,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
606
795
|
with info.uploadStream(multipart=False) as writeable:
|
|
607
796
|
if isinstance(msg, str):
|
|
608
797
|
# This stream is for binary data, so encode any non-encoded things
|
|
609
|
-
msg = msg.encode(
|
|
798
|
+
msg = msg.encode("utf-8", errors="ignore")
|
|
610
799
|
writeable.write(msg)
|
|
611
800
|
info.save()
|
|
612
801
|
|
|
@@ -614,7 +803,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
614
803
|
itemsProcessed = 0
|
|
615
804
|
|
|
616
805
|
for info in self._read_logs(callback, self.statsFileOwnerID):
|
|
617
|
-
info._ownerID = self.readStatsFileOwnerID
|
|
806
|
+
info._ownerID = str(self.readStatsFileOwnerID) # boto3 requires strings
|
|
618
807
|
info.save()
|
|
619
808
|
itemsProcessed += 1
|
|
620
809
|
|
|
@@ -628,10 +817,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
628
817
|
items = None
|
|
629
818
|
for attempt in retry_sdb():
|
|
630
819
|
with attempt:
|
|
631
|
-
items =
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
820
|
+
items = boto3_pager(
|
|
821
|
+
self.db.select,
|
|
822
|
+
"Items",
|
|
823
|
+
ConsistentRead=True,
|
|
824
|
+
SelectExpression=f"select * from `{self.files_domain_name}` where ownerID='{str(ownerId)}'",
|
|
825
|
+
)
|
|
635
826
|
assert items is not None
|
|
636
827
|
for item in items:
|
|
637
828
|
info = self.FileInfo.fromItem(item)
|
|
@@ -648,13 +839,19 @@ class AWSJobStore(AbstractJobStore):
|
|
|
648
839
|
with info.uploadStream(allowInlining=False) as f:
|
|
649
840
|
f.write(info.content)
|
|
650
841
|
|
|
651
|
-
self.
|
|
842
|
+
self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
|
|
843
|
+
ACL="public-read"
|
|
844
|
+
)
|
|
652
845
|
|
|
653
|
-
url = self.s3_client.generate_presigned_url(
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
846
|
+
url = self.s3_client.generate_presigned_url(
|
|
847
|
+
"get_object",
|
|
848
|
+
Params={
|
|
849
|
+
"Bucket": self.files_bucket.name,
|
|
850
|
+
"Key": compat_bytes(jobStoreFileID),
|
|
851
|
+
"VersionId": info.version,
|
|
852
|
+
},
|
|
853
|
+
ExpiresIn=self.publicUrlExpiration.total_seconds(),
|
|
854
|
+
)
|
|
658
855
|
|
|
659
856
|
# boto doesn't properly remove the x-amz-security-token parameter when
|
|
660
857
|
# query_auth is False when using an IAM role (see issue #2043). Including the
|
|
@@ -662,12 +859,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
662
859
|
# even if the resource is public, so we need to remove it.
|
|
663
860
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
|
664
861
|
params = parse_qs(query)
|
|
665
|
-
if
|
|
666
|
-
del params[
|
|
667
|
-
if
|
|
668
|
-
del params[
|
|
669
|
-
if
|
|
670
|
-
del params[
|
|
862
|
+
if "x-amz-security-token" in params:
|
|
863
|
+
del params["x-amz-security-token"]
|
|
864
|
+
if "AWSAccessKeyId" in params:
|
|
865
|
+
del params["AWSAccessKeyId"]
|
|
866
|
+
if "Signature" in params:
|
|
867
|
+
del params["Signature"]
|
|
671
868
|
query = urlencode(params, doseq=True)
|
|
672
869
|
url = urlunsplit((scheme, netloc, path, query, fragment))
|
|
673
870
|
return url
|
|
@@ -676,22 +873,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
676
873
|
self._requireValidSharedFileName(shared_file_name)
|
|
677
874
|
return self.get_public_url(self._shared_file_id(shared_file_name))
|
|
678
875
|
|
|
679
|
-
def
|
|
680
|
-
|
|
681
|
-
:
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
return db
|
|
688
|
-
|
|
689
|
-
def _bindBucket(self,
|
|
690
|
-
bucket_name: str,
|
|
691
|
-
create: bool = False,
|
|
692
|
-
block: bool = True,
|
|
693
|
-
versioning: bool = False,
|
|
694
|
-
check_versioning_consistency: bool = True):
|
|
876
|
+
def _bindBucket(
|
|
877
|
+
self,
|
|
878
|
+
bucket_name: str,
|
|
879
|
+
create: bool = False,
|
|
880
|
+
block: bool = True,
|
|
881
|
+
versioning: bool = False,
|
|
882
|
+
check_versioning_consistency: bool = True,
|
|
883
|
+
):
|
|
695
884
|
"""
|
|
696
885
|
Return the Boto Bucket object representing the S3 bucket with the given name. If the
|
|
697
886
|
bucket does not exist and `create` is True, it will be created.
|
|
@@ -716,8 +905,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
716
905
|
Decide, given an error, whether we should retry binding the bucket.
|
|
717
906
|
"""
|
|
718
907
|
|
|
719
|
-
if
|
|
720
|
-
get_error_status(error) in (404, 409)):
|
|
908
|
+
if isinstance(error, ClientError) and get_error_status(error) in (404, 409):
|
|
721
909
|
# Handle cases where the bucket creation is in a weird state that might let us proceed.
|
|
722
910
|
# https://github.com/BD2KGenomics/toil/issues/955
|
|
723
911
|
# https://github.com/BD2KGenomics/toil/issues/995
|
|
@@ -727,7 +915,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
727
915
|
# OperationAborted == 409
|
|
728
916
|
# NoSuchBucket == 404
|
|
729
917
|
return True
|
|
730
|
-
if get_error_code(error) ==
|
|
918
|
+
if get_error_code(error) == "SlowDown":
|
|
731
919
|
# We may get told to SlowDown by AWS when we try to create our
|
|
732
920
|
# bucket. In that case, we should retry and use the exponential
|
|
733
921
|
# backoff.
|
|
@@ -767,8 +955,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
767
955
|
|
|
768
956
|
if tags:
|
|
769
957
|
flat_tags = flatten_tags(tags)
|
|
770
|
-
bucket_tagging = self.s3_resource.BucketTagging(
|
|
771
|
-
|
|
958
|
+
bucket_tagging = self.s3_resource.BucketTagging(
|
|
959
|
+
bucket_name
|
|
960
|
+
)
|
|
961
|
+
bucket_tagging.put(Tagging={"TagSet": flat_tags})
|
|
772
962
|
|
|
773
963
|
# Configure bucket so that we can make objects in
|
|
774
964
|
# it public, which was the historical default.
|
|
@@ -781,7 +971,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
781
971
|
# This is raised if the user attempts to get a bucket in a region outside
|
|
782
972
|
# the specified one, if the specified one is not `us-east-1`. The us-east-1
|
|
783
973
|
# server allows a user to use buckets from any region.
|
|
784
|
-
raise BucketLocationConflictException(
|
|
974
|
+
raise BucketLocationConflictException(
|
|
975
|
+
get_bucket_region(bucket_name)
|
|
976
|
+
)
|
|
785
977
|
else:
|
|
786
978
|
raise
|
|
787
979
|
else:
|
|
@@ -798,25 +990,35 @@ class AWSJobStore(AbstractJobStore):
|
|
|
798
990
|
# consistent?
|
|
799
991
|
time.sleep(1)
|
|
800
992
|
while not self._getBucketVersioning(bucket_name):
|
|
801
|
-
logger.warning(
|
|
993
|
+
logger.warning(
|
|
994
|
+
f"Waiting for versioning activation on bucket '{bucket_name}'..."
|
|
995
|
+
)
|
|
802
996
|
time.sleep(1)
|
|
803
997
|
elif check_versioning_consistency:
|
|
804
998
|
# now test for versioning consistency
|
|
805
999
|
# we should never see any of these errors since 'versioning' should always be true
|
|
806
1000
|
bucket_versioning = self._getBucketVersioning(bucket_name)
|
|
807
1001
|
if bucket_versioning != versioning:
|
|
808
|
-
assert False,
|
|
1002
|
+
assert False, "Cannot modify versioning on existing bucket"
|
|
809
1003
|
elif bucket_versioning is None:
|
|
810
|
-
assert False,
|
|
1004
|
+
assert False, "Cannot use a bucket with versioning suspended"
|
|
811
1005
|
if bucketExisted:
|
|
812
|
-
logger.debug(
|
|
1006
|
+
logger.debug(
|
|
1007
|
+
f"Using pre-existing job store bucket '{bucket_name}'."
|
|
1008
|
+
)
|
|
813
1009
|
else:
|
|
814
|
-
logger.debug(
|
|
1010
|
+
logger.debug(
|
|
1011
|
+
f"Created new job store bucket '{bucket_name}' with versioning state {versioning}."
|
|
1012
|
+
)
|
|
815
1013
|
|
|
816
1014
|
return bucket
|
|
817
1015
|
|
|
818
|
-
def _bindDomain(
|
|
1016
|
+
def _bindDomain(
|
|
1017
|
+
self, domain_name: str, create: bool = False, block: bool = True
|
|
1018
|
+
) -> None:
|
|
819
1019
|
"""
|
|
1020
|
+
Return the Boto3 domain name representing the SDB domain. When create=True, it will
|
|
1021
|
+
create the domain if it does not exist.
|
|
820
1022
|
Return the Boto Domain object representing the SDB domain of the given name. If the
|
|
821
1023
|
domain does not exist and `create` is True, it will be created.
|
|
822
1024
|
|
|
@@ -824,29 +1026,33 @@ class AWSJobStore(AbstractJobStore):
|
|
|
824
1026
|
|
|
825
1027
|
:param bool create: True if domain should be created if it doesn't exist
|
|
826
1028
|
|
|
827
|
-
:param bool block: If False,
|
|
1029
|
+
:param bool block: If False, raise DomainDoesNotExist if the domain doesn't exist. If True, wait until
|
|
828
1030
|
domain appears. This parameter is ignored if create is True.
|
|
829
1031
|
|
|
830
|
-
:rtype:
|
|
831
|
-
:raises
|
|
1032
|
+
:rtype: None
|
|
1033
|
+
:raises ClientError: If `block` is True and the domain still doesn't exist after the
|
|
832
1034
|
retry timeout expires.
|
|
833
1035
|
"""
|
|
834
1036
|
logger.debug("Binding to job store domain '%s'.", domain_name)
|
|
835
|
-
retryargs = dict(
|
|
1037
|
+
retryargs = dict(
|
|
1038
|
+
predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e)
|
|
1039
|
+
)
|
|
836
1040
|
if not block:
|
|
837
|
-
retryargs[
|
|
1041
|
+
retryargs["timeout"] = 15
|
|
838
1042
|
for attempt in retry_sdb(**retryargs):
|
|
839
1043
|
with attempt:
|
|
840
1044
|
try:
|
|
841
|
-
|
|
842
|
-
|
|
1045
|
+
self.db.domain_metadata(DomainName=domain_name)
|
|
1046
|
+
return
|
|
1047
|
+
except ClientError as e:
|
|
843
1048
|
if no_such_sdb_domain(e):
|
|
844
1049
|
if create:
|
|
845
|
-
|
|
1050
|
+
self.db.create_domain(DomainName=domain_name)
|
|
1051
|
+
return
|
|
846
1052
|
elif block:
|
|
847
1053
|
raise
|
|
848
1054
|
else:
|
|
849
|
-
|
|
1055
|
+
raise DomainDoesNotExist(domain_name)
|
|
850
1056
|
else:
|
|
851
1057
|
raise
|
|
852
1058
|
|
|
@@ -854,13 +1060,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
854
1060
|
return str(uuid.uuid4())
|
|
855
1061
|
|
|
856
1062
|
# A dummy job ID under which all shared files are stored
|
|
857
|
-
sharedFileOwnerID = uuid.UUID(
|
|
1063
|
+
sharedFileOwnerID = uuid.UUID("891f7db6-e4d9-4221-a58e-ab6cc4395f94")
|
|
858
1064
|
|
|
859
1065
|
# A dummy job ID under which all unread stats files are stored
|
|
860
|
-
statsFileOwnerID = uuid.UUID(
|
|
1066
|
+
statsFileOwnerID = uuid.UUID("bfcf5286-4bc7-41ef-a85d-9ab415b69d53")
|
|
861
1067
|
|
|
862
1068
|
# A dummy job ID under which all read stats files are stored
|
|
863
|
-
readStatsFileOwnerID = uuid.UUID(
|
|
1069
|
+
readStatsFileOwnerID = uuid.UUID("e77fc3aa-d232-4255-ae04-f64ee8eb0bfa")
|
|
864
1070
|
|
|
865
1071
|
def _shared_file_id(self, shared_file_name):
|
|
866
1072
|
return str(uuid.uuid5(self.sharedFileOwnerID, shared_file_name))
|
|
@@ -870,13 +1076,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
870
1076
|
"""
|
|
871
1077
|
Represents a file in this job store.
|
|
872
1078
|
"""
|
|
1079
|
+
|
|
873
1080
|
outer = None
|
|
874
1081
|
"""
|
|
875
1082
|
:type: AWSJobStore
|
|
876
1083
|
"""
|
|
877
1084
|
|
|
878
|
-
def __init__(
|
|
879
|
-
|
|
1085
|
+
def __init__(
|
|
1086
|
+
self,
|
|
1087
|
+
fileID,
|
|
1088
|
+
ownerID,
|
|
1089
|
+
encrypted,
|
|
1090
|
+
version=None,
|
|
1091
|
+
content=None,
|
|
1092
|
+
numContentChunks=0,
|
|
1093
|
+
checksum=None,
|
|
1094
|
+
):
|
|
880
1095
|
"""
|
|
881
1096
|
:type fileID: str
|
|
882
1097
|
:param fileID: the file's ID
|
|
@@ -955,32 +1170,45 @@ class AWSJobStore(AbstractJobStore):
|
|
|
955
1170
|
assert content is None or isinstance(content, bytes)
|
|
956
1171
|
self._content = content
|
|
957
1172
|
if content is not None:
|
|
958
|
-
self.version =
|
|
1173
|
+
self.version = ""
|
|
959
1174
|
|
|
960
1175
|
@classmethod
|
|
961
|
-
def create(cls, ownerID):
|
|
962
|
-
return cls(
|
|
1176
|
+
def create(cls, ownerID: str):
|
|
1177
|
+
return cls(
|
|
1178
|
+
str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None
|
|
1179
|
+
)
|
|
963
1180
|
|
|
964
1181
|
@classmethod
|
|
965
1182
|
def presenceIndicator(cls):
|
|
966
|
-
return
|
|
1183
|
+
return "encrypted"
|
|
967
1184
|
|
|
968
1185
|
@classmethod
|
|
969
1186
|
def exists(cls, jobStoreFileID):
|
|
970
1187
|
for attempt in retry_sdb():
|
|
971
1188
|
with attempt:
|
|
972
|
-
return bool(
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
1189
|
+
return bool(
|
|
1190
|
+
cls.outer.db.get_attributes(
|
|
1191
|
+
DomainName=cls.outer.files_domain_name,
|
|
1192
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1193
|
+
AttributeNames=[cls.presenceIndicator()],
|
|
1194
|
+
ConsistentRead=True,
|
|
1195
|
+
).get("Attributes", [])
|
|
1196
|
+
)
|
|
976
1197
|
|
|
977
1198
|
@classmethod
|
|
978
1199
|
def load(cls, jobStoreFileID):
|
|
979
1200
|
for attempt in retry_sdb():
|
|
980
1201
|
with attempt:
|
|
981
1202
|
self = cls.fromItem(
|
|
982
|
-
|
|
983
|
-
|
|
1203
|
+
{
|
|
1204
|
+
"Name": compat_bytes(jobStoreFileID),
|
|
1205
|
+
"Attributes": cls.outer.db.get_attributes(
|
|
1206
|
+
DomainName=cls.outer.files_domain_name,
|
|
1207
|
+
ItemName=compat_bytes(jobStoreFileID),
|
|
1208
|
+
ConsistentRead=True,
|
|
1209
|
+
).get("Attributes", []),
|
|
1210
|
+
}
|
|
1211
|
+
)
|
|
984
1212
|
return self
|
|
985
1213
|
|
|
986
1214
|
@classmethod
|
|
@@ -1010,7 +1238,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1010
1238
|
return self
|
|
1011
1239
|
|
|
1012
1240
|
@classmethod
|
|
1013
|
-
def fromItem(cls, item):
|
|
1241
|
+
def fromItem(cls, item: "ItemTypeDef"):
|
|
1014
1242
|
"""
|
|
1015
1243
|
Convert an SDB item to an instance of this class.
|
|
1016
1244
|
|
|
@@ -1023,31 +1251,37 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1023
1251
|
return s if s is None else str(s)
|
|
1024
1252
|
|
|
1025
1253
|
# ownerID and encrypted are the only mandatory attributes
|
|
1026
|
-
ownerID =
|
|
1027
|
-
|
|
1254
|
+
ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
|
|
1255
|
+
item, ["ownerID", "encrypted", "version", "checksum"]
|
|
1256
|
+
)
|
|
1028
1257
|
if ownerID is None:
|
|
1029
1258
|
assert encrypted is None
|
|
1030
1259
|
return None
|
|
1031
1260
|
else:
|
|
1032
|
-
version = strOrNone(item['version'])
|
|
1033
|
-
checksum = strOrNone(item.get('checksum'))
|
|
1034
1261
|
encrypted = strict_bool(encrypted)
|
|
1035
|
-
content, numContentChunks = cls.attributesToBinary(item)
|
|
1262
|
+
content, numContentChunks = cls.attributesToBinary(item["Attributes"])
|
|
1036
1263
|
if encrypted:
|
|
1037
1264
|
sseKeyPath = cls.outer.sseKeyPath
|
|
1038
1265
|
if sseKeyPath is None:
|
|
1039
|
-
raise AssertionError(
|
|
1266
|
+
raise AssertionError(
|
|
1267
|
+
"Content is encrypted but no key was provided."
|
|
1268
|
+
)
|
|
1040
1269
|
if content is not None:
|
|
1041
1270
|
content = encryption.decrypt(content, sseKeyPath)
|
|
1042
|
-
self = cls(
|
|
1043
|
-
|
|
1271
|
+
self = cls(
|
|
1272
|
+
fileID=item["Name"],
|
|
1273
|
+
ownerID=ownerID,
|
|
1274
|
+
encrypted=encrypted,
|
|
1275
|
+
version=version,
|
|
1276
|
+
content=content,
|
|
1277
|
+
numContentChunks=numContentChunks,
|
|
1278
|
+
checksum=checksum,
|
|
1279
|
+
)
|
|
1044
1280
|
return self
|
|
1045
1281
|
|
|
1046
|
-
def toItem(self):
|
|
1282
|
+
def toItem(self) -> tuple[dict[str, str], int]:
|
|
1047
1283
|
"""
|
|
1048
|
-
Convert this instance to
|
|
1049
|
-
|
|
1050
|
-
:rtype: (dict,int)
|
|
1284
|
+
Convert this instance to a dictionary of attribute names to values
|
|
1051
1285
|
|
|
1052
1286
|
:return: the attributes dict and an integer specifying the the number of chunk
|
|
1053
1287
|
attributes in the dictionary that are used for storing inlined content.
|
|
@@ -1057,15 +1291,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1057
1291
|
if self.encrypted and content is not None:
|
|
1058
1292
|
sseKeyPath = self.outer.sseKeyPath
|
|
1059
1293
|
if sseKeyPath is None:
|
|
1060
|
-
raise AssertionError(
|
|
1294
|
+
raise AssertionError(
|
|
1295
|
+
"Encryption requested but no key was provided."
|
|
1296
|
+
)
|
|
1061
1297
|
content = encryption.encrypt(content, sseKeyPath)
|
|
1062
1298
|
assert content is None or isinstance(content, bytes)
|
|
1063
1299
|
attributes = self.binaryToAttributes(content)
|
|
1064
|
-
numChunks = attributes[
|
|
1065
|
-
attributes.update(
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1300
|
+
numChunks = int(attributes["numChunks"])
|
|
1301
|
+
attributes.update(
|
|
1302
|
+
dict(
|
|
1303
|
+
ownerID=self.ownerID or "",
|
|
1304
|
+
encrypted=str(self.encrypted),
|
|
1305
|
+
version=self.version or "",
|
|
1306
|
+
checksum=self.checksum or "",
|
|
1307
|
+
)
|
|
1308
|
+
)
|
|
1069
1309
|
return attributes, numChunks
|
|
1070
1310
|
|
|
1071
1311
|
@classmethod
|
|
@@ -1078,32 +1318,67 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1078
1318
|
|
|
1079
1319
|
def save(self):
|
|
1080
1320
|
attributes, numNewContentChunks = self.toItem()
|
|
1321
|
+
attributes_boto3 = SDBHelper.attributeDictToList(attributes)
|
|
1081
1322
|
# False stands for absence
|
|
1082
|
-
|
|
1323
|
+
if self.previousVersion is None:
|
|
1324
|
+
expected: "UpdateConditionTypeDef" = {
|
|
1325
|
+
"Name": "version",
|
|
1326
|
+
"Exists": False,
|
|
1327
|
+
}
|
|
1328
|
+
else:
|
|
1329
|
+
expected = {"Name": "version", "Value": cast(str, self.previousVersion)}
|
|
1083
1330
|
try:
|
|
1084
1331
|
for attempt in retry_sdb():
|
|
1085
1332
|
with attempt:
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1333
|
+
self.outer.db.put_attributes(
|
|
1334
|
+
DomainName=self.outer.files_domain_name,
|
|
1335
|
+
ItemName=compat_bytes(self.fileID),
|
|
1336
|
+
Attributes=[
|
|
1337
|
+
{
|
|
1338
|
+
"Name": attribute["Name"],
|
|
1339
|
+
"Value": attribute["Value"],
|
|
1340
|
+
"Replace": True,
|
|
1341
|
+
}
|
|
1342
|
+
for attribute in attributes_boto3
|
|
1343
|
+
],
|
|
1344
|
+
Expected=expected,
|
|
1345
|
+
)
|
|
1089
1346
|
# clean up the old version of the file if necessary and safe
|
|
1090
1347
|
if self.previousVersion and (self.previousVersion != self.version):
|
|
1091
1348
|
for attempt in retry_s3():
|
|
1092
1349
|
with attempt:
|
|
1093
|
-
self.outer.s3_client.delete_object(
|
|
1094
|
-
|
|
1095
|
-
|
|
1350
|
+
self.outer.s3_client.delete_object(
|
|
1351
|
+
Bucket=self.outer.files_bucket.name,
|
|
1352
|
+
Key=compat_bytes(self.fileID),
|
|
1353
|
+
VersionId=self.previousVersion,
|
|
1354
|
+
)
|
|
1096
1355
|
self._previousVersion = self._version
|
|
1097
1356
|
if numNewContentChunks < self._numContentChunks:
|
|
1098
1357
|
residualChunks = range(numNewContentChunks, self._numContentChunks)
|
|
1099
|
-
|
|
1358
|
+
residual_chunk_names = [self._chunkName(i) for i in residualChunks]
|
|
1359
|
+
# boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
|
|
1360
|
+
# the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
|
|
1361
|
+
# but this doesnt extend to boto3
|
|
1362
|
+
delete_attributes = self.outer.db.get_attributes(
|
|
1363
|
+
DomainName=self.outer.files_domain_name,
|
|
1364
|
+
ItemName=compat_bytes(self.fileID),
|
|
1365
|
+
AttributeNames=[chunk for chunk in residual_chunk_names],
|
|
1366
|
+
).get("Attributes")
|
|
1100
1367
|
for attempt in retry_sdb():
|
|
1101
1368
|
with attempt:
|
|
1102
|
-
self.outer.
|
|
1103
|
-
|
|
1369
|
+
self.outer.db.delete_attributes(
|
|
1370
|
+
DomainName=self.outer.files_domain_name,
|
|
1371
|
+
ItemName=compat_bytes(self.fileID),
|
|
1372
|
+
Attributes=delete_attributes,
|
|
1373
|
+
)
|
|
1374
|
+
self.outer.db.get_attributes(
|
|
1375
|
+
DomainName=self.outer.files_domain_name,
|
|
1376
|
+
ItemName=compat_bytes(self.fileID),
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1104
1379
|
self._numContentChunks = numNewContentChunks
|
|
1105
|
-
except
|
|
1106
|
-
if e
|
|
1380
|
+
except ClientError as e:
|
|
1381
|
+
if get_error_code(e) == "ConditionalCheckFailed":
|
|
1107
1382
|
raise ConcurrentFileModificationException(self.fileID)
|
|
1108
1383
|
else:
|
|
1109
1384
|
raise
|
|
@@ -1111,24 +1386,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1111
1386
|
def upload(self, localFilePath, calculateChecksum=True):
|
|
1112
1387
|
file_size, file_time = fileSizeAndTime(localFilePath)
|
|
1113
1388
|
if file_size <= self.maxInlinedSize():
|
|
1114
|
-
with open(localFilePath,
|
|
1389
|
+
with open(localFilePath, "rb") as f:
|
|
1115
1390
|
self.content = f.read()
|
|
1116
1391
|
# Clear out any old checksum in case of overwrite
|
|
1117
|
-
self.checksum =
|
|
1392
|
+
self.checksum = ""
|
|
1118
1393
|
else:
|
|
1119
1394
|
headerArgs = self._s3EncryptionArgs()
|
|
1120
1395
|
# Create a new Resource in case it needs to be on its own thread
|
|
1121
|
-
resource = boto3_session.resource(
|
|
1122
|
-
|
|
1123
|
-
self.checksum =
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1396
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1397
|
+
|
|
1398
|
+
self.checksum = (
|
|
1399
|
+
self._get_file_checksum(localFilePath)
|
|
1400
|
+
if calculateChecksum
|
|
1401
|
+
else None
|
|
1402
|
+
)
|
|
1403
|
+
self.version = uploadFromPath(
|
|
1404
|
+
localFilePath,
|
|
1405
|
+
resource=resource,
|
|
1406
|
+
bucketName=self.outer.files_bucket.name,
|
|
1407
|
+
fileID=compat_bytes(self.fileID),
|
|
1408
|
+
headerArgs=headerArgs,
|
|
1409
|
+
partSize=self.outer.part_size,
|
|
1410
|
+
)
|
|
1411
|
+
|
|
1412
|
+
def _start_checksum(self, to_match=None, algorithm="sha1"):
|
|
1132
1413
|
"""
|
|
1133
1414
|
Get a hasher that can be used with _update_checksum and
|
|
1134
1415
|
_finish_checksum.
|
|
@@ -1146,12 +1427,12 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1146
1427
|
expected = None
|
|
1147
1428
|
|
|
1148
1429
|
if to_match is not None:
|
|
1149
|
-
parts = to_match.split(
|
|
1430
|
+
parts = to_match.split("$")
|
|
1150
1431
|
algorithm = parts[0]
|
|
1151
1432
|
expected = parts[1]
|
|
1152
1433
|
|
|
1153
1434
|
wrapped = getattr(hashlib, algorithm)()
|
|
1154
|
-
logger.debug(f
|
|
1435
|
+
logger.debug(f"Starting {algorithm} checksum to match {expected}")
|
|
1155
1436
|
return algorithm, wrapped, expected
|
|
1156
1437
|
|
|
1157
1438
|
def _update_checksum(self, checksum_in_progress, data):
|
|
@@ -1168,26 +1449,32 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1168
1449
|
|
|
1169
1450
|
result_hash = checksum_in_progress[1].hexdigest()
|
|
1170
1451
|
|
|
1171
|
-
logger.debug(
|
|
1452
|
+
logger.debug(
|
|
1453
|
+
f"Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}"
|
|
1454
|
+
)
|
|
1172
1455
|
if checksum_in_progress[2] is not None:
|
|
1173
1456
|
# We expected a particular hash
|
|
1174
1457
|
if result_hash != checksum_in_progress[2]:
|
|
1175
|
-
raise ChecksumError(
|
|
1176
|
-
|
|
1458
|
+
raise ChecksumError(
|
|
1459
|
+
"Checksum mismatch. Expected: %s Actual: %s"
|
|
1460
|
+
% (checksum_in_progress[2], result_hash)
|
|
1461
|
+
)
|
|
1177
1462
|
|
|
1178
|
-
return
|
|
1463
|
+
return "$".join([checksum_in_progress[0], result_hash])
|
|
1179
1464
|
|
|
1180
1465
|
def _get_file_checksum(self, localFilePath, to_match=None):
|
|
1181
|
-
with open(localFilePath,
|
|
1466
|
+
with open(localFilePath, "rb") as f:
|
|
1182
1467
|
hasher = self._start_checksum(to_match=to_match)
|
|
1183
1468
|
contents = f.read(1024 * 1024)
|
|
1184
|
-
while contents != b
|
|
1469
|
+
while contents != b"":
|
|
1185
1470
|
self._update_checksum(hasher, contents)
|
|
1186
1471
|
contents = f.read(1024 * 1024)
|
|
1187
1472
|
return self._finish_checksum(hasher)
|
|
1188
1473
|
|
|
1189
1474
|
@contextmanager
|
|
1190
|
-
def uploadStream(
|
|
1475
|
+
def uploadStream(
|
|
1476
|
+
self, multipart=True, allowInlining=True, encoding=None, errors=None
|
|
1477
|
+
):
|
|
1191
1478
|
"""
|
|
1192
1479
|
Context manager that gives out a binary or text mode upload stream to upload data.
|
|
1193
1480
|
"""
|
|
@@ -1204,71 +1491,95 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1204
1491
|
class MultiPartPipe(WritablePipe):
|
|
1205
1492
|
def readFrom(self, readable):
|
|
1206
1493
|
# Get the first block of data we want to put
|
|
1207
|
-
buf = readable.read(store.
|
|
1494
|
+
buf = readable.read(store.part_size)
|
|
1208
1495
|
assert isinstance(buf, bytes)
|
|
1209
1496
|
|
|
1210
1497
|
if allowInlining and len(buf) <= info.maxInlinedSize():
|
|
1211
|
-
logger.debug(
|
|
1498
|
+
logger.debug("Inlining content of %d bytes", len(buf))
|
|
1212
1499
|
info.content = buf
|
|
1213
1500
|
# There will be no checksum
|
|
1214
|
-
info.checksum =
|
|
1501
|
+
info.checksum = ""
|
|
1215
1502
|
else:
|
|
1216
1503
|
# We will compute a checksum
|
|
1217
1504
|
hasher = info._start_checksum()
|
|
1218
|
-
logger.debug(
|
|
1505
|
+
logger.debug("Updating checksum with %d bytes", len(buf))
|
|
1219
1506
|
info._update_checksum(hasher, buf)
|
|
1220
1507
|
|
|
1221
1508
|
client = store.s3_client
|
|
1222
|
-
bucket_name = store.
|
|
1509
|
+
bucket_name = store.files_bucket.name
|
|
1223
1510
|
headerArgs = info._s3EncryptionArgs()
|
|
1224
1511
|
|
|
1225
1512
|
for attempt in retry_s3():
|
|
1226
1513
|
with attempt:
|
|
1227
|
-
logger.debug(
|
|
1514
|
+
logger.debug("Starting multipart upload")
|
|
1228
1515
|
# low-level clients are thread safe
|
|
1229
|
-
upload = client.create_multipart_upload(
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1516
|
+
upload = client.create_multipart_upload(
|
|
1517
|
+
Bucket=bucket_name,
|
|
1518
|
+
Key=compat_bytes(info.fileID),
|
|
1519
|
+
**headerArgs,
|
|
1520
|
+
)
|
|
1521
|
+
uploadId = upload["UploadId"]
|
|
1233
1522
|
parts = []
|
|
1234
|
-
logger.debug(
|
|
1235
|
-
|
|
1523
|
+
logger.debug("Multipart upload started as %s", uploadId)
|
|
1236
1524
|
|
|
1237
1525
|
for attempt in retry_s3():
|
|
1238
1526
|
with attempt:
|
|
1239
1527
|
for i in range(CONSISTENCY_TICKS):
|
|
1240
1528
|
# Sometimes we can create a multipart upload and not see it. Wait around for it.
|
|
1241
|
-
response = client.list_multipart_uploads(
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1529
|
+
response = client.list_multipart_uploads(
|
|
1530
|
+
Bucket=bucket_name,
|
|
1531
|
+
MaxUploads=1,
|
|
1532
|
+
Prefix=compat_bytes(info.fileID),
|
|
1533
|
+
)
|
|
1534
|
+
if (
|
|
1535
|
+
"Uploads" in response
|
|
1536
|
+
and len(response["Uploads"]) != 0
|
|
1537
|
+
and response["Uploads"][0]["UploadId"]
|
|
1538
|
+
== uploadId
|
|
1539
|
+
):
|
|
1540
|
+
|
|
1541
|
+
logger.debug(
|
|
1542
|
+
"Multipart upload visible as %s", uploadId
|
|
1543
|
+
)
|
|
1249
1544
|
break
|
|
1250
1545
|
else:
|
|
1251
|
-
logger.debug(
|
|
1252
|
-
|
|
1546
|
+
logger.debug(
|
|
1547
|
+
"Multipart upload %s is not visible; we see %s",
|
|
1548
|
+
uploadId,
|
|
1549
|
+
response.get("Uploads"),
|
|
1550
|
+
)
|
|
1551
|
+
time.sleep(CONSISTENCY_TIME * 2**i)
|
|
1253
1552
|
|
|
1254
1553
|
try:
|
|
1255
1554
|
for part_num in itertools.count():
|
|
1256
1555
|
for attempt in retry_s3():
|
|
1257
1556
|
with attempt:
|
|
1258
|
-
logger.debug(
|
|
1557
|
+
logger.debug(
|
|
1558
|
+
"Uploading part %d of %d bytes to %s",
|
|
1559
|
+
part_num + 1,
|
|
1560
|
+
len(buf),
|
|
1561
|
+
uploadId,
|
|
1562
|
+
)
|
|
1259
1563
|
# TODO: include the Content-MD5 header:
|
|
1260
1564
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.complete_multipart_upload
|
|
1261
|
-
part = client.upload_part(
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1565
|
+
part = client.upload_part(
|
|
1566
|
+
Bucket=bucket_name,
|
|
1567
|
+
Key=compat_bytes(info.fileID),
|
|
1568
|
+
PartNumber=part_num + 1,
|
|
1569
|
+
UploadId=uploadId,
|
|
1570
|
+
Body=BytesIO(buf),
|
|
1571
|
+
**headerArgs,
|
|
1572
|
+
)
|
|
1573
|
+
|
|
1574
|
+
parts.append(
|
|
1575
|
+
{
|
|
1576
|
+
"PartNumber": part_num + 1,
|
|
1577
|
+
"ETag": part["ETag"],
|
|
1578
|
+
}
|
|
1579
|
+
)
|
|
1269
1580
|
|
|
1270
1581
|
# Get the next block of data we want to put
|
|
1271
|
-
buf = readable.read(info.outer.
|
|
1582
|
+
buf = readable.read(info.outer.part_size)
|
|
1272
1583
|
assert isinstance(buf, bytes)
|
|
1273
1584
|
if len(buf) == 0:
|
|
1274
1585
|
# Don't allow any part other than the very first to be empty.
|
|
@@ -1278,15 +1589,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1278
1589
|
with panic(log=logger):
|
|
1279
1590
|
for attempt in retry_s3():
|
|
1280
1591
|
with attempt:
|
|
1281
|
-
client.abort_multipart_upload(
|
|
1282
|
-
|
|
1283
|
-
|
|
1592
|
+
client.abort_multipart_upload(
|
|
1593
|
+
Bucket=bucket_name,
|
|
1594
|
+
Key=compat_bytes(info.fileID),
|
|
1595
|
+
UploadId=uploadId,
|
|
1596
|
+
)
|
|
1284
1597
|
|
|
1285
1598
|
else:
|
|
1286
1599
|
|
|
1287
|
-
while not store._getBucketVersioning(
|
|
1288
|
-
|
|
1289
|
-
|
|
1600
|
+
while not store._getBucketVersioning(
|
|
1601
|
+
store.files_bucket.name
|
|
1602
|
+
):
|
|
1603
|
+
logger.warning(
|
|
1604
|
+
"Versioning does not appear to be enabled yet. Deferring multipart "
|
|
1605
|
+
"upload completion..."
|
|
1606
|
+
)
|
|
1290
1607
|
time.sleep(1)
|
|
1291
1608
|
|
|
1292
1609
|
# Save the checksum
|
|
@@ -1298,32 +1615,46 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1298
1615
|
# in tests
|
|
1299
1616
|
# (https://github.com/DataBiosphere/toil/issues/3894)
|
|
1300
1617
|
with attempt:
|
|
1301
|
-
logger.debug(
|
|
1618
|
+
logger.debug("Attempting to complete upload...")
|
|
1302
1619
|
completed = client.complete_multipart_upload(
|
|
1303
1620
|
Bucket=bucket_name,
|
|
1304
1621
|
Key=compat_bytes(info.fileID),
|
|
1305
1622
|
UploadId=uploadId,
|
|
1306
|
-
MultipartUpload={"Parts": parts}
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1623
|
+
MultipartUpload={"Parts": parts},
|
|
1624
|
+
)
|
|
1625
|
+
|
|
1626
|
+
logger.debug(
|
|
1627
|
+
"Completed upload object of type %s: %s",
|
|
1628
|
+
str(type(completed)),
|
|
1629
|
+
repr(completed),
|
|
1630
|
+
)
|
|
1631
|
+
info.version = completed.get("VersionId")
|
|
1632
|
+
logger.debug(
|
|
1633
|
+
"Completed upload with version %s",
|
|
1634
|
+
str(info.version),
|
|
1635
|
+
)
|
|
1312
1636
|
|
|
1313
1637
|
if info.version is None:
|
|
1314
1638
|
# Somehow we don't know the version. Try and get it.
|
|
1315
|
-
for attempt in retry_s3(
|
|
1639
|
+
for attempt in retry_s3(
|
|
1640
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1641
|
+
or isinstance(e, AssertionError)
|
|
1642
|
+
):
|
|
1316
1643
|
with attempt:
|
|
1317
|
-
version = client.head_object(
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1644
|
+
version = client.head_object(
|
|
1645
|
+
Bucket=bucket_name,
|
|
1646
|
+
Key=compat_bytes(info.fileID),
|
|
1647
|
+
**headerArgs,
|
|
1648
|
+
).get("VersionId", None)
|
|
1649
|
+
logger.warning(
|
|
1650
|
+
"Loaded key for upload with no version and got version %s",
|
|
1651
|
+
str(version),
|
|
1652
|
+
)
|
|
1322
1653
|
info.version = version
|
|
1323
1654
|
assert info.version is not None
|
|
1324
1655
|
|
|
1325
1656
|
# Make sure we actually wrote something, even if an empty file
|
|
1326
|
-
assert
|
|
1657
|
+
assert bool(info.version) or info.content is not None
|
|
1327
1658
|
|
|
1328
1659
|
class SinglePartPipe(WritablePipe):
|
|
1329
1660
|
def readFrom(self, readable):
|
|
@@ -1331,56 +1662,74 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1331
1662
|
assert isinstance(buf, bytes)
|
|
1332
1663
|
dataLength = len(buf)
|
|
1333
1664
|
if allowInlining and dataLength <= info.maxInlinedSize():
|
|
1334
|
-
logger.debug(
|
|
1665
|
+
logger.debug("Inlining content of %d bytes", len(buf))
|
|
1335
1666
|
info.content = buf
|
|
1336
1667
|
# There will be no checksum
|
|
1337
|
-
info.checksum =
|
|
1668
|
+
info.checksum = ""
|
|
1338
1669
|
else:
|
|
1339
1670
|
# We will compute a checksum
|
|
1340
1671
|
hasher = info._start_checksum()
|
|
1341
1672
|
info._update_checksum(hasher, buf)
|
|
1342
1673
|
info.checksum = info._finish_checksum(hasher)
|
|
1343
1674
|
|
|
1344
|
-
bucket_name = store.
|
|
1675
|
+
bucket_name = store.files_bucket.name
|
|
1345
1676
|
headerArgs = info._s3EncryptionArgs()
|
|
1346
1677
|
client = store.s3_client
|
|
1347
1678
|
|
|
1348
1679
|
buf = BytesIO(buf)
|
|
1349
1680
|
|
|
1350
1681
|
while not store._getBucketVersioning(bucket_name):
|
|
1351
|
-
logger.warning(
|
|
1352
|
-
|
|
1682
|
+
logger.warning(
|
|
1683
|
+
"Versioning does not appear to be enabled yet. Deferring single part "
|
|
1684
|
+
"upload..."
|
|
1685
|
+
)
|
|
1353
1686
|
time.sleep(1)
|
|
1354
1687
|
|
|
1355
1688
|
for attempt in retry_s3():
|
|
1356
1689
|
with attempt:
|
|
1357
|
-
logger.debug(
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1690
|
+
logger.debug(
|
|
1691
|
+
"Uploading single part of %d bytes", dataLength
|
|
1692
|
+
)
|
|
1693
|
+
client.upload_fileobj(
|
|
1694
|
+
Bucket=bucket_name,
|
|
1695
|
+
Key=compat_bytes(info.fileID),
|
|
1696
|
+
Fileobj=buf,
|
|
1697
|
+
ExtraArgs=headerArgs,
|
|
1698
|
+
)
|
|
1362
1699
|
|
|
1363
1700
|
# use head_object with the SSE headers to access versionId and content_length attributes
|
|
1364
|
-
headObj = client.head_object(
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1701
|
+
headObj = client.head_object(
|
|
1702
|
+
Bucket=bucket_name,
|
|
1703
|
+
Key=compat_bytes(info.fileID),
|
|
1704
|
+
**headerArgs,
|
|
1705
|
+
)
|
|
1706
|
+
assert dataLength == headObj.get("ContentLength", None)
|
|
1707
|
+
info.version = headObj.get("VersionId", None)
|
|
1708
|
+
logger.debug(
|
|
1709
|
+
"Upload received version %s", str(info.version)
|
|
1710
|
+
)
|
|
1370
1711
|
|
|
1371
1712
|
if info.version is None:
|
|
1372
1713
|
# Somehow we don't know the version
|
|
1373
|
-
for attempt in retry_s3(
|
|
1714
|
+
for attempt in retry_s3(
|
|
1715
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1716
|
+
or isinstance(e, AssertionError)
|
|
1717
|
+
):
|
|
1374
1718
|
with attempt:
|
|
1375
|
-
headObj = client.head_object(
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1719
|
+
headObj = client.head_object(
|
|
1720
|
+
Bucket=bucket_name,
|
|
1721
|
+
Key=compat_bytes(info.fileID),
|
|
1722
|
+
**headerArgs,
|
|
1723
|
+
)
|
|
1724
|
+
info.version = headObj.get("VersionId", None)
|
|
1725
|
+
logger.warning(
|
|
1726
|
+
"Reloaded key with no version and got version %s",
|
|
1727
|
+
str(info.version),
|
|
1728
|
+
)
|
|
1380
1729
|
assert info.version is not None
|
|
1381
1730
|
|
|
1382
1731
|
# Make sure we actually wrote something, even if an empty file
|
|
1383
|
-
assert
|
|
1732
|
+
assert bool(info.version) or info.content is not None
|
|
1384
1733
|
|
|
1385
1734
|
if multipart:
|
|
1386
1735
|
pipe = MultiPartPipe(encoding=encoding, errors=errors)
|
|
@@ -1391,20 +1740,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1391
1740
|
yield writable
|
|
1392
1741
|
|
|
1393
1742
|
if not pipe.reader_done:
|
|
1394
|
-
logger.debug(f
|
|
1395
|
-
raise RuntimeError(
|
|
1743
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1744
|
+
raise RuntimeError(
|
|
1745
|
+
"Escaped context manager without written data being read!"
|
|
1746
|
+
)
|
|
1396
1747
|
|
|
1397
1748
|
# We check our work to make sure we have exactly one of embedded
|
|
1398
1749
|
# content or a real object version.
|
|
1399
1750
|
|
|
1400
1751
|
if self.content is None:
|
|
1401
1752
|
if not bool(self.version):
|
|
1402
|
-
logger.debug(f
|
|
1403
|
-
raise RuntimeError(
|
|
1753
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1754
|
+
raise RuntimeError("No content added and no version created")
|
|
1404
1755
|
else:
|
|
1405
1756
|
if bool(self.version):
|
|
1406
|
-
logger.debug(f
|
|
1407
|
-
raise RuntimeError(
|
|
1757
|
+
logger.debug(f"Version: {self.version} Content: {self.content}")
|
|
1758
|
+
raise RuntimeError("Content added and version created")
|
|
1408
1759
|
|
|
1409
1760
|
def copyFrom(self, srcObj):
|
|
1410
1761
|
"""
|
|
@@ -1414,18 +1765,20 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1414
1765
|
"""
|
|
1415
1766
|
assert srcObj.content_length is not None
|
|
1416
1767
|
if srcObj.content_length <= self.maxInlinedSize():
|
|
1417
|
-
self.content = srcObj.get().get(
|
|
1768
|
+
self.content = srcObj.get().get("Body").read()
|
|
1418
1769
|
else:
|
|
1419
1770
|
# Create a new Resource in case it needs to be on its own thread
|
|
1420
|
-
resource = boto3_session.resource(
|
|
1421
|
-
self.version = copyKeyMultipart(
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1771
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1772
|
+
self.version = copyKeyMultipart(
|
|
1773
|
+
resource,
|
|
1774
|
+
srcBucketName=compat_bytes(srcObj.bucket_name),
|
|
1775
|
+
srcKeyName=compat_bytes(srcObj.key),
|
|
1776
|
+
srcKeyVersion=compat_bytes(srcObj.version_id),
|
|
1777
|
+
dstBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1778
|
+
dstKeyName=compat_bytes(self._fileID),
|
|
1779
|
+
sseAlgorithm="AES256",
|
|
1780
|
+
sseKey=self._getSSEKey(),
|
|
1781
|
+
)
|
|
1429
1782
|
|
|
1430
1783
|
def copyTo(self, dstObj):
|
|
1431
1784
|
"""
|
|
@@ -1439,35 +1792,43 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1439
1792
|
dstObj.put(Body=self.content)
|
|
1440
1793
|
elif self.version:
|
|
1441
1794
|
# Create a new Resource in case it needs to be on its own thread
|
|
1442
|
-
resource = boto3_session.resource(
|
|
1795
|
+
resource = boto3_session.resource("s3", region_name=self.outer.region)
|
|
1443
1796
|
|
|
1444
1797
|
for attempt in retry_s3():
|
|
1445
1798
|
# encrypted = True if self.outer.sseKeyPath else False
|
|
1446
1799
|
with attempt:
|
|
1447
|
-
copyKeyMultipart(
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1800
|
+
copyKeyMultipart(
|
|
1801
|
+
resource,
|
|
1802
|
+
srcBucketName=compat_bytes(self.outer.files_bucket.name),
|
|
1803
|
+
srcKeyName=compat_bytes(self.fileID),
|
|
1804
|
+
srcKeyVersion=compat_bytes(self.version),
|
|
1805
|
+
dstBucketName=compat_bytes(dstObj.bucket_name),
|
|
1806
|
+
dstKeyName=compat_bytes(dstObj.key),
|
|
1807
|
+
copySourceSseAlgorithm="AES256",
|
|
1808
|
+
copySourceSseKey=self._getSSEKey(),
|
|
1809
|
+
)
|
|
1455
1810
|
else:
|
|
1456
1811
|
assert False
|
|
1457
1812
|
|
|
1458
1813
|
def download(self, localFilePath, verifyChecksum=True):
|
|
1459
1814
|
if self.content is not None:
|
|
1460
1815
|
with AtomicFileCreate(localFilePath) as tmpPath:
|
|
1461
|
-
with open(tmpPath,
|
|
1816
|
+
with open(tmpPath, "wb") as f:
|
|
1462
1817
|
f.write(self.content)
|
|
1463
1818
|
elif self.version:
|
|
1464
1819
|
headerArgs = self._s3EncryptionArgs()
|
|
1465
|
-
obj = self.outer.
|
|
1820
|
+
obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
|
|
1466
1821
|
|
|
1467
|
-
for attempt in retry_s3(
|
|
1822
|
+
for attempt in retry_s3(
|
|
1823
|
+
predicate=lambda e: retryable_s3_errors(e)
|
|
1824
|
+
or isinstance(e, ChecksumError)
|
|
1825
|
+
):
|
|
1468
1826
|
with attempt:
|
|
1469
1827
|
with AtomicFileCreate(localFilePath) as tmpPath:
|
|
1470
|
-
obj.download_file(
|
|
1828
|
+
obj.download_file(
|
|
1829
|
+
Filename=tmpPath,
|
|
1830
|
+
ExtraArgs={"VersionId": self.version, **headerArgs},
|
|
1831
|
+
)
|
|
1471
1832
|
|
|
1472
1833
|
if verifyChecksum and self.checksum:
|
|
1473
1834
|
try:
|
|
@@ -1475,7 +1836,10 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1475
1836
|
self._get_file_checksum(localFilePath, self.checksum)
|
|
1476
1837
|
except ChecksumError as e:
|
|
1477
1838
|
# Annotate checksum mismatches with file name
|
|
1478
|
-
raise ChecksumError(
|
|
1839
|
+
raise ChecksumError(
|
|
1840
|
+
"Checksums do not match for file %s."
|
|
1841
|
+
% localFilePath
|
|
1842
|
+
) from e
|
|
1479
1843
|
# The error will get caught and result in a retry of the download until we run out of retries.
|
|
1480
1844
|
# TODO: handle obviously truncated downloads by resuming instead.
|
|
1481
1845
|
else:
|
|
@@ -1494,10 +1858,13 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1494
1858
|
writable.write(info.content)
|
|
1495
1859
|
elif info.version:
|
|
1496
1860
|
headerArgs = info._s3EncryptionArgs()
|
|
1497
|
-
obj = info.outer.
|
|
1861
|
+
obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
|
|
1498
1862
|
for attempt in retry_s3():
|
|
1499
1863
|
with attempt:
|
|
1500
|
-
obj.download_fileobj(
|
|
1864
|
+
obj.download_fileobj(
|
|
1865
|
+
writable,
|
|
1866
|
+
ExtraArgs={"VersionId": info.version, **headerArgs},
|
|
1867
|
+
)
|
|
1501
1868
|
else:
|
|
1502
1869
|
assert False
|
|
1503
1870
|
|
|
@@ -1513,7 +1880,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1513
1880
|
def transform(self, readable, writable):
|
|
1514
1881
|
hasher = info._start_checksum(to_match=info.checksum)
|
|
1515
1882
|
contents = readable.read(1024 * 1024)
|
|
1516
|
-
while contents != b
|
|
1883
|
+
while contents != b"":
|
|
1517
1884
|
info._update_checksum(hasher, contents)
|
|
1518
1885
|
try:
|
|
1519
1886
|
writable.write(contents)
|
|
@@ -1530,7 +1897,9 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1530
1897
|
if verifyChecksum and self.checksum:
|
|
1531
1898
|
with DownloadPipe() as readable:
|
|
1532
1899
|
# Interpose a pipe to check the hash
|
|
1533
|
-
with HashingPipe(
|
|
1900
|
+
with HashingPipe(
|
|
1901
|
+
readable, encoding=encoding, errors=errors
|
|
1902
|
+
) as verified:
|
|
1534
1903
|
yield verified
|
|
1535
1904
|
else:
|
|
1536
1905
|
# Readable end of pipe produces text mode output if encoding specified
|
|
@@ -1541,17 +1910,25 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1541
1910
|
def delete(self):
|
|
1542
1911
|
store = self.outer
|
|
1543
1912
|
if self.previousVersion is not None:
|
|
1913
|
+
expected: "UpdateConditionTypeDef" = {
|
|
1914
|
+
"Name": "version",
|
|
1915
|
+
"Value": cast(str, self.previousVersion),
|
|
1916
|
+
}
|
|
1544
1917
|
for attempt in retry_sdb():
|
|
1545
1918
|
with attempt:
|
|
1546
|
-
store.
|
|
1547
|
-
|
|
1548
|
-
|
|
1919
|
+
store.db.delete_attributes(
|
|
1920
|
+
DomainName=store.files_domain_name,
|
|
1921
|
+
ItemName=compat_bytes(self.fileID),
|
|
1922
|
+
Expected=expected,
|
|
1923
|
+
)
|
|
1549
1924
|
if self.previousVersion:
|
|
1550
1925
|
for attempt in retry_s3():
|
|
1551
1926
|
with attempt:
|
|
1552
|
-
store.s3_client.delete_object(
|
|
1553
|
-
|
|
1554
|
-
|
|
1927
|
+
store.s3_client.delete_object(
|
|
1928
|
+
Bucket=store.files_bucket.name,
|
|
1929
|
+
Key=compat_bytes(self.fileID),
|
|
1930
|
+
VersionId=self.previousVersion,
|
|
1931
|
+
)
|
|
1555
1932
|
|
|
1556
1933
|
def getSize(self):
|
|
1557
1934
|
"""
|
|
@@ -1562,7 +1939,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1562
1939
|
elif self.version:
|
|
1563
1940
|
for attempt in retry_s3():
|
|
1564
1941
|
with attempt:
|
|
1565
|
-
obj = self.outer.
|
|
1942
|
+
obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
|
|
1566
1943
|
return obj.content_length
|
|
1567
1944
|
else:
|
|
1568
1945
|
return 0
|
|
@@ -1570,7 +1947,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1570
1947
|
def _getSSEKey(self) -> Optional[bytes]:
|
|
1571
1948
|
sseKeyPath = self.outer.sseKeyPath
|
|
1572
1949
|
if sseKeyPath:
|
|
1573
|
-
with open(sseKeyPath,
|
|
1950
|
+
with open(sseKeyPath, "rb") as f:
|
|
1574
1951
|
sseKey = f.read()
|
|
1575
1952
|
return sseKey
|
|
1576
1953
|
|
|
@@ -1579,25 +1956,30 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1579
1956
|
# parameters and will be used to set the http headers
|
|
1580
1957
|
if self.encrypted:
|
|
1581
1958
|
sseKey = self._getSSEKey()
|
|
1582
|
-
assert
|
|
1959
|
+
assert (
|
|
1960
|
+
sseKey is not None
|
|
1961
|
+
), "Content is encrypted but no key was provided."
|
|
1583
1962
|
assert len(sseKey) == 32
|
|
1584
1963
|
# boto3 encodes the key and calculates the MD5 for us
|
|
1585
|
-
return {
|
|
1964
|
+
return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sseKey}
|
|
1586
1965
|
else:
|
|
1587
1966
|
return {}
|
|
1588
1967
|
|
|
1589
1968
|
def __repr__(self):
|
|
1590
1969
|
r = custom_repr
|
|
1591
|
-
d = (
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1970
|
+
d = (
|
|
1971
|
+
("fileID", r(self.fileID)),
|
|
1972
|
+
("ownerID", r(self.ownerID)),
|
|
1973
|
+
("encrypted", r(self.encrypted)),
|
|
1974
|
+
("version", r(self.version)),
|
|
1975
|
+
("previousVersion", r(self.previousVersion)),
|
|
1976
|
+
("content", r(self.content)),
|
|
1977
|
+
("checksum", r(self.checksum)),
|
|
1978
|
+
("_numContentChunks", r(self._numContentChunks)),
|
|
1979
|
+
)
|
|
1980
|
+
return "{}({})".format(
|
|
1981
|
+
type(self).__name__, ", ".join(f"{k}={v}" for k, v in d)
|
|
1982
|
+
)
|
|
1601
1983
|
|
|
1602
1984
|
versionings = dict(Enabled=True, Disabled=False, Suspended=None)
|
|
1603
1985
|
|
|
@@ -1631,22 +2013,22 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1631
2013
|
pass
|
|
1632
2014
|
# TODO: Add other failure cases to be ignored here.
|
|
1633
2015
|
self._registered = None
|
|
1634
|
-
if self.
|
|
1635
|
-
self._delete_bucket(self.
|
|
1636
|
-
self.
|
|
1637
|
-
for name in
|
|
1638
|
-
|
|
1639
|
-
if
|
|
1640
|
-
self._delete_domain(
|
|
2016
|
+
if self.files_bucket is not None:
|
|
2017
|
+
self._delete_bucket(self.files_bucket)
|
|
2018
|
+
self.files_bucket = None
|
|
2019
|
+
for name in "files_domain_name", "jobs_domain_name":
|
|
2020
|
+
domainName = getattr(self, name)
|
|
2021
|
+
if domainName is not None:
|
|
2022
|
+
self._delete_domain(domainName)
|
|
1641
2023
|
setattr(self, name, None)
|
|
1642
2024
|
self._registered = False
|
|
1643
2025
|
|
|
1644
|
-
def _delete_domain(self,
|
|
2026
|
+
def _delete_domain(self, domainName):
|
|
1645
2027
|
for attempt in retry_sdb():
|
|
1646
2028
|
with attempt:
|
|
1647
2029
|
try:
|
|
1648
|
-
|
|
1649
|
-
except
|
|
2030
|
+
self.db.delete_domain(DomainName=domainName)
|
|
2031
|
+
except ClientError as e:
|
|
1650
2032
|
if not no_such_sdb_domain(e):
|
|
1651
2033
|
raise
|
|
1652
2034
|
|
|
@@ -1658,12 +2040,14 @@ class AWSJobStore(AbstractJobStore):
|
|
|
1658
2040
|
for attempt in retry_s3():
|
|
1659
2041
|
with attempt:
|
|
1660
2042
|
try:
|
|
1661
|
-
uploads = s3_boto3_client.list_multipart_uploads(
|
|
2043
|
+
uploads = s3_boto3_client.list_multipart_uploads(
|
|
2044
|
+
Bucket=bucket.name
|
|
2045
|
+
).get("Uploads")
|
|
1662
2046
|
if uploads:
|
|
1663
2047
|
for u in uploads:
|
|
1664
|
-
s3_boto3_client.abort_multipart_upload(
|
|
1665
|
-
|
|
1666
|
-
|
|
2048
|
+
s3_boto3_client.abort_multipart_upload(
|
|
2049
|
+
Bucket=bucket.name, Key=u["Key"], UploadId=u["UploadId"]
|
|
2050
|
+
)
|
|
1667
2051
|
|
|
1668
2052
|
bucket.objects.all().delete()
|
|
1669
2053
|
bucket.object_versions.delete()
|
|
@@ -1680,8 +2064,10 @@ aRepr.maxstring = 38 # so UUIDs don't get truncated (36 for UUID plus 2 for quo
|
|
|
1680
2064
|
custom_repr = aRepr.repr
|
|
1681
2065
|
|
|
1682
2066
|
|
|
1683
|
-
class BucketLocationConflictException(
|
|
2067
|
+
class BucketLocationConflictException(LocatorException):
|
|
1684
2068
|
def __init__(self, bucketRegion):
|
|
1685
2069
|
super().__init__(
|
|
1686
|
-
|
|
1687
|
-
|
|
2070
|
+
"A bucket with the same name as the jobstore was found in another region (%s). "
|
|
2071
|
+
"Cannot proceed as the unique bucket name is already in use.",
|
|
2072
|
+
locator=bucketRegion,
|
|
2073
|
+
)
|