toil 9.1.2__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +14 -14
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -26,72 +26,58 @@ Reasons for this
|
|
|
26
26
|
|
|
27
27
|
Variables defining part size, parallelization, and other constants should live in toil.lib.aws.config.
|
|
28
28
|
"""
|
|
29
|
-
import
|
|
29
|
+
import datetime
|
|
30
30
|
import json
|
|
31
31
|
import logging
|
|
32
|
+
import os
|
|
32
33
|
import pickle
|
|
33
34
|
import re
|
|
34
35
|
import stat
|
|
35
36
|
import uuid
|
|
36
|
-
import
|
|
37
|
-
|
|
38
|
-
from io import BytesIO
|
|
37
|
+
from collections.abc import Callable, Iterator
|
|
39
38
|
from contextlib import contextmanager
|
|
39
|
+
from io import BytesIO
|
|
40
|
+
from typing import IO, Any, ContextManager, Literal, overload
|
|
40
41
|
from urllib.parse import ParseResult, urlparse
|
|
41
|
-
from typing import (
|
|
42
|
-
ContextManager,
|
|
43
|
-
IO,
|
|
44
|
-
TYPE_CHECKING,
|
|
45
|
-
Optional,
|
|
46
|
-
Union,
|
|
47
|
-
cast,
|
|
48
|
-
Tuple,
|
|
49
|
-
Callable,
|
|
50
|
-
Dict,
|
|
51
|
-
Any,
|
|
52
|
-
Iterator,
|
|
53
|
-
Literal,
|
|
54
|
-
overload
|
|
55
|
-
)
|
|
56
42
|
|
|
57
43
|
# This file can't be imported if the AWS modules are not available.
|
|
58
44
|
from botocore.exceptions import ClientError
|
|
59
45
|
|
|
46
|
+
from toil.common import Config
|
|
60
47
|
from toil.fileStores import FileID
|
|
61
|
-
from toil.
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
48
|
+
from toil.job import JobDescription
|
|
49
|
+
from toil.jobStores.abstractJobStore import (
|
|
50
|
+
AbstractJobStore,
|
|
51
|
+
JobStoreExistsException,
|
|
52
|
+
NoSuchFileException,
|
|
53
|
+
NoSuchJobException,
|
|
54
|
+
NoSuchJobStoreException,
|
|
55
|
+
)
|
|
65
56
|
from toil.lib.aws.s3 import (
|
|
66
|
-
|
|
67
|
-
|
|
57
|
+
AWSKeyNotFoundError,
|
|
58
|
+
MultiPartPipe,
|
|
68
59
|
bucket_exists,
|
|
69
|
-
copy_s3_to_s3,
|
|
70
60
|
copy_local_to_s3,
|
|
71
61
|
copy_s3_to_local,
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
62
|
+
copy_s3_to_s3,
|
|
63
|
+
create_public_url,
|
|
64
|
+
create_s3_bucket,
|
|
65
|
+
delete_s3_bucket,
|
|
76
66
|
download_stream,
|
|
77
|
-
s3_key_exists,
|
|
78
|
-
head_s3_object,
|
|
79
67
|
get_s3_object,
|
|
68
|
+
head_s3_object,
|
|
69
|
+
list_s3_items,
|
|
70
|
+
parse_s3_uri,
|
|
80
71
|
put_s3_object,
|
|
81
|
-
|
|
82
|
-
|
|
72
|
+
s3_key_exists,
|
|
73
|
+
upload_to_s3,
|
|
83
74
|
)
|
|
75
|
+
from toil.lib.aws.session import establish_boto3_session
|
|
84
76
|
from toil.lib.aws.utils import get_object_for_url, list_objects_for_url
|
|
85
|
-
from toil.common import Config
|
|
86
|
-
from toil.jobStores.abstractJobStore import NoSuchFileException
|
|
87
77
|
from toil.lib.ec2nodes import EC2Regions
|
|
88
78
|
from toil.lib.retry import get_error_status
|
|
89
|
-
from toil.version import version
|
|
90
|
-
from toil.lib.aws.session import establish_boto3_session
|
|
91
|
-
from toil.job import JobDescription, Job
|
|
92
79
|
from toil.lib.url import URLAccess
|
|
93
80
|
|
|
94
|
-
|
|
95
81
|
DEFAULT_AWS_PART_SIZE = 52428800
|
|
96
82
|
logger = logging.getLogger(__name__)
|
|
97
83
|
|
|
@@ -159,8 +145,9 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
159
145
|
initialized with and warn the user if restarting with a different
|
|
160
146
|
version.
|
|
161
147
|
"""
|
|
148
|
+
|
|
162
149
|
def __init__(self, locator: str, partSize: int = DEFAULT_AWS_PART_SIZE) -> None:
|
|
163
|
-
super(
|
|
150
|
+
super().__init__(locator)
|
|
164
151
|
# TODO: parsing of user options seems like it should be done outside of this class;
|
|
165
152
|
# pass in only the bucket name and region?
|
|
166
153
|
self.region, self.bucket_name = parse_jobstore_identifier(locator)
|
|
@@ -174,19 +161,19 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
174
161
|
self.bucket = None
|
|
175
162
|
|
|
176
163
|
# pickled job files named with uuid4
|
|
177
|
-
self.job_key_prefix =
|
|
164
|
+
self.job_key_prefix = "jobs/"
|
|
178
165
|
# job-file associations; these are empty files mimicking a db w/naming convention: job_uuid4.file_uuid4
|
|
179
166
|
#
|
|
180
167
|
# TODO: a many-to-many system is implemented, but a simpler one-to-many
|
|
181
168
|
# system could be used, because each file should belong to at most one
|
|
182
169
|
# job. This should be changed to a hierarchical layout.
|
|
183
|
-
self.job_associations_key_prefix =
|
|
170
|
+
self.job_associations_key_prefix = "job-associations/"
|
|
184
171
|
# input/output files named with uuid4
|
|
185
|
-
self.content_key_prefix =
|
|
172
|
+
self.content_key_prefix = "files/"
|
|
186
173
|
# these are special files, like 'environment.pickle'; place them in root
|
|
187
|
-
self.shared_key_prefix =
|
|
174
|
+
self.shared_key_prefix = ""
|
|
188
175
|
# read and unread; named with uuid4
|
|
189
|
-
self.logs_key_prefix =
|
|
176
|
+
self.logs_key_prefix = "logs/"
|
|
190
177
|
|
|
191
178
|
###################################### CREATE/DESTROY JOBSTORE ######################################
|
|
192
179
|
|
|
@@ -197,11 +184,13 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
197
184
|
Create bucket, raise if it already exists.
|
|
198
185
|
Set options from config.
|
|
199
186
|
"""
|
|
200
|
-
logger.debug(
|
|
187
|
+
logger.debug(
|
|
188
|
+
f"Instantiating {self.__class__} for region {self.region} with bucket: '{self.bucket_name}'"
|
|
189
|
+
)
|
|
201
190
|
if bucket_exists(self.s3_resource, self.bucket_name):
|
|
202
|
-
raise JobStoreExistsException(self.locator,
|
|
191
|
+
raise JobStoreExistsException(self.locator, "aws")
|
|
203
192
|
self.bucket = create_s3_bucket(self.s3_resource, self.bucket_name, region=self.region) # type: ignore
|
|
204
|
-
super(
|
|
193
|
+
super().initialize(config)
|
|
205
194
|
|
|
206
195
|
def resume(self) -> None:
|
|
207
196
|
"""
|
|
@@ -210,11 +199,11 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
210
199
|
:raise NoSuchJobStoreException: if the bucket doesn't exist.
|
|
211
200
|
"""
|
|
212
201
|
if not bucket_exists(self.s3_resource, self.bucket_name):
|
|
213
|
-
raise NoSuchJobStoreException(self.locator,
|
|
202
|
+
raise NoSuchJobStoreException(self.locator, "aws")
|
|
214
203
|
# This sets self.config to not be None and loads the encryption key
|
|
215
204
|
# path from the unencrypted config. So it needs the bucket to exist to
|
|
216
205
|
# read from.
|
|
217
|
-
super(
|
|
206
|
+
super().resume()
|
|
218
207
|
|
|
219
208
|
def destroy(self) -> None:
|
|
220
209
|
delete_s3_bucket(self.s3_resource, self.bucket_name)
|
|
@@ -233,13 +222,13 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
233
222
|
pasting together of prefixes and identifiers, so it never ahs to be
|
|
234
223
|
mixed with the identifier=/prefix= calling convention.
|
|
235
224
|
"""
|
|
236
|
-
return f
|
|
225
|
+
return f"{prefix}{identifier}"
|
|
237
226
|
|
|
238
227
|
def is_in_bucket(
|
|
239
228
|
self,
|
|
240
229
|
identifier: str,
|
|
241
230
|
prefix: str,
|
|
242
|
-
bucket:
|
|
231
|
+
bucket: str | None = None,
|
|
243
232
|
) -> bool:
|
|
244
233
|
"""
|
|
245
234
|
Check if the key for the given identifier and prefix is in the bucket.
|
|
@@ -250,17 +239,16 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
250
239
|
s3_resource=self.s3_resource,
|
|
251
240
|
bucket=bucket,
|
|
252
241
|
key=self._key_in_bucket(identifier=identifier, prefix=prefix),
|
|
253
|
-
extra_args=self._get_encryption_args()
|
|
242
|
+
extra_args=self._get_encryption_args(),
|
|
254
243
|
)
|
|
255
244
|
|
|
256
|
-
|
|
257
245
|
def write_to_bucket(
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
246
|
+
self,
|
|
247
|
+
identifier: str,
|
|
248
|
+
prefix: str,
|
|
249
|
+
data: bytes | str | dict[str, Any] | None,
|
|
250
|
+
bucket: str | None = None,
|
|
251
|
+
encrypted: bool | None = None,
|
|
264
252
|
) -> None:
|
|
265
253
|
"""
|
|
266
254
|
Write something directly to a bucket.
|
|
@@ -274,11 +262,11 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
274
262
|
bucket = bucket or self.bucket_name
|
|
275
263
|
|
|
276
264
|
if isinstance(data, dict):
|
|
277
|
-
data = json.dumps(data).encode(
|
|
265
|
+
data = json.dumps(data).encode("utf-8")
|
|
278
266
|
elif isinstance(data, str):
|
|
279
|
-
data = data.encode(
|
|
267
|
+
data = data.encode("utf-8")
|
|
280
268
|
elif data is None:
|
|
281
|
-
data = b
|
|
269
|
+
data = b""
|
|
282
270
|
|
|
283
271
|
assert isinstance(data, bytes)
|
|
284
272
|
put_s3_object(
|
|
@@ -293,7 +281,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
293
281
|
self,
|
|
294
282
|
identifier: str,
|
|
295
283
|
prefix: str,
|
|
296
|
-
bucket:
|
|
284
|
+
bucket: str | None = None,
|
|
297
285
|
) -> bytes:
|
|
298
286
|
"""
|
|
299
287
|
Read something directly from a bucket.
|
|
@@ -315,7 +303,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
315
303
|
bucket=bucket,
|
|
316
304
|
key=self._key_in_bucket(identifier=identifier, prefix=prefix),
|
|
317
305
|
extra_args=self._get_encryption_args(),
|
|
318
|
-
)[
|
|
306
|
+
)["Body"].read()
|
|
319
307
|
except self.s3_client.exceptions.NoSuchKey:
|
|
320
308
|
if prefix == self.job_key_prefix:
|
|
321
309
|
raise NoSuchJobException(identifier)
|
|
@@ -324,7 +312,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
324
312
|
else:
|
|
325
313
|
raise
|
|
326
314
|
except ClientError as e:
|
|
327
|
-
if e.response.get(
|
|
315
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
328
316
|
if prefix == self.job_key_prefix:
|
|
329
317
|
raise NoSuchJobException(identifier)
|
|
330
318
|
elif prefix == self.content_key_prefix:
|
|
@@ -350,9 +338,11 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
350
338
|
|
|
351
339
|
jobDescription.pre_update_hook()
|
|
352
340
|
|
|
353
|
-
self.write_to_bucket(
|
|
354
|
-
|
|
355
|
-
|
|
341
|
+
self.write_to_bucket(
|
|
342
|
+
identifier=str(jobDescription.jobStoreID),
|
|
343
|
+
prefix=self.job_key_prefix,
|
|
344
|
+
data=pickle.dumps(jobDescription, protocol=pickle.HIGHEST_PROTOCOL),
|
|
345
|
+
)
|
|
356
346
|
return jobDescription
|
|
357
347
|
|
|
358
348
|
def job_exists(self, job_id: str, check: bool = False) -> bool:
|
|
@@ -369,11 +359,11 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
369
359
|
identifier=job_id,
|
|
370
360
|
prefix=self.job_key_prefix,
|
|
371
361
|
),
|
|
372
|
-
**self._get_encryption_args()
|
|
362
|
+
**self._get_encryption_args(),
|
|
373
363
|
)
|
|
374
364
|
return True
|
|
375
365
|
except ClientError as e:
|
|
376
|
-
if e.response.get(
|
|
366
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
377
367
|
if check:
|
|
378
368
|
raise NoSuchJobException(job_id)
|
|
379
369
|
else:
|
|
@@ -386,9 +376,13 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
386
376
|
return False
|
|
387
377
|
|
|
388
378
|
def jobs(self) -> Iterator[JobDescription]:
|
|
389
|
-
for result in list_s3_items(
|
|
379
|
+
for result in list_s3_items(
|
|
380
|
+
self.s3_resource, bucket=self.bucket_name, prefix=self.job_key_prefix
|
|
381
|
+
):
|
|
390
382
|
try:
|
|
391
|
-
job_id = result[
|
|
383
|
+
job_id = result["Key"][
|
|
384
|
+
len(self.job_key_prefix) :
|
|
385
|
+
] # strip self.job_key_prefix
|
|
392
386
|
yield self.load_job(job_id)
|
|
393
387
|
except NoSuchJobException:
|
|
394
388
|
# job may have been deleted between showing up in the list and getting loaded
|
|
@@ -397,7 +391,9 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
397
391
|
def load_job(self, job_id: str) -> JobDescription:
|
|
398
392
|
"""Use a job_id to get a job from the jobstore's s3 bucket, unpickle, and return it."""
|
|
399
393
|
try:
|
|
400
|
-
job = pickle.loads(
|
|
394
|
+
job = pickle.loads(
|
|
395
|
+
self.read_from_bucket(identifier=job_id, prefix=self.job_key_prefix)
|
|
396
|
+
)
|
|
401
397
|
except NoSuchJobException:
|
|
402
398
|
raise
|
|
403
399
|
|
|
@@ -422,7 +418,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
422
418
|
Key=self._key_in_bucket(
|
|
423
419
|
identifier=job_id,
|
|
424
420
|
prefix=self.job_key_prefix,
|
|
425
|
-
)
|
|
421
|
+
),
|
|
426
422
|
)
|
|
427
423
|
|
|
428
424
|
# delete any files marked as associated with the job
|
|
@@ -431,24 +427,32 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
431
427
|
identifier=job_id,
|
|
432
428
|
prefix=self.job_associations_key_prefix,
|
|
433
429
|
)
|
|
434
|
-
for associated_job_file in list_s3_items(
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
job_file_associations_to_delete.append(associated_job_file[
|
|
438
|
-
file_id = associated_job_file[
|
|
430
|
+
for associated_job_file in list_s3_items(
|
|
431
|
+
self.s3_resource, bucket=self.bucket_name, prefix=root_key
|
|
432
|
+
):
|
|
433
|
+
job_file_associations_to_delete.append(associated_job_file["Key"])
|
|
434
|
+
file_id = associated_job_file["Key"].split(".")[-1]
|
|
439
435
|
self.delete_file(file_id)
|
|
440
436
|
|
|
441
437
|
# delete the job-file association references (these are empty files the simply connect jobs to files)
|
|
442
438
|
for job_file_association in job_file_associations_to_delete:
|
|
443
|
-
self.s3_client.delete_object(
|
|
439
|
+
self.s3_client.delete_object(
|
|
440
|
+
Bucket=self.bucket_name, Key=f"{job_file_association}"
|
|
441
|
+
)
|
|
444
442
|
|
|
445
443
|
def associate_job_with_file(self, job_id: str, file_id: str) -> None:
|
|
446
444
|
# associate this job with this file; the file will be deleted when the job is
|
|
447
|
-
self.write_to_bucket(
|
|
445
|
+
self.write_to_bucket(
|
|
446
|
+
identifier=f"{job_id}.{file_id}",
|
|
447
|
+
prefix=self.job_associations_key_prefix,
|
|
448
|
+
data=None,
|
|
449
|
+
)
|
|
448
450
|
|
|
449
451
|
###################################### FILES API ######################################
|
|
450
452
|
|
|
451
|
-
def write_file(
|
|
453
|
+
def write_file(
|
|
454
|
+
self, local_path: str, job_id: str | None = None, cleanup: bool = False
|
|
455
|
+
) -> FileID:
|
|
452
456
|
"""
|
|
453
457
|
Write a local file into the jobstore and return a file_id referencing it.
|
|
454
458
|
|
|
@@ -473,67 +477,64 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
473
477
|
|
|
474
478
|
# Each file gets a prefix under which we put exactly one key, to hide
|
|
475
479
|
# metadata in the key.
|
|
476
|
-
prefix = self._key_in_bucket(
|
|
477
|
-
identifier=file_id,
|
|
478
|
-
prefix=self.content_key_prefix
|
|
479
|
-
)
|
|
480
|
+
prefix = self._key_in_bucket(identifier=file_id, prefix=self.content_key_prefix)
|
|
480
481
|
|
|
481
482
|
copy_local_to_s3(
|
|
482
483
|
s3_resource=self.s3_resource,
|
|
483
484
|
local_file_path=local_path,
|
|
484
485
|
dst_bucket=self.bucket_name,
|
|
485
|
-
dst_key=f
|
|
486
|
-
extra_args=self._get_encryption_args()
|
|
486
|
+
dst_key=f"{prefix}/{os.path.basename(local_path)}",
|
|
487
|
+
extra_args=self._get_encryption_args(),
|
|
487
488
|
)
|
|
488
489
|
return FileID(file_id, size, executable)
|
|
489
490
|
|
|
490
491
|
def find_s3_key_from_file_id(self, file_id: str) -> str:
|
|
491
492
|
"""This finds an s3 key for which file_id is the prefix, and which already exists."""
|
|
492
|
-
prefix = self._key_in_bucket(
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
493
|
+
prefix = self._key_in_bucket(identifier=file_id, prefix=self.content_key_prefix)
|
|
494
|
+
s3_keys = [
|
|
495
|
+
s3_item
|
|
496
|
+
for s3_item in list_s3_items(
|
|
497
|
+
self.s3_resource, bucket=self.bucket_name, prefix=prefix
|
|
498
|
+
)
|
|
499
|
+
]
|
|
497
500
|
if len(s3_keys) == 0:
|
|
498
501
|
raise NoSuchFileException(file_id)
|
|
499
502
|
if len(s3_keys) > 1:
|
|
500
503
|
# There can be only one.
|
|
501
|
-
raise RuntimeError(
|
|
502
|
-
|
|
504
|
+
raise RuntimeError(
|
|
505
|
+
f"File ID: {file_id} should be unique, but includes: {s3_keys}"
|
|
506
|
+
)
|
|
507
|
+
return s3_keys[0]["Key"]
|
|
503
508
|
|
|
504
509
|
@contextmanager
|
|
505
510
|
def write_file_stream(
|
|
506
511
|
self,
|
|
507
|
-
job_id:
|
|
512
|
+
job_id: str | None = None,
|
|
508
513
|
cleanup: bool = False,
|
|
509
|
-
basename:
|
|
510
|
-
encoding:
|
|
511
|
-
errors:
|
|
514
|
+
basename: str | None = None,
|
|
515
|
+
encoding: str | None = None,
|
|
516
|
+
errors: str | None = None,
|
|
512
517
|
) -> Iterator[tuple[IO[bytes], str]]:
|
|
513
518
|
file_id = str(uuid.uuid4())
|
|
514
519
|
if job_id and cleanup:
|
|
515
520
|
self.associate_job_with_file(job_id, file_id)
|
|
516
|
-
prefix = self._key_in_bucket(
|
|
517
|
-
identifier=file_id,
|
|
518
|
-
prefix=self.content_key_prefix
|
|
519
|
-
)
|
|
521
|
+
prefix = self._key_in_bucket(identifier=file_id, prefix=self.content_key_prefix)
|
|
520
522
|
|
|
521
|
-
pipe = MultiPartPipe(
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
523
|
+
pipe = MultiPartPipe(
|
|
524
|
+
part_size=self.part_size,
|
|
525
|
+
s3_resource=self.s3_resource,
|
|
526
|
+
bucket_name=self.bucket_name,
|
|
527
|
+
file_id=f"{prefix}/{str(basename)}",
|
|
528
|
+
encryption_args=self._get_encryption_args(),
|
|
529
|
+
encoding=encoding,
|
|
530
|
+
errors=errors,
|
|
531
|
+
)
|
|
528
532
|
with pipe as writable:
|
|
529
533
|
yield writable, file_id
|
|
530
534
|
|
|
531
535
|
@contextmanager
|
|
532
536
|
def update_file_stream(
|
|
533
|
-
|
|
534
|
-
file_id: str,
|
|
535
|
-
encoding: Optional[str] = None,
|
|
536
|
-
errors: Optional[str] = None
|
|
537
|
+
self, file_id: str, encoding: str | None = None, errors: str | None = None
|
|
537
538
|
) -> Iterator[IO[Any]]:
|
|
538
539
|
logger.debug("Replacing file %s via multipart upload", file_id)
|
|
539
540
|
pipe = MultiPartPipe(
|
|
@@ -552,9 +553,9 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
552
553
|
def write_shared_file_stream(
|
|
553
554
|
self,
|
|
554
555
|
shared_file_name: str,
|
|
555
|
-
encrypted:
|
|
556
|
-
encoding:
|
|
557
|
-
errors:
|
|
556
|
+
encrypted: bool | None = None,
|
|
557
|
+
encoding: str | None = None,
|
|
558
|
+
errors: str | None = None,
|
|
558
559
|
) -> Iterator[IO[bytes]]:
|
|
559
560
|
encryption_args = {} if encrypted is False else self._get_encryption_args()
|
|
560
561
|
pipe = MultiPartPipe(
|
|
@@ -578,7 +579,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
578
579
|
local_file_path=local_path,
|
|
579
580
|
dst_bucket=self.bucket_name,
|
|
580
581
|
dst_key=self.find_s3_key_from_file_id(file_id),
|
|
581
|
-
extra_args=self._get_encryption_args()
|
|
582
|
+
extra_args=self._get_encryption_args(),
|
|
582
583
|
)
|
|
583
584
|
|
|
584
585
|
def file_exists(self, file_id: str) -> bool:
|
|
@@ -593,10 +594,12 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
593
594
|
def get_file_size(self, file_id: str) -> int:
|
|
594
595
|
"""Do we need both get_file_size and _get_size???"""
|
|
595
596
|
full_s3_key = self.find_s3_key_from_file_id(file_id)
|
|
596
|
-
return
|
|
597
|
+
return (
|
|
598
|
+
self._get_size(url=urlparse(f"s3://{self.bucket_name}/{full_s3_key}")) or 0
|
|
599
|
+
)
|
|
597
600
|
|
|
598
601
|
@classmethod
|
|
599
|
-
def _get_size(cls, url: ParseResult) ->
|
|
602
|
+
def _get_size(cls, url: ParseResult) -> int | None:
|
|
600
603
|
"""Do we need both get_file_size and _get_size???"""
|
|
601
604
|
try:
|
|
602
605
|
return get_object_for_url(url, existing=True).content_length
|
|
@@ -612,14 +615,14 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
612
615
|
local_file_path=local_path,
|
|
613
616
|
src_bucket=self.bucket_name,
|
|
614
617
|
src_key=full_s3_key,
|
|
615
|
-
extra_args=self._get_encryption_args()
|
|
618
|
+
extra_args=self._get_encryption_args(),
|
|
616
619
|
)
|
|
617
620
|
if executable:
|
|
618
621
|
os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
|
|
619
622
|
except self.s3_client.exceptions.NoSuchKey:
|
|
620
623
|
raise NoSuchFileException(file_id)
|
|
621
624
|
except ClientError as e:
|
|
622
|
-
if e.response.get(
|
|
625
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
623
626
|
raise NoSuchFileException(file_id)
|
|
624
627
|
else:
|
|
625
628
|
raise
|
|
@@ -627,23 +630,25 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
627
630
|
@contextmanager # type: ignore
|
|
628
631
|
def read_file_stream( # type: ignore
|
|
629
632
|
self,
|
|
630
|
-
file_id:
|
|
631
|
-
encoding:
|
|
632
|
-
errors:
|
|
633
|
-
) ->
|
|
633
|
+
file_id: FileID | str,
|
|
634
|
+
encoding: str | None = None,
|
|
635
|
+
errors: str | None = None,
|
|
636
|
+
) -> ContextManager[IO[bytes]] | ContextManager[IO[str]]:
|
|
634
637
|
full_s3_key = self.find_s3_key_from_file_id(file_id)
|
|
635
638
|
try:
|
|
636
|
-
with download_stream(
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
639
|
+
with download_stream(
|
|
640
|
+
self.s3_resource,
|
|
641
|
+
bucket=self.bucket_name,
|
|
642
|
+
key=full_s3_key,
|
|
643
|
+
extra_args=self._get_encryption_args(),
|
|
644
|
+
encoding=encoding,
|
|
645
|
+
errors=errors,
|
|
646
|
+
) as readable:
|
|
642
647
|
yield readable
|
|
643
648
|
except self.s3_client.exceptions.NoSuchKey:
|
|
644
649
|
raise NoSuchFileException(file_id)
|
|
645
650
|
except ClientError as e:
|
|
646
|
-
if e.response.get(
|
|
651
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
647
652
|
raise NoSuchFileException(file_id)
|
|
648
653
|
else:
|
|
649
654
|
raise
|
|
@@ -654,7 +659,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
654
659
|
self,
|
|
655
660
|
shared_file_name: str,
|
|
656
661
|
encoding: str,
|
|
657
|
-
errors:
|
|
662
|
+
errors: str | None = None,
|
|
658
663
|
) -> Iterator[IO[str]]: ...
|
|
659
664
|
|
|
660
665
|
@overload
|
|
@@ -663,38 +668,42 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
663
668
|
self,
|
|
664
669
|
shared_file_name: str,
|
|
665
670
|
encoding: Literal[None] = None,
|
|
666
|
-
errors:
|
|
671
|
+
errors: str | None = None,
|
|
667
672
|
) -> Iterator[IO[bytes]]: ...
|
|
668
673
|
|
|
669
674
|
@contextmanager
|
|
670
675
|
def read_shared_file_stream(
|
|
671
676
|
self,
|
|
672
677
|
shared_file_name: str,
|
|
673
|
-
encoding:
|
|
674
|
-
errors:
|
|
675
|
-
) -> Iterator[
|
|
678
|
+
encoding: str | None = None,
|
|
679
|
+
errors: str | None = None,
|
|
680
|
+
) -> Iterator[IO[bytes] | IO[str]]:
|
|
676
681
|
self._requireValidSharedFileName(shared_file_name)
|
|
677
|
-
key = self._key_in_bucket(
|
|
682
|
+
key = self._key_in_bucket(
|
|
683
|
+
identifier=shared_file_name, prefix=self.shared_key_prefix
|
|
684
|
+
)
|
|
678
685
|
if not self.is_in_bucket(
|
|
679
686
|
identifier=shared_file_name,
|
|
680
687
|
prefix=self.shared_key_prefix,
|
|
681
688
|
):
|
|
682
689
|
# TRAVIS=true TOIL_OWNER_TAG="shared" /home/quokka/git/toil/v3nv/bin/python -m pytest --durations=0 --log-level DEBUG --log-cli-level INFO -r s /home/quokka/git/toil/src/toil/test/jobStores/jobStoreTest.py::EncryptedAWSJobStoreTest::testJobDeletions
|
|
683
690
|
# throw NoSuchFileException in download_stream
|
|
684
|
-
raise NoSuchFileException(f
|
|
691
|
+
raise NoSuchFileException(f"s3://{self.bucket_name}/{key}")
|
|
685
692
|
|
|
686
693
|
try:
|
|
687
|
-
with download_stream(
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
694
|
+
with download_stream(
|
|
695
|
+
self.s3_resource,
|
|
696
|
+
bucket=self.bucket_name,
|
|
697
|
+
key=key,
|
|
698
|
+
encoding=encoding,
|
|
699
|
+
errors=errors,
|
|
700
|
+
extra_args=self._get_encryption_args(),
|
|
701
|
+
) as readable:
|
|
693
702
|
yield readable
|
|
694
703
|
except self.s3_client.exceptions.NoSuchKey:
|
|
695
704
|
raise NoSuchFileException(shared_file_name)
|
|
696
705
|
except ClientError as e:
|
|
697
|
-
if e.response.get(
|
|
706
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
698
707
|
raise NoSuchFileException(shared_file_name)
|
|
699
708
|
else:
|
|
700
709
|
raise
|
|
@@ -713,10 +722,10 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
713
722
|
self,
|
|
714
723
|
otherCls: type[URLAccess],
|
|
715
724
|
uri: ParseResult,
|
|
716
|
-
shared_file_name:
|
|
725
|
+
shared_file_name: str | None = None,
|
|
717
726
|
hardlink: bool = False,
|
|
718
727
|
symlink: bool = True,
|
|
719
|
-
) ->
|
|
728
|
+
) -> FileID | None:
|
|
720
729
|
"""
|
|
721
730
|
Upload a file into the s3 bucket jobstore from the source uri.
|
|
722
731
|
|
|
@@ -726,18 +735,26 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
726
735
|
# we are copying from s3 to s3
|
|
727
736
|
if isinstance(otherCls, AWSJobStore):
|
|
728
737
|
src_bucket_name, src_key_name = parse_s3_uri(uri.geturl())
|
|
729
|
-
response = head_s3_object(
|
|
730
|
-
|
|
738
|
+
response = head_s3_object(
|
|
739
|
+
self.s3_resource, bucket=src_bucket_name, key=src_key_name, check=True
|
|
740
|
+
)
|
|
741
|
+
content_length = response["ContentLength"] # e.g. 65536
|
|
731
742
|
|
|
732
743
|
file_id = str(uuid.uuid4())
|
|
733
744
|
if shared_file_name:
|
|
734
|
-
dst_key = self._key_in_bucket(
|
|
745
|
+
dst_key = self._key_in_bucket(
|
|
746
|
+
identifier=shared_file_name, prefix=self.shared_key_prefix
|
|
747
|
+
)
|
|
735
748
|
else:
|
|
736
749
|
# cannot determine exec bit from foreign s3 so default to False
|
|
737
|
-
dst_key = "/".join(
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
750
|
+
dst_key = "/".join(
|
|
751
|
+
[
|
|
752
|
+
self._key_in_bucket(
|
|
753
|
+
identifier=file_id, prefix=self.content_key_prefix
|
|
754
|
+
),
|
|
755
|
+
src_key_name.split("/")[-1],
|
|
756
|
+
]
|
|
757
|
+
)
|
|
741
758
|
|
|
742
759
|
copy_s3_to_s3(
|
|
743
760
|
s3_resource=self.s3_resource,
|
|
@@ -745,18 +762,18 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
745
762
|
src_key=src_key_name,
|
|
746
763
|
dst_bucket=self.bucket_name,
|
|
747
764
|
dst_key=dst_key,
|
|
748
|
-
extra_args=self._get_encryption_args()
|
|
765
|
+
extra_args=self._get_encryption_args(),
|
|
749
766
|
)
|
|
750
767
|
# TODO: verify etag after copying here?
|
|
751
768
|
|
|
752
769
|
return FileID(file_id, content_length) if not shared_file_name else None
|
|
753
770
|
else:
|
|
754
|
-
return super(
|
|
771
|
+
return super()._import_file(
|
|
755
772
|
otherCls=otherCls,
|
|
756
773
|
uri=uri,
|
|
757
774
|
shared_file_name=shared_file_name,
|
|
758
775
|
hardlink=hardlink,
|
|
759
|
-
symlink=symlink
|
|
776
|
+
symlink=symlink,
|
|
760
777
|
)
|
|
761
778
|
|
|
762
779
|
def _export_file(
|
|
@@ -772,14 +789,14 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
772
789
|
src_key=src_full_s3_key,
|
|
773
790
|
dst_bucket=dst_bucket_name,
|
|
774
791
|
dst_key=dst_key_name,
|
|
775
|
-
extra_args=self._get_encryption_args()
|
|
792
|
+
extra_args=self._get_encryption_args(),
|
|
776
793
|
)
|
|
777
794
|
else:
|
|
778
|
-
super(
|
|
795
|
+
super()._default_export_file(otherCls, jobStoreFileID, url)
|
|
779
796
|
|
|
780
797
|
@classmethod
|
|
781
798
|
def _read_from_url(
|
|
782
|
-
cls, url: ParseResult, writable:
|
|
799
|
+
cls, url: ParseResult, writable: IO[bytes] | IO[str]
|
|
783
800
|
) -> tuple[int, bool]:
|
|
784
801
|
src_obj = get_object_for_url(url, existing=True)
|
|
785
802
|
src_obj.download_fileobj(writable)
|
|
@@ -789,15 +806,17 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
789
806
|
@classmethod
|
|
790
807
|
def _write_to_url(
|
|
791
808
|
cls,
|
|
792
|
-
readable:
|
|
809
|
+
readable: IO[bytes] | IO[str],
|
|
793
810
|
url: ParseResult,
|
|
794
811
|
executable: bool = False,
|
|
795
812
|
) -> None:
|
|
796
813
|
dst_obj = get_object_for_url(url)
|
|
797
|
-
upload_to_s3(
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
814
|
+
upload_to_s3(
|
|
815
|
+
readable=readable,
|
|
816
|
+
s3_resource=establish_boto3_session().resource("s3"),
|
|
817
|
+
bucket=dst_obj.bucket_name,
|
|
818
|
+
key=dst_obj.key,
|
|
819
|
+
)
|
|
801
820
|
|
|
802
821
|
@classmethod
|
|
803
822
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
@@ -815,7 +834,9 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
815
834
|
src_obj = get_object_for_url(url, existing=True, anonymous=True)
|
|
816
835
|
response = src_obj.get()
|
|
817
836
|
except Exception as e:
|
|
818
|
-
if isinstance(e, PermissionError) or (
|
|
837
|
+
if isinstance(e, PermissionError) or (
|
|
838
|
+
isinstance(e, ClientError) and get_error_status(e) == 403
|
|
839
|
+
):
|
|
819
840
|
# The object setup or the download does not have permission. Try again with a login.
|
|
820
841
|
src_obj = get_object_for_url(url, existing=True)
|
|
821
842
|
response = src_obj.get()
|
|
@@ -833,9 +854,9 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
833
854
|
@classmethod
|
|
834
855
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
835
856
|
# TODO: export seems unused
|
|
836
|
-
return url.scheme.lower() ==
|
|
857
|
+
return url.scheme.lower() == "s3"
|
|
837
858
|
|
|
838
|
-
def get_public_url(self, file_id: str) -> str:
|
|
859
|
+
def get_public_url(self, file_id: str) -> str:
|
|
839
860
|
"""Turn s3:// into http:// and put a public-read ACL on it."""
|
|
840
861
|
try:
|
|
841
862
|
return create_public_url(
|
|
@@ -849,7 +870,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
849
870
|
except self.s3_client.exceptions.NoSuchKey:
|
|
850
871
|
raise NoSuchFileException(file_id)
|
|
851
872
|
except ClientError as e:
|
|
852
|
-
if e.response.get(
|
|
873
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
853
874
|
raise NoSuchFileException(file_id)
|
|
854
875
|
else:
|
|
855
876
|
raise
|
|
@@ -870,7 +891,7 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
870
891
|
except self.s3_client.exceptions.NoSuchKey:
|
|
871
892
|
raise NoSuchFileException(file_id)
|
|
872
893
|
except ClientError as e:
|
|
873
|
-
if e.response.get(
|
|
894
|
+
if e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 404:
|
|
874
895
|
raise NoSuchFileException(file_id)
|
|
875
896
|
else:
|
|
876
897
|
raise
|
|
@@ -883,17 +904,17 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
883
904
|
|
|
884
905
|
def get_empty_file_store_id(
|
|
885
906
|
self,
|
|
886
|
-
job_id:
|
|
907
|
+
job_id: str | None = None,
|
|
887
908
|
cleanup: bool = False,
|
|
888
|
-
basename:
|
|
909
|
+
basename: str | None = None,
|
|
889
910
|
) -> str:
|
|
890
911
|
"""Create an empty file in s3 and return a bare string file ID."""
|
|
891
912
|
file_id = str(uuid.uuid4())
|
|
892
913
|
self.write_to_bucket(
|
|
893
|
-
identifier=f
|
|
914
|
+
identifier=f"{file_id}/0/{basename}",
|
|
894
915
|
prefix=self.content_key_prefix,
|
|
895
916
|
data=None,
|
|
896
|
-
bucket=self.bucket_name
|
|
917
|
+
bucket=self.bucket_name,
|
|
897
918
|
)
|
|
898
919
|
if job_id and cleanup:
|
|
899
920
|
self.associate_job_with_file(job_id, file_id)
|
|
@@ -901,21 +922,23 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
901
922
|
|
|
902
923
|
###################################### LOGGING API ######################################
|
|
903
924
|
|
|
904
|
-
def write_logs(self, log_msg:
|
|
925
|
+
def write_logs(self, log_msg: bytes | str) -> None:
|
|
905
926
|
if isinstance(log_msg, str):
|
|
906
|
-
log_msg = log_msg.encode(
|
|
927
|
+
log_msg = log_msg.encode("utf-8", errors="ignore")
|
|
907
928
|
file_obj = BytesIO(log_msg)
|
|
908
929
|
|
|
909
930
|
key_name = self._key_in_bucket(
|
|
910
|
-
identifier=f
|
|
911
|
-
|
|
931
|
+
identifier=f"{datetime.datetime.now()}{str(uuid.uuid4())}".replace(
|
|
932
|
+
" ", "_"
|
|
912
933
|
),
|
|
913
934
|
prefix=self.logs_key_prefix,
|
|
914
935
|
)
|
|
915
|
-
self.s3_client.upload_fileobj(
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
936
|
+
self.s3_client.upload_fileobj(
|
|
937
|
+
Bucket=self.bucket_name,
|
|
938
|
+
Key=key_name,
|
|
939
|
+
ExtraArgs=self._get_encryption_args(),
|
|
940
|
+
Fileobj=file_obj,
|
|
941
|
+
)
|
|
919
942
|
|
|
920
943
|
def read_logs(self, callback: Callable[..., Any], read_all: bool = False) -> int:
|
|
921
944
|
"""
|
|
@@ -929,30 +952,38 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
929
952
|
# We want to pick up reading where we left off
|
|
930
953
|
try:
|
|
931
954
|
read_log_marker = self.read_from_bucket(
|
|
932
|
-
identifier=LOG_MARKER,
|
|
933
|
-
|
|
934
|
-
).decode('utf-8')
|
|
955
|
+
identifier=LOG_MARKER, prefix=self.shared_key_prefix
|
|
956
|
+
).decode("utf-8")
|
|
935
957
|
except self.s3_client.exceptions.NoSuchKey:
|
|
936
958
|
# We haven't recorded that we've read anything yet.
|
|
937
959
|
# Leave read_log_marker at "0"
|
|
938
960
|
pass
|
|
939
961
|
|
|
940
962
|
startafter = None if read_log_marker == "0" else read_log_marker
|
|
941
|
-
for result in list_s3_items(
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
963
|
+
for result in list_s3_items(
|
|
964
|
+
self.s3_resource,
|
|
965
|
+
bucket=self.bucket_name,
|
|
966
|
+
prefix=self.logs_key_prefix,
|
|
967
|
+
startafter=startafter,
|
|
968
|
+
):
|
|
969
|
+
if result["Key"] > read_log_marker or read_all:
|
|
970
|
+
read_log_marker = result["Key"]
|
|
971
|
+
with download_stream(
|
|
972
|
+
self.s3_resource,
|
|
973
|
+
bucket=self.bucket_name,
|
|
974
|
+
key=result["Key"],
|
|
975
|
+
extra_args=self._get_encryption_args(),
|
|
976
|
+
) as readable:
|
|
948
977
|
callback(readable)
|
|
949
978
|
items_processed += 1
|
|
950
979
|
|
|
951
980
|
if items_processed > 0:
|
|
952
981
|
# We processed something, so we need to update the marker.
|
|
953
|
-
self.write_to_bucket(
|
|
954
|
-
|
|
955
|
-
|
|
982
|
+
self.write_to_bucket(
|
|
983
|
+
identifier=LOG_MARKER,
|
|
984
|
+
prefix=self.shared_key_prefix,
|
|
985
|
+
data=read_log_marker,
|
|
986
|
+
)
|
|
956
987
|
return items_processed
|
|
957
988
|
|
|
958
989
|
def _get_encryption_args(self) -> dict[str, Any]:
|
|
@@ -976,34 +1007,39 @@ class AWSJobStore(AbstractJobStore, URLAccess):
|
|
|
976
1007
|
return {}
|
|
977
1008
|
|
|
978
1009
|
if config is not None and config.sseKey:
|
|
979
|
-
with open(config.sseKey
|
|
1010
|
+
with open(config.sseKey) as f:
|
|
980
1011
|
sse_key = f.read()
|
|
981
1012
|
if not len(sse_key) == 32: # TODO: regex
|
|
982
1013
|
raise ValueError(
|
|
983
|
-
f
|
|
984
|
-
f
|
|
985
|
-
f
|
|
1014
|
+
f"Check that {self.config.sseKey} "
|
|
1015
|
+
f"is the path to a real SSE key. "
|
|
1016
|
+
f"(Key length {len(sse_key)} != 32)"
|
|
986
1017
|
)
|
|
987
|
-
return {
|
|
1018
|
+
return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sse_key}
|
|
988
1019
|
else:
|
|
989
1020
|
return {}
|
|
990
1021
|
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
1022
|
+
|
|
1023
|
+
def parse_jobstore_identifier(jobstore_identifier: str) -> tuple[str, str]:
|
|
1024
|
+
region, jobstore_name = jobstore_identifier.split(":")
|
|
1025
|
+
bucket_name = f"{jobstore_name}--toil"
|
|
994
1026
|
|
|
995
1027
|
regions = EC2Regions.keys()
|
|
996
1028
|
if region not in regions:
|
|
997
1029
|
raise ValueError(f'AWS Region "{region}" is not one of: {regions}')
|
|
998
1030
|
|
|
999
1031
|
if not 3 <= len(jobstore_name) <= 56:
|
|
1000
|
-
raise ValueError(
|
|
1001
|
-
|
|
1032
|
+
raise ValueError(
|
|
1033
|
+
f"AWS jobstore name must be between 3 and 56 chars: "
|
|
1034
|
+
f"{jobstore_name} (len: {len(jobstore_name)})"
|
|
1035
|
+
)
|
|
1002
1036
|
|
|
1003
|
-
if not re.compile(r
|
|
1004
|
-
raise ValueError(
|
|
1005
|
-
|
|
1037
|
+
if not re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$").match(jobstore_name):
|
|
1038
|
+
raise ValueError(
|
|
1039
|
+
f"Invalid AWS jobstore name: '{jobstore_name}'. Must contain only digits, "
|
|
1040
|
+
f"lower-case letters, and hyphens. Must also not start or end in a hyphen."
|
|
1041
|
+
)
|
|
1006
1042
|
|
|
1007
|
-
if
|
|
1043
|
+
if "--" in jobstore_name:
|
|
1008
1044
|
raise ValueError(f"AWS jobstore names may not contain '--': {jobstore_name}")
|
|
1009
1045
|
return region, bucket_name
|