toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +17 -22
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/lib/aws/session.py
CHANGED
|
@@ -15,7 +15,7 @@ import collections
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import threading
|
|
18
|
-
from typing import TYPE_CHECKING, Literal,
|
|
18
|
+
from typing import TYPE_CHECKING, Literal, cast, overload
|
|
19
19
|
|
|
20
20
|
import boto3
|
|
21
21
|
import boto3.resources.base
|
|
@@ -55,7 +55,7 @@ ANONYMOUS_CONFIG = Config(signature_version=botocore.UNSIGNED)
|
|
|
55
55
|
_init_lock = threading.RLock()
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
def _new_boto3_session(region_name:
|
|
58
|
+
def _new_boto3_session(region_name: str | None = None) -> Session:
|
|
59
59
|
"""
|
|
60
60
|
This is the One True Place where new Boto3 sessions should be made, and
|
|
61
61
|
prepares them with the necessary credential caching. Does *not* cache
|
|
@@ -115,26 +115,26 @@ class AWSConnectionManager:
|
|
|
115
115
|
"""
|
|
116
116
|
# This stores Boto3 sessions in .item of a thread-local storage, by
|
|
117
117
|
# region.
|
|
118
|
-
self.sessions_by_region: dict[
|
|
118
|
+
self.sessions_by_region: dict[str | None, threading.local] = (
|
|
119
119
|
collections.defaultdict(threading.local)
|
|
120
120
|
)
|
|
121
121
|
# This stores Boto3 resources in .item of a thread-local storage, by
|
|
122
122
|
# (region, service name, endpoint URL) tuples
|
|
123
123
|
self.resource_cache: dict[
|
|
124
|
-
tuple[
|
|
124
|
+
tuple[str | None, str, str | None], threading.local
|
|
125
125
|
] = collections.defaultdict(threading.local)
|
|
126
126
|
# This stores Boto3 clients in .item of a thread-local storage, by
|
|
127
127
|
# (region, service name, endpoint URL) tuples
|
|
128
|
-
self.client_cache: dict[
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
self.client_cache: dict[tuple[str | None, str, str | None], threading.local] = (
|
|
129
|
+
collections.defaultdict(threading.local)
|
|
130
|
+
)
|
|
131
131
|
# This stores Boto 2 connections in .item of a thread-local storage, by
|
|
132
132
|
# (region, service name) tuples.
|
|
133
|
-
self.boto2_cache: dict[tuple[
|
|
133
|
+
self.boto2_cache: dict[tuple[str | None, str], threading.local] = (
|
|
134
134
|
collections.defaultdict(threading.local)
|
|
135
135
|
)
|
|
136
136
|
|
|
137
|
-
def session(self, region:
|
|
137
|
+
def session(self, region: str | None) -> boto3.session.Session:
|
|
138
138
|
"""
|
|
139
139
|
Get the Boto3 Session to use for the given region.
|
|
140
140
|
"""
|
|
@@ -148,34 +148,34 @@ class AWSConnectionManager:
|
|
|
148
148
|
@overload
|
|
149
149
|
def resource(
|
|
150
150
|
self,
|
|
151
|
-
region:
|
|
151
|
+
region: str | None,
|
|
152
152
|
service_name: Literal["s3"],
|
|
153
|
-
endpoint_url:
|
|
154
|
-
config:
|
|
153
|
+
endpoint_url: str | None = None,
|
|
154
|
+
config: Config | None = None,
|
|
155
155
|
) -> "S3ServiceResource": ...
|
|
156
156
|
@overload
|
|
157
157
|
def resource(
|
|
158
158
|
self,
|
|
159
|
-
region:
|
|
159
|
+
region: str | None,
|
|
160
160
|
service_name: Literal["iam"],
|
|
161
|
-
endpoint_url:
|
|
162
|
-
config:
|
|
161
|
+
endpoint_url: str | None = None,
|
|
162
|
+
config: Config | None = None,
|
|
163
163
|
) -> "IAMServiceResource": ...
|
|
164
164
|
@overload
|
|
165
165
|
def resource(
|
|
166
166
|
self,
|
|
167
|
-
region:
|
|
167
|
+
region: str | None,
|
|
168
168
|
service_name: Literal["ec2"],
|
|
169
|
-
endpoint_url:
|
|
170
|
-
config:
|
|
169
|
+
endpoint_url: str | None = None,
|
|
170
|
+
config: Config | None = None,
|
|
171
171
|
) -> "EC2ServiceResource": ...
|
|
172
172
|
|
|
173
173
|
def resource(
|
|
174
174
|
self,
|
|
175
|
-
region:
|
|
175
|
+
region: str | None,
|
|
176
176
|
service_name: str,
|
|
177
|
-
endpoint_url:
|
|
178
|
-
config:
|
|
177
|
+
endpoint_url: str | None = None,
|
|
178
|
+
config: Config | None = None,
|
|
179
179
|
) -> boto3.resources.base.ServiceResource:
|
|
180
180
|
"""
|
|
181
181
|
Get the Boto3 Resource to use with the given service (like 'ec2') in the given region.
|
|
@@ -205,58 +205,58 @@ class AWSConnectionManager:
|
|
|
205
205
|
@overload
|
|
206
206
|
def client(
|
|
207
207
|
self,
|
|
208
|
-
region:
|
|
208
|
+
region: str | None,
|
|
209
209
|
service_name: Literal["ec2"],
|
|
210
|
-
endpoint_url:
|
|
211
|
-
config:
|
|
210
|
+
endpoint_url: str | None = None,
|
|
211
|
+
config: Config | None = None,
|
|
212
212
|
) -> "EC2Client": ...
|
|
213
213
|
@overload
|
|
214
214
|
def client(
|
|
215
215
|
self,
|
|
216
|
-
region:
|
|
216
|
+
region: str | None,
|
|
217
217
|
service_name: Literal["iam"],
|
|
218
|
-
endpoint_url:
|
|
219
|
-
config:
|
|
218
|
+
endpoint_url: str | None = None,
|
|
219
|
+
config: Config | None = None,
|
|
220
220
|
) -> "IAMClient": ...
|
|
221
221
|
@overload
|
|
222
222
|
def client(
|
|
223
223
|
self,
|
|
224
|
-
region:
|
|
224
|
+
region: str | None,
|
|
225
225
|
service_name: Literal["s3"],
|
|
226
|
-
endpoint_url:
|
|
227
|
-
config:
|
|
226
|
+
endpoint_url: str | None = None,
|
|
227
|
+
config: Config | None = None,
|
|
228
228
|
) -> "S3Client": ...
|
|
229
229
|
@overload
|
|
230
230
|
def client(
|
|
231
231
|
self,
|
|
232
|
-
region:
|
|
232
|
+
region: str | None,
|
|
233
233
|
service_name: Literal["sts"],
|
|
234
|
-
endpoint_url:
|
|
235
|
-
config:
|
|
234
|
+
endpoint_url: str | None = None,
|
|
235
|
+
config: Config | None = None,
|
|
236
236
|
) -> "STSClient": ...
|
|
237
237
|
@overload
|
|
238
238
|
def client(
|
|
239
239
|
self,
|
|
240
|
-
region:
|
|
240
|
+
region: str | None,
|
|
241
241
|
service_name: Literal["sdb"],
|
|
242
|
-
endpoint_url:
|
|
243
|
-
config:
|
|
242
|
+
endpoint_url: str | None = None,
|
|
243
|
+
config: Config | None = None,
|
|
244
244
|
) -> "SimpleDBClient": ...
|
|
245
245
|
@overload
|
|
246
246
|
def client(
|
|
247
247
|
self,
|
|
248
|
-
region:
|
|
248
|
+
region: str | None,
|
|
249
249
|
service_name: Literal["autoscaling"],
|
|
250
|
-
endpoint_url:
|
|
251
|
-
config:
|
|
250
|
+
endpoint_url: str | None = None,
|
|
251
|
+
config: Config | None = None,
|
|
252
252
|
) -> "AutoScalingClient": ...
|
|
253
253
|
|
|
254
254
|
def client(
|
|
255
255
|
self,
|
|
256
|
-
region:
|
|
256
|
+
region: str | None,
|
|
257
257
|
service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"],
|
|
258
|
-
endpoint_url:
|
|
259
|
-
config:
|
|
258
|
+
endpoint_url: str | None = None,
|
|
259
|
+
config: Config | None = None,
|
|
260
260
|
) -> botocore.client.BaseClient:
|
|
261
261
|
"""
|
|
262
262
|
Get the Boto3 Client to use with the given service (like 'ec2') in the given region.
|
|
@@ -298,7 +298,7 @@ class AWSConnectionManager:
|
|
|
298
298
|
_global_manager = AWSConnectionManager()
|
|
299
299
|
|
|
300
300
|
|
|
301
|
-
def establish_boto3_session(region_name:
|
|
301
|
+
def establish_boto3_session(region_name: str | None = None) -> Session:
|
|
302
302
|
"""
|
|
303
303
|
Get a Boto 3 session usable by the current thread.
|
|
304
304
|
|
|
@@ -312,52 +312,52 @@ def establish_boto3_session(region_name: Optional[str] = None) -> Session:
|
|
|
312
312
|
@overload
|
|
313
313
|
def client(
|
|
314
314
|
service_name: Literal["ec2"],
|
|
315
|
-
region_name:
|
|
316
|
-
endpoint_url:
|
|
317
|
-
config:
|
|
315
|
+
region_name: str | None = None,
|
|
316
|
+
endpoint_url: str | None = None,
|
|
317
|
+
config: Config | None = None,
|
|
318
318
|
) -> "EC2Client": ...
|
|
319
319
|
@overload
|
|
320
320
|
def client(
|
|
321
321
|
service_name: Literal["iam"],
|
|
322
|
-
region_name:
|
|
323
|
-
endpoint_url:
|
|
324
|
-
config:
|
|
322
|
+
region_name: str | None = None,
|
|
323
|
+
endpoint_url: str | None = None,
|
|
324
|
+
config: Config | None = None,
|
|
325
325
|
) -> "IAMClient": ...
|
|
326
326
|
@overload
|
|
327
327
|
def client(
|
|
328
328
|
service_name: Literal["s3"],
|
|
329
|
-
region_name:
|
|
330
|
-
endpoint_url:
|
|
331
|
-
config:
|
|
329
|
+
region_name: str | None = None,
|
|
330
|
+
endpoint_url: str | None = None,
|
|
331
|
+
config: Config | None = None,
|
|
332
332
|
) -> "S3Client": ...
|
|
333
333
|
@overload
|
|
334
334
|
def client(
|
|
335
335
|
service_name: Literal["sts"],
|
|
336
|
-
region_name:
|
|
337
|
-
endpoint_url:
|
|
338
|
-
config:
|
|
336
|
+
region_name: str | None = None,
|
|
337
|
+
endpoint_url: str | None = None,
|
|
338
|
+
config: Config | None = None,
|
|
339
339
|
) -> "STSClient": ...
|
|
340
340
|
@overload
|
|
341
341
|
def client(
|
|
342
342
|
service_name: Literal["sdb"],
|
|
343
|
-
region_name:
|
|
344
|
-
endpoint_url:
|
|
345
|
-
config:
|
|
343
|
+
region_name: str | None = None,
|
|
344
|
+
endpoint_url: str | None = None,
|
|
345
|
+
config: Config | None = None,
|
|
346
346
|
) -> "SimpleDBClient": ...
|
|
347
347
|
@overload
|
|
348
348
|
def client(
|
|
349
349
|
service_name: Literal["autoscaling"],
|
|
350
|
-
region_name:
|
|
351
|
-
endpoint_url:
|
|
352
|
-
config:
|
|
350
|
+
region_name: str | None = None,
|
|
351
|
+
endpoint_url: str | None = None,
|
|
352
|
+
config: Config | None = None,
|
|
353
353
|
) -> "AutoScalingClient": ...
|
|
354
354
|
|
|
355
355
|
|
|
356
356
|
def client(
|
|
357
357
|
service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"],
|
|
358
|
-
region_name:
|
|
359
|
-
endpoint_url:
|
|
360
|
-
config:
|
|
358
|
+
region_name: str | None = None,
|
|
359
|
+
endpoint_url: str | None = None,
|
|
360
|
+
config: Config | None = None,
|
|
361
361
|
) -> botocore.client.BaseClient:
|
|
362
362
|
"""
|
|
363
363
|
Get a Boto 3 client for a particular AWS service, usable by the current thread.
|
|
@@ -374,31 +374,31 @@ def client(
|
|
|
374
374
|
@overload
|
|
375
375
|
def resource(
|
|
376
376
|
service_name: Literal["s3"],
|
|
377
|
-
region_name:
|
|
378
|
-
endpoint_url:
|
|
379
|
-
config:
|
|
377
|
+
region_name: str | None = None,
|
|
378
|
+
endpoint_url: str | None = None,
|
|
379
|
+
config: Config | None = None,
|
|
380
380
|
) -> "S3ServiceResource": ...
|
|
381
381
|
@overload
|
|
382
382
|
def resource(
|
|
383
383
|
service_name: Literal["iam"],
|
|
384
|
-
region_name:
|
|
385
|
-
endpoint_url:
|
|
386
|
-
config:
|
|
384
|
+
region_name: str | None = None,
|
|
385
|
+
endpoint_url: str | None = None,
|
|
386
|
+
config: Config | None = None,
|
|
387
387
|
) -> "IAMServiceResource": ...
|
|
388
388
|
@overload
|
|
389
389
|
def resource(
|
|
390
390
|
service_name: Literal["ec2"],
|
|
391
|
-
region_name:
|
|
392
|
-
endpoint_url:
|
|
393
|
-
config:
|
|
391
|
+
region_name: str | None = None,
|
|
392
|
+
endpoint_url: str | None = None,
|
|
393
|
+
config: Config | None = None,
|
|
394
394
|
) -> "EC2ServiceResource": ...
|
|
395
395
|
|
|
396
396
|
|
|
397
397
|
def resource(
|
|
398
398
|
service_name: Literal["s3", "iam", "ec2"],
|
|
399
|
-
region_name:
|
|
400
|
-
endpoint_url:
|
|
401
|
-
config:
|
|
399
|
+
region_name: str | None = None,
|
|
400
|
+
endpoint_url: str | None = None,
|
|
401
|
+
config: Config | None = None,
|
|
402
402
|
) -> boto3.resources.base.ServiceResource:
|
|
403
403
|
"""
|
|
404
404
|
Get a Boto 3 resource for a particular AWS service, usable by the current thread.
|
toil/lib/aws/utils.py
CHANGED
|
@@ -15,20 +15,11 @@ import errno
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import socket
|
|
18
|
-
from collections.abc import Iterable, Iterator
|
|
19
|
-
from typing import
|
|
20
|
-
|
|
21
|
-
Any,
|
|
22
|
-
Callable,
|
|
23
|
-
ContextManager,
|
|
24
|
-
Literal,
|
|
25
|
-
Optional,
|
|
26
|
-
Union,
|
|
27
|
-
cast,
|
|
28
|
-
)
|
|
29
|
-
from urllib.parse import ParseResult, urlparse
|
|
18
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
19
|
+
from typing import TYPE_CHECKING, Any, ContextManager, Literal, cast
|
|
20
|
+
from urllib.parse import ParseResult
|
|
30
21
|
|
|
31
|
-
# To import toil.lib.aws.session, the AWS libraries must be installed
|
|
22
|
+
# To import toil.lib.aws.session, the AWS libraries must be installed
|
|
32
23
|
from toil.lib.aws import AWSRegionName, AWSServerErrors, session
|
|
33
24
|
from toil.lib.conversions import strtobool
|
|
34
25
|
from toil.lib.memoize import memoize
|
|
@@ -73,7 +64,7 @@ THROTTLED_ERROR_CODES = [
|
|
|
73
64
|
|
|
74
65
|
@retry(errors=[AWSServerErrors])
|
|
75
66
|
def delete_sdb_domain(
|
|
76
|
-
sdb_domain_name: str, region:
|
|
67
|
+
sdb_domain_name: str, region: str | None = None, quiet: bool = True
|
|
77
68
|
) -> None:
|
|
78
69
|
sdb_client = session.client("sdb", region_name=region)
|
|
79
70
|
sdb_client.delete_domain(DomainName=sdb_domain_name)
|
|
@@ -168,7 +159,9 @@ def delete_s3_bucket(
|
|
|
168
159
|
s3_resource.Bucket(bucket).delete()
|
|
169
160
|
# S3 bucket deletion is only eventually-consistent. See
|
|
170
161
|
# <https://docs.aws.amazon.com/AmazonS3/latest/userguide/delete-bucket.html>
|
|
171
|
-
printq(
|
|
162
|
+
printq(
|
|
163
|
+
f"\n * S3 bucket successfully scheduled for deletion: {bucket}\n\n", quiet
|
|
164
|
+
)
|
|
172
165
|
except s3_resource.meta.client.exceptions.NoSuchBucket:
|
|
173
166
|
printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
|
|
174
167
|
|
|
@@ -242,9 +235,9 @@ class NoBucketLocationError(Exception):
|
|
|
242
235
|
|
|
243
236
|
def get_bucket_region(
|
|
244
237
|
bucket_name: str,
|
|
245
|
-
endpoint_url:
|
|
246
|
-
only_strategies:
|
|
247
|
-
anonymous:
|
|
238
|
+
endpoint_url: str | None = None,
|
|
239
|
+
only_strategies: set[int] | None = None,
|
|
240
|
+
anonymous: bool | None = None,
|
|
248
241
|
) -> str:
|
|
249
242
|
"""
|
|
250
243
|
Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
|
|
@@ -262,7 +255,7 @@ def get_bucket_region(
|
|
|
262
255
|
config = session.ANONYMOUS_CONFIG if anonymous else None
|
|
263
256
|
s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
|
|
264
257
|
|
|
265
|
-
def attempt_get_bucket_location() ->
|
|
258
|
+
def attempt_get_bucket_location() -> str | None:
|
|
266
259
|
"""
|
|
267
260
|
Try and get the bucket location from the normal API call.
|
|
268
261
|
"""
|
|
@@ -270,7 +263,7 @@ def get_bucket_region(
|
|
|
270
263
|
"LocationConstraint", None
|
|
271
264
|
)
|
|
272
265
|
|
|
273
|
-
def attempt_get_bucket_location_from_us_east_1() ->
|
|
266
|
+
def attempt_get_bucket_location_from_us_east_1() -> str | None:
|
|
274
267
|
"""
|
|
275
268
|
Try and get the bucket location from the normal API call, but against us-east-1
|
|
276
269
|
"""
|
|
@@ -289,7 +282,7 @@ def get_bucket_region(
|
|
|
289
282
|
"LocationConstraint", None
|
|
290
283
|
)
|
|
291
284
|
|
|
292
|
-
def attempt_head_bucket() ->
|
|
285
|
+
def attempt_head_bucket() -> str | None:
|
|
293
286
|
"""
|
|
294
287
|
Try and get the bucket location from calling HeadBucket and inspecting
|
|
295
288
|
the headers.
|
|
@@ -303,7 +296,7 @@ def get_bucket_region(
|
|
|
303
296
|
|
|
304
297
|
# Compose a list of strategies we want to try in order, which may work.
|
|
305
298
|
# None is an acceptable return type that actually means something.
|
|
306
|
-
strategies: list[Callable[[],
|
|
299
|
+
strategies: list[Callable[[], str | None]] = []
|
|
307
300
|
strategies.append(attempt_get_bucket_location)
|
|
308
301
|
if not endpoint_url:
|
|
309
302
|
# We should only try to talk to us-east-1 if we don't have a custom
|
|
@@ -338,25 +331,30 @@ def get_bucket_region(
|
|
|
338
331
|
raise
|
|
339
332
|
except KeyError as e:
|
|
340
333
|
# If we get a weird head response we will have a KeyError
|
|
341
|
-
logger.debug(
|
|
334
|
+
logger.debug(
|
|
335
|
+
"Strategy %d to get bucket location did not work: %s", i + 1, e
|
|
336
|
+
)
|
|
342
337
|
error_logs.append((i + 1, str(e)))
|
|
343
338
|
last_error = e
|
|
344
339
|
|
|
345
340
|
error_messages = []
|
|
346
341
|
for rank, message in error_logs:
|
|
347
|
-
error_messages.append(
|
|
342
|
+
error_messages.append(
|
|
343
|
+
f"Strategy {rank} failed to get bucket location because: {message}"
|
|
344
|
+
)
|
|
348
345
|
# If we get here we ran out of attempts.
|
|
349
346
|
raise NoBucketLocationError(
|
|
350
347
|
"Could not get bucket location: " + "\n".join(error_messages)
|
|
351
348
|
) from last_error
|
|
352
349
|
|
|
350
|
+
|
|
353
351
|
@memoize
|
|
354
352
|
def get_bucket_region_if_available(
|
|
355
353
|
bucket_name: str,
|
|
356
|
-
endpoint_url:
|
|
357
|
-
only_strategies:
|
|
358
|
-
anonymous:
|
|
359
|
-
) ->
|
|
354
|
+
endpoint_url: str | None = None,
|
|
355
|
+
only_strategies: set[int] | None = None,
|
|
356
|
+
anonymous: bool | None = None,
|
|
357
|
+
) -> str | None:
|
|
360
358
|
"""
|
|
361
359
|
Get the AWS region name associated with the given S3 bucket, or return None.
|
|
362
360
|
|
|
@@ -369,21 +367,26 @@ def get_bucket_region_if_available(
|
|
|
369
367
|
try:
|
|
370
368
|
return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
|
|
371
369
|
except Exception as e:
|
|
372
|
-
if isinstance(e, NoBucketLocationError) or (
|
|
370
|
+
if isinstance(e, NoBucketLocationError) or (
|
|
371
|
+
isinstance(e, ClientError) and get_error_status(e) == 403
|
|
372
|
+
):
|
|
373
373
|
# We can't know
|
|
374
374
|
return None
|
|
375
375
|
else:
|
|
376
376
|
raise
|
|
377
377
|
|
|
378
|
+
|
|
378
379
|
def region_to_bucket_location(region: str) -> str:
|
|
379
380
|
return "" if region == "us-east-1" else region
|
|
380
381
|
|
|
381
382
|
|
|
382
|
-
def bucket_location_to_region(location:
|
|
383
|
+
def bucket_location_to_region(location: str | None) -> str:
|
|
383
384
|
return "us-east-1" if location == "" or location is None else location
|
|
384
385
|
|
|
385
386
|
|
|
386
|
-
def get_object_for_url(
|
|
387
|
+
def get_object_for_url(
|
|
388
|
+
url: ParseResult, existing: bool | None = None, anonymous: bool | None = None
|
|
389
|
+
) -> "S3Object":
|
|
387
390
|
"""
|
|
388
391
|
Extracts a key (object) from a given parsed s3:// URL.
|
|
389
392
|
|
|
@@ -400,11 +403,11 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonym
|
|
|
400
403
|
bucket_name = url.netloc
|
|
401
404
|
|
|
402
405
|
# Decide if we need to override Boto's built-in URL here.
|
|
403
|
-
endpoint_url:
|
|
406
|
+
endpoint_url: str | None = None
|
|
404
407
|
host = os.environ.get("TOIL_S3_HOST", None)
|
|
405
408
|
port = os.environ.get("TOIL_S3_PORT", None)
|
|
406
409
|
protocol = "https"
|
|
407
|
-
if strtobool(os.environ.get("TOIL_S3_USE_SSL",
|
|
410
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", "True")) is False:
|
|
408
411
|
protocol = "http"
|
|
409
412
|
if host:
|
|
410
413
|
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
@@ -412,13 +415,17 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonym
|
|
|
412
415
|
# TODO: OrdinaryCallingFormat equivalent in boto3?
|
|
413
416
|
# if botoargs:
|
|
414
417
|
# botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
|
|
415
|
-
|
|
418
|
+
|
|
416
419
|
config = session.ANONYMOUS_CONFIG if anonymous else None
|
|
417
420
|
# Get the bucket's region to avoid a redirect per request.
|
|
418
421
|
# Cache the result
|
|
419
|
-
region = get_bucket_region_if_available(
|
|
422
|
+
region = get_bucket_region_if_available(
|
|
423
|
+
bucket_name, endpoint_url=endpoint_url, anonymous=anonymous
|
|
424
|
+
)
|
|
420
425
|
if region is not None:
|
|
421
|
-
s3 = session.resource(
|
|
426
|
+
s3 = session.resource(
|
|
427
|
+
"s3", region_name=region, endpoint_url=endpoint_url, config=config
|
|
428
|
+
)
|
|
422
429
|
else:
|
|
423
430
|
# We can't get the bucket location, perhaps because we don't have
|
|
424
431
|
# permission to do that.
|
|
@@ -461,7 +468,7 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonym
|
|
|
461
468
|
|
|
462
469
|
|
|
463
470
|
@retry(errors=[AWSServerErrors])
|
|
464
|
-
def list_objects_for_url(url: ParseResult, anonymous:
|
|
471
|
+
def list_objects_for_url(url: ParseResult, anonymous: bool | None = None) -> list[str]:
|
|
465
472
|
"""
|
|
466
473
|
Extracts a key (object) from a given parsed s3:// URL. The URL will be
|
|
467
474
|
supplemented with a trailing slash if it is missing.
|
|
@@ -478,20 +485,20 @@ def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) ->
|
|
|
478
485
|
|
|
479
486
|
# Decide if we need to override Boto's built-in URL here.
|
|
480
487
|
# TODO: Deduplicate with get_object_for_url, or push down into session module
|
|
481
|
-
endpoint_url:
|
|
488
|
+
endpoint_url: str | None = None
|
|
482
489
|
host = os.environ.get("TOIL_S3_HOST", None)
|
|
483
490
|
port = os.environ.get("TOIL_S3_PORT", None)
|
|
484
491
|
protocol = "https"
|
|
485
|
-
if strtobool(os.environ.get("TOIL_S3_USE_SSL",
|
|
492
|
+
if strtobool(os.environ.get("TOIL_S3_USE_SSL", "True")) is False:
|
|
486
493
|
protocol = "http"
|
|
487
494
|
if host:
|
|
488
495
|
endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
|
|
489
|
-
|
|
496
|
+
|
|
490
497
|
config = session.ANONYMOUS_CONFIG if anonymous else None
|
|
491
498
|
client = session.client("s3", endpoint_url=endpoint_url, config=config)
|
|
492
499
|
|
|
493
500
|
listing = []
|
|
494
|
-
|
|
501
|
+
|
|
495
502
|
try:
|
|
496
503
|
paginator = client.get_paginator("list_objects_v2")
|
|
497
504
|
result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
|
|
@@ -513,12 +520,13 @@ def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) ->
|
|
|
513
520
|
else:
|
|
514
521
|
raise
|
|
515
522
|
|
|
516
|
-
|
|
517
523
|
logger.debug("Found in %s items: %s", url, listing)
|
|
518
524
|
return listing
|
|
519
525
|
|
|
520
526
|
|
|
521
|
-
def flatten_tags(
|
|
527
|
+
def flatten_tags(
|
|
528
|
+
tags: dict[str, str],
|
|
529
|
+
) -> list[dict[Literal["Key"] | Literal["Value"], str]]:
|
|
522
530
|
"""
|
|
523
531
|
Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
|
|
524
532
|
"""
|
toil/lib/checksum.py
CHANGED
|
@@ -11,11 +11,10 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import logging
|
|
15
14
|
import hashlib
|
|
16
|
-
|
|
15
|
+
import logging
|
|
17
16
|
from io import BytesIO
|
|
18
|
-
from typing import
|
|
17
|
+
from typing import TYPE_CHECKING, BinaryIO
|
|
19
18
|
|
|
20
19
|
from toil.lib.aws.config import S3_PART_SIZE
|
|
21
20
|
|
|
@@ -33,16 +32,17 @@ class ChecksumError(Exception):
|
|
|
33
32
|
|
|
34
33
|
class Etag:
|
|
35
34
|
"""A hasher for s3 etags."""
|
|
35
|
+
|
|
36
36
|
def __init__(self, chunk_size: int) -> None:
|
|
37
37
|
self.etag_bytes: int = 0
|
|
38
|
-
self.etag_parts:
|
|
38
|
+
self.etag_parts: list[bytes] = []
|
|
39
39
|
self.etag_hasher: "_Hash" = hashlib.md5()
|
|
40
40
|
self.chunk_size: int = chunk_size
|
|
41
41
|
|
|
42
42
|
def update(self, chunk: bytes) -> None:
|
|
43
43
|
if self.etag_bytes + len(chunk) > self.chunk_size:
|
|
44
|
-
chunk_head = chunk[:self.chunk_size - self.etag_bytes]
|
|
45
|
-
chunk_tail = chunk[self.chunk_size - self.etag_bytes:]
|
|
44
|
+
chunk_head = chunk[: self.chunk_size - self.etag_bytes]
|
|
45
|
+
chunk_tail = chunk[self.chunk_size - self.etag_bytes :]
|
|
46
46
|
self.etag_hasher.update(chunk_head)
|
|
47
47
|
self.etag_parts.append(self.etag_hasher.digest())
|
|
48
48
|
self.etag_hasher = hashlib.md5()
|
|
@@ -58,31 +58,35 @@ class Etag:
|
|
|
58
58
|
self.etag_bytes = 0
|
|
59
59
|
if len(self.etag_parts) > 1:
|
|
60
60
|
etag = hashlib.md5(b"".join(self.etag_parts)).hexdigest()
|
|
61
|
-
return f
|
|
61
|
+
return f"{etag}-{len(self.etag_parts)}"
|
|
62
62
|
else:
|
|
63
63
|
return self.etag_hasher.hexdigest()
|
|
64
64
|
|
|
65
65
|
|
|
66
|
-
hashers = {
|
|
67
|
-
|
|
68
|
-
|
|
66
|
+
hashers = {
|
|
67
|
+
"sha1": hashlib.sha1(),
|
|
68
|
+
"sha256": hashlib.sha256(),
|
|
69
|
+
"etag": Etag(chunk_size=S3_PART_SIZE),
|
|
70
|
+
}
|
|
69
71
|
|
|
70
72
|
|
|
71
|
-
def compute_checksum_for_file(local_file_path: str, algorithm: str =
|
|
72
|
-
with open(local_file_path,
|
|
73
|
+
def compute_checksum_for_file(local_file_path: str, algorithm: str = "sha1") -> str:
|
|
74
|
+
with open(local_file_path, "rb") as fh:
|
|
73
75
|
checksum_result = compute_checksum_for_content(fh, algorithm=algorithm)
|
|
74
76
|
return checksum_result
|
|
75
77
|
|
|
76
78
|
|
|
77
|
-
def compute_checksum_for_content(
|
|
79
|
+
def compute_checksum_for_content(
|
|
80
|
+
fh: BinaryIO | BytesIO, algorithm: str = "sha1"
|
|
81
|
+
) -> str:
|
|
78
82
|
"""
|
|
79
83
|
Note: Chunk size matters for s3 etags, and must be the same to get the same hash from the same object.
|
|
80
84
|
Therefore this buffer is not modifiable throughout Toil.
|
|
81
85
|
"""
|
|
82
86
|
hasher: "_Hash" = hashers[algorithm] # type: ignore
|
|
83
87
|
contents = fh.read(S3_PART_SIZE)
|
|
84
|
-
while contents != b
|
|
88
|
+
while contents != b"":
|
|
85
89
|
hasher.update(contents)
|
|
86
90
|
contents = fh.read(S3_PART_SIZE)
|
|
87
91
|
|
|
88
|
-
return f
|
|
92
|
+
return f"{algorithm}${hasher.hexdigest()}"
|
toil/lib/compatibility.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import warnings
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def deprecated(new_function_name: str) -> Callable[..., Any]:
|
|
@@ -18,7 +19,7 @@ def deprecated(new_function_name: str) -> Callable[..., Any]:
|
|
|
18
19
|
return decorate
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def compat_bytes(s:
|
|
22
|
+
def compat_bytes(s: bytes | str) -> str:
|
|
22
23
|
return s.decode("utf-8") if isinstance(s, bytes) else s
|
|
23
24
|
|
|
24
25
|
|