toil 8.1.0b1__py3-none-any.whl → 8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +0 -35
- toil/batchSystems/abstractBatchSystem.py +1 -1
- toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
- toil/batchSystems/awsBatch.py +1 -1
- toil/batchSystems/cleanup_support.py +1 -1
- toil/batchSystems/kubernetes.py +53 -7
- toil/batchSystems/local_support.py +1 -1
- toil/batchSystems/mesos/batchSystem.py +13 -8
- toil/batchSystems/mesos/test/__init__.py +3 -2
- toil/batchSystems/singleMachine.py +1 -1
- toil/batchSystems/slurm.py +27 -26
- toil/bus.py +5 -3
- toil/common.py +39 -11
- toil/cwl/cwltoil.py +1 -1
- toil/job.py +64 -49
- toil/jobStores/abstractJobStore.py +24 -3
- toil/jobStores/fileJobStore.py +25 -1
- toil/jobStores/googleJobStore.py +104 -30
- toil/leader.py +9 -0
- toil/lib/accelerators.py +3 -1
- toil/lib/aws/utils.py.orig +504 -0
- toil/lib/bioio.py +1 -1
- toil/lib/docker.py +252 -91
- toil/lib/dockstore.py +11 -3
- toil/lib/exceptions.py +5 -3
- toil/lib/history.py +87 -13
- toil/lib/history_submission.py +23 -9
- toil/lib/io.py +34 -22
- toil/lib/misc.py +7 -1
- toil/lib/resources.py +2 -1
- toil/lib/threading.py +11 -10
- toil/options/common.py +8 -0
- toil/options/wdl.py +11 -0
- toil/server/api_spec/LICENSE +201 -0
- toil/server/api_spec/README.rst +5 -0
- toil/server/cli/wes_cwl_runner.py +2 -1
- toil/test/__init__.py +275 -115
- toil/test/batchSystems/batchSystemTest.py +227 -205
- toil/test/batchSystems/test_slurm.py +27 -0
- toil/test/cactus/pestis.tar.gz +0 -0
- toil/test/conftest.py +7 -0
- toil/test/cwl/2.fasta +11 -0
- toil/test/cwl/2.fastq +12 -0
- toil/test/cwl/conftest.py +1 -1
- toil/test/cwl/cwlTest.py +999 -867
- toil/test/cwl/directory/directory/file.txt +15 -0
- toil/test/cwl/download_directory_file.json +4 -0
- toil/test/cwl/download_directory_s3.json +4 -0
- toil/test/cwl/download_file.json +6 -0
- toil/test/cwl/download_http.json +6 -0
- toil/test/cwl/download_https.json +6 -0
- toil/test/cwl/download_s3.json +6 -0
- toil/test/cwl/download_subdirectory_file.json +5 -0
- toil/test/cwl/download_subdirectory_s3.json +5 -0
- toil/test/cwl/empty.json +1 -0
- toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
- toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
- toil/test/cwl/optional-file-exists.json +6 -0
- toil/test/cwl/optional-file-missing.json +6 -0
- toil/test/cwl/preemptible_expression.json +1 -0
- toil/test/cwl/revsort-job-missing.json +6 -0
- toil/test/cwl/revsort-job.json +6 -0
- toil/test/cwl/s3_secondary_file.json +16 -0
- toil/test/cwl/seqtk_seq_job.json +6 -0
- toil/test/cwl/stream.json +6 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
- toil/test/cwl/whale.txt +16 -0
- toil/test/docs/scripts/example_alwaysfail.py +38 -0
- toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
- toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
- toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
- toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
- toil/test/docs/scripts/tutorial_arguments.py +23 -0
- toil/test/docs/scripts/tutorial_debugging.patch +12 -0
- toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
- toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
- toil/test/docs/scripts/tutorial_docker.py +20 -0
- toil/test/docs/scripts/tutorial_dynamic.py +24 -0
- toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
- toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
- toil/test/docs/scripts/tutorial_helloworld.py +15 -0
- toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
- toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
- toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
- toil/test/docs/scripts/tutorial_managing.py +29 -0
- toil/test/docs/scripts/tutorial_managing2.py +56 -0
- toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
- toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
- toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
- toil/test/docs/scripts/tutorial_promises.py +25 -0
- toil/test/docs/scripts/tutorial_promises2.py +30 -0
- toil/test/docs/scripts/tutorial_quickstart.py +22 -0
- toil/test/docs/scripts/tutorial_requirements.py +44 -0
- toil/test/docs/scripts/tutorial_services.py +45 -0
- toil/test/docs/scripts/tutorial_staging.py +45 -0
- toil/test/docs/scripts/tutorial_stats.py +64 -0
- toil/test/lib/aws/test_iam.py +3 -1
- toil/test/lib/dockerTest.py +205 -122
- toil/test/lib/test_history.py +101 -77
- toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
- toil/test/provisioners/clusterTest.py +4 -4
- toil/test/provisioners/gceProvisionerTest.py +16 -14
- toil/test/sort/sort.py +4 -1
- toil/test/src/busTest.py +17 -17
- toil/test/src/deferredFunctionTest.py +145 -132
- toil/test/src/importExportFileTest.py +71 -63
- toil/test/src/jobEncapsulationTest.py +27 -28
- toil/test/src/jobServiceTest.py +149 -133
- toil/test/src/jobTest.py +219 -211
- toil/test/src/miscTests.py +66 -60
- toil/test/src/promisedRequirementTest.py +163 -169
- toil/test/src/regularLogTest.py +24 -24
- toil/test/src/resourceTest.py +82 -76
- toil/test/src/restartDAGTest.py +51 -47
- toil/test/src/resumabilityTest.py +24 -19
- toil/test/src/retainTempDirTest.py +60 -57
- toil/test/src/systemTest.py +17 -13
- toil/test/src/threadingTest.py +29 -32
- toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
- toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
- toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
- toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
- toil/test/utils/toilDebugTest.py +117 -102
- toil/test/utils/toilKillTest.py +54 -53
- toil/test/utils/utilsTest.py +303 -229
- toil/test/wdl/lint_error.wdl +9 -0
- toil/test/wdl/md5sum/empty_file.json +1 -0
- toil/test/wdl/md5sum/md5sum-gs.json +1 -0
- toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
- toil/test/wdl/md5sum/md5sum.input +1 -0
- toil/test/wdl/md5sum/md5sum.json +1 -0
- toil/test/wdl/md5sum/md5sum.wdl +25 -0
- toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
- toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
- toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
- toil/test/wdl/standard_library/as_map.json +16 -0
- toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
- toil/test/wdl/standard_library/as_pairs.json +7 -0
- toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
- toil/test/wdl/standard_library/ceil.json +3 -0
- toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
- toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
- toil/test/wdl/standard_library/collect_by_key.json +1 -0
- toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
- toil/test/wdl/standard_library/cross.json +11 -0
- toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
- toil/test/wdl/standard_library/flatten.json +7 -0
- toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
- toil/test/wdl/standard_library/floor.json +3 -0
- toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
- toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
- toil/test/wdl/standard_library/keys.json +8 -0
- toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
- toil/test/wdl/standard_library/length.json +7 -0
- toil/test/wdl/standard_library/length_as_input.wdl +16 -0
- toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
- toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
- toil/test/wdl/standard_library/length_invalid.json +3 -0
- toil/test/wdl/standard_library/range.json +3 -0
- toil/test/wdl/standard_library/range_0.json +3 -0
- toil/test/wdl/standard_library/range_as_input.wdl +17 -0
- toil/test/wdl/standard_library/range_invalid.json +3 -0
- toil/test/wdl/standard_library/read_boolean.json +3 -0
- toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_float.json +3 -0
- toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_int.json +3 -0
- toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_json.json +3 -0
- toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_lines.json +3 -0
- toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_map.json +3 -0
- toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_string.json +3 -0
- toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_tsv.json +3 -0
- toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
- toil/test/wdl/standard_library/round.json +3 -0
- toil/test/wdl/standard_library/round_as_command.wdl +16 -0
- toil/test/wdl/standard_library/round_as_input.wdl +16 -0
- toil/test/wdl/standard_library/size.json +3 -0
- toil/test/wdl/standard_library/size_as_command.wdl +17 -0
- toil/test/wdl/standard_library/size_as_output.wdl +36 -0
- toil/test/wdl/standard_library/stderr.json +3 -0
- toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
- toil/test/wdl/standard_library/stdout.json +3 -0
- toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
- toil/test/wdl/standard_library/sub.json +3 -0
- toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
- toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
- toil/test/wdl/standard_library/transpose.json +6 -0
- toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
- toil/test/wdl/standard_library/write_json.json +6 -0
- toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_lines.json +7 -0
- toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_map.json +6 -0
- toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_tsv.json +6 -0
- toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
- toil/test/wdl/standard_library/zip.json +12 -0
- toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
- toil/test/wdl/test.csv +3 -0
- toil/test/wdl/test.tsv +3 -0
- toil/test/wdl/testfiles/croo.wdl +38 -0
- toil/test/wdl/testfiles/drop_files.wdl +62 -0
- toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
- toil/test/wdl/testfiles/empty.txt +0 -0
- toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
- toil/test/wdl/testfiles/random.wdl +66 -0
- toil/test/wdl/testfiles/string_file_coercion.json +1 -0
- toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
- toil/test/wdl/testfiles/test.json +4 -0
- toil/test/wdl/testfiles/test_boolean.txt +1 -0
- toil/test/wdl/testfiles/test_float.txt +1 -0
- toil/test/wdl/testfiles/test_int.txt +1 -0
- toil/test/wdl/testfiles/test_lines.txt +5 -0
- toil/test/wdl/testfiles/test_map.txt +2 -0
- toil/test/wdl/testfiles/test_string.txt +1 -0
- toil/test/wdl/testfiles/url_to_file.wdl +13 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
- toil/test/wdl/testfiles/vocab.json +1 -0
- toil/test/wdl/testfiles/vocab.wdl +66 -0
- toil/test/wdl/testfiles/wait.wdl +34 -0
- toil/test/wdl/wdl_specification/type_pair.json +23 -0
- toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
- toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
- toil/test/wdl/wdl_specification/v1_spec.json +1 -0
- toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
- toil/test/wdl/wdltoil_test.py +680 -407
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/version.py +9 -9
- toil/wdl/wdltoil.py +336 -123
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/METADATA +5 -4
- toil-8.2.0.dist-info/RECORD +439 -0
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
- toil-8.1.0b1.dist-info/RECORD +0 -259
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
# Copyright (C) 2015-2021 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import errno
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import socket
|
|
18
|
+
from typing import (Any,
|
|
19
|
+
Callable,
|
|
20
|
+
ContextManager,
|
|
21
|
+
Dict,
|
|
22
|
+
Iterable,
|
|
23
|
+
Iterator,
|
|
24
|
+
List,
|
|
25
|
+
Optional,
|
|
26
|
+
Set,
|
|
27
|
+
<<<<<<< HEAD
|
|
28
|
+
Tuple,
|
|
29
|
+
cast)
|
|
30
|
+
=======
|
|
31
|
+
cast,
|
|
32
|
+
TYPE_CHECKING)
|
|
33
|
+
>>>>>>> ce9c91c31 (Allow for not installing the mypy_boto3_* packages)
|
|
34
|
+
from urllib.parse import ParseResult
|
|
35
|
+
|
|
36
|
+
from toil.lib.aws import session, AWSRegionName, AWSServerErrors
|
|
37
|
+
from toil.lib.misc import printq
|
|
38
|
+
from toil.lib.retry import (DEFAULT_DELAYS,
|
|
39
|
+
DEFAULT_TIMEOUT,
|
|
40
|
+
get_error_code,
|
|
41
|
+
get_error_status,
|
|
42
|
+
old_retry,
|
|
43
|
+
retry, ErrorCondition)
|
|
44
|
+
|
|
45
|
+
if TYPE_CHECKING:
|
|
46
|
+
from mypy_boto3_sdb.type_defs import AttributeTypeDef
|
|
47
|
+
from mypy_boto3_s3.service_resource import Bucket, Object as S3Object
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from botocore.exceptions import ClientError, EndpointConnectionError
|
|
51
|
+
except ImportError:
|
|
52
|
+
ClientError = None # type: ignore
|
|
53
|
+
EndpointConnectionError = None # type: ignore
|
|
54
|
+
# AWS/boto extra is not installed
|
|
55
|
+
|
|
56
|
+
logger = logging.getLogger(__name__)
|
|
57
|
+
|
|
58
|
+
# These are error codes we expect from AWS if we are making requests too fast.
|
|
59
|
+
# https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
|
|
60
|
+
THROTTLED_ERROR_CODES = [
|
|
61
|
+
'Throttling',
|
|
62
|
+
'ThrottlingException',
|
|
63
|
+
'ThrottledException',
|
|
64
|
+
'RequestThrottledException',
|
|
65
|
+
'TooManyRequestsException',
|
|
66
|
+
'ProvisionedThroughputExceededException',
|
|
67
|
+
'TransactionInProgressException',
|
|
68
|
+
'RequestLimitExceeded',
|
|
69
|
+
'BandwidthLimitExceeded',
|
|
70
|
+
'LimitExceededException',
|
|
71
|
+
'RequestThrottled',
|
|
72
|
+
'SlowDown',
|
|
73
|
+
'PriorRequestNotComplete',
|
|
74
|
+
'EC2ThrottledException',
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
@retry(errors=[AWSServerErrors])
|
|
78
|
+
def delete_iam_role(
|
|
79
|
+
role_name: str, region: Optional[str] = None, quiet: bool = True
|
|
80
|
+
) -> None:
|
|
81
|
+
# TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
|
|
82
|
+
# of overloads of the client-getting methods to exist based on the literal
|
|
83
|
+
# string passed in, to return exactly the right kind of client or resource.
|
|
84
|
+
# So we end up having to wrap all the calls in casts, which kind of defeats
|
|
85
|
+
# the point of a nice fluent method you can call with the name of the thing
|
|
86
|
+
# you want; we should have been calling iam_client() and so on all along if
|
|
87
|
+
# we wanted MyPy to be able to understand us. So at some point we should
|
|
88
|
+
# consider revising our API here to be less annoying to explain to the type
|
|
89
|
+
# checker.
|
|
90
|
+
iam_client = session.client('iam', region_name=region)
|
|
91
|
+
iam_resource = session.resource('iam', region_name=region)
|
|
92
|
+
role = iam_resource.Role(role_name)
|
|
93
|
+
# normal policies
|
|
94
|
+
for attached_policy in role.attached_policies.all():
|
|
95
|
+
printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
|
|
96
|
+
role.detach_policy(PolicyArn=attached_policy.arn)
|
|
97
|
+
# inline policies
|
|
98
|
+
for inline_policy in role.policies.all():
|
|
99
|
+
printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
|
|
100
|
+
iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
|
|
101
|
+
iam_client.delete_role(RoleName=role_name)
|
|
102
|
+
printq(f'Role {role_name} successfully deleted.', quiet)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@retry(errors=[AWSServerErrors])
|
|
106
|
+
def delete_iam_instance_profile(
|
|
107
|
+
instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
|
|
108
|
+
) -> None:
|
|
109
|
+
iam_resource = session.resource("iam", region_name=region)
|
|
110
|
+
instance_profile = iam_resource.InstanceProfile(instance_profile_name)
|
|
111
|
+
if instance_profile.roles is not None:
|
|
112
|
+
for role in instance_profile.roles:
|
|
113
|
+
printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
|
|
114
|
+
instance_profile.remove_role(RoleName=role.name)
|
|
115
|
+
instance_profile.delete()
|
|
116
|
+
printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@retry(errors=[AWSServerErrors])
|
|
120
|
+
def delete_sdb_domain(
|
|
121
|
+
sdb_domain_name: str, region: Optional[str] = None, quiet: bool = True
|
|
122
|
+
) -> None:
|
|
123
|
+
sdb_client = session.client("sdb", region_name=region)
|
|
124
|
+
sdb_client.delete_domain(DomainName=sdb_domain_name)
|
|
125
|
+
printq(f'SBD Domain: "{sdb_domain_name}" successfully deleted.', quiet)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def connection_reset(e: Exception) -> bool:
|
|
129
|
+
"""
|
|
130
|
+
Return true if an error is a connection reset error.
|
|
131
|
+
"""
|
|
132
|
+
# For some reason we get 'error: [Errno 104] Connection reset by peer' where the
|
|
133
|
+
# English description suggests that errno is 54 (ECONNRESET) while the actual
|
|
134
|
+
# errno is listed as 104. To be safe, we check for both:
|
|
135
|
+
return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
|
|
136
|
+
|
|
137
|
+
def connection_error(e: Exception) -> bool:
|
|
138
|
+
"""
|
|
139
|
+
Return True if an error represents a failure to make a network connection.
|
|
140
|
+
"""
|
|
141
|
+
return (connection_reset(e)
|
|
142
|
+
or isinstance(e, EndpointConnectionError))
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# TODO: Replace with: @retry and ErrorCondition
|
|
146
|
+
def retryable_s3_errors(e: Exception) -> bool:
|
|
147
|
+
"""
|
|
148
|
+
Return true if this is an error from S3 that looks like we ought to retry our request.
|
|
149
|
+
"""
|
|
150
|
+
return (connection_error(e)
|
|
151
|
+
or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
|
|
152
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
153
|
+
# boto3 errors
|
|
154
|
+
or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
|
|
155
|
+
or (isinstance(e, ClientError) and 'BucketNotEmpty' in str(e))
|
|
156
|
+
or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 409 and 'try again' in str(e))
|
|
157
|
+
or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') in (404, 429, 500, 502, 503, 504)))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_TIMEOUT, predicate: Callable[[Exception], bool] = retryable_s3_errors) -> Iterator[ContextManager[None]]:
|
|
161
|
+
"""
|
|
162
|
+
Retry iterator of context managers specifically for S3 operations.
|
|
163
|
+
"""
|
|
164
|
+
return old_retry(delays=delays, timeout=timeout, predicate=predicate)
|
|
165
|
+
|
|
166
|
+
@retry(errors=[AWSServerErrors])
|
|
167
|
+
def delete_s3_bucket(
|
|
168
|
+
s3_resource: "S3ServiceResource",
|
|
169
|
+
bucket: str,
|
|
170
|
+
quiet: bool = True
|
|
171
|
+
) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Delete the given S3 bucket.
|
|
174
|
+
"""
|
|
175
|
+
printq(f'Deleting s3 bucket: {bucket}', quiet)
|
|
176
|
+
|
|
177
|
+
paginator = s3_resource.meta.client.get_paginator('list_object_versions')
|
|
178
|
+
try:
|
|
179
|
+
for response in paginator.paginate(Bucket=bucket):
|
|
180
|
+
# Versions and delete markers can both go in here to be deleted.
|
|
181
|
+
# They both have Key and VersionId, but there's no shared base type
|
|
182
|
+
# defined for them in the stubs to express that. See
|
|
183
|
+
# <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
|
|
184
|
+
# have to do gymnastics to get them into the same list.
|
|
185
|
+
to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \
|
|
186
|
+
cast(List[Dict[str, Any]], response.get('DeleteMarkers', []))
|
|
187
|
+
for entry in to_delete:
|
|
188
|
+
printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet)
|
|
189
|
+
s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId'])
|
|
190
|
+
s3_resource.Bucket(bucket).delete()
|
|
191
|
+
printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet)
|
|
192
|
+
except s3_resource.meta.client.exceptions.NoSuchBucket:
|
|
193
|
+
printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def create_s3_bucket(
|
|
197
|
+
s3_resource: "S3ServiceResource",
|
|
198
|
+
bucket_name: str,
|
|
199
|
+
region: AWSRegionName,
|
|
200
|
+
) -> "Bucket":
|
|
201
|
+
"""
|
|
202
|
+
Create an AWS S3 bucket, using the given Boto3 S3 session, with the
|
|
203
|
+
given name, in the given region.
|
|
204
|
+
|
|
205
|
+
Supports the us-east-1 region, where bucket creation is special.
|
|
206
|
+
|
|
207
|
+
*ALL* S3 bucket creation should use this function.
|
|
208
|
+
"""
|
|
209
|
+
logger.debug("Creating bucket '%s' in region %s.", bucket_name, region)
|
|
210
|
+
if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
|
|
211
|
+
bucket = s3_resource.create_bucket(Bucket=bucket_name)
|
|
212
|
+
else:
|
|
213
|
+
bucket = s3_resource.create_bucket(
|
|
214
|
+
Bucket=bucket_name,
|
|
215
|
+
CreateBucketConfiguration={"LocationConstraint": region},
|
|
216
|
+
)
|
|
217
|
+
return bucket
|
|
218
|
+
|
|
219
|
+
@retry(errors=[ClientError])
|
|
220
|
+
def enable_public_objects(bucket_name: str) -> None:
|
|
221
|
+
"""
|
|
222
|
+
Enable a bucket to contain objects which are public.
|
|
223
|
+
|
|
224
|
+
This adjusts the bucket's Public Access Block setting to not block all
|
|
225
|
+
public access, and also adjusts the bucket's Object Ownership setting to a
|
|
226
|
+
setting which enables object ACLs.
|
|
227
|
+
|
|
228
|
+
Does *not* touch the *account*'s Public Access Block setting, which can
|
|
229
|
+
also interfere here. That is probably best left to the account
|
|
230
|
+
administrator.
|
|
231
|
+
|
|
232
|
+
This configuration used to be the default, and is what most of Toil's code
|
|
233
|
+
is written to expect, but it was changed so that new buckets default to the
|
|
234
|
+
more restrictive setting
|
|
235
|
+
<https://aws.amazon.com/about-aws/whats-new/2022/12/amazon-s3-automatically-enable-block-public-access-disable-access-control-lists-buckets-april-2023/>,
|
|
236
|
+
with the expectation that people would write IAM policies for the buckets
|
|
237
|
+
to allow public access if needed. Toil expects to be able to make arbitrary
|
|
238
|
+
objects in arbitrary places public, and naming them all in an IAM policy
|
|
239
|
+
would be a very awkward way to do it. So we restore the old behavior.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
s3_client = session.client('s3')
|
|
243
|
+
|
|
244
|
+
# Even though the new default is for public access to be prohibited, this
|
|
245
|
+
# is implemented by adding new things attached to the bucket. If we remove
|
|
246
|
+
# those things the bucket will default to the old defaults. See
|
|
247
|
+
# <https://aws.amazon.com/blogs/aws/heads-up-amazon-s3-security-changes-are-coming-in-april-of-2023/>.
|
|
248
|
+
|
|
249
|
+
# Stop blocking public access
|
|
250
|
+
s3_client.delete_public_access_block(Bucket=bucket_name)
|
|
251
|
+
|
|
252
|
+
# Stop using an ownership controls setting that prohibits ACLs.
|
|
253
|
+
s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
|
|
254
|
+
|
|
255
|
+
class NoBucketLocationError(Exception):
|
|
256
|
+
"""
|
|
257
|
+
Error to represent that we could not get a location for a bucket.
|
|
258
|
+
"""
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only_strategies: Optional[Set[int]] = None) -> str:
|
|
262
|
+
"""
|
|
263
|
+
Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
|
|
264
|
+
|
|
265
|
+
Does not log at info level or above when this does not work; failures are expected in some contexts.
|
|
266
|
+
|
|
267
|
+
Takes an optional S3 API URL override.
|
|
268
|
+
|
|
269
|
+
:param only_strategies: For testing, use only strategies with 1-based numbers in this set.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
s3_client = session.client('s3', endpoint_url=endpoint_url)
|
|
273
|
+
|
|
274
|
+
def attempt_get_bucket_location() -> Optional[str]:
|
|
275
|
+
"""
|
|
276
|
+
Try and get the bucket location from the normal API call.
|
|
277
|
+
"""
|
|
278
|
+
return s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
|
|
279
|
+
|
|
280
|
+
def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
|
|
281
|
+
"""
|
|
282
|
+
Try and get the bucket location from the normal API call, but against us-east-1
|
|
283
|
+
"""
|
|
284
|
+
# Sometimes we aren't allowed to GetBucketLocation. At least some of
|
|
285
|
+
# the time, that's only true when we talk to whatever S3 API servers we
|
|
286
|
+
# usually use, and we can get around this lack of permission by talking
|
|
287
|
+
# to us-east-1 instead. We've been told that this is because us-east-1
|
|
288
|
+
# is special and will answer the question when other regions won't.
|
|
289
|
+
# See:
|
|
290
|
+
# <https://ucsc-gi.slack.com/archives/C027D41M6UA/p1652819831740169?thread_ts=1652817377.594539&cid=C027D41M6UA>
|
|
291
|
+
# It could also be because AWS open data buckets (which we tend to
|
|
292
|
+
# encounter this problem for) tend to actually themselves be in
|
|
293
|
+
# us-east-1.
|
|
294
|
+
backup_s3_client = session.client('s3', region_name='us-east-1')
|
|
295
|
+
return backup_s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
|
|
296
|
+
|
|
297
|
+
def attempt_head_bucket() -> Optional[str]:
|
|
298
|
+
"""
|
|
299
|
+
Try and get the bucket location from calling HeadBucket and inspecting
|
|
300
|
+
the headers.
|
|
301
|
+
"""
|
|
302
|
+
# If that also doesn't work, we can try HEAD-ing the bucket and looking
|
|
303
|
+
# for an 'x-amz-bucket-region' header on the response, which can tell
|
|
304
|
+
# us where the bucket is. See
|
|
305
|
+
# <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
|
|
306
|
+
info = s3_client.head_bucket(Bucket=bucket_name)
|
|
307
|
+
return info['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
|
|
308
|
+
|
|
309
|
+
# Compose a list of strategies we want to try in order, which may work.
|
|
310
|
+
# None is an acceptable return type that actually means something.
|
|
311
|
+
strategies: List[Callable[[], Optional[str]]] = []
|
|
312
|
+
strategies.append(attempt_get_bucket_location)
|
|
313
|
+
if not endpoint_url:
|
|
314
|
+
# We should only try to talk to us-east-1 if we don't have a custom
|
|
315
|
+
# URL.
|
|
316
|
+
strategies.append(attempt_get_bucket_location_from_us_east_1)
|
|
317
|
+
strategies.append(attempt_head_bucket)
|
|
318
|
+
|
|
319
|
+
error_logs: List[Tuple[int, str]] = []
|
|
320
|
+
for attempt in retry_s3():
|
|
321
|
+
with attempt:
|
|
322
|
+
for i, strategy in enumerate(strategies):
|
|
323
|
+
if only_strategies is not None and i+1 not in only_strategies:
|
|
324
|
+
# We want to test running without this strategy.
|
|
325
|
+
continue
|
|
326
|
+
try:
|
|
327
|
+
location = bucket_location_to_region(strategy())
|
|
328
|
+
logger.debug('Got bucket location from strategy %d', i + 1)
|
|
329
|
+
return location
|
|
330
|
+
except ClientError as e:
|
|
331
|
+
if get_error_code(e) == 'AccessDenied' and not endpoint_url:
|
|
332
|
+
logger.debug('Strategy %d to get bucket location did not work: %s', i + 1, e)
|
|
333
|
+
error_logs.append((i + 1, str(e)))
|
|
334
|
+
last_error: Exception = e
|
|
335
|
+
# We were blocked with this strategy. Move on to the
|
|
336
|
+
# next strategy which might work.
|
|
337
|
+
continue
|
|
338
|
+
else:
|
|
339
|
+
raise
|
|
340
|
+
except KeyError as e:
|
|
341
|
+
# If we get a weird head response we will have a KeyError
|
|
342
|
+
logger.debug('Strategy %d to get bucket location did not work: %s', i + 1, e)
|
|
343
|
+
error_logs.append((i + 1, str(e)))
|
|
344
|
+
last_error = e
|
|
345
|
+
|
|
346
|
+
error_messages = []
|
|
347
|
+
for rank, message in error_logs:
|
|
348
|
+
error_messages.append(f"Strategy {rank} failed to get bucket location because: {message}")
|
|
349
|
+
# If we get here we ran out of attempts.
|
|
350
|
+
raise NoBucketLocationError("Could not get bucket location: " + "\n".join(error_messages)) from last_error
|
|
351
|
+
|
|
352
|
+
def region_to_bucket_location(region: str) -> str:
|
|
353
|
+
return '' if region == 'us-east-1' else region
|
|
354
|
+
|
|
355
|
+
def bucket_location_to_region(location: Optional[str]) -> str:
|
|
356
|
+
return "us-east-1" if location == "" or location is None else location
|
|
357
|
+
|
|
358
|
+
def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
|
|
359
|
+
"""
|
|
360
|
+
Extracts a key (object) from a given parsed s3:// URL.
|
|
361
|
+
|
|
362
|
+
If existing is true and the object does not exist, raises FileNotFoundError.
|
|
363
|
+
|
|
364
|
+
:param bool existing: If True, key is expected to exist. If False, key is expected not to
|
|
365
|
+
exists and it will be created. If None, the key will be created if it doesn't exist.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
key_name = url.path[1:]
|
|
369
|
+
bucket_name = url.netloc
|
|
370
|
+
|
|
371
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
372
|
+
endpoint_url: Optional[str] = None
|
|
373
|
+
host = os.environ.get('TOIL_S3_HOST', None)
|
|
374
|
+
port = os.environ.get('TOIL_S3_PORT', None)
|
|
375
|
+
protocol = 'https'
|
|
376
|
+
if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
|
|
377
|
+
protocol = 'http'
|
|
378
|
+
if host:
|
|
379
|
+
endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
|
|
380
|
+
|
|
381
|
+
# TODO: OrdinaryCallingFormat equivalent in boto3?
|
|
382
|
+
# if botoargs:
|
|
383
|
+
# botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
|
|
384
|
+
|
|
385
|
+
try:
|
|
386
|
+
# Get the bucket's region to avoid a redirect per request
|
|
387
|
+
region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
|
|
388
|
+
s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
|
|
389
|
+
except NoBucketLocationError as e:
|
|
390
|
+
# Probably don't have permission.
|
|
391
|
+
# TODO: check if it is that
|
|
392
|
+
logger.debug("Couldn't get bucket location: %s", e)
|
|
393
|
+
logger.debug("Fall back to not specifying location")
|
|
394
|
+
s3 = session.resource('s3', endpoint_url=endpoint_url)
|
|
395
|
+
|
|
396
|
+
obj = s3.Object(bucket_name, key_name)
|
|
397
|
+
objExists = True
|
|
398
|
+
|
|
399
|
+
try:
|
|
400
|
+
obj.load()
|
|
401
|
+
except ClientError as e:
|
|
402
|
+
if get_error_status(e) == 404:
|
|
403
|
+
objExists = False
|
|
404
|
+
else:
|
|
405
|
+
raise
|
|
406
|
+
if existing is True and not objExists:
|
|
407
|
+
raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
|
|
408
|
+
elif existing is False and objExists:
|
|
409
|
+
raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
|
|
410
|
+
|
|
411
|
+
if not objExists:
|
|
412
|
+
obj.put() # write an empty file
|
|
413
|
+
return obj
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@retry(errors=[AWSServerErrors])
|
|
417
|
+
def list_objects_for_url(url: ParseResult) -> List[str]:
|
|
418
|
+
"""
|
|
419
|
+
Extracts a key (object) from a given parsed s3:// URL. The URL will be
|
|
420
|
+
supplemented with a trailing slash if it is missing.
|
|
421
|
+
"""
|
|
422
|
+
key_name = url.path[1:]
|
|
423
|
+
bucket_name = url.netloc
|
|
424
|
+
|
|
425
|
+
if key_name != '' and not key_name.endswith('/'):
|
|
426
|
+
# Make sure to put the trailing slash on the key, or else we'll see
|
|
427
|
+
# a prefix of just it.
|
|
428
|
+
key_name = key_name + '/'
|
|
429
|
+
|
|
430
|
+
# Decide if we need to override Boto's built-in URL here.
|
|
431
|
+
# TODO: Deduplicate with get_object_for_url, or push down into session module
|
|
432
|
+
endpoint_url: Optional[str] = None
|
|
433
|
+
host = os.environ.get('TOIL_S3_HOST', None)
|
|
434
|
+
port = os.environ.get('TOIL_S3_PORT', None)
|
|
435
|
+
protocol = 'https'
|
|
436
|
+
if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
|
|
437
|
+
protocol = 'http'
|
|
438
|
+
if host:
|
|
439
|
+
endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
|
|
440
|
+
|
|
441
|
+
client = session.client('s3', endpoint_url=endpoint_url)
|
|
442
|
+
|
|
443
|
+
listing = []
|
|
444
|
+
|
|
445
|
+
paginator = client.get_paginator('list_objects_v2')
|
|
446
|
+
result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter='/')
|
|
447
|
+
for page in result:
|
|
448
|
+
if 'CommonPrefixes' in page:
|
|
449
|
+
for prefix_item in page['CommonPrefixes']:
|
|
450
|
+
listing.append(prefix_item['Prefix'][len(key_name):])
|
|
451
|
+
if 'Contents' in page:
|
|
452
|
+
for content_item in page['Contents']:
|
|
453
|
+
if content_item['Key'] == key_name:
|
|
454
|
+
# Ignore folder name itself
|
|
455
|
+
continue
|
|
456
|
+
listing.append(content_item['Key'][len(key_name):])
|
|
457
|
+
|
|
458
|
+
logger.debug('Found in %s items: %s', url, listing)
|
|
459
|
+
return listing
|
|
460
|
+
|
|
461
|
+
def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]:
|
|
462
|
+
"""
|
|
463
|
+
Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
|
|
464
|
+
"""
|
|
465
|
+
return [{'Key': k, 'Value': v} for k, v in tags.items()]
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: str,
|
|
469
|
+
**kwargs: Any) -> Iterable[Any]:
|
|
470
|
+
"""
|
|
471
|
+
Yield all the results from calling the given Boto 3 method with the
|
|
472
|
+
given keyword arguments, paging through the results using the Marker or
|
|
473
|
+
NextToken, and fetching out and looping over the list in the response
|
|
474
|
+
with the given attribute name.
|
|
475
|
+
"""
|
|
476
|
+
|
|
477
|
+
# Recover the Boto3 client, and the name of the operation
|
|
478
|
+
client = requestor_callable.__self__ # type: ignore[attr-defined]
|
|
479
|
+
op_name = requestor_callable.__name__
|
|
480
|
+
|
|
481
|
+
# grab a Boto 3 built-in paginator. See
|
|
482
|
+
# <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html>
|
|
483
|
+
paginator = client.get_paginator(op_name)
|
|
484
|
+
|
|
485
|
+
for page in paginator.paginate(**kwargs):
|
|
486
|
+
# Invoke it and go through the pages, yielding from them
|
|
487
|
+
yield from page.get(result_attribute_name, [])
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def get_item_from_attributes(attributes: List["AttributeTypeDef"], name: str) -> Any:
|
|
491
|
+
"""
|
|
492
|
+
Given a list of attributes, find the attribute associated with the name and return its corresponding value.
|
|
493
|
+
|
|
494
|
+
The `attribute_list` will be a list of TypedDict's (which boto3 SDB functions commonly return),
|
|
495
|
+
where each TypedDict has a "Name" and "Value" key value pair.
|
|
496
|
+
This function grabs the value out of the associated TypedDict.
|
|
497
|
+
|
|
498
|
+
If the attribute with the name does not exist, the function will return None.
|
|
499
|
+
|
|
500
|
+
:param attributes: list of attributes
|
|
501
|
+
:param name: name of the attribute
|
|
502
|
+
:return: value of the attribute
|
|
503
|
+
"""
|
|
504
|
+
return next((attribute["Value"] for attribute in attributes if attribute["Name"] == name), None)
|
toil/lib/bioio.py
CHANGED
|
@@ -20,7 +20,7 @@ from toil.test import get_temp_file
|
|
|
20
20
|
|
|
21
21
|
# used by cactus
|
|
22
22
|
# TODO: only used in utilsTest.py; move this there once out of cactus
|
|
23
|
-
def system(command):
|
|
23
|
+
def system(command: list[str]) -> None:
|
|
24
24
|
"""
|
|
25
25
|
A convenience wrapper around subprocess.check_call that logs the command before passing it
|
|
26
26
|
on. The command can be either a string or a sequence of strings. If it is a string shell=True
|