toil 8.1.0b1__py3-none-any.whl → 8.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. toil/__init__.py +0 -35
  2. toil/batchSystems/abstractBatchSystem.py +1 -1
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
  4. toil/batchSystems/awsBatch.py +1 -1
  5. toil/batchSystems/cleanup_support.py +1 -1
  6. toil/batchSystems/kubernetes.py +53 -7
  7. toil/batchSystems/local_support.py +1 -1
  8. toil/batchSystems/mesos/batchSystem.py +13 -8
  9. toil/batchSystems/mesos/test/__init__.py +3 -2
  10. toil/batchSystems/singleMachine.py +1 -1
  11. toil/batchSystems/slurm.py +27 -26
  12. toil/bus.py +5 -3
  13. toil/common.py +39 -11
  14. toil/cwl/cwltoil.py +1 -1
  15. toil/job.py +64 -49
  16. toil/jobStores/abstractJobStore.py +24 -3
  17. toil/jobStores/fileJobStore.py +25 -1
  18. toil/jobStores/googleJobStore.py +104 -30
  19. toil/leader.py +9 -0
  20. toil/lib/accelerators.py +3 -1
  21. toil/lib/aws/utils.py.orig +504 -0
  22. toil/lib/bioio.py +1 -1
  23. toil/lib/docker.py +252 -91
  24. toil/lib/dockstore.py +11 -3
  25. toil/lib/exceptions.py +5 -3
  26. toil/lib/history.py +87 -13
  27. toil/lib/history_submission.py +23 -9
  28. toil/lib/io.py +34 -22
  29. toil/lib/misc.py +7 -1
  30. toil/lib/resources.py +2 -1
  31. toil/lib/threading.py +11 -10
  32. toil/options/common.py +8 -0
  33. toil/options/wdl.py +11 -0
  34. toil/server/api_spec/LICENSE +201 -0
  35. toil/server/api_spec/README.rst +5 -0
  36. toil/server/cli/wes_cwl_runner.py +2 -1
  37. toil/test/__init__.py +275 -115
  38. toil/test/batchSystems/batchSystemTest.py +227 -205
  39. toil/test/batchSystems/test_slurm.py +27 -0
  40. toil/test/cactus/pestis.tar.gz +0 -0
  41. toil/test/conftest.py +7 -0
  42. toil/test/cwl/2.fasta +11 -0
  43. toil/test/cwl/2.fastq +12 -0
  44. toil/test/cwl/conftest.py +1 -1
  45. toil/test/cwl/cwlTest.py +999 -867
  46. toil/test/cwl/directory/directory/file.txt +15 -0
  47. toil/test/cwl/download_directory_file.json +4 -0
  48. toil/test/cwl/download_directory_s3.json +4 -0
  49. toil/test/cwl/download_file.json +6 -0
  50. toil/test/cwl/download_http.json +6 -0
  51. toil/test/cwl/download_https.json +6 -0
  52. toil/test/cwl/download_s3.json +6 -0
  53. toil/test/cwl/download_subdirectory_file.json +5 -0
  54. toil/test/cwl/download_subdirectory_s3.json +5 -0
  55. toil/test/cwl/empty.json +1 -0
  56. toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
  57. toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
  58. toil/test/cwl/optional-file-exists.json +6 -0
  59. toil/test/cwl/optional-file-missing.json +6 -0
  60. toil/test/cwl/preemptible_expression.json +1 -0
  61. toil/test/cwl/revsort-job-missing.json +6 -0
  62. toil/test/cwl/revsort-job.json +6 -0
  63. toil/test/cwl/s3_secondary_file.json +16 -0
  64. toil/test/cwl/seqtk_seq_job.json +6 -0
  65. toil/test/cwl/stream.json +6 -0
  66. toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
  67. toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
  68. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
  69. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
  70. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
  71. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
  72. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
  73. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
  74. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
  75. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
  76. toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
  77. toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
  78. toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
  79. toil/test/cwl/whale.txt +16 -0
  80. toil/test/docs/scripts/example_alwaysfail.py +38 -0
  81. toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
  82. toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
  83. toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
  84. toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
  85. toil/test/docs/scripts/tutorial_arguments.py +23 -0
  86. toil/test/docs/scripts/tutorial_debugging.patch +12 -0
  87. toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
  88. toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
  89. toil/test/docs/scripts/tutorial_docker.py +20 -0
  90. toil/test/docs/scripts/tutorial_dynamic.py +24 -0
  91. toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
  92. toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
  93. toil/test/docs/scripts/tutorial_helloworld.py +15 -0
  94. toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
  95. toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
  96. toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
  97. toil/test/docs/scripts/tutorial_managing.py +29 -0
  98. toil/test/docs/scripts/tutorial_managing2.py +56 -0
  99. toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
  100. toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
  101. toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
  102. toil/test/docs/scripts/tutorial_promises.py +25 -0
  103. toil/test/docs/scripts/tutorial_promises2.py +30 -0
  104. toil/test/docs/scripts/tutorial_quickstart.py +22 -0
  105. toil/test/docs/scripts/tutorial_requirements.py +44 -0
  106. toil/test/docs/scripts/tutorial_services.py +45 -0
  107. toil/test/docs/scripts/tutorial_staging.py +45 -0
  108. toil/test/docs/scripts/tutorial_stats.py +64 -0
  109. toil/test/lib/aws/test_iam.py +3 -1
  110. toil/test/lib/dockerTest.py +205 -122
  111. toil/test/lib/test_history.py +101 -77
  112. toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
  113. toil/test/provisioners/clusterTest.py +4 -4
  114. toil/test/provisioners/gceProvisionerTest.py +16 -14
  115. toil/test/sort/sort.py +4 -1
  116. toil/test/src/busTest.py +17 -17
  117. toil/test/src/deferredFunctionTest.py +145 -132
  118. toil/test/src/importExportFileTest.py +71 -63
  119. toil/test/src/jobEncapsulationTest.py +27 -28
  120. toil/test/src/jobServiceTest.py +149 -133
  121. toil/test/src/jobTest.py +219 -211
  122. toil/test/src/miscTests.py +66 -60
  123. toil/test/src/promisedRequirementTest.py +163 -169
  124. toil/test/src/regularLogTest.py +24 -24
  125. toil/test/src/resourceTest.py +82 -76
  126. toil/test/src/restartDAGTest.py +51 -47
  127. toil/test/src/resumabilityTest.py +24 -19
  128. toil/test/src/retainTempDirTest.py +60 -57
  129. toil/test/src/systemTest.py +17 -13
  130. toil/test/src/threadingTest.py +29 -32
  131. toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
  132. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
  133. toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
  134. toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
  135. toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
  136. toil/test/utils/toilDebugTest.py +117 -102
  137. toil/test/utils/toilKillTest.py +54 -53
  138. toil/test/utils/utilsTest.py +303 -229
  139. toil/test/wdl/lint_error.wdl +9 -0
  140. toil/test/wdl/md5sum/empty_file.json +1 -0
  141. toil/test/wdl/md5sum/md5sum-gs.json +1 -0
  142. toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
  143. toil/test/wdl/md5sum/md5sum.input +1 -0
  144. toil/test/wdl/md5sum/md5sum.json +1 -0
  145. toil/test/wdl/md5sum/md5sum.wdl +25 -0
  146. toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
  147. toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
  148. toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
  149. toil/test/wdl/standard_library/as_map.json +16 -0
  150. toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
  151. toil/test/wdl/standard_library/as_pairs.json +7 -0
  152. toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
  153. toil/test/wdl/standard_library/ceil.json +3 -0
  154. toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
  155. toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
  156. toil/test/wdl/standard_library/collect_by_key.json +1 -0
  157. toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
  158. toil/test/wdl/standard_library/cross.json +11 -0
  159. toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
  160. toil/test/wdl/standard_library/flatten.json +7 -0
  161. toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
  162. toil/test/wdl/standard_library/floor.json +3 -0
  163. toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
  164. toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
  165. toil/test/wdl/standard_library/keys.json +8 -0
  166. toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
  167. toil/test/wdl/standard_library/length.json +7 -0
  168. toil/test/wdl/standard_library/length_as_input.wdl +16 -0
  169. toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
  170. toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
  171. toil/test/wdl/standard_library/length_invalid.json +3 -0
  172. toil/test/wdl/standard_library/range.json +3 -0
  173. toil/test/wdl/standard_library/range_0.json +3 -0
  174. toil/test/wdl/standard_library/range_as_input.wdl +17 -0
  175. toil/test/wdl/standard_library/range_invalid.json +3 -0
  176. toil/test/wdl/standard_library/read_boolean.json +3 -0
  177. toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
  178. toil/test/wdl/standard_library/read_float.json +3 -0
  179. toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
  180. toil/test/wdl/standard_library/read_int.json +3 -0
  181. toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
  182. toil/test/wdl/standard_library/read_json.json +3 -0
  183. toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
  184. toil/test/wdl/standard_library/read_lines.json +3 -0
  185. toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
  186. toil/test/wdl/standard_library/read_map.json +3 -0
  187. toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
  188. toil/test/wdl/standard_library/read_string.json +3 -0
  189. toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
  190. toil/test/wdl/standard_library/read_tsv.json +3 -0
  191. toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
  192. toil/test/wdl/standard_library/round.json +3 -0
  193. toil/test/wdl/standard_library/round_as_command.wdl +16 -0
  194. toil/test/wdl/standard_library/round_as_input.wdl +16 -0
  195. toil/test/wdl/standard_library/size.json +3 -0
  196. toil/test/wdl/standard_library/size_as_command.wdl +17 -0
  197. toil/test/wdl/standard_library/size_as_output.wdl +36 -0
  198. toil/test/wdl/standard_library/stderr.json +3 -0
  199. toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
  200. toil/test/wdl/standard_library/stdout.json +3 -0
  201. toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
  202. toil/test/wdl/standard_library/sub.json +3 -0
  203. toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
  204. toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
  205. toil/test/wdl/standard_library/transpose.json +6 -0
  206. toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
  207. toil/test/wdl/standard_library/write_json.json +6 -0
  208. toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
  209. toil/test/wdl/standard_library/write_lines.json +7 -0
  210. toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
  211. toil/test/wdl/standard_library/write_map.json +6 -0
  212. toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
  213. toil/test/wdl/standard_library/write_tsv.json +6 -0
  214. toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
  215. toil/test/wdl/standard_library/zip.json +12 -0
  216. toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
  217. toil/test/wdl/test.csv +3 -0
  218. toil/test/wdl/test.tsv +3 -0
  219. toil/test/wdl/testfiles/croo.wdl +38 -0
  220. toil/test/wdl/testfiles/drop_files.wdl +62 -0
  221. toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
  222. toil/test/wdl/testfiles/empty.txt +0 -0
  223. toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
  224. toil/test/wdl/testfiles/random.wdl +66 -0
  225. toil/test/wdl/testfiles/string_file_coercion.json +1 -0
  226. toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
  227. toil/test/wdl/testfiles/test.json +4 -0
  228. toil/test/wdl/testfiles/test_boolean.txt +1 -0
  229. toil/test/wdl/testfiles/test_float.txt +1 -0
  230. toil/test/wdl/testfiles/test_int.txt +1 -0
  231. toil/test/wdl/testfiles/test_lines.txt +5 -0
  232. toil/test/wdl/testfiles/test_map.txt +2 -0
  233. toil/test/wdl/testfiles/test_string.txt +1 -0
  234. toil/test/wdl/testfiles/url_to_file.wdl +13 -0
  235. toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
  236. toil/test/wdl/testfiles/vocab.json +1 -0
  237. toil/test/wdl/testfiles/vocab.wdl +66 -0
  238. toil/test/wdl/testfiles/wait.wdl +34 -0
  239. toil/test/wdl/wdl_specification/type_pair.json +23 -0
  240. toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
  241. toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
  242. toil/test/wdl/wdl_specification/v1_spec.json +1 -0
  243. toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
  244. toil/test/wdl/wdltoil_test.py +680 -407
  245. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  246. toil/version.py +9 -9
  247. toil/wdl/wdltoil.py +336 -123
  248. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/METADATA +5 -4
  249. toil-8.2.0.dist-info/RECORD +439 -0
  250. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
  251. toil-8.1.0b1.dist-info/RECORD +0 -259
  252. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
  253. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
  254. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,504 @@
1
+ # Copyright (C) 2015-2021 Regents of the University of California
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import errno
15
+ import logging
16
+ import os
17
+ import socket
18
+ from typing import (Any,
19
+ Callable,
20
+ ContextManager,
21
+ Dict,
22
+ Iterable,
23
+ Iterator,
24
+ List,
25
+ Optional,
26
+ Set,
27
+ <<<<<<< HEAD
28
+ Tuple,
29
+ cast)
30
+ =======
31
+ cast,
32
+ TYPE_CHECKING)
33
+ >>>>>>> ce9c91c31 (Allow for not installing the mypy_boto3_* packages)
34
+ from urllib.parse import ParseResult
35
+
36
+ from toil.lib.aws import session, AWSRegionName, AWSServerErrors
37
+ from toil.lib.misc import printq
38
+ from toil.lib.retry import (DEFAULT_DELAYS,
39
+ DEFAULT_TIMEOUT,
40
+ get_error_code,
41
+ get_error_status,
42
+ old_retry,
43
+ retry, ErrorCondition)
44
+
45
+ if TYPE_CHECKING:
46
+ from mypy_boto3_sdb.type_defs import AttributeTypeDef
47
+ from mypy_boto3_s3.service_resource import Bucket, Object as S3Object
48
+
49
+ try:
50
+ from botocore.exceptions import ClientError, EndpointConnectionError
51
+ except ImportError:
52
+ ClientError = None # type: ignore
53
+ EndpointConnectionError = None # type: ignore
54
+ # AWS/boto extra is not installed
55
+
56
+ logger = logging.getLogger(__name__)
57
+
58
+ # These are error codes we expect from AWS if we are making requests too fast.
59
+ # https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
60
+ THROTTLED_ERROR_CODES = [
61
+ 'Throttling',
62
+ 'ThrottlingException',
63
+ 'ThrottledException',
64
+ 'RequestThrottledException',
65
+ 'TooManyRequestsException',
66
+ 'ProvisionedThroughputExceededException',
67
+ 'TransactionInProgressException',
68
+ 'RequestLimitExceeded',
69
+ 'BandwidthLimitExceeded',
70
+ 'LimitExceededException',
71
+ 'RequestThrottled',
72
+ 'SlowDown',
73
+ 'PriorRequestNotComplete',
74
+ 'EC2ThrottledException',
75
+ ]
76
+
77
+ @retry(errors=[AWSServerErrors])
78
+ def delete_iam_role(
79
+ role_name: str, region: Optional[str] = None, quiet: bool = True
80
+ ) -> None:
81
+ # TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
82
+ # of overloads of the client-getting methods to exist based on the literal
83
+ # string passed in, to return exactly the right kind of client or resource.
84
+ # So we end up having to wrap all the calls in casts, which kind of defeats
85
+ # the point of a nice fluent method you can call with the name of the thing
86
+ # you want; we should have been calling iam_client() and so on all along if
87
+ # we wanted MyPy to be able to understand us. So at some point we should
88
+ # consider revising our API here to be less annoying to explain to the type
89
+ # checker.
90
+ iam_client = session.client('iam', region_name=region)
91
+ iam_resource = session.resource('iam', region_name=region)
92
+ role = iam_resource.Role(role_name)
93
+ # normal policies
94
+ for attached_policy in role.attached_policies.all():
95
+ printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
96
+ role.detach_policy(PolicyArn=attached_policy.arn)
97
+ # inline policies
98
+ for inline_policy in role.policies.all():
99
+ printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
100
+ iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
101
+ iam_client.delete_role(RoleName=role_name)
102
+ printq(f'Role {role_name} successfully deleted.', quiet)
103
+
104
+
105
+ @retry(errors=[AWSServerErrors])
106
+ def delete_iam_instance_profile(
107
+ instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
108
+ ) -> None:
109
+ iam_resource = session.resource("iam", region_name=region)
110
+ instance_profile = iam_resource.InstanceProfile(instance_profile_name)
111
+ if instance_profile.roles is not None:
112
+ for role in instance_profile.roles:
113
+ printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
114
+ instance_profile.remove_role(RoleName=role.name)
115
+ instance_profile.delete()
116
+ printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
117
+
118
+
119
+ @retry(errors=[AWSServerErrors])
120
+ def delete_sdb_domain(
121
+ sdb_domain_name: str, region: Optional[str] = None, quiet: bool = True
122
+ ) -> None:
123
+ sdb_client = session.client("sdb", region_name=region)
124
+ sdb_client.delete_domain(DomainName=sdb_domain_name)
125
+ printq(f'SBD Domain: "{sdb_domain_name}" successfully deleted.', quiet)
126
+
127
+
128
+ def connection_reset(e: Exception) -> bool:
129
+ """
130
+ Return true if an error is a connection reset error.
131
+ """
132
+ # For some reason we get 'error: [Errno 104] Connection reset by peer' where the
133
+ # English description suggests that errno is 54 (ECONNRESET) while the actual
134
+ # errno is listed as 104. To be safe, we check for both:
135
+ return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
136
+
137
+ def connection_error(e: Exception) -> bool:
138
+ """
139
+ Return True if an error represents a failure to make a network connection.
140
+ """
141
+ return (connection_reset(e)
142
+ or isinstance(e, EndpointConnectionError))
143
+
144
+
145
+ # TODO: Replace with: @retry and ErrorCondition
146
+ def retryable_s3_errors(e: Exception) -> bool:
147
+ """
148
+ Return true if this is an error from S3 that looks like we ought to retry our request.
149
+ """
150
+ return (connection_error(e)
151
+ or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
152
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
153
+ # boto3 errors
154
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
155
+ or (isinstance(e, ClientError) and 'BucketNotEmpty' in str(e))
156
+ or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 409 and 'try again' in str(e))
157
+ or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') in (404, 429, 500, 502, 503, 504)))
158
+
159
+
160
+ def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_TIMEOUT, predicate: Callable[[Exception], bool] = retryable_s3_errors) -> Iterator[ContextManager[None]]:
161
+ """
162
+ Retry iterator of context managers specifically for S3 operations.
163
+ """
164
+ return old_retry(delays=delays, timeout=timeout, predicate=predicate)
165
+
166
+ @retry(errors=[AWSServerErrors])
167
+ def delete_s3_bucket(
168
+ s3_resource: "S3ServiceResource",
169
+ bucket: str,
170
+ quiet: bool = True
171
+ ) -> None:
172
+ """
173
+ Delete the given S3 bucket.
174
+ """
175
+ printq(f'Deleting s3 bucket: {bucket}', quiet)
176
+
177
+ paginator = s3_resource.meta.client.get_paginator('list_object_versions')
178
+ try:
179
+ for response in paginator.paginate(Bucket=bucket):
180
+ # Versions and delete markers can both go in here to be deleted.
181
+ # They both have Key and VersionId, but there's no shared base type
182
+ # defined for them in the stubs to express that. See
183
+ # <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
184
+ # have to do gymnastics to get them into the same list.
185
+ to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \
186
+ cast(List[Dict[str, Any]], response.get('DeleteMarkers', []))
187
+ for entry in to_delete:
188
+ printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet)
189
+ s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId'])
190
+ s3_resource.Bucket(bucket).delete()
191
+ printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet)
192
+ except s3_resource.meta.client.exceptions.NoSuchBucket:
193
+ printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet)
194
+
195
+
196
+ def create_s3_bucket(
197
+ s3_resource: "S3ServiceResource",
198
+ bucket_name: str,
199
+ region: AWSRegionName,
200
+ ) -> "Bucket":
201
+ """
202
+ Create an AWS S3 bucket, using the given Boto3 S3 session, with the
203
+ given name, in the given region.
204
+
205
+ Supports the us-east-1 region, where bucket creation is special.
206
+
207
+ *ALL* S3 bucket creation should use this function.
208
+ """
209
+ logger.debug("Creating bucket '%s' in region %s.", bucket_name, region)
210
+ if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
211
+ bucket = s3_resource.create_bucket(Bucket=bucket_name)
212
+ else:
213
+ bucket = s3_resource.create_bucket(
214
+ Bucket=bucket_name,
215
+ CreateBucketConfiguration={"LocationConstraint": region},
216
+ )
217
+ return bucket
218
+
219
+ @retry(errors=[ClientError])
220
+ def enable_public_objects(bucket_name: str) -> None:
221
+ """
222
+ Enable a bucket to contain objects which are public.
223
+
224
+ This adjusts the bucket's Public Access Block setting to not block all
225
+ public access, and also adjusts the bucket's Object Ownership setting to a
226
+ setting which enables object ACLs.
227
+
228
+ Does *not* touch the *account*'s Public Access Block setting, which can
229
+ also interfere here. That is probably best left to the account
230
+ administrator.
231
+
232
+ This configuration used to be the default, and is what most of Toil's code
233
+ is written to expect, but it was changed so that new buckets default to the
234
+ more restrictive setting
235
+ <https://aws.amazon.com/about-aws/whats-new/2022/12/amazon-s3-automatically-enable-block-public-access-disable-access-control-lists-buckets-april-2023/>,
236
+ with the expectation that people would write IAM policies for the buckets
237
+ to allow public access if needed. Toil expects to be able to make arbitrary
238
+ objects in arbitrary places public, and naming them all in an IAM policy
239
+ would be a very awkward way to do it. So we restore the old behavior.
240
+ """
241
+
242
+ s3_client = session.client('s3')
243
+
244
+ # Even though the new default is for public access to be prohibited, this
245
+ # is implemented by adding new things attached to the bucket. If we remove
246
+ # those things the bucket will default to the old defaults. See
247
+ # <https://aws.amazon.com/blogs/aws/heads-up-amazon-s3-security-changes-are-coming-in-april-of-2023/>.
248
+
249
+ # Stop blocking public access
250
+ s3_client.delete_public_access_block(Bucket=bucket_name)
251
+
252
+ # Stop using an ownership controls setting that prohibits ACLs.
253
+ s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
254
+
255
+ class NoBucketLocationError(Exception):
256
+ """
257
+ Error to represent that we could not get a location for a bucket.
258
+ """
259
+ pass
260
+
261
+ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only_strategies: Optional[Set[int]] = None) -> str:
262
+ """
263
+ Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
264
+
265
+ Does not log at info level or above when this does not work; failures are expected in some contexts.
266
+
267
+ Takes an optional S3 API URL override.
268
+
269
+ :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
270
+ """
271
+
272
+ s3_client = session.client('s3', endpoint_url=endpoint_url)
273
+
274
+ def attempt_get_bucket_location() -> Optional[str]:
275
+ """
276
+ Try and get the bucket location from the normal API call.
277
+ """
278
+ return s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
279
+
280
+ def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
281
+ """
282
+ Try and get the bucket location from the normal API call, but against us-east-1
283
+ """
284
+ # Sometimes we aren't allowed to GetBucketLocation. At least some of
285
+ # the time, that's only true when we talk to whatever S3 API servers we
286
+ # usually use, and we can get around this lack of permission by talking
287
+ # to us-east-1 instead. We've been told that this is because us-east-1
288
+ # is special and will answer the question when other regions won't.
289
+ # See:
290
+ # <https://ucsc-gi.slack.com/archives/C027D41M6UA/p1652819831740169?thread_ts=1652817377.594539&cid=C027D41M6UA>
291
+ # It could also be because AWS open data buckets (which we tend to
292
+ # encounter this problem for) tend to actually themselves be in
293
+ # us-east-1.
294
+ backup_s3_client = session.client('s3', region_name='us-east-1')
295
+ return backup_s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
296
+
297
+ def attempt_head_bucket() -> Optional[str]:
298
+ """
299
+ Try and get the bucket location from calling HeadBucket and inspecting
300
+ the headers.
301
+ """
302
+ # If that also doesn't work, we can try HEAD-ing the bucket and looking
303
+ # for an 'x-amz-bucket-region' header on the response, which can tell
304
+ # us where the bucket is. See
305
+ # <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
306
+ info = s3_client.head_bucket(Bucket=bucket_name)
307
+ return info['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
308
+
309
+ # Compose a list of strategies we want to try in order, which may work.
310
+ # None is an acceptable return type that actually means something.
311
+ strategies: List[Callable[[], Optional[str]]] = []
312
+ strategies.append(attempt_get_bucket_location)
313
+ if not endpoint_url:
314
+ # We should only try to talk to us-east-1 if we don't have a custom
315
+ # URL.
316
+ strategies.append(attempt_get_bucket_location_from_us_east_1)
317
+ strategies.append(attempt_head_bucket)
318
+
319
+ error_logs: List[Tuple[int, str]] = []
320
+ for attempt in retry_s3():
321
+ with attempt:
322
+ for i, strategy in enumerate(strategies):
323
+ if only_strategies is not None and i+1 not in only_strategies:
324
+ # We want to test running without this strategy.
325
+ continue
326
+ try:
327
+ location = bucket_location_to_region(strategy())
328
+ logger.debug('Got bucket location from strategy %d', i + 1)
329
+ return location
330
+ except ClientError as e:
331
+ if get_error_code(e) == 'AccessDenied' and not endpoint_url:
332
+ logger.debug('Strategy %d to get bucket location did not work: %s', i + 1, e)
333
+ error_logs.append((i + 1, str(e)))
334
+ last_error: Exception = e
335
+ # We were blocked with this strategy. Move on to the
336
+ # next strategy which might work.
337
+ continue
338
+ else:
339
+ raise
340
+ except KeyError as e:
341
+ # If we get a weird head response we will have a KeyError
342
+ logger.debug('Strategy %d to get bucket location did not work: %s', i + 1, e)
343
+ error_logs.append((i + 1, str(e)))
344
+ last_error = e
345
+
346
+ error_messages = []
347
+ for rank, message in error_logs:
348
+ error_messages.append(f"Strategy {rank} failed to get bucket location because: {message}")
349
+ # If we get here we ran out of attempts.
350
+ raise NoBucketLocationError("Could not get bucket location: " + "\n".join(error_messages)) from last_error
351
+
352
+ def region_to_bucket_location(region: str) -> str:
353
+ return '' if region == 'us-east-1' else region
354
+
355
+ def bucket_location_to_region(location: Optional[str]) -> str:
356
+ return "us-east-1" if location == "" or location is None else location
357
+
358
+ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
359
+ """
360
+ Extracts a key (object) from a given parsed s3:// URL.
361
+
362
+ If existing is true and the object does not exist, raises FileNotFoundError.
363
+
364
+ :param bool existing: If True, key is expected to exist. If False, key is expected not to
365
+ exists and it will be created. If None, the key will be created if it doesn't exist.
366
+ """
367
+
368
+ key_name = url.path[1:]
369
+ bucket_name = url.netloc
370
+
371
+ # Decide if we need to override Boto's built-in URL here.
372
+ endpoint_url: Optional[str] = None
373
+ host = os.environ.get('TOIL_S3_HOST', None)
374
+ port = os.environ.get('TOIL_S3_PORT', None)
375
+ protocol = 'https'
376
+ if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
377
+ protocol = 'http'
378
+ if host:
379
+ endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
380
+
381
+ # TODO: OrdinaryCallingFormat equivalent in boto3?
382
+ # if botoargs:
383
+ # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
384
+
385
+ try:
386
+ # Get the bucket's region to avoid a redirect per request
387
+ region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
388
+ s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
389
+ except NoBucketLocationError as e:
390
+ # Probably don't have permission.
391
+ # TODO: check if it is that
392
+ logger.debug("Couldn't get bucket location: %s", e)
393
+ logger.debug("Fall back to not specifying location")
394
+ s3 = session.resource('s3', endpoint_url=endpoint_url)
395
+
396
+ obj = s3.Object(bucket_name, key_name)
397
+ objExists = True
398
+
399
+ try:
400
+ obj.load()
401
+ except ClientError as e:
402
+ if get_error_status(e) == 404:
403
+ objExists = False
404
+ else:
405
+ raise
406
+ if existing is True and not objExists:
407
+ raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
408
+ elif existing is False and objExists:
409
+ raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
410
+
411
+ if not objExists:
412
+ obj.put() # write an empty file
413
+ return obj
414
+
415
+
416
+ @retry(errors=[AWSServerErrors])
417
+ def list_objects_for_url(url: ParseResult) -> List[str]:
418
+ """
419
+ Extracts a key (object) from a given parsed s3:// URL. The URL will be
420
+ supplemented with a trailing slash if it is missing.
421
+ """
422
+ key_name = url.path[1:]
423
+ bucket_name = url.netloc
424
+
425
+ if key_name != '' and not key_name.endswith('/'):
426
+ # Make sure to put the trailing slash on the key, or else we'll see
427
+ # a prefix of just it.
428
+ key_name = key_name + '/'
429
+
430
+ # Decide if we need to override Boto's built-in URL here.
431
+ # TODO: Deduplicate with get_object_for_url, or push down into session module
432
+ endpoint_url: Optional[str] = None
433
+ host = os.environ.get('TOIL_S3_HOST', None)
434
+ port = os.environ.get('TOIL_S3_PORT', None)
435
+ protocol = 'https'
436
+ if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
437
+ protocol = 'http'
438
+ if host:
439
+ endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
440
+
441
+ client = session.client('s3', endpoint_url=endpoint_url)
442
+
443
+ listing = []
444
+
445
+ paginator = client.get_paginator('list_objects_v2')
446
+ result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter='/')
447
+ for page in result:
448
+ if 'CommonPrefixes' in page:
449
+ for prefix_item in page['CommonPrefixes']:
450
+ listing.append(prefix_item['Prefix'][len(key_name):])
451
+ if 'Contents' in page:
452
+ for content_item in page['Contents']:
453
+ if content_item['Key'] == key_name:
454
+ # Ignore folder name itself
455
+ continue
456
+ listing.append(content_item['Key'][len(key_name):])
457
+
458
+ logger.debug('Found in %s items: %s', url, listing)
459
+ return listing
460
+
461
+ def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]:
462
+ """
463
+ Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
464
+ """
465
+ return [{'Key': k, 'Value': v} for k, v in tags.items()]
466
+
467
+
468
+ def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: str,
469
+ **kwargs: Any) -> Iterable[Any]:
470
+ """
471
+ Yield all the results from calling the given Boto 3 method with the
472
+ given keyword arguments, paging through the results using the Marker or
473
+ NextToken, and fetching out and looping over the list in the response
474
+ with the given attribute name.
475
+ """
476
+
477
+ # Recover the Boto3 client, and the name of the operation
478
+ client = requestor_callable.__self__ # type: ignore[attr-defined]
479
+ op_name = requestor_callable.__name__
480
+
481
+ # grab a Boto 3 built-in paginator. See
482
+ # <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html>
483
+ paginator = client.get_paginator(op_name)
484
+
485
+ for page in paginator.paginate(**kwargs):
486
+ # Invoke it and go through the pages, yielding from them
487
+ yield from page.get(result_attribute_name, [])
488
+
489
+
490
+ def get_item_from_attributes(attributes: List["AttributeTypeDef"], name: str) -> Any:
491
+ """
492
+ Given a list of attributes, find the attribute associated with the name and return its corresponding value.
493
+
494
+ The `attribute_list` will be a list of TypedDict's (which boto3 SDB functions commonly return),
495
+ where each TypedDict has a "Name" and "Value" key value pair.
496
+ This function grabs the value out of the associated TypedDict.
497
+
498
+ If the attribute with the name does not exist, the function will return None.
499
+
500
+ :param attributes: list of attributes
501
+ :param name: name of the attribute
502
+ :return: value of the attribute
503
+ """
504
+ return next((attribute["Value"] for attribute in attributes if attribute["Name"] == name), None)
toil/lib/bioio.py CHANGED
@@ -20,7 +20,7 @@ from toil.test import get_temp_file
20
20
 
21
21
  # used by cactus
22
22
  # TODO: only used in utilsTest.py; move this there once out of cactus
23
- def system(command):
23
+ def system(command: list[str]) -> None:
24
24
  """
25
25
  A convenience wrapper around subprocess.check_call that logs the command before passing it
26
26
  on. The command can be either a string or a sequence of strings. If it is a string shell=True