toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/lib/aws/utils.py CHANGED
@@ -15,45 +15,33 @@ import errno
15
15
  import logging
16
16
  import os
17
17
  import socket
18
- import sys
19
- from typing import (Any,
20
- Callable,
21
- ContextManager,
22
- Dict,
23
- Iterable,
24
- Iterator,
25
- List,
26
- Optional,
27
- Set,
28
- Union,
29
- cast)
18
+ from collections.abc import Iterable, Iterator
19
+ from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
30
20
  from urllib.parse import ParseResult
31
21
 
32
- from toil.lib.aws import session
22
+ from toil.lib.aws import AWSRegionName, AWSServerErrors, session
23
+ from toil.lib.conversions import strtobool
33
24
  from toil.lib.misc import printq
34
- from toil.lib.retry import (DEFAULT_DELAYS,
35
- DEFAULT_TIMEOUT,
36
- get_error_code,
37
- get_error_status,
38
- old_retry,
39
- retry)
40
-
41
- if sys.version_info >= (3, 8):
42
- from typing import Literal
43
- else:
44
- from typing_extensions import Literal
25
+ from toil.lib.retry import (
26
+ DEFAULT_DELAYS,
27
+ DEFAULT_TIMEOUT,
28
+ get_error_code,
29
+ get_error_status,
30
+ old_retry,
31
+ retry,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ from mypy_boto3_s3 import S3ServiceResource
36
+ from mypy_boto3_s3.service_resource import Bucket
37
+ from mypy_boto3_s3.service_resource import Object as S3Object
38
+ from mypy_boto3_sdb.type_defs import AttributeTypeDef
45
39
 
46
40
  try:
47
- from boto.exception import BotoServerError, S3ResponseError
48
- from botocore.exceptions import ClientError
49
- from mypy_boto3_iam import IAMClient, IAMServiceResource
50
- from mypy_boto3_s3 import S3Client, S3ServiceResource
51
- from mypy_boto3_s3.literals import BucketLocationConstraintType
52
- from mypy_boto3_s3.service_resource import Bucket, Object
53
- from mypy_boto3_sdb import SimpleDBClient
41
+ from botocore.exceptions import ClientError, EndpointConnectionError
54
42
  except ImportError:
55
- BotoServerError = None # type: ignore
56
43
  ClientError = None # type: ignore
44
+ EndpointConnectionError = None # type: ignore
57
45
  # AWS/boto extra is not installed
58
46
 
59
47
  logger = logging.getLogger(__name__)
@@ -61,73 +49,28 @@ logger = logging.getLogger(__name__)
61
49
  # These are error codes we expect from AWS if we are making requests too fast.
62
50
  # https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
63
51
  THROTTLED_ERROR_CODES = [
64
- 'Throttling',
65
- 'ThrottlingException',
66
- 'ThrottledException',
67
- 'RequestThrottledException',
68
- 'TooManyRequestsException',
69
- 'ProvisionedThroughputExceededException',
70
- 'TransactionInProgressException',
71
- 'RequestLimitExceeded',
72
- 'BandwidthLimitExceeded',
73
- 'LimitExceededException',
74
- 'RequestThrottled',
75
- 'SlowDown',
76
- 'PriorRequestNotComplete',
77
- 'EC2ThrottledException',
52
+ "Throttling",
53
+ "ThrottlingException",
54
+ "ThrottledException",
55
+ "RequestThrottledException",
56
+ "TooManyRequestsException",
57
+ "ProvisionedThroughputExceededException",
58
+ "TransactionInProgressException",
59
+ "RequestLimitExceeded",
60
+ "BandwidthLimitExceeded",
61
+ "LimitExceededException",
62
+ "RequestThrottled",
63
+ "SlowDown",
64
+ "PriorRequestNotComplete",
65
+ "EC2ThrottledException",
78
66
  ]
79
67
 
80
- @retry(errors=[BotoServerError])
81
- def delete_iam_role(
82
- role_name: str, region: Optional[str] = None, quiet: bool = True
83
- ) -> None:
84
- from boto.iam.connection import IAMConnection
85
-
86
- # TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
87
- # of overloads of the client-getting methods to exist based on the literal
88
- # string passed in, to return exactly the right kind of client or resource.
89
- # So we end up having to wrap all the calls in casts, which kind of defeats
90
- # the point of a nice fluent method you can call with the name of the thing
91
- # you want; we should have been calling iam_client() and so on all along if
92
- # we wanted MyPy to be able to understand us. So at some point we should
93
- # consider revising our API here to be less annoying to explain to the type
94
- # checker.
95
- iam_client = cast(IAMClient, session.client('iam', region_name=region))
96
- iam_resource = cast(IAMServiceResource, session.resource('iam', region_name=region))
97
- boto_iam_connection = IAMConnection()
98
- role = iam_resource.Role(role_name)
99
- # normal policies
100
- for attached_policy in role.attached_policies.all():
101
- printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
102
- role.detach_policy(PolicyArn=attached_policy.arn)
103
- # inline policies
104
- for inline_policy in role.policies.all():
105
- printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
106
- # couldn't find an easy way to remove inline policies with boto3; use boto
107
- boto_iam_connection.delete_role_policy(role.name, inline_policy.policy_name)
108
- iam_client.delete_role(RoleName=role_name)
109
- printq(f'Role {role_name} successfully deleted.', quiet)
110
-
111
-
112
- @retry(errors=[BotoServerError])
113
- def delete_iam_instance_profile(
114
- instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
115
- ) -> None:
116
- iam_resource = cast(IAMServiceResource, session.resource("iam", region_name=region))
117
- instance_profile = iam_resource.InstanceProfile(instance_profile_name)
118
- if instance_profile.roles is not None:
119
- for role in instance_profile.roles:
120
- printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
121
- instance_profile.remove_role(RoleName=role.name)
122
- instance_profile.delete()
123
- printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
124
68
 
125
-
126
- @retry(errors=[BotoServerError])
69
+ @retry(errors=[AWSServerErrors])
127
70
  def delete_sdb_domain(
128
71
  sdb_domain_name: str, region: Optional[str] = None, quiet: bool = True
129
72
  ) -> None:
130
- sdb_client = cast(SimpleDBClient, session.client("sdb", region_name=region))
73
+ sdb_client = session.client("sdb", region_name=region)
131
74
  sdb_client.delete_domain(DomainName=sdb_domain_name)
132
75
  printq(f'SBD Domain: "{sdb_domain_name}" successfully deleted.', quiet)
133
76
 
@@ -141,39 +84,60 @@ def connection_reset(e: Exception) -> bool:
141
84
  # errno is listed as 104. To be safe, we check for both:
142
85
  return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
143
86
 
87
+
88
+ def connection_error(e: Exception) -> bool:
89
+ """
90
+ Return True if an error represents a failure to make a network connection.
91
+ """
92
+ return connection_reset(e) or isinstance(e, EndpointConnectionError)
93
+
94
+
144
95
  # TODO: Replace with: @retry and ErrorCondition
145
96
  def retryable_s3_errors(e: Exception) -> bool:
146
97
  """
147
98
  Return true if this is an error from S3 that looks like we ought to retry our request.
148
99
  """
149
- return (connection_reset(e)
150
- or (isinstance(e, BotoServerError) and e.status in (429, 500))
151
- or (isinstance(e, BotoServerError) and e.code in THROTTLED_ERROR_CODES)
152
- # boto3 errors
153
- or (isinstance(e, (S3ResponseError, ClientError)) and get_error_code(e) in THROTTLED_ERROR_CODES)
154
- or (isinstance(e, ClientError) and 'BucketNotEmpty' in str(e))
155
- or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 409 and 'try again' in str(e))
156
- or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') in (404, 429, 500, 502, 503, 504)))
100
+ return (
101
+ connection_error(e)
102
+ or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
103
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
104
+ # boto3 errors
105
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
106
+ or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
107
+ or (
108
+ isinstance(e, ClientError)
109
+ and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
110
+ and "try again" in str(e)
111
+ )
112
+ or (
113
+ isinstance(e, ClientError)
114
+ and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
115
+ in (404, 429, 500, 502, 503, 504)
116
+ )
117
+ )
157
118
 
158
119
 
159
- def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_TIMEOUT, predicate: Callable[[Exception], bool] = retryable_s3_errors) -> Iterator[ContextManager[None]]:
120
+ def retry_s3(
121
+ delays: Iterable[float] = DEFAULT_DELAYS,
122
+ timeout: float = DEFAULT_TIMEOUT,
123
+ predicate: Callable[[Exception], bool] = retryable_s3_errors,
124
+ ) -> Iterator[ContextManager[None]]:
160
125
  """
161
126
  Retry iterator of context managers specifically for S3 operations.
162
127
  """
163
128
  return old_retry(delays=delays, timeout=timeout, predicate=predicate)
164
129
 
165
- @retry(errors=[BotoServerError])
130
+
131
+ @retry(errors=[AWSServerErrors])
166
132
  def delete_s3_bucket(
167
- s3_resource: "S3ServiceResource",
168
- bucket: str,
169
- quiet: bool = True
133
+ s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
170
134
  ) -> None:
171
135
  """
172
136
  Delete the given S3 bucket.
173
137
  """
174
- printq(f'Deleting s3 bucket: {bucket}', quiet)
138
+ printq(f"Deleting s3 bucket: {bucket}", quiet)
175
139
 
176
- paginator = s3_resource.meta.client.get_paginator('list_object_versions')
140
+ paginator = s3_resource.meta.client.get_paginator("list_object_versions")
177
141
  try:
178
142
  for response in paginator.paginate(Bucket=bucket):
179
143
  # Versions and delete markers can both go in here to be deleted.
@@ -181,21 +145,26 @@ def delete_s3_bucket(
181
145
  # defined for them in the stubs to express that. See
182
146
  # <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
183
147
  # have to do gymnastics to get them into the same list.
184
- to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \
185
- cast(List[Dict[str, Any]], response.get('DeleteMarkers', []))
148
+ to_delete: list[dict[str, Any]] = cast(
149
+ list[dict[str, Any]], response.get("Versions", [])
150
+ ) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
186
151
  for entry in to_delete:
187
- printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet)
188
- s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId'])
152
+ printq(
153
+ f" Deleting {entry['Key']} version {entry['VersionId']}", quiet
154
+ )
155
+ s3_resource.meta.client.delete_object(
156
+ Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
157
+ )
189
158
  s3_resource.Bucket(bucket).delete()
190
- printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet)
159
+ printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
191
160
  except s3_resource.meta.client.exceptions.NoSuchBucket:
192
- printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet)
161
+ printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
193
162
 
194
163
 
195
164
  def create_s3_bucket(
196
165
  s3_resource: "S3ServiceResource",
197
166
  bucket_name: str,
198
- region: Union["BucketLocationConstraintType", Literal["us-east-1"]],
167
+ region: AWSRegionName,
199
168
  ) -> "Bucket":
200
169
  """
201
170
  Create an AWS S3 bucket, using the given Boto3 S3 session, with the
@@ -205,7 +174,7 @@ def create_s3_bucket(
205
174
 
206
175
  *ALL* S3 bucket creation should use this function.
207
176
  """
208
- logger.debug("Creating bucket '%s' in region %s.", bucket_name, region)
177
+ logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
209
178
  if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
210
179
  bucket = s3_resource.create_bucket(Bucket=bucket_name)
211
180
  else:
@@ -215,6 +184,7 @@ def create_s3_bucket(
215
184
  )
216
185
  return bucket
217
186
 
187
+
218
188
  @retry(errors=[ClientError])
219
189
  def enable_public_objects(bucket_name: str) -> None:
220
190
  """
@@ -238,7 +208,7 @@ def enable_public_objects(bucket_name: str) -> None:
238
208
  would be a very awkward way to do it. So we restore the old behavior.
239
209
  """
240
210
 
241
- s3_client = cast(S3Client, session.client('s3'))
211
+ s3_client = session.client("s3")
242
212
 
243
213
  # Even though the new default is for public access to be prohibited, this
244
214
  # is implemented by adding new things attached to the bucket. If we remove
@@ -252,22 +222,36 @@ def enable_public_objects(bucket_name: str) -> None:
252
222
  s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
253
223
 
254
224
 
255
- def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only_strategies: Optional[Set[int]] = None) -> str:
225
+ class NoBucketLocationError(Exception):
226
+ """
227
+ Error to represent that we could not get a location for a bucket.
256
228
  """
257
- Get the AWS region name associated with the given S3 bucket.
229
+
230
+
231
+ def get_bucket_region(
232
+ bucket_name: str,
233
+ endpoint_url: Optional[str] = None,
234
+ only_strategies: Optional[set[int]] = None,
235
+ ) -> str:
236
+ """
237
+ Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
238
+
239
+ Does not log at info level or above when this does not work; failures are expected in some contexts.
258
240
 
259
241
  Takes an optional S3 API URL override.
260
242
 
261
243
  :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
262
244
  """
263
245
 
264
- s3_client = cast(S3Client, session.client('s3', endpoint_url=endpoint_url))
246
+ s3_client = session.client("s3", endpoint_url=endpoint_url)
265
247
 
266
248
  def attempt_get_bucket_location() -> Optional[str]:
267
249
  """
268
250
  Try and get the bucket location from the normal API call.
269
251
  """
270
- return s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
252
+ return s3_client.get_bucket_location(Bucket=bucket_name).get(
253
+ "LocationConstraint", None
254
+ )
271
255
 
272
256
  def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
273
257
  """
@@ -283,8 +267,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
283
267
  # It could also be because AWS open data buckets (which we tend to
284
268
  # encounter this problem for) tend to actually themselves be in
285
269
  # us-east-1.
286
- backup_s3_client = cast(S3Client, session.client('s3', region_name='us-east-1'))
287
- return backup_s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
270
+ backup_s3_client = session.client("s3", region_name="us-east-1")
271
+ return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
272
+ "LocationConstraint", None
273
+ )
288
274
 
289
275
  def attempt_head_bucket() -> Optional[str]:
290
276
  """
@@ -296,11 +282,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
296
282
  # us where the bucket is. See
297
283
  # <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
298
284
  info = s3_client.head_bucket(Bucket=bucket_name)
299
- return info['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
285
+ return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
300
286
 
301
287
  # Compose a list of strategies we want to try in order, which may work.
302
288
  # None is an acceptable return type that actually means something.
303
- strategies: List[Callable[[], Optional[str]]] = []
289
+ strategies: list[Callable[[], Optional[str]]] = []
304
290
  strategies.append(attempt_get_bucket_location)
305
291
  if not endpoint_url:
306
292
  # We should only try to talk to us-east-1 if we don't have a custom
@@ -308,17 +294,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
308
294
  strategies.append(attempt_get_bucket_location_from_us_east_1)
309
295
  strategies.append(attempt_head_bucket)
310
296
 
297
+ error_logs: list[tuple[int, str]] = []
311
298
  for attempt in retry_s3():
312
299
  with attempt:
313
300
  for i, strategy in enumerate(strategies):
314
- if only_strategies is not None and i+1 not in only_strategies:
301
+ if only_strategies is not None and i + 1 not in only_strategies:
315
302
  # We want to test running without this strategy.
316
303
  continue
317
304
  try:
318
- return bucket_location_to_region(strategy())
305
+ location = bucket_location_to_region(strategy())
306
+ logger.debug("Got bucket location from strategy %d", i + 1)
307
+ return location
319
308
  except ClientError as e:
320
- if get_error_code(e) == 'AccessDenied' and not endpoint_url:
321
- logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
309
+ if get_error_code(e) == "AccessDenied" and not endpoint_url:
310
+ logger.debug(
311
+ "Strategy %d to get bucket location did not work: %s",
312
+ i + 1,
313
+ e,
314
+ )
315
+ error_logs.append((i + 1, str(e)))
322
316
  last_error: Exception = e
323
317
  # We were blocked with this strategy. Move on to the
324
318
  # next strategy which might work.
@@ -327,120 +321,182 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
327
321
  raise
328
322
  except KeyError as e:
329
323
  # If we get a weird head response we will have a KeyError
330
- logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
324
+ logger.debug(
325
+ "Strategy %d to get bucket location did not work: %s", i + 1, e
326
+ )
327
+ error_logs.append((i + 1, str(e)))
331
328
  last_error = e
332
- # If we get here we ran out of attempts. Raise whatever the last problem was.
333
- raise last_error
329
+
330
+ error_messages = []
331
+ for rank, message in error_logs:
332
+ error_messages.append(
333
+ f"Strategy {rank} failed to get bucket location because: {message}"
334
+ )
335
+ # If we get here we ran out of attempts.
336
+ raise NoBucketLocationError(
337
+ "Could not get bucket location: " + "\n".join(error_messages)
338
+ ) from last_error
339
+
334
340
 
335
341
  def region_to_bucket_location(region: str) -> str:
336
- return '' if region == 'us-east-1' else region
342
+ return "" if region == "us-east-1" else region
343
+
337
344
 
338
345
  def bucket_location_to_region(location: Optional[str]) -> str:
339
346
  return "us-east-1" if location == "" or location is None else location
340
347
 
341
- def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
342
- """
343
- Extracts a key (object) from a given parsed s3:// URL.
344
348
 
345
- If existing is true and the object does not exist, raises FileNotFoundError.
349
+ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
350
+ """
351
+ Extracts a key (object) from a given parsed s3:// URL.
346
352
 
347
- :param bool existing: If True, key is expected to exist. If False, key is expected not to
348
- exists and it will be created. If None, the key will be created if it doesn't exist.
349
- """
353
+ If existing is true and the object does not exist, raises FileNotFoundError.
350
354
 
351
- key_name = url.path[1:]
352
- bucket_name = url.netloc
353
-
354
- # Decide if we need to override Boto's built-in URL here.
355
- endpoint_url: Optional[str] = None
356
- host = os.environ.get('TOIL_S3_HOST', None)
357
- port = os.environ.get('TOIL_S3_PORT', None)
358
- protocol = 'https'
359
- if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
360
- protocol = 'http'
361
- if host:
362
- endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
363
-
364
- # TODO: OrdinaryCallingFormat equivalent in boto3?
365
- # if botoargs:
366
- # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
367
-
368
- try:
369
- # Get the bucket's region to avoid a redirect per request
370
- region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
371
- s3 = cast(S3ServiceResource, session.resource('s3', region_name=region, endpoint_url=endpoint_url))
372
- except ClientError:
373
- # Probably don't have permission.
374
- # TODO: check if it is that
375
- s3 = cast(S3ServiceResource, session.resource('s3', endpoint_url=endpoint_url))
376
-
377
- obj = s3.Object(bucket_name, key_name)
378
- objExists = True
379
-
380
- try:
381
- obj.load()
382
- except ClientError as e:
383
- if get_error_status(e) == 404:
384
- objExists = False
385
- else:
386
- raise
387
- if existing is True and not objExists:
388
- raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
389
- elif existing is False and objExists:
390
- raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
391
-
392
- if not objExists:
393
- obj.put() # write an empty file
394
- return obj
395
-
396
-
397
- @retry(errors=[BotoServerError])
398
- def list_objects_for_url(url: ParseResult) -> List[str]:
399
- """
400
- Extracts a key (object) from a given parsed s3:// URL. The URL will be
401
- supplemented with a trailing slash if it is missing.
402
- """
403
- key_name = url.path[1:]
404
- bucket_name = url.netloc
405
-
406
- if key_name != '' and not key_name.endswith('/'):
407
- # Make sure to put the trailing slash on the key, or else we'll see
408
- # a prefix of just it.
409
- key_name = key_name + '/'
410
-
411
- # Decide if we need to override Boto's built-in URL here.
412
- # TODO: Deduplicate with get_object_for_url, or push down into session module
413
- endpoint_url: Optional[str] = None
414
- host = os.environ.get('TOIL_S3_HOST', None)
415
- port = os.environ.get('TOIL_S3_PORT', None)
416
- protocol = 'https'
417
- if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
418
- protocol = 'http'
419
- if host:
420
- endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
421
-
422
- client = cast(S3Client, session.client('s3', endpoint_url=endpoint_url))
423
-
424
- listing = []
425
-
426
- paginator = client.get_paginator('list_objects_v2')
427
- result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter='/')
428
- for page in result:
429
- if 'CommonPrefixes' in page:
430
- for prefix_item in page['CommonPrefixes']:
431
- listing.append(prefix_item['Prefix'][len(key_name):])
432
- if 'Contents' in page:
433
- for content_item in page['Contents']:
434
- if content_item['Key'] == key_name:
435
- # Ignore folder name itself
436
- continue
437
- listing.append(content_item['Key'][len(key_name):])
355
+ :param bool existing: If True, key is expected to exist. If False, key is expected not to
356
+ exists and it will be created. If None, the key will be created if it doesn't exist.
357
+ """
358
+
359
+ key_name = url.path[1:]
360
+ bucket_name = url.netloc
361
+
362
+ # Decide if we need to override Boto's built-in URL here.
363
+ endpoint_url: Optional[str] = None
364
+ host = os.environ.get("TOIL_S3_HOST", None)
365
+ port = os.environ.get("TOIL_S3_PORT", None)
366
+ protocol = "https"
367
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
368
+ protocol = "http"
369
+ if host:
370
+ endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
371
+
372
+ # TODO: OrdinaryCallingFormat equivalent in boto3?
373
+ # if botoargs:
374
+ # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
375
+
376
+ try:
377
+ # Get the bucket's region to avoid a redirect per request
378
+ region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
379
+ s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url)
380
+ except NoBucketLocationError as e:
381
+ # Probably don't have permission.
382
+ # TODO: check if it is that
383
+ logger.debug("Couldn't get bucket location: %s", e)
384
+ logger.debug("Fall back to not specifying location")
385
+ s3 = session.resource("s3", endpoint_url=endpoint_url)
386
+
387
+ obj = s3.Object(bucket_name, key_name)
388
+ objExists = True
389
+
390
+ try:
391
+ obj.load()
392
+ except ClientError as e:
393
+ if get_error_status(e) == 404:
394
+ objExists = False
395
+ else:
396
+ raise
397
+ if existing is True and not objExists:
398
+ raise FileNotFoundError(
399
+ f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
400
+ )
401
+ elif existing is False and objExists:
402
+ raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
403
+
404
+ if not objExists:
405
+ obj.put() # write an empty file
406
+ return obj
407
+
408
+
409
+ @retry(errors=[AWSServerErrors])
410
+ def list_objects_for_url(url: ParseResult) -> list[str]:
411
+ """
412
+ Extracts a key (object) from a given parsed s3:// URL. The URL will be
413
+ supplemented with a trailing slash if it is missing.
414
+ """
415
+ key_name = url.path[1:]
416
+ bucket_name = url.netloc
417
+
418
+ if key_name != "" and not key_name.endswith("/"):
419
+ # Make sure to put the trailing slash on the key, or else we'll see
420
+ # a prefix of just it.
421
+ key_name = key_name + "/"
422
+
423
+ # Decide if we need to override Boto's built-in URL here.
424
+ # TODO: Deduplicate with get_object_for_url, or push down into session module
425
+ endpoint_url: Optional[str] = None
426
+ host = os.environ.get("TOIL_S3_HOST", None)
427
+ port = os.environ.get("TOIL_S3_PORT", None)
428
+ protocol = "https"
429
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
430
+ protocol = "http"
431
+ if host:
432
+ endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
433
+
434
+ client = session.client("s3", endpoint_url=endpoint_url)
435
+
436
+ listing = []
437
+
438
+ paginator = client.get_paginator("list_objects_v2")
439
+ result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
440
+ for page in result:
441
+ if "CommonPrefixes" in page:
442
+ for prefix_item in page["CommonPrefixes"]:
443
+ listing.append(prefix_item["Prefix"][len(key_name) :])
444
+ if "Contents" in page:
445
+ for content_item in page["Contents"]:
446
+ if content_item["Key"] == key_name:
447
+ # Ignore folder name itself
448
+ continue
449
+ listing.append(content_item["Key"][len(key_name) :])
438
450
 
439
- logger.debug('Found in %s items: %s', url, listing)
440
- return listing
451
+ logger.debug("Found in %s items: %s", url, listing)
452
+ return listing
441
453
 
442
- def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]:
454
+
455
+ def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
443
456
  """
444
457
  Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
445
458
  """
446
- return [{'Key': k, 'Value': v} for k, v in tags.items()]
459
+ return [{"Key": k, "Value": v} for k, v in tags.items()]
460
+
461
+
462
+ def boto3_pager(
463
+ requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
464
+ ) -> Iterable[Any]:
465
+ """
466
+ Yield all the results from calling the given Boto 3 method with the
467
+ given keyword arguments, paging through the results using the Marker or
468
+ NextToken, and fetching out and looping over the list in the response
469
+ with the given attribute name.
470
+ """
471
+
472
+ # Recover the Boto3 client, and the name of the operation
473
+ client = requestor_callable.__self__ # type: ignore[attr-defined]
474
+ op_name = requestor_callable.__name__
475
+
476
+ # grab a Boto 3 built-in paginator. See
477
+ # <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html>
478
+ paginator = client.get_paginator(op_name)
479
+
480
+ for page in paginator.paginate(**kwargs):
481
+ # Invoke it and go through the pages, yielding from them
482
+ yield from page.get(result_attribute_name, [])
483
+
484
+
485
+ def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
486
+ """
487
+ Given a list of attributes, find the attribute associated with the name and return its corresponding value.
488
+
489
+ The `attribute_list` will be a list of TypedDict's (which boto3 SDB functions commonly return),
490
+ where each TypedDict has a "Name" and "Value" key value pair.
491
+ This function grabs the value out of the associated TypedDict.
492
+
493
+ If the attribute with the name does not exist, the function will return None.
494
+
495
+ :param attributes: list of attributes
496
+ :param name: name of the attribute
497
+ :return: value of the attribute
498
+ """
499
+ return next(
500
+ (attribute["Value"] for attribute in attributes if attribute["Name"] == name),
501
+ None,
502
+ )