toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. toil/__init__.py +121 -83
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +38 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +489 -137
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +630 -359
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1114 -532
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +988 -315
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +727 -403
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +193 -58
  49. toil/lib/aws/utils.py +238 -218
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +322 -209
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +4 -2
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +99 -11
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +65 -18
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +19 -7
  71. toil/lib/retry.py +115 -77
  72. toil/lib/threading.py +282 -80
  73. toil/lib/throttle.py +15 -14
  74. toil/options/common.py +834 -401
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +70 -19
  78. toil/provisioners/__init__.py +111 -46
  79. toil/provisioners/abstractProvisioner.py +322 -157
  80. toil/provisioners/aws/__init__.py +62 -30
  81. toil/provisioners/aws/awsProvisioner.py +980 -627
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +147 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +127 -61
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +148 -64
  98. toil/test/__init__.py +263 -179
  99. toil/test/batchSystems/batchSystemTest.py +438 -195
  100. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +93 -47
  104. toil/test/cactus/test_cactus_integration.py +20 -22
  105. toil/test/cwl/cwlTest.py +271 -71
  106. toil/test/cwl/measure_default_memory.cwl +12 -0
  107. toil/test/cwl/not_run_required_input.cwl +29 -0
  108. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  109. toil/test/docs/scriptsTest.py +60 -34
  110. toil/test/jobStores/jobStoreTest.py +412 -235
  111. toil/test/lib/aws/test_iam.py +116 -48
  112. toil/test/lib/aws/test_s3.py +16 -9
  113. toil/test/lib/aws/test_utils.py +5 -6
  114. toil/test/lib/dockerTest.py +118 -141
  115. toil/test/lib/test_conversions.py +113 -115
  116. toil/test/lib/test_ec2.py +57 -49
  117. toil/test/lib/test_integration.py +104 -0
  118. toil/test/lib/test_misc.py +12 -5
  119. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  120. toil/test/mesos/helloWorld.py +7 -6
  121. toil/test/mesos/stress.py +25 -20
  122. toil/test/options/options.py +7 -2
  123. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  124. toil/test/provisioners/clusterScalerTest.py +440 -250
  125. toil/test/provisioners/clusterTest.py +81 -42
  126. toil/test/provisioners/gceProvisionerTest.py +174 -100
  127. toil/test/provisioners/provisionerTest.py +25 -13
  128. toil/test/provisioners/restartScript.py +5 -4
  129. toil/test/server/serverTest.py +188 -141
  130. toil/test/sort/restart_sort.py +137 -68
  131. toil/test/sort/sort.py +134 -66
  132. toil/test/sort/sortTest.py +91 -49
  133. toil/test/src/autoDeploymentTest.py +140 -100
  134. toil/test/src/busTest.py +20 -18
  135. toil/test/src/checkpointTest.py +8 -2
  136. toil/test/src/deferredFunctionTest.py +49 -35
  137. toil/test/src/dockerCheckTest.py +33 -26
  138. toil/test/src/environmentTest.py +20 -10
  139. toil/test/src/fileStoreTest.py +538 -271
  140. toil/test/src/helloWorldTest.py +7 -4
  141. toil/test/src/importExportFileTest.py +61 -31
  142. toil/test/src/jobDescriptionTest.py +32 -17
  143. toil/test/src/jobEncapsulationTest.py +2 -0
  144. toil/test/src/jobFileStoreTest.py +74 -50
  145. toil/test/src/jobServiceTest.py +187 -73
  146. toil/test/src/jobTest.py +120 -70
  147. toil/test/src/miscTests.py +19 -18
  148. toil/test/src/promisedRequirementTest.py +82 -36
  149. toil/test/src/promisesTest.py +7 -6
  150. toil/test/src/realtimeLoggerTest.py +6 -6
  151. toil/test/src/regularLogTest.py +71 -37
  152. toil/test/src/resourceTest.py +80 -49
  153. toil/test/src/restartDAGTest.py +36 -22
  154. toil/test/src/resumabilityTest.py +9 -2
  155. toil/test/src/retainTempDirTest.py +45 -14
  156. toil/test/src/systemTest.py +12 -8
  157. toil/test/src/threadingTest.py +44 -25
  158. toil/test/src/toilContextManagerTest.py +10 -7
  159. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  160. toil/test/src/workerTest.py +33 -16
  161. toil/test/utils/toilDebugTest.py +70 -58
  162. toil/test/utils/toilKillTest.py +4 -5
  163. toil/test/utils/utilsTest.py +239 -102
  164. toil/test/wdl/wdltoil_test.py +789 -148
  165. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  166. toil/toilState.py +52 -26
  167. toil/utils/toilConfig.py +13 -4
  168. toil/utils/toilDebugFile.py +44 -27
  169. toil/utils/toilDebugJob.py +85 -25
  170. toil/utils/toilDestroyCluster.py +11 -6
  171. toil/utils/toilKill.py +8 -3
  172. toil/utils/toilLaunchCluster.py +251 -145
  173. toil/utils/toilMain.py +37 -16
  174. toil/utils/toilRsyncCluster.py +27 -14
  175. toil/utils/toilSshCluster.py +45 -22
  176. toil/utils/toilStats.py +75 -36
  177. toil/utils/toilStatus.py +226 -119
  178. toil/utils/toilUpdateEC2Instances.py +3 -1
  179. toil/version.py +11 -11
  180. toil/wdl/utils.py +5 -5
  181. toil/wdl/wdltoil.py +3513 -1052
  182. toil/worker.py +269 -128
  183. toil-8.0.0.dist-info/METADATA +173 -0
  184. toil-8.0.0.dist-info/RECORD +253 -0
  185. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  186. toil-7.0.0.dist-info/METADATA +0 -158
  187. toil-7.0.0.dist-info/RECORD +0 -244
  188. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
  189. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  190. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/lib/aws/utils.py CHANGED
@@ -15,35 +15,30 @@ import errno
15
15
  import logging
16
16
  import os
17
17
  import socket
18
- from typing import (Any,
19
- Callable,
20
- ContextManager,
21
- Dict,
22
- Iterable,
23
- Iterator,
24
- List,
25
- Optional,
26
- Set,
27
- cast)
18
+ from collections.abc import Iterable, Iterator
19
+ from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
28
20
  from urllib.parse import ParseResult
29
21
 
30
- from mypy_boto3_sdb.type_defs import AttributeTypeDef
31
- from toil.lib.aws import session, AWSRegionName, AWSServerErrors
22
+ from toil.lib.aws import AWSRegionName, AWSServerErrors, session
23
+ from toil.lib.conversions import strtobool
32
24
  from toil.lib.misc import printq
33
- from toil.lib.retry import (DEFAULT_DELAYS,
34
- DEFAULT_TIMEOUT,
35
- get_error_code,
36
- get_error_status,
37
- old_retry,
38
- retry, ErrorCondition)
25
+ from toil.lib.retry import (
26
+ DEFAULT_DELAYS,
27
+ DEFAULT_TIMEOUT,
28
+ get_error_code,
29
+ get_error_status,
30
+ old_retry,
31
+ retry,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ from mypy_boto3_s3 import S3ServiceResource
36
+ from mypy_boto3_s3.service_resource import Bucket
37
+ from mypy_boto3_s3.service_resource import Object as S3Object
38
+ from mypy_boto3_sdb.type_defs import AttributeTypeDef
39
39
 
40
40
  try:
41
41
  from botocore.exceptions import ClientError, EndpointConnectionError
42
- from mypy_boto3_iam import IAMClient, IAMServiceResource
43
- from mypy_boto3_s3 import S3Client, S3ServiceResource
44
- from mypy_boto3_s3.literals import BucketLocationConstraintType
45
- from mypy_boto3_s3.service_resource import Bucket, Object
46
- from mypy_boto3_sdb import SimpleDBClient
47
42
  except ImportError:
48
43
  ClientError = None # type: ignore
49
44
  EndpointConnectionError = None # type: ignore
@@ -54,63 +49,22 @@ logger = logging.getLogger(__name__)
54
49
  # These are error codes we expect from AWS if we are making requests too fast.
55
50
  # https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
56
51
  THROTTLED_ERROR_CODES = [
57
- 'Throttling',
58
- 'ThrottlingException',
59
- 'ThrottledException',
60
- 'RequestThrottledException',
61
- 'TooManyRequestsException',
62
- 'ProvisionedThroughputExceededException',
63
- 'TransactionInProgressException',
64
- 'RequestLimitExceeded',
65
- 'BandwidthLimitExceeded',
66
- 'LimitExceededException',
67
- 'RequestThrottled',
68
- 'SlowDown',
69
- 'PriorRequestNotComplete',
70
- 'EC2ThrottledException',
52
+ "Throttling",
53
+ "ThrottlingException",
54
+ "ThrottledException",
55
+ "RequestThrottledException",
56
+ "TooManyRequestsException",
57
+ "ProvisionedThroughputExceededException",
58
+ "TransactionInProgressException",
59
+ "RequestLimitExceeded",
60
+ "BandwidthLimitExceeded",
61
+ "LimitExceededException",
62
+ "RequestThrottled",
63
+ "SlowDown",
64
+ "PriorRequestNotComplete",
65
+ "EC2ThrottledException",
71
66
  ]
72
67
 
73
- @retry(errors=[AWSServerErrors])
74
- def delete_iam_role(
75
- role_name: str, region: Optional[str] = None, quiet: bool = True
76
- ) -> None:
77
- # TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
78
- # of overloads of the client-getting methods to exist based on the literal
79
- # string passed in, to return exactly the right kind of client or resource.
80
- # So we end up having to wrap all the calls in casts, which kind of defeats
81
- # the point of a nice fluent method you can call with the name of the thing
82
- # you want; we should have been calling iam_client() and so on all along if
83
- # we wanted MyPy to be able to understand us. So at some point we should
84
- # consider revising our API here to be less annoying to explain to the type
85
- # checker.
86
- iam_client = session.client('iam', region_name=region)
87
- iam_resource = session.resource('iam', region_name=region)
88
- role = iam_resource.Role(role_name)
89
- # normal policies
90
- for attached_policy in role.attached_policies.all():
91
- printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
92
- role.detach_policy(PolicyArn=attached_policy.arn)
93
- # inline policies
94
- for inline_policy in role.policies.all():
95
- printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
96
- iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
97
- iam_client.delete_role(RoleName=role_name)
98
- printq(f'Role {role_name} successfully deleted.', quiet)
99
-
100
-
101
- @retry(errors=[AWSServerErrors])
102
- def delete_iam_instance_profile(
103
- instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
104
- ) -> None:
105
- iam_resource = session.resource("iam", region_name=region)
106
- instance_profile = iam_resource.InstanceProfile(instance_profile_name)
107
- if instance_profile.roles is not None:
108
- for role in instance_profile.roles:
109
- printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
110
- instance_profile.remove_role(RoleName=role.name)
111
- instance_profile.delete()
112
- printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
113
-
114
68
 
115
69
  @retry(errors=[AWSServerErrors])
116
70
  def delete_sdb_domain(
@@ -130,12 +84,12 @@ def connection_reset(e: Exception) -> bool:
130
84
  # errno is listed as 104. To be safe, we check for both:
131
85
  return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
132
86
 
87
+
133
88
  def connection_error(e: Exception) -> bool:
134
89
  """
135
90
  Return True if an error represents a failure to make a network connection.
136
91
  """
137
- return (connection_reset(e)
138
- or isinstance(e, EndpointConnectionError))
92
+ return connection_reset(e) or isinstance(e, EndpointConnectionError)
139
93
 
140
94
 
141
95
  # TODO: Replace with: @retry and ErrorCondition
@@ -143,34 +97,47 @@ def retryable_s3_errors(e: Exception) -> bool:
143
97
  """
144
98
  Return true if this is an error from S3 that looks like we ought to retry our request.
145
99
  """
146
- return (connection_error(e)
147
- or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
148
- or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
149
- # boto3 errors
150
- or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
151
- or (isinstance(e, ClientError) and 'BucketNotEmpty' in str(e))
152
- or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 409 and 'try again' in str(e))
153
- or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') in (404, 429, 500, 502, 503, 504)))
100
+ return (
101
+ connection_error(e)
102
+ or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
103
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
104
+ # boto3 errors
105
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
106
+ or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
107
+ or (
108
+ isinstance(e, ClientError)
109
+ and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
110
+ and "try again" in str(e)
111
+ )
112
+ or (
113
+ isinstance(e, ClientError)
114
+ and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
115
+ in (404, 429, 500, 502, 503, 504)
116
+ )
117
+ )
154
118
 
155
119
 
156
- def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_TIMEOUT, predicate: Callable[[Exception], bool] = retryable_s3_errors) -> Iterator[ContextManager[None]]:
120
+ def retry_s3(
121
+ delays: Iterable[float] = DEFAULT_DELAYS,
122
+ timeout: float = DEFAULT_TIMEOUT,
123
+ predicate: Callable[[Exception], bool] = retryable_s3_errors,
124
+ ) -> Iterator[ContextManager[None]]:
157
125
  """
158
126
  Retry iterator of context managers specifically for S3 operations.
159
127
  """
160
128
  return old_retry(delays=delays, timeout=timeout, predicate=predicate)
161
129
 
130
+
162
131
  @retry(errors=[AWSServerErrors])
163
132
  def delete_s3_bucket(
164
- s3_resource: "S3ServiceResource",
165
- bucket: str,
166
- quiet: bool = True
133
+ s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
167
134
  ) -> None:
168
135
  """
169
136
  Delete the given S3 bucket.
170
137
  """
171
- printq(f'Deleting s3 bucket: {bucket}', quiet)
138
+ printq(f"Deleting s3 bucket: {bucket}", quiet)
172
139
 
173
- paginator = s3_resource.meta.client.get_paginator('list_object_versions')
140
+ paginator = s3_resource.meta.client.get_paginator("list_object_versions")
174
141
  try:
175
142
  for response in paginator.paginate(Bucket=bucket):
176
143
  # Versions and delete markers can both go in here to be deleted.
@@ -178,15 +145,20 @@ def delete_s3_bucket(
178
145
  # defined for them in the stubs to express that. See
179
146
  # <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
180
147
  # have to do gymnastics to get them into the same list.
181
- to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \
182
- cast(List[Dict[str, Any]], response.get('DeleteMarkers', []))
148
+ to_delete: list[dict[str, Any]] = cast(
149
+ list[dict[str, Any]], response.get("Versions", [])
150
+ ) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
183
151
  for entry in to_delete:
184
- printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet)
185
- s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId'])
152
+ printq(
153
+ f" Deleting {entry['Key']} version {entry['VersionId']}", quiet
154
+ )
155
+ s3_resource.meta.client.delete_object(
156
+ Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
157
+ )
186
158
  s3_resource.Bucket(bucket).delete()
187
- printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet)
159
+ printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
188
160
  except s3_resource.meta.client.exceptions.NoSuchBucket:
189
- printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet)
161
+ printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
190
162
 
191
163
 
192
164
  def create_s3_bucket(
@@ -202,7 +174,7 @@ def create_s3_bucket(
202
174
 
203
175
  *ALL* S3 bucket creation should use this function.
204
176
  """
205
- logger.debug("Creating bucket '%s' in region %s.", bucket_name, region)
177
+ logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
206
178
  if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
207
179
  bucket = s3_resource.create_bucket(Bucket=bucket_name)
208
180
  else:
@@ -212,6 +184,7 @@ def create_s3_bucket(
212
184
  )
213
185
  return bucket
214
186
 
187
+
215
188
  @retry(errors=[ClientError])
216
189
  def enable_public_objects(bucket_name: str) -> None:
217
190
  """
@@ -235,7 +208,7 @@ def enable_public_objects(bucket_name: str) -> None:
235
208
  would be a very awkward way to do it. So we restore the old behavior.
236
209
  """
237
210
 
238
- s3_client = session.client('s3')
211
+ s3_client = session.client("s3")
239
212
 
240
213
  # Even though the new default is for public access to be prohibited, this
241
214
  # is implemented by adding new things attached to the bucket. If we remove
@@ -249,22 +222,36 @@ def enable_public_objects(bucket_name: str) -> None:
249
222
  s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
250
223
 
251
224
 
252
- def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only_strategies: Optional[Set[int]] = None) -> str:
225
+ class NoBucketLocationError(Exception):
226
+ """
227
+ Error to represent that we could not get a location for a bucket.
228
+ """
229
+
230
+
231
+ def get_bucket_region(
232
+ bucket_name: str,
233
+ endpoint_url: Optional[str] = None,
234
+ only_strategies: Optional[set[int]] = None,
235
+ ) -> str:
253
236
  """
254
- Get the AWS region name associated with the given S3 bucket.
237
+ Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
238
+
239
+ Does not log at info level or above when this does not work; failures are expected in some contexts.
255
240
 
256
241
  Takes an optional S3 API URL override.
257
242
 
258
243
  :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
259
244
  """
260
245
 
261
- s3_client = session.client('s3', endpoint_url=endpoint_url)
246
+ s3_client = session.client("s3", endpoint_url=endpoint_url)
262
247
 
263
248
  def attempt_get_bucket_location() -> Optional[str]:
264
249
  """
265
250
  Try and get the bucket location from the normal API call.
266
251
  """
267
- return s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
252
+ return s3_client.get_bucket_location(Bucket=bucket_name).get(
253
+ "LocationConstraint", None
254
+ )
268
255
 
269
256
  def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
270
257
  """
@@ -280,8 +267,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
280
267
  # It could also be because AWS open data buckets (which we tend to
281
268
  # encounter this problem for) tend to actually themselves be in
282
269
  # us-east-1.
283
- backup_s3_client = session.client('s3', region_name='us-east-1')
284
- return backup_s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
270
+ backup_s3_client = session.client("s3", region_name="us-east-1")
271
+ return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
272
+ "LocationConstraint", None
273
+ )
285
274
 
286
275
  def attempt_head_bucket() -> Optional[str]:
287
276
  """
@@ -293,11 +282,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
293
282
  # us where the bucket is. See
294
283
  # <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
295
284
  info = s3_client.head_bucket(Bucket=bucket_name)
296
- return info['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
285
+ return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
297
286
 
298
287
  # Compose a list of strategies we want to try in order, which may work.
299
288
  # None is an acceptable return type that actually means something.
300
- strategies: List[Callable[[], Optional[str]]] = []
289
+ strategies: list[Callable[[], Optional[str]]] = []
301
290
  strategies.append(attempt_get_bucket_location)
302
291
  if not endpoint_url:
303
292
  # We should only try to talk to us-east-1 if we don't have a custom
@@ -305,17 +294,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
305
294
  strategies.append(attempt_get_bucket_location_from_us_east_1)
306
295
  strategies.append(attempt_head_bucket)
307
296
 
297
+ error_logs: list[tuple[int, str]] = []
308
298
  for attempt in retry_s3():
309
299
  with attempt:
310
300
  for i, strategy in enumerate(strategies):
311
- if only_strategies is not None and i+1 not in only_strategies:
301
+ if only_strategies is not None and i + 1 not in only_strategies:
312
302
  # We want to test running without this strategy.
313
303
  continue
314
304
  try:
315
- return bucket_location_to_region(strategy())
305
+ location = bucket_location_to_region(strategy())
306
+ logger.debug("Got bucket location from strategy %d", i + 1)
307
+ return location
316
308
  except ClientError as e:
317
- if get_error_code(e) == 'AccessDenied' and not endpoint_url:
318
- logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
309
+ if get_error_code(e) == "AccessDenied" and not endpoint_url:
310
+ logger.debug(
311
+ "Strategy %d to get bucket location did not work: %s",
312
+ i + 1,
313
+ e,
314
+ )
315
+ error_logs.append((i + 1, str(e)))
319
316
  last_error: Exception = e
320
317
  # We were blocked with this strategy. Move on to the
321
318
  # next strategy which might work.
@@ -324,127 +321,147 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
324
321
  raise
325
322
  except KeyError as e:
326
323
  # If we get a weird head response we will have a KeyError
327
- logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
324
+ logger.debug(
325
+ "Strategy %d to get bucket location did not work: %s", i + 1, e
326
+ )
327
+ error_logs.append((i + 1, str(e)))
328
328
  last_error = e
329
- # If we get here we ran out of attempts. Raise whatever the last problem was.
330
- raise last_error
329
+
330
+ error_messages = []
331
+ for rank, message in error_logs:
332
+ error_messages.append(
333
+ f"Strategy {rank} failed to get bucket location because: {message}"
334
+ )
335
+ # If we get here we ran out of attempts.
336
+ raise NoBucketLocationError(
337
+ "Could not get bucket location: " + "\n".join(error_messages)
338
+ ) from last_error
339
+
331
340
 
332
341
  def region_to_bucket_location(region: str) -> str:
333
- return '' if region == 'us-east-1' else region
342
+ return "" if region == "us-east-1" else region
343
+
334
344
 
335
345
  def bucket_location_to_region(location: Optional[str]) -> str:
336
346
  return "us-east-1" if location == "" or location is None else location
337
347
 
338
- def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
339
- """
340
- Extracts a key (object) from a given parsed s3:// URL.
341
348
 
342
- If existing is true and the object does not exist, raises FileNotFoundError.
349
+ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
350
+ """
351
+ Extracts a key (object) from a given parsed s3:// URL.
343
352
 
344
- :param bool existing: If True, key is expected to exist. If False, key is expected not to
345
- exists and it will be created. If None, the key will be created if it doesn't exist.
346
- """
353
+ If existing is true and the object does not exist, raises FileNotFoundError.
354
+
355
+ :param bool existing: If True, key is expected to exist. If False, key is expected not to
356
+ exists and it will be created. If None, the key will be created if it doesn't exist.
357
+ """
358
+
359
+ key_name = url.path[1:]
360
+ bucket_name = url.netloc
361
+
362
+ # Decide if we need to override Boto's built-in URL here.
363
+ endpoint_url: Optional[str] = None
364
+ host = os.environ.get("TOIL_S3_HOST", None)
365
+ port = os.environ.get("TOIL_S3_PORT", None)
366
+ protocol = "https"
367
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
368
+ protocol = "http"
369
+ if host:
370
+ endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
371
+
372
+ # TODO: OrdinaryCallingFormat equivalent in boto3?
373
+ # if botoargs:
374
+ # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
375
+
376
+ try:
377
+ # Get the bucket's region to avoid a redirect per request
378
+ region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
379
+ s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url)
380
+ except NoBucketLocationError as e:
381
+ # Probably don't have permission.
382
+ # TODO: check if it is that
383
+ logger.debug("Couldn't get bucket location: %s", e)
384
+ logger.debug("Fall back to not specifying location")
385
+ s3 = session.resource("s3", endpoint_url=endpoint_url)
386
+
387
+ obj = s3.Object(bucket_name, key_name)
388
+ objExists = True
389
+
390
+ try:
391
+ obj.load()
392
+ except ClientError as e:
393
+ if get_error_status(e) == 404:
394
+ objExists = False
395
+ else:
396
+ raise
397
+ if existing is True and not objExists:
398
+ raise FileNotFoundError(
399
+ f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
400
+ )
401
+ elif existing is False and objExists:
402
+ raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
347
403
 
348
- key_name = url.path[1:]
349
- bucket_name = url.netloc
350
-
351
- # Decide if we need to override Boto's built-in URL here.
352
- endpoint_url: Optional[str] = None
353
- host = os.environ.get('TOIL_S3_HOST', None)
354
- port = os.environ.get('TOIL_S3_PORT', None)
355
- protocol = 'https'
356
- if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
357
- protocol = 'http'
358
- if host:
359
- endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
360
-
361
- # TODO: OrdinaryCallingFormat equivalent in boto3?
362
- # if botoargs:
363
- # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
364
-
365
- try:
366
- # Get the bucket's region to avoid a redirect per request
367
- region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
368
- s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
369
- except ClientError:
370
- # Probably don't have permission.
371
- # TODO: check if it is that
372
- s3 = session.resource('s3', endpoint_url=endpoint_url)
373
-
374
- obj = s3.Object(bucket_name, key_name)
375
- objExists = True
376
-
377
- try:
378
- obj.load()
379
- except ClientError as e:
380
- if get_error_status(e) == 404:
381
- objExists = False
382
- else:
383
- raise
384
- if existing is True and not objExists:
385
- raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
386
- elif existing is False and objExists:
387
- raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
388
-
389
- if not objExists:
390
- obj.put() # write an empty file
391
- return obj
404
+ if not objExists:
405
+ obj.put() # write an empty file
406
+ return obj
392
407
 
393
408
 
394
409
  @retry(errors=[AWSServerErrors])
395
- def list_objects_for_url(url: ParseResult) -> List[str]:
396
- """
397
- Extracts a key (object) from a given parsed s3:// URL. The URL will be
398
- supplemented with a trailing slash if it is missing.
399
- """
400
- key_name = url.path[1:]
401
- bucket_name = url.netloc
402
-
403
- if key_name != '' and not key_name.endswith('/'):
404
- # Make sure to put the trailing slash on the key, or else we'll see
405
- # a prefix of just it.
406
- key_name = key_name + '/'
407
-
408
- # Decide if we need to override Boto's built-in URL here.
409
- # TODO: Deduplicate with get_object_for_url, or push down into session module
410
- endpoint_url: Optional[str] = None
411
- host = os.environ.get('TOIL_S3_HOST', None)
412
- port = os.environ.get('TOIL_S3_PORT', None)
413
- protocol = 'https'
414
- if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
415
- protocol = 'http'
416
- if host:
417
- endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
418
-
419
- client = session.client('s3', endpoint_url=endpoint_url)
420
-
421
- listing = []
422
-
423
- paginator = client.get_paginator('list_objects_v2')
424
- result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter='/')
425
- for page in result:
426
- if 'CommonPrefixes' in page:
427
- for prefix_item in page['CommonPrefixes']:
428
- listing.append(prefix_item['Prefix'][len(key_name):])
429
- if 'Contents' in page:
430
- for content_item in page['Contents']:
431
- if content_item['Key'] == key_name:
432
- # Ignore folder name itself
433
- continue
434
- listing.append(content_item['Key'][len(key_name):])
410
+ def list_objects_for_url(url: ParseResult) -> list[str]:
411
+ """
412
+ Extracts a key (object) from a given parsed s3:// URL. The URL will be
413
+ supplemented with a trailing slash if it is missing.
414
+ """
415
+ key_name = url.path[1:]
416
+ bucket_name = url.netloc
417
+
418
+ if key_name != "" and not key_name.endswith("/"):
419
+ # Make sure to put the trailing slash on the key, or else we'll see
420
+ # a prefix of just it.
421
+ key_name = key_name + "/"
422
+
423
+ # Decide if we need to override Boto's built-in URL here.
424
+ # TODO: Deduplicate with get_object_for_url, or push down into session module
425
+ endpoint_url: Optional[str] = None
426
+ host = os.environ.get("TOIL_S3_HOST", None)
427
+ port = os.environ.get("TOIL_S3_PORT", None)
428
+ protocol = "https"
429
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
430
+ protocol = "http"
431
+ if host:
432
+ endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
433
+
434
+ client = session.client("s3", endpoint_url=endpoint_url)
435
+
436
+ listing = []
437
+
438
+ paginator = client.get_paginator("list_objects_v2")
439
+ result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
440
+ for page in result:
441
+ if "CommonPrefixes" in page:
442
+ for prefix_item in page["CommonPrefixes"]:
443
+ listing.append(prefix_item["Prefix"][len(key_name) :])
444
+ if "Contents" in page:
445
+ for content_item in page["Contents"]:
446
+ if content_item["Key"] == key_name:
447
+ # Ignore folder name itself
448
+ continue
449
+ listing.append(content_item["Key"][len(key_name) :])
450
+
451
+ logger.debug("Found in %s items: %s", url, listing)
452
+ return listing
435
453
 
436
- logger.debug('Found in %s items: %s', url, listing)
437
- return listing
438
454
 
439
- def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]:
455
+ def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
440
456
  """
441
457
  Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
442
458
  """
443
- return [{'Key': k, 'Value': v} for k, v in tags.items()]
459
+ return [{"Key": k, "Value": v} for k, v in tags.items()]
444
460
 
445
461
 
446
- def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: str,
447
- **kwargs: Any) -> Iterable[Any]:
462
+ def boto3_pager(
463
+ requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
464
+ ) -> Iterable[Any]:
448
465
  """
449
466
  Yield all the results from calling the given Boto 3 method with the
450
467
  given keyword arguments, paging through the results using the Marker or
@@ -465,7 +482,7 @@ def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: s
465
482
  yield from page.get(result_attribute_name, [])
466
483
 
467
484
 
468
- def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> Any:
485
+ def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
469
486
  """
470
487
  Given a list of attributes, find the attribute associated with the name and return its corresponding value.
471
488
 
@@ -475,8 +492,11 @@ def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> A
475
492
 
476
493
  If the attribute with the name does not exist, the function will return None.
477
494
 
478
- :param attributes: list of attributes as List[AttributeTypeDef]
495
+ :param attributes: list of attributes
479
496
  :param name: name of the attribute
480
497
  :return: value of the attribute
481
498
  """
482
- return next((attribute["Value"] for attribute in attributes if attribute["Name"] == name), None)
499
+ return next(
500
+ (attribute["Value"] for attribute in attributes if attribute["Name"] == name),
501
+ None,
502
+ )