toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/lib/aws/utils.py CHANGED
@@ -15,102 +15,53 @@ import errno
15
15
  import logging
16
16
  import os
17
17
  import socket
18
- from typing import (Any,
19
- Callable,
20
- ContextManager,
21
- Dict,
22
- Iterable,
23
- Iterator,
24
- List,
25
- Optional,
26
- Set,
27
- cast)
18
+ from collections.abc import Iterable, Iterator
19
+ from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
28
20
  from urllib.parse import ParseResult
29
21
 
30
- from mypy_boto3_sdb.type_defs import AttributeTypeDef
31
- from toil.lib.aws import session, AWSRegionName, AWSServerErrors
22
+ # To import toil.lib.aws.session, the AWS libraries must be installed
23
+ from toil.lib.aws import AWSRegionName, AWSServerErrors, session
24
+ from toil.lib.conversions import strtobool
25
+ from toil.lib.memoize import memoize
32
26
  from toil.lib.misc import printq
33
- from toil.lib.retry import (DEFAULT_DELAYS,
34
- DEFAULT_TIMEOUT,
35
- get_error_code,
36
- get_error_status,
37
- old_retry,
38
- retry, ErrorCondition)
39
-
40
- try:
41
- from botocore.exceptions import ClientError, EndpointConnectionError
42
- from mypy_boto3_iam import IAMClient, IAMServiceResource
43
- from mypy_boto3_s3 import S3Client, S3ServiceResource
44
- from mypy_boto3_s3.literals import BucketLocationConstraintType
45
- from mypy_boto3_s3.service_resource import Bucket, Object
46
- from mypy_boto3_sdb import SimpleDBClient
47
- except ImportError:
48
- ClientError = None # type: ignore
49
- EndpointConnectionError = None # type: ignore
50
- # AWS/boto extra is not installed
27
+ from toil.lib.retry import (
28
+ DEFAULT_DELAYS,
29
+ DEFAULT_TIMEOUT,
30
+ get_error_code,
31
+ get_error_status,
32
+ old_retry,
33
+ retry,
34
+ )
35
+
36
+ if TYPE_CHECKING:
37
+ from mypy_boto3_s3 import S3ServiceResource
38
+ from mypy_boto3_s3.service_resource import Bucket
39
+ from mypy_boto3_s3.service_resource import Object as S3Object
40
+ from mypy_boto3_sdb.type_defs import AttributeTypeDef
41
+
42
+ from botocore.exceptions import ClientError, EndpointConnectionError
51
43
 
52
44
  logger = logging.getLogger(__name__)
53
45
 
54
46
  # These are error codes we expect from AWS if we are making requests too fast.
55
47
  # https://github.com/boto/botocore/blob/49f87350d54f55b687969ec8bf204df785975077/botocore/retries/standard.py#L316
56
48
  THROTTLED_ERROR_CODES = [
57
- 'Throttling',
58
- 'ThrottlingException',
59
- 'ThrottledException',
60
- 'RequestThrottledException',
61
- 'TooManyRequestsException',
62
- 'ProvisionedThroughputExceededException',
63
- 'TransactionInProgressException',
64
- 'RequestLimitExceeded',
65
- 'BandwidthLimitExceeded',
66
- 'LimitExceededException',
67
- 'RequestThrottled',
68
- 'SlowDown',
69
- 'PriorRequestNotComplete',
70
- 'EC2ThrottledException',
49
+ "Throttling",
50
+ "ThrottlingException",
51
+ "ThrottledException",
52
+ "RequestThrottledException",
53
+ "TooManyRequestsException",
54
+ "ProvisionedThroughputExceededException",
55
+ "TransactionInProgressException",
56
+ "RequestLimitExceeded",
57
+ "BandwidthLimitExceeded",
58
+ "LimitExceededException",
59
+ "RequestThrottled",
60
+ "SlowDown",
61
+ "PriorRequestNotComplete",
62
+ "EC2ThrottledException",
71
63
  ]
72
64
 
73
- @retry(errors=[AWSServerErrors])
74
- def delete_iam_role(
75
- role_name: str, region: Optional[str] = None, quiet: bool = True
76
- ) -> None:
77
- # TODO: the Boto3 type hints are a bit oversealous here; they want hundreds
78
- # of overloads of the client-getting methods to exist based on the literal
79
- # string passed in, to return exactly the right kind of client or resource.
80
- # So we end up having to wrap all the calls in casts, which kind of defeats
81
- # the point of a nice fluent method you can call with the name of the thing
82
- # you want; we should have been calling iam_client() and so on all along if
83
- # we wanted MyPy to be able to understand us. So at some point we should
84
- # consider revising our API here to be less annoying to explain to the type
85
- # checker.
86
- iam_client = session.client('iam', region_name=region)
87
- iam_resource = session.resource('iam', region_name=region)
88
- role = iam_resource.Role(role_name)
89
- # normal policies
90
- for attached_policy in role.attached_policies.all():
91
- printq(f'Now dissociating policy: {attached_policy.policy_name} from role {role.name}', quiet)
92
- role.detach_policy(PolicyArn=attached_policy.arn)
93
- # inline policies
94
- for inline_policy in role.policies.all():
95
- printq(f'Deleting inline policy: {inline_policy.policy_name} from role {role.name}', quiet)
96
- iam_client.delete_role_policy(RoleName=role.name, PolicyName=inline_policy.policy_name)
97
- iam_client.delete_role(RoleName=role_name)
98
- printq(f'Role {role_name} successfully deleted.', quiet)
99
-
100
-
101
- @retry(errors=[AWSServerErrors])
102
- def delete_iam_instance_profile(
103
- instance_profile_name: str, region: Optional[str] = None, quiet: bool = True
104
- ) -> None:
105
- iam_resource = session.resource("iam", region_name=region)
106
- instance_profile = iam_resource.InstanceProfile(instance_profile_name)
107
- if instance_profile.roles is not None:
108
- for role in instance_profile.roles:
109
- printq(f'Now dissociating role: {role.name} from instance profile {instance_profile_name}', quiet)
110
- instance_profile.remove_role(RoleName=role.name)
111
- instance_profile.delete()
112
- printq(f'Instance profile "{instance_profile_name}" successfully deleted.', quiet)
113
-
114
65
 
115
66
  @retry(errors=[AWSServerErrors])
116
67
  def delete_sdb_domain(
@@ -130,12 +81,12 @@ def connection_reset(e: Exception) -> bool:
130
81
  # errno is listed as 104. To be safe, we check for both:
131
82
  return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104)
132
83
 
84
+
133
85
  def connection_error(e: Exception) -> bool:
134
86
  """
135
87
  Return True if an error represents a failure to make a network connection.
136
88
  """
137
- return (connection_reset(e)
138
- or isinstance(e, EndpointConnectionError))
89
+ return connection_reset(e) or isinstance(e, EndpointConnectionError)
139
90
 
140
91
 
141
92
  # TODO: Replace with: @retry and ErrorCondition
@@ -143,34 +94,47 @@ def retryable_s3_errors(e: Exception) -> bool:
143
94
  """
144
95
  Return true if this is an error from S3 that looks like we ought to retry our request.
145
96
  """
146
- return (connection_error(e)
147
- or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
148
- or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
149
- # boto3 errors
150
- or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
151
- or (isinstance(e, ClientError) and 'BucketNotEmpty' in str(e))
152
- or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 409 and 'try again' in str(e))
153
- or (isinstance(e, ClientError) and e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') in (404, 429, 500, 502, 503, 504)))
97
+ return (
98
+ connection_error(e)
99
+ or (isinstance(e, ClientError) and get_error_status(e) in (429, 500))
100
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
101
+ # boto3 errors
102
+ or (isinstance(e, ClientError) and get_error_code(e) in THROTTLED_ERROR_CODES)
103
+ or (isinstance(e, ClientError) and "BucketNotEmpty" in str(e))
104
+ or (
105
+ isinstance(e, ClientError)
106
+ and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 409
107
+ and "try again" in str(e)
108
+ )
109
+ or (
110
+ isinstance(e, ClientError)
111
+ and e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
112
+ in (404, 429, 500, 502, 503, 504)
113
+ )
114
+ )
154
115
 
155
116
 
156
- def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_TIMEOUT, predicate: Callable[[Exception], bool] = retryable_s3_errors) -> Iterator[ContextManager[None]]:
117
+ def retry_s3(
118
+ delays: Iterable[float] = DEFAULT_DELAYS,
119
+ timeout: float = DEFAULT_TIMEOUT,
120
+ predicate: Callable[[Exception], bool] = retryable_s3_errors,
121
+ ) -> Iterator[ContextManager[None]]:
157
122
  """
158
123
  Retry iterator of context managers specifically for S3 operations.
159
124
  """
160
125
  return old_retry(delays=delays, timeout=timeout, predicate=predicate)
161
126
 
127
+
162
128
  @retry(errors=[AWSServerErrors])
163
129
  def delete_s3_bucket(
164
- s3_resource: "S3ServiceResource",
165
- bucket: str,
166
- quiet: bool = True
130
+ s3_resource: "S3ServiceResource", bucket: str, quiet: bool = True
167
131
  ) -> None:
168
132
  """
169
133
  Delete the given S3 bucket.
170
134
  """
171
- printq(f'Deleting s3 bucket: {bucket}', quiet)
135
+ printq(f"Deleting s3 bucket: {bucket}", quiet)
172
136
 
173
- paginator = s3_resource.meta.client.get_paginator('list_object_versions')
137
+ paginator = s3_resource.meta.client.get_paginator("list_object_versions")
174
138
  try:
175
139
  for response in paginator.paginate(Bucket=bucket):
176
140
  # Versions and delete markers can both go in here to be deleted.
@@ -178,15 +142,20 @@ def delete_s3_bucket(
178
142
  # defined for them in the stubs to express that. See
179
143
  # <https://github.com/vemel/mypy_boto3_builder/issues/123>. So we
180
144
  # have to do gymnastics to get them into the same list.
181
- to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \
182
- cast(List[Dict[str, Any]], response.get('DeleteMarkers', []))
145
+ to_delete: list[dict[str, Any]] = cast(
146
+ list[dict[str, Any]], response.get("Versions", [])
147
+ ) + cast(list[dict[str, Any]], response.get("DeleteMarkers", []))
183
148
  for entry in to_delete:
184
- printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet)
185
- s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId'])
149
+ printq(
150
+ f" Deleting {entry['Key']} version {entry['VersionId']}", quiet
151
+ )
152
+ s3_resource.meta.client.delete_object(
153
+ Bucket=bucket, Key=entry["Key"], VersionId=entry["VersionId"]
154
+ )
186
155
  s3_resource.Bucket(bucket).delete()
187
- printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet)
156
+ printq(f"\n * Deleted s3 bucket successfully: {bucket}\n\n", quiet)
188
157
  except s3_resource.meta.client.exceptions.NoSuchBucket:
189
- printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet)
158
+ printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
190
159
 
191
160
 
192
161
  def create_s3_bucket(
@@ -202,7 +171,7 @@ def create_s3_bucket(
202
171
 
203
172
  *ALL* S3 bucket creation should use this function.
204
173
  """
205
- logger.debug("Creating bucket '%s' in region %s.", bucket_name, region)
174
+ logger.info("Creating bucket '%s' in region %s.", bucket_name, region)
206
175
  if region == "us-east-1": # see https://github.com/boto/boto3/issues/125
207
176
  bucket = s3_resource.create_bucket(Bucket=bucket_name)
208
177
  else:
@@ -212,6 +181,7 @@ def create_s3_bucket(
212
181
  )
213
182
  return bucket
214
183
 
184
+
215
185
  @retry(errors=[ClientError])
216
186
  def enable_public_objects(bucket_name: str) -> None:
217
187
  """
@@ -235,7 +205,7 @@ def enable_public_objects(bucket_name: str) -> None:
235
205
  would be a very awkward way to do it. So we restore the old behavior.
236
206
  """
237
207
 
238
- s3_client = session.client('s3')
208
+ s3_client = session.client("s3")
239
209
 
240
210
  # Even though the new default is for public access to be prohibited, this
241
211
  # is implemented by adding new things attached to the bucket. If we remove
@@ -249,22 +219,41 @@ def enable_public_objects(bucket_name: str) -> None:
249
219
  s3_client.delete_bucket_ownership_controls(Bucket=bucket_name)
250
220
 
251
221
 
252
- def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only_strategies: Optional[Set[int]] = None) -> str:
222
+ class NoBucketLocationError(Exception):
223
+ """
224
+ Error to represent that we could not get a location for a bucket.
253
225
  """
254
- Get the AWS region name associated with the given S3 bucket.
226
+
227
+
228
+ def get_bucket_region(
229
+ bucket_name: str,
230
+ endpoint_url: Optional[str] = None,
231
+ only_strategies: Optional[set[int]] = None,
232
+ anonymous: Optional[bool] = None
233
+ ) -> str:
234
+ """
235
+ Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
236
+
237
+ Does not log at info level or above when this does not work; failures are expected in some contexts.
255
238
 
256
239
  Takes an optional S3 API URL override.
257
240
 
258
241
  :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
242
+
243
+ :raises NoBucketLocationError: if the bucket's region cannot be determined
244
+ (possibly due to lack of permissions).
259
245
  """
260
246
 
261
- s3_client = session.client('s3', endpoint_url=endpoint_url)
247
+ config = session.ANONYMOUS_CONFIG if anonymous else None
248
+ s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
262
249
 
263
250
  def attempt_get_bucket_location() -> Optional[str]:
264
251
  """
265
252
  Try and get the bucket location from the normal API call.
266
253
  """
267
- return s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
254
+ return s3_client.get_bucket_location(Bucket=bucket_name).get(
255
+ "LocationConstraint", None
256
+ )
268
257
 
269
258
  def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
270
259
  """
@@ -280,8 +269,10 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
280
269
  # It could also be because AWS open data buckets (which we tend to
281
270
  # encounter this problem for) tend to actually themselves be in
282
271
  # us-east-1.
283
- backup_s3_client = session.client('s3', region_name='us-east-1')
284
- return backup_s3_client.get_bucket_location(Bucket=bucket_name).get('LocationConstraint', None)
272
+ backup_s3_client = session.client("s3", region_name="us-east-1", config=config)
273
+ return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
274
+ "LocationConstraint", None
275
+ )
285
276
 
286
277
  def attempt_head_bucket() -> Optional[str]:
287
278
  """
@@ -293,11 +284,11 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
293
284
  # us where the bucket is. See
294
285
  # <https://github.com/aws/aws-sdk-cpp/issues/844#issuecomment-383747871>
295
286
  info = s3_client.head_bucket(Bucket=bucket_name)
296
- return info['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
287
+ return info["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"]
297
288
 
298
289
  # Compose a list of strategies we want to try in order, which may work.
299
290
  # None is an acceptable return type that actually means something.
300
- strategies: List[Callable[[], Optional[str]]] = []
291
+ strategies: list[Callable[[], Optional[str]]] = []
301
292
  strategies.append(attempt_get_bucket_location)
302
293
  if not endpoint_url:
303
294
  # We should only try to talk to us-east-1 if we don't have a custom
@@ -305,17 +296,25 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
305
296
  strategies.append(attempt_get_bucket_location_from_us_east_1)
306
297
  strategies.append(attempt_head_bucket)
307
298
 
299
+ error_logs: list[tuple[int, str]] = []
308
300
  for attempt in retry_s3():
309
301
  with attempt:
310
302
  for i, strategy in enumerate(strategies):
311
- if only_strategies is not None and i+1 not in only_strategies:
303
+ if only_strategies is not None and i + 1 not in only_strategies:
312
304
  # We want to test running without this strategy.
313
305
  continue
314
306
  try:
315
- return bucket_location_to_region(strategy())
307
+ location = bucket_location_to_region(strategy())
308
+ logger.debug("Got bucket location from strategy %d", i + 1)
309
+ return location
316
310
  except ClientError as e:
317
- if get_error_code(e) == 'AccessDenied' and not endpoint_url:
318
- logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
311
+ if get_error_code(e) == "AccessDenied" and not endpoint_url:
312
+ logger.debug(
313
+ "Strategy %d to get bucket location did not work: %s",
314
+ i + 1,
315
+ e,
316
+ )
317
+ error_logs.append((i + 1, str(e)))
319
318
  last_error: Exception = e
320
319
  # We were blocked with this strategy. Move on to the
321
320
  # next strategy which might work.
@@ -324,127 +323,202 @@ def get_bucket_region(bucket_name: str, endpoint_url: Optional[str] = None, only
324
323
  raise
325
324
  except KeyError as e:
326
325
  # If we get a weird head response we will have a KeyError
327
- logger.warning('Strategy %d to get bucket location did not work: %s', i + 1, e)
326
+ logger.debug(
327
+ "Strategy %d to get bucket location did not work: %s", i + 1, e
328
+ )
329
+ error_logs.append((i + 1, str(e)))
328
330
  last_error = e
329
- # If we get here we ran out of attempts. Raise whatever the last problem was.
330
- raise last_error
331
+
332
+ error_messages = []
333
+ for rank, message in error_logs:
334
+ error_messages.append(
335
+ f"Strategy {rank} failed to get bucket location because: {message}"
336
+ )
337
+ # If we get here we ran out of attempts.
338
+ raise NoBucketLocationError(
339
+ "Could not get bucket location: " + "\n".join(error_messages)
340
+ ) from last_error
341
+
342
+ @memoize
343
+ def get_bucket_region_if_available(
344
+ bucket_name: str,
345
+ endpoint_url: Optional[str] = None,
346
+ only_strategies: Optional[set[int]] = None,
347
+ anonymous: Optional[bool] = None
348
+ ) -> Optional[str]:
349
+ """
350
+ Get the AWS region name associated with the given S3 bucket, or return None.
351
+
352
+ Caches results, so may not return the location for a bucket that has been
353
+ created but was previously observed to be nonexistent.
354
+
355
+ :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
356
+ """
357
+
358
+ try:
359
+ return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
360
+ except Exception as e:
361
+ if isinstance(e, NoBucketLocationError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
362
+ # We can't know
363
+ return None
364
+ else:
365
+ raise
331
366
 
332
367
  def region_to_bucket_location(region: str) -> str:
333
- return '' if region == 'us-east-1' else region
368
+ return "" if region == "us-east-1" else region
369
+
334
370
 
335
371
  def bucket_location_to_region(location: Optional[str]) -> str:
336
372
  return "us-east-1" if location == "" or location is None else location
337
373
 
338
- def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "Object":
339
- """
340
- Extracts a key (object) from a given parsed s3:// URL.
341
374
 
342
- If existing is true and the object does not exist, raises FileNotFoundError.
375
+ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonymous: Optional[bool] = None) -> "S3Object":
376
+ """
377
+ Extracts a key (object) from a given parsed s3:// URL.
343
378
 
344
- :param bool existing: If True, key is expected to exist. If False, key is expected not to
345
- exists and it will be created. If None, the key will be created if it doesn't exist.
346
- """
379
+ If existing is true and the object does not exist, raises FileNotFoundError.
347
380
 
348
- key_name = url.path[1:]
349
- bucket_name = url.netloc
381
+ :param bool existing: If True, key is expected to exist. If False, key is expected not to
382
+ exists and it will be created. If None, the key will be created if it doesn't exist.
350
383
 
351
- # Decide if we need to override Boto's built-in URL here.
352
- endpoint_url: Optional[str] = None
353
- host = os.environ.get('TOIL_S3_HOST', None)
354
- port = os.environ.get('TOIL_S3_PORT', None)
355
- protocol = 'https'
356
- if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
357
- protocol = 'http'
358
- if host:
359
- endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
384
+ :raises FileNotFoundError: when existing is True and the object does not exist.
385
+ :raises RuntimeError: when existing is False but the object exists.
386
+ :raises PermissionError: when we are not authorized to look at the object.
387
+ """
360
388
 
361
- # TODO: OrdinaryCallingFormat equivalent in boto3?
362
- # if botoargs:
363
- # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
389
+ key_name = url.path[1:]
390
+ bucket_name = url.netloc
391
+
392
+ # Decide if we need to override Boto's built-in URL here.
393
+ endpoint_url: Optional[str] = None
394
+ host = os.environ.get("TOIL_S3_HOST", None)
395
+ port = os.environ.get("TOIL_S3_PORT", None)
396
+ protocol = "https"
397
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
398
+ protocol = "http"
399
+ if host:
400
+ endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
401
+
402
+ # TODO: OrdinaryCallingFormat equivalent in boto3?
403
+ # if botoargs:
404
+ # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
405
+
406
+ config = session.ANONYMOUS_CONFIG if anonymous else None
407
+ # Get the bucket's region to avoid a redirect per request.
408
+ # Cache the result
409
+ region = get_bucket_region_if_available(bucket_name, endpoint_url=endpoint_url, anonymous=anonymous)
410
+ if region is not None:
411
+ s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url, config=config)
412
+ else:
413
+ # We can't get the bucket location, perhaps because we don't have
414
+ # permission to do that.
415
+ logger.debug("Couldn't get bucket location")
416
+ logger.debug("Fall back to not specifying location")
417
+ s3 = session.resource("s3", endpoint_url=endpoint_url, config=config)
364
418
 
365
- try:
366
- # Get the bucket's region to avoid a redirect per request
367
- region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
368
- s3 = session.resource('s3', region_name=region, endpoint_url=endpoint_url)
369
- except ClientError:
370
- # Probably don't have permission.
371
- # TODO: check if it is that
372
- s3 = session.resource('s3', endpoint_url=endpoint_url)
419
+ obj = s3.Object(bucket_name, key_name)
420
+ objExists = True
373
421
 
374
- obj = s3.Object(bucket_name, key_name)
375
- objExists = True
422
+ try:
423
+ obj.load()
424
+ except ClientError as e:
425
+ if get_error_status(e) == 404:
426
+ objExists = False
427
+ elif get_error_status(e) == 403:
428
+ raise PermissionError(
429
+ f"Key '{key_name}' is not accessible in bucket '{bucket_name}'."
430
+ ) from e
431
+ else:
432
+ raise
433
+ if existing is True and not objExists:
434
+ raise FileNotFoundError(
435
+ f"Key '{key_name}' does not exist in bucket '{bucket_name}'."
436
+ )
437
+ elif existing is False and objExists:
438
+ raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
376
439
 
440
+ if not objExists:
377
441
  try:
378
- obj.load()
442
+ obj.put() # write an empty file
379
443
  except ClientError as e:
380
- if get_error_status(e) == 404:
381
- objExists = False
444
+ if get_error_status(e) == 403:
445
+ raise PermissionError(
446
+ f"Key '{key_name}' is not writable in bucket '{bucket_name}'."
447
+ ) from e
382
448
  else:
383
449
  raise
384
- if existing is True and not objExists:
385
- raise FileNotFoundError(f"Key '{key_name}' does not exist in bucket '{bucket_name}'.")
386
- elif existing is False and objExists:
387
- raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
388
-
389
- if not objExists:
390
- obj.put() # write an empty file
391
- return obj
450
+ return obj
392
451
 
393
452
 
394
453
  @retry(errors=[AWSServerErrors])
395
- def list_objects_for_url(url: ParseResult) -> List[str]:
396
- """
397
- Extracts a key (object) from a given parsed s3:// URL. The URL will be
398
- supplemented with a trailing slash if it is missing.
399
- """
400
- key_name = url.path[1:]
401
- bucket_name = url.netloc
402
-
403
- if key_name != '' and not key_name.endswith('/'):
404
- # Make sure to put the trailing slash on the key, or else we'll see
405
- # a prefix of just it.
406
- key_name = key_name + '/'
407
-
408
- # Decide if we need to override Boto's built-in URL here.
409
- # TODO: Deduplicate with get_object_for_url, or push down into session module
410
- endpoint_url: Optional[str] = None
411
- host = os.environ.get('TOIL_S3_HOST', None)
412
- port = os.environ.get('TOIL_S3_PORT', None)
413
- protocol = 'https'
414
- if os.environ.get('TOIL_S3_USE_SSL', True) == 'False':
415
- protocol = 'http'
416
- if host:
417
- endpoint_url = f'{protocol}://{host}' + f':{port}' if port else ''
418
-
419
- client = session.client('s3', endpoint_url=endpoint_url)
420
-
421
- listing = []
422
-
423
- paginator = client.get_paginator('list_objects_v2')
424
- result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter='/')
454
+ def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) -> list[str]:
455
+ """
456
+ Extracts a key (object) from a given parsed s3:// URL. The URL will be
457
+ supplemented with a trailing slash if it is missing.
458
+
459
+ :raises PermissionError: when we are not authorized to do the list operation.
460
+ """
461
+
462
+ key_name = url.path[1:]
463
+ bucket_name = url.netloc
464
+
465
+ if key_name != "" and not key_name.endswith("/"):
466
+ # Make sure to put the trailing slash on the key, or else we'll see
467
+ # a prefix of just it.
468
+ key_name = key_name + "/"
469
+
470
+ # Decide if we need to override Boto's built-in URL here.
471
+ # TODO: Deduplicate with get_object_for_url, or push down into session module
472
+ endpoint_url: Optional[str] = None
473
+ host = os.environ.get("TOIL_S3_HOST", None)
474
+ port = os.environ.get("TOIL_S3_PORT", None)
475
+ protocol = "https"
476
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
477
+ protocol = "http"
478
+ if host:
479
+ endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
480
+
481
+ config = session.ANONYMOUS_CONFIG if anonymous else None
482
+ client = session.client("s3", endpoint_url=endpoint_url, config=config)
483
+
484
+ listing = []
485
+
486
+ try:
487
+ paginator = client.get_paginator("list_objects_v2")
488
+ result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
425
489
  for page in result:
426
- if 'CommonPrefixes' in page:
427
- for prefix_item in page['CommonPrefixes']:
428
- listing.append(prefix_item['Prefix'][len(key_name):])
429
- if 'Contents' in page:
430
- for content_item in page['Contents']:
431
- if content_item['Key'] == key_name:
490
+ if "CommonPrefixes" in page:
491
+ for prefix_item in page["CommonPrefixes"]:
492
+ listing.append(prefix_item["Prefix"][len(key_name) :])
493
+ if "Contents" in page:
494
+ for content_item in page["Contents"]:
495
+ if content_item["Key"] == key_name:
432
496
  # Ignore folder name itself
433
497
  continue
434
- listing.append(content_item['Key'][len(key_name):])
498
+ listing.append(content_item["Key"][len(key_name) :])
499
+ except ClientError as e:
500
+ if get_error_status(e) == 403:
501
+ raise PermissionError(
502
+ f"Prefix '{key_name}' is not authorized to be listed in bucket '{bucket_name}'."
503
+ ) from e
504
+ else:
505
+ raise
506
+
507
+
508
+ logger.debug("Found in %s items: %s", url, listing)
509
+ return listing
435
510
 
436
- logger.debug('Found in %s items: %s', url, listing)
437
- return listing
438
511
 
439
- def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]:
512
+ def flatten_tags(tags: dict[str, str]) -> list[dict[str, str]]:
440
513
  """
441
514
  Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
442
515
  """
443
- return [{'Key': k, 'Value': v} for k, v in tags.items()]
516
+ return [{"Key": k, "Value": v} for k, v in tags.items()]
444
517
 
445
518
 
446
- def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: str,
447
- **kwargs: Any) -> Iterable[Any]:
519
+ def boto3_pager(
520
+ requestor_callable: Callable[..., Any], result_attribute_name: str, **kwargs: Any
521
+ ) -> Iterable[Any]:
448
522
  """
449
523
  Yield all the results from calling the given Boto 3 method with the
450
524
  given keyword arguments, paging through the results using the Marker or
@@ -465,7 +539,7 @@ def boto3_pager(requestor_callable: Callable[..., Any], result_attribute_name: s
465
539
  yield from page.get(result_attribute_name, [])
466
540
 
467
541
 
468
- def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> Any:
542
+ def get_item_from_attributes(attributes: list["AttributeTypeDef"], name: str) -> Any:
469
543
  """
470
544
  Given a list of attributes, find the attribute associated with the name and return its corresponding value.
471
545
 
@@ -475,8 +549,11 @@ def get_item_from_attributes(attributes: List[AttributeTypeDef], name: str) -> A
475
549
 
476
550
  If the attribute with the name does not exist, the function will return None.
477
551
 
478
- :param attributes: list of attributes as List[AttributeTypeDef]
552
+ :param attributes: list of attributes
479
553
  :param name: name of the attribute
480
554
  :return: value of the attribute
481
555
  """
482
- return next((attribute["Value"] for attribute in attributes if attribute["Name"] == name), None)
556
+ return next(
557
+ (attribute["Value"] for attribute in attributes if attribute["Name"] == name),
558
+ None,
559
+ )