toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
@@ -21,46 +21,53 @@ import reprlib
21
21
  import stat
22
22
  import time
23
23
  import uuid
24
+ from collections.abc import Generator
24
25
  from contextlib import contextmanager
25
26
  from io import BytesIO
26
- from typing import List, Optional, IO, Dict, Union, Generator, Tuple, cast, TYPE_CHECKING
27
+ from typing import IO, TYPE_CHECKING, Optional, Union, cast
27
28
  from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
28
29
 
29
30
  from botocore.exceptions import ClientError
30
- from mypy_boto3_sdb import SimpleDBClient
31
- from mypy_boto3_sdb.type_defs import ReplaceableItemTypeDef, ReplaceableAttributeTypeDef, SelectResultTypeDef, ItemTypeDef, AttributeTypeDef, DeletableItemTypeDef, UpdateConditionTypeDef
32
31
 
33
32
  import toil.lib.encryption as encryption
34
33
  from toil.fileStores import FileID
35
34
  from toil.job import Job, JobDescription
36
- from toil.jobStores.abstractJobStore import (AbstractJobStore,
37
- ConcurrentFileModificationException,
38
- JobStoreExistsException,
39
- NoSuchFileException,
40
- NoSuchJobException,
41
- NoSuchJobStoreException, LocatorException)
42
- from toil.jobStores.aws.utils import (SDBHelper,
43
- ServerSideCopyProhibitedError,
44
- copyKeyMultipart,
45
- fileSizeAndTime,
46
- no_such_sdb_domain,
47
- retry_sdb,
48
- sdb_unavailable,
49
- uploadFile,
50
- uploadFromPath)
51
- from toil.jobStores.utils import (ReadablePipe,
52
- ReadableTransformingPipe,
53
- WritablePipe)
35
+ from toil.jobStores.abstractJobStore import (
36
+ AbstractJobStore,
37
+ ConcurrentFileModificationException,
38
+ JobStoreExistsException,
39
+ LocatorException,
40
+ NoSuchFileException,
41
+ NoSuchJobException,
42
+ NoSuchJobStoreException,
43
+ )
44
+ from toil.jobStores.aws.utils import (
45
+ SDBHelper,
46
+ ServerSideCopyProhibitedError,
47
+ copyKeyMultipart,
48
+ fileSizeAndTime,
49
+ no_such_sdb_domain,
50
+ retry_sdb,
51
+ sdb_unavailable,
52
+ uploadFile,
53
+ uploadFromPath,
54
+ )
55
+ from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
54
56
  from toil.lib.aws import build_tag_dict_from_env
55
57
  from toil.lib.aws.session import establish_boto3_session
56
- from toil.lib.aws.utils import (create_s3_bucket,
57
- enable_public_objects,
58
- flatten_tags,
59
- get_bucket_region,
60
- get_object_for_url,
61
- list_objects_for_url,
62
- retry_s3,
63
- retryable_s3_errors, boto3_pager, get_item_from_attributes)
58
+ from toil.lib.aws.utils import (
59
+ NoBucketLocationError,
60
+ boto3_pager,
61
+ create_s3_bucket,
62
+ enable_public_objects,
63
+ flatten_tags,
64
+ get_bucket_region,
65
+ get_item_from_attributes,
66
+ get_object_for_url,
67
+ list_objects_for_url,
68
+ retry_s3,
69
+ retryable_s3_errors,
70
+ )
64
71
  from toil.lib.compatibility import compat_bytes
65
72
  from toil.lib.ec2nodes import EC2Regions
66
73
  from toil.lib.exceptions import panic
@@ -70,11 +77,20 @@ from toil.lib.objects import InnerClass
70
77
  from toil.lib.retry import get_error_code, get_error_status, retry
71
78
 
72
79
  if TYPE_CHECKING:
80
+ from mypy_boto3_sdb.type_defs import (
81
+ AttributeTypeDef,
82
+ DeletableItemTypeDef,
83
+ ItemTypeDef,
84
+ ReplaceableAttributeTypeDef,
85
+ ReplaceableItemTypeDef,
86
+ UpdateConditionTypeDef,
87
+ )
88
+
73
89
  from toil import Config
74
90
 
75
91
  boto3_session = establish_boto3_session()
76
- s3_boto3_resource = boto3_session.resource('s3')
77
- s3_boto3_client = boto3_session.client('s3')
92
+ s3_boto3_resource = boto3_session.resource("s3")
93
+ s3_boto3_client = boto3_session.client("s3")
78
94
  logger = logging.getLogger(__name__)
79
95
 
80
96
  # Sometimes we have to wait for multipart uploads to become real. How long
@@ -89,6 +105,7 @@ class ChecksumError(Exception):
89
105
 
90
106
  class DomainDoesNotExist(Exception):
91
107
  """Raised when a domain that is expected to exist does not exist."""
108
+
92
109
  def __init__(self, domain_name):
93
110
  super().__init__(f"Expected domain {domain_name} to exist!")
94
111
 
@@ -106,14 +123,14 @@ class AWSJobStore(AbstractJobStore):
106
123
  # URLs where the may interfere with the certificate common name. We use a double
107
124
  # underscore as a separator instead.
108
125
  #
109
- bucketNameRe = re.compile(r'^[a-z0-9][a-z0-9-]+[a-z0-9]$')
126
+ bucketNameRe = re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$")
110
127
 
111
128
  # See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
112
129
  #
113
130
  minBucketNameLen = 3
114
131
  maxBucketNameLen = 63
115
132
  maxNameLen = 10
116
- nameSeparator = '--'
133
+ nameSeparator = "--"
117
134
 
118
135
  def __init__(self, locator: str, partSize: int = 50 << 20) -> None:
119
136
  """
@@ -124,23 +141,35 @@ class AWSJobStore(AbstractJobStore):
124
141
  whole file
125
142
  """
126
143
  super().__init__(locator)
127
- region, namePrefix = locator.split(':')
144
+ region, namePrefix = locator.split(":")
128
145
  regions = EC2Regions.keys()
129
146
  if region not in regions:
130
147
  raise ValueError(f'Region "{region}" is not one of: {regions}')
131
148
  if not self.bucketNameRe.match(namePrefix):
132
- raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, "
133
- "hyphens or lower-case letters and must not start or end in a "
134
- "hyphen." % namePrefix)
149
+ raise ValueError(
150
+ "Invalid name prefix '%s'. Name prefixes must contain only digits, "
151
+ "hyphens or lower-case letters and must not start or end in a "
152
+ "hyphen." % namePrefix
153
+ )
135
154
  # Reserve 13 for separator and suffix
136
- if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(self.nameSeparator):
137
- raise ValueError("Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
138
- "characters." % namePrefix)
139
- if '--' in namePrefix:
140
- raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain "
141
- "%s." % (namePrefix, self.nameSeparator))
142
- logger.debug("Instantiating %s for region %s and name prefix '%s'",
143
- self.__class__, region, namePrefix)
155
+ if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
156
+ self.nameSeparator
157
+ ):
158
+ raise ValueError(
159
+ "Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
160
+ "characters." % namePrefix
161
+ )
162
+ if "--" in namePrefix:
163
+ raise ValueError(
164
+ "Invalid name prefix '%s'. Name prefixes may not contain "
165
+ "%s." % (namePrefix, self.nameSeparator)
166
+ )
167
+ logger.debug(
168
+ "Instantiating %s for region %s and name prefix '%s'",
169
+ self.__class__,
170
+ region,
171
+ namePrefix,
172
+ )
144
173
  self.region = region
145
174
  self.name_prefix = namePrefix
146
175
  self.part_size = partSize
@@ -149,7 +178,7 @@ class AWSJobStore(AbstractJobStore):
149
178
  self.files_bucket = None
150
179
  self.db = boto3_session.client(service_name="sdb", region_name=region)
151
180
 
152
- self.s3_resource = boto3_session.resource('s3', region_name=self.region)
181
+ self.s3_resource = boto3_session.resource("s3", region_name=self.region)
153
182
  self.s3_client = self.s3_resource.meta.client
154
183
 
155
184
  def initialize(self, config: "Config") -> None:
@@ -176,7 +205,12 @@ class AWSJobStore(AbstractJobStore):
176
205
  self._bind(create=False)
177
206
  super().resume()
178
207
 
179
- def _bind(self, create: bool = False, block: bool = True, check_versioning_consistency: bool = True) -> None:
208
+ def _bind(
209
+ self,
210
+ create: bool = False,
211
+ block: bool = True,
212
+ check_versioning_consistency: bool = True,
213
+ ) -> None:
180
214
  def qualify(name):
181
215
  assert len(name) <= self.maxNameLen
182
216
  return self.name_prefix + self.nameSeparator + name
@@ -191,11 +225,13 @@ class AWSJobStore(AbstractJobStore):
191
225
  self.files_domain_name = qualify("files")
192
226
  self._bindDomain(self.files_domain_name, create=create, block=block)
193
227
  if self.files_bucket is None:
194
- self.files_bucket = self._bindBucket(qualify('files'),
195
- create=create,
196
- block=block,
197
- versioning=True,
198
- check_versioning_consistency=check_versioning_consistency)
228
+ self.files_bucket = self._bindBucket(
229
+ qualify("files"),
230
+ create=create,
231
+ block=block,
232
+ versioning=True,
233
+ check_versioning_consistency=check_versioning_consistency,
234
+ )
199
235
 
200
236
  @property
201
237
  def _registered(self) -> Optional[bool]:
@@ -217,25 +253,31 @@ class AWSJobStore(AbstractJobStore):
217
253
  # can't handle job stores that were partially created by 3.3.0, though.
218
254
  registry_domain_name = "toil-registry"
219
255
  try:
220
- self._bindDomain(domain_name=registry_domain_name,
221
- create=False,
222
- block=False)
256
+ self._bindDomain(
257
+ domain_name=registry_domain_name, create=False, block=False
258
+ )
223
259
  except DomainDoesNotExist:
224
260
  return False
225
261
 
226
262
  for attempt in retry_sdb():
227
263
  with attempt:
228
- get_result = self.db.get_attributes(DomainName=registry_domain_name,
229
- ItemName=self.name_prefix,
230
- AttributeNames=['exists'],
231
- ConsistentRead=True)
232
- attributes: List[AttributeTypeDef] = get_result.get("Attributes", []) # the documentation says 'Attributes' should always exist, but this is not true
233
- exists: Optional[str] = get_item_from_attributes(attributes=attributes, name="exists")
264
+ get_result = self.db.get_attributes(
265
+ DomainName=registry_domain_name,
266
+ ItemName=self.name_prefix,
267
+ AttributeNames=["exists"],
268
+ ConsistentRead=True,
269
+ )
270
+ attributes: list["AttributeTypeDef"] = get_result.get(
271
+ "Attributes", []
272
+ ) # the documentation says 'Attributes' should always exist, but this is not true
273
+ exists: Optional[str] = get_item_from_attributes(
274
+ attributes=attributes, name="exists"
275
+ )
234
276
  if exists is None:
235
277
  return False
236
- elif exists == 'True':
278
+ elif exists == "True":
237
279
  return True
238
- elif exists == 'False':
280
+ elif exists == "False":
239
281
  return None
240
282
  else:
241
283
  assert False
@@ -244,31 +286,40 @@ class AWSJobStore(AbstractJobStore):
244
286
  def _registered(self, value: bool) -> None:
245
287
  registry_domain_name = "toil-registry"
246
288
  try:
247
- self._bindDomain(domain_name=registry_domain_name,
248
- # Only create registry domain when registering or
249
- # transitioning a store
250
- create=value is not False,
251
- block=False)
289
+ self._bindDomain(
290
+ domain_name=registry_domain_name,
291
+ # Only create registry domain when registering or
292
+ # transitioning a store
293
+ create=value is not False,
294
+ block=False,
295
+ )
252
296
  except DomainDoesNotExist:
253
297
  pass
254
298
  else:
255
299
  for attempt in retry_sdb():
256
300
  with attempt:
257
301
  if value is False:
258
- self.db.delete_attributes(DomainName=registry_domain_name,
259
- ItemName=self.name_prefix)
302
+ self.db.delete_attributes(
303
+ DomainName=registry_domain_name, ItemName=self.name_prefix
304
+ )
260
305
  else:
261
306
  if value is True:
262
- attributes: List[ReplaceableAttributeTypeDef] = [{"Name": "exists", "Value": "True", "Replace": True}]
307
+ attributes: list["ReplaceableAttributeTypeDef"] = [
308
+ {"Name": "exists", "Value": "True", "Replace": True}
309
+ ]
263
310
  elif value is None:
264
- attributes = [{"Name": "exists", "Value": "False", "Replace": True}]
311
+ attributes = [
312
+ {"Name": "exists", "Value": "False", "Replace": True}
313
+ ]
265
314
  else:
266
315
  assert False
267
- self.db.put_attributes(DomainName=registry_domain_name,
268
- ItemName=self.name_prefix,
269
- Attributes=attributes)
316
+ self.db.put_attributes(
317
+ DomainName=registry_domain_name,
318
+ ItemName=self.name_prefix,
319
+ Attributes=attributes,
320
+ )
270
321
 
271
- def _checkItem(self, item: ItemTypeDef, enforce: bool = True) -> None:
322
+ def _checkItem(self, item: "ItemTypeDef", enforce: bool = True) -> None:
272
323
  """
273
324
  Make sure that the given SimpleDB item actually has the attributes we think it should.
274
325
 
@@ -278,22 +329,31 @@ class AWSJobStore(AbstractJobStore):
278
329
  """
279
330
  self._checkAttributes(item["Attributes"], enforce)
280
331
 
281
- def _checkAttributes(self, attributes: List[AttributeTypeDef], enforce: bool = True) -> None:
332
+ def _checkAttributes(
333
+ self, attributes: list["AttributeTypeDef"], enforce: bool = True
334
+ ) -> None:
282
335
  if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
283
- logger.error("overlargeID attribute isn't present: either SimpleDB entry is "
284
- "corrupt or jobstore is from an extremely old Toil: %s", attributes)
336
+ logger.error(
337
+ "overlargeID attribute isn't present: either SimpleDB entry is "
338
+ "corrupt or jobstore is from an extremely old Toil: %s",
339
+ attributes,
340
+ )
285
341
  if enforce:
286
- raise RuntimeError("encountered SimpleDB entry missing required attribute "
287
- "'overlargeID'; is your job store ancient?")
342
+ raise RuntimeError(
343
+ "encountered SimpleDB entry missing required attribute "
344
+ "'overlargeID'; is your job store ancient?"
345
+ )
288
346
 
289
- def _awsJobFromAttributes(self, attributes: List[AttributeTypeDef]) -> Job:
347
+ def _awsJobFromAttributes(self, attributes: list["AttributeTypeDef"]) -> Job:
290
348
  """
291
349
  Get a Toil Job object from attributes that are defined in an item from the DB
292
350
  :param attributes: List of attributes
293
351
  :return: Toil job
294
352
  """
295
353
  self._checkAttributes(attributes)
296
- overlarge_id_value = get_item_from_attributes(attributes=attributes, name="overlargeID")
354
+ overlarge_id_value = get_item_from_attributes(
355
+ attributes=attributes, name="overlargeID"
356
+ )
297
357
  if overlarge_id_value:
298
358
  assert self.file_exists(overlarge_id_value)
299
359
  # This is an overlarge job, download the actual attributes
@@ -309,15 +369,14 @@ class AWSJobStore(AbstractJobStore):
309
369
  job.assignConfig(self.config)
310
370
  return job
311
371
 
312
- def _awsJobFromItem(self, item: ItemTypeDef) -> Job:
372
+ def _awsJobFromItem(self, item: "ItemTypeDef") -> Job:
313
373
  """
314
374
  Get a Toil Job object from an item from the DB
315
- :param item: ItemTypeDef
316
375
  :return: Toil Job
317
376
  """
318
377
  return self._awsJobFromAttributes(item["Attributes"])
319
378
 
320
- def _awsJobToAttributes(self, job: JobDescription) -> List[AttributeTypeDef]:
379
+ def _awsJobToAttributes(self, job: JobDescription) -> list["AttributeTypeDef"]:
321
380
  binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
322
381
  if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
323
382
  # Store as an overlarge job in S3
@@ -330,7 +389,7 @@ class AWSJobStore(AbstractJobStore):
330
389
  item["overlargeID"] = ""
331
390
  return SDBHelper.attributeDictToList(item)
332
391
 
333
- def _awsJobToItem(self, job: JobDescription, name: str) -> ItemTypeDef:
392
+ def _awsJobToItem(self, job: JobDescription, name: str) -> "ItemTypeDef":
334
393
  return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
335
394
 
336
395
  jobsPerBatchInsert = 25
@@ -339,27 +398,34 @@ class AWSJobStore(AbstractJobStore):
339
398
  def batch(self) -> None:
340
399
  self._batchedUpdates = []
341
400
  yield
342
- batches = [self._batchedUpdates[i:i + self.jobsPerBatchInsert] for i in
343
- range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)]
401
+ batches = [
402
+ self._batchedUpdates[i : i + self.jobsPerBatchInsert]
403
+ for i in range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)
404
+ ]
344
405
 
345
406
  for batch in batches:
346
- items: List[ReplaceableItemTypeDef] = []
407
+ items: list["ReplaceableItemTypeDef"] = []
347
408
  for jobDescription in batch:
348
- item_attributes: List[ReplaceableAttributeTypeDef] = []
409
+ item_attributes: list["ReplaceableAttributeTypeDef"] = []
349
410
  jobDescription.pre_update_hook()
350
411
  item_name = compat_bytes(jobDescription.jobStoreID)
351
- got_job_attributes: List[AttributeTypeDef] = self._awsJobToAttributes(jobDescription)
412
+ got_job_attributes: list["AttributeTypeDef"] = self._awsJobToAttributes(
413
+ jobDescription
414
+ )
352
415
  for each_attribute in got_job_attributes:
353
- new_attribute: ReplaceableAttributeTypeDef = {"Name": each_attribute["Name"],
354
- "Value": each_attribute["Value"],
355
- "Replace": True}
416
+ new_attribute: "ReplaceableAttributeTypeDef" = {
417
+ "Name": each_attribute["Name"],
418
+ "Value": each_attribute["Value"],
419
+ "Replace": True,
420
+ }
356
421
  item_attributes.append(new_attribute)
357
- items.append({"Name": item_name,
358
- "Attributes": item_attributes})
422
+ items.append({"Name": item_name, "Attributes": item_attributes})
359
423
 
360
424
  for attempt in retry_sdb():
361
425
  with attempt:
362
- self.db.batch_put_attributes(DomainName=self.jobs_domain_name, Items=items)
426
+ self.db.batch_put_attributes(
427
+ DomainName=self.jobs_domain_name, Items=items
428
+ )
363
429
  self._batchedUpdates = None
364
430
 
365
431
  def assign_job_id(self, job_description: JobDescription) -> None:
@@ -377,19 +443,28 @@ class AWSJobStore(AbstractJobStore):
377
443
  def job_exists(self, job_id: Union[bytes, str]) -> bool:
378
444
  for attempt in retry_sdb():
379
445
  with attempt:
380
- return len(self.db.get_attributes(DomainName=self.jobs_domain_name,
381
- ItemName=compat_bytes(job_id),
382
- AttributeNames=[SDBHelper.presenceIndicator()],
383
- ConsistentRead=True).get("Attributes", [])) > 0
446
+ return (
447
+ len(
448
+ self.db.get_attributes(
449
+ DomainName=self.jobs_domain_name,
450
+ ItemName=compat_bytes(job_id),
451
+ AttributeNames=[SDBHelper.presenceIndicator()],
452
+ ConsistentRead=True,
453
+ ).get("Attributes", [])
454
+ )
455
+ > 0
456
+ )
384
457
 
385
458
  def jobs(self) -> Generator[Job, None, None]:
386
- job_items: Optional[List[ItemTypeDef]] = None
459
+ job_items: Optional[list["ItemTypeDef"]] = None
387
460
  for attempt in retry_sdb():
388
461
  with attempt:
389
- job_items = boto3_pager(self.db.select,
390
- "Items",
391
- ConsistentRead=True,
392
- SelectExpression="select * from `%s`" % self.jobs_domain_name)
462
+ job_items = boto3_pager(
463
+ self.db.select,
464
+ "Items",
465
+ ConsistentRead=True,
466
+ SelectExpression="select * from `%s`" % self.jobs_domain_name,
467
+ )
393
468
  assert job_items is not None
394
469
  for jobItem in job_items:
395
470
  yield self._awsJobFromItem(jobItem)
@@ -398,9 +473,11 @@ class AWSJobStore(AbstractJobStore):
398
473
  item_attributes = None
399
474
  for attempt in retry_sdb():
400
475
  with attempt:
401
- item_attributes = self.db.get_attributes(DomainName=self.jobs_domain_name,
402
- ItemName=compat_bytes(job_id),
403
- ConsistentRead=True).get("Attributes", [])
476
+ item_attributes = self.db.get_attributes(
477
+ DomainName=self.jobs_domain_name,
478
+ ItemName=compat_bytes(job_id),
479
+ ConsistentRead=True,
480
+ ).get("Attributes", [])
404
481
  if not item_attributes:
405
482
  raise NoSuchJobException(job_id)
406
483
  job = self._awsJobFromAttributes(item_attributes)
@@ -413,11 +490,17 @@ class AWSJobStore(AbstractJobStore):
413
490
  logger.debug("Updating job %s", job_description.jobStoreID)
414
491
  job_description.pre_update_hook()
415
492
  job_attributes = self._awsJobToAttributes(job_description)
416
- update_attributes: List[ReplaceableAttributeTypeDef] = [{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
417
- for attribute in job_attributes]
493
+ update_attributes: list["ReplaceableAttributeTypeDef"] = [
494
+ {"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
495
+ for attribute in job_attributes
496
+ ]
418
497
  for attempt in retry_sdb():
419
498
  with attempt:
420
- self.db.put_attributes(DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_description.jobStoreID), Attributes=update_attributes)
499
+ self.db.put_attributes(
500
+ DomainName=self.jobs_domain_name,
501
+ ItemName=compat_bytes(job_description.jobStoreID),
502
+ Attributes=update_attributes,
503
+ )
421
504
 
422
505
  itemsPerBatchDelete = 25
423
506
 
@@ -428,51 +511,75 @@ class AWSJobStore(AbstractJobStore):
428
511
  # If the job is overlarge, delete its file from the filestore
429
512
  for attempt in retry_sdb():
430
513
  with attempt:
431
- attributes = self.db.get_attributes(DomainName=self.jobs_domain_name,
432
- ItemName=compat_bytes(job_id),
433
- ConsistentRead=True).get("Attributes", [])
514
+ attributes = self.db.get_attributes(
515
+ DomainName=self.jobs_domain_name,
516
+ ItemName=compat_bytes(job_id),
517
+ ConsistentRead=True,
518
+ ).get("Attributes", [])
434
519
  # If the overlargeID has fallen off, maybe we partially deleted the
435
520
  # attributes of the item? Or raced on it? Or hit SimpleDB being merely
436
521
  # eventually consistent? We should still be able to get rid of it.
437
522
  self._checkAttributes(attributes, enforce=False)
438
- overlarge_id_value = get_item_from_attributes(attributes=attributes, name="overlargeID")
523
+ overlarge_id_value = get_item_from_attributes(
524
+ attributes=attributes, name="overlargeID"
525
+ )
439
526
  if overlarge_id_value:
440
527
  logger.debug("Deleting job from filestore")
441
528
  self.delete_file(overlarge_id_value)
442
529
  for attempt in retry_sdb():
443
530
  with attempt:
444
- self.db.delete_attributes(DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id))
445
- items: Optional[List[ItemTypeDef]] = None
531
+ self.db.delete_attributes(
532
+ DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id)
533
+ )
534
+ items: Optional[list["ItemTypeDef"]] = None
446
535
  for attempt in retry_sdb():
447
536
  with attempt:
448
- items = list(boto3_pager(self.db.select,
449
- "Items",
450
- ConsistentRead=True,
451
- SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'"))
537
+ items = list(
538
+ boto3_pager(
539
+ self.db.select,
540
+ "Items",
541
+ ConsistentRead=True,
542
+ SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'",
543
+ )
544
+ )
452
545
  assert items is not None
453
546
  if items:
454
- logger.debug("Deleting %d file(s) associated with job %s", len(items), job_id)
547
+ logger.debug(
548
+ "Deleting %d file(s) associated with job %s", len(items), job_id
549
+ )
455
550
  n = self.itemsPerBatchDelete
456
- batches = [items[i:i + n] for i in range(0, len(items), n)]
551
+ batches = [items[i : i + n] for i in range(0, len(items), n)]
457
552
  for batch in batches:
458
- delete_items: List[DeletableItemTypeDef] = [{"Name": item["Name"]} for item in batch]
553
+ delete_items: list["DeletableItemTypeDef"] = [
554
+ {"Name": item["Name"]} for item in batch
555
+ ]
459
556
  for attempt in retry_sdb():
460
557
  with attempt:
461
- self.db.batch_delete_attributes(DomainName=self.files_domain_name, Items=delete_items)
558
+ self.db.batch_delete_attributes(
559
+ DomainName=self.files_domain_name, Items=delete_items
560
+ )
462
561
  for item in items:
463
- item: ItemTypeDef
464
- version = get_item_from_attributes(attributes=item["Attributes"], name="version")
562
+ item: "ItemTypeDef"
563
+ version = get_item_from_attributes(
564
+ attributes=item["Attributes"], name="version"
565
+ )
465
566
  for attempt in retry_s3():
466
567
  with attempt:
467
568
  if version:
468
- self.s3_client.delete_object(Bucket=self.files_bucket.name,
469
- Key=compat_bytes(item["Name"]),
470
- VersionId=version)
569
+ self.s3_client.delete_object(
570
+ Bucket=self.files_bucket.name,
571
+ Key=compat_bytes(item["Name"]),
572
+ VersionId=version,
573
+ )
471
574
  else:
472
- self.s3_client.delete_object(Bucket=self.files_bucket.name,
473
- Key=compat_bytes(item["Name"]))
575
+ self.s3_client.delete_object(
576
+ Bucket=self.files_bucket.name,
577
+ Key=compat_bytes(item["Name"]),
578
+ )
474
579
 
475
- def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None) -> FileID:
580
+ def get_empty_file_store_id(
581
+ self, jobStoreID=None, cleanup=False, basename=None
582
+ ) -> FileID:
476
583
  info = self.FileInfo.create(jobStoreID if cleanup else None)
477
584
  with info.uploadStream() as _:
478
585
  # Empty
@@ -481,8 +588,14 @@ class AWSJobStore(AbstractJobStore):
481
588
  logger.debug("Created %r.", info)
482
589
  return info.fileID
483
590
 
484
- def _import_file(self, otherCls, uri: ParseResult, shared_file_name: Optional[str] = None,
485
- hardlink: bool = False, symlink: bool = True) -> Optional[FileID]:
591
+ def _import_file(
592
+ self,
593
+ otherCls,
594
+ uri: ParseResult,
595
+ shared_file_name: Optional[str] = None,
596
+ hardlink: bool = False,
597
+ symlink: bool = True,
598
+ ) -> Optional[FileID]:
486
599
  try:
487
600
  if issubclass(otherCls, AWSJobStore):
488
601
  srcObj = get_object_for_url(uri, existing=True)
@@ -492,15 +605,19 @@ class AWSJobStore(AbstractJobStore):
492
605
  else:
493
606
  self._requireValidSharedFileName(shared_file_name)
494
607
  jobStoreFileID = self._shared_file_id(shared_file_name)
495
- info = self.FileInfo.loadOrCreate(jobStoreFileID=jobStoreFileID,
496
- ownerID=str(self.sharedFileOwnerID),
497
- encrypted=None)
608
+ info = self.FileInfo.loadOrCreate(
609
+ jobStoreFileID=jobStoreFileID,
610
+ ownerID=str(self.sharedFileOwnerID),
611
+ encrypted=None,
612
+ )
498
613
  info.copyFrom(srcObj)
499
614
  info.save()
500
615
  return FileID(info.fileID, size) if shared_file_name is None else None
501
- except ServerSideCopyProhibitedError:
502
- # AWS refuses to do this copy for us
503
- logger.warning("Falling back to copying via the local machine. This could get expensive!")
616
+ except (NoBucketLocationError, ServerSideCopyProhibitedError):
617
+ # AWS refuses to tell us where the bucket is or do this copy for us
618
+ logger.warning(
619
+ "Falling back to copying via the local machine. This could get expensive!"
620
+ )
504
621
 
505
622
  # copy if exception
506
623
  return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
@@ -512,71 +629,112 @@ class AWSJobStore(AbstractJobStore):
512
629
  info = self.FileInfo.loadOrFail(file_id)
513
630
  info.copyTo(dstObj)
514
631
  return
515
- except ServerSideCopyProhibitedError:
516
- # AWS refuses to do this copy for us
517
- logger.warning("Falling back to copying via the local machine. This could get expensive!")
632
+ except (NoBucketLocationError, ServerSideCopyProhibitedError):
633
+ # AWS refuses to tell us where the bucket is or do this copy for us
634
+ logger.warning(
635
+ "Falling back to copying via the local machine. This could get expensive!"
636
+ )
518
637
  else:
519
638
  super()._default_export_file(otherCls, file_id, uri)
520
639
 
640
+ ###
641
+ # URL access implementation
642
+ ###
643
+
644
+ # URL access methods aren't used by the rest of the job store methods.
645
+
521
646
  @classmethod
522
647
  def _url_exists(cls, url: ParseResult) -> bool:
523
648
  try:
524
- get_object_for_url(url, existing=True)
649
+ try:
650
+ get_object_for_url(url, existing=True, anonymous=True)
651
+ except PermissionError:
652
+ # If we can't look anonymously, log in
653
+ get_object_for_url(url, existing=True)
525
654
  return True
526
655
  except FileNotFoundError:
527
656
  # Not a file
528
- # Might be a directory.
657
+ # Might be a directory. Or we might not have access to know.
658
+ # See if it's a directory.
529
659
  return cls._get_is_directory(url)
530
660
 
531
661
  @classmethod
532
662
  def _get_size(cls, url: ParseResult) -> int:
533
- return get_object_for_url(url, existing=True).content_length
663
+ try:
664
+ src_obj = get_object_for_url(url, existing=True, anonymous=True)
665
+ except PermissionError:
666
+ src_obj = get_object_for_url(url, existing=True)
667
+ return src_obj.content_length
534
668
 
535
669
  @classmethod
536
670
  def _read_from_url(cls, url: ParseResult, writable):
537
- srcObj = get_object_for_url(url, existing=True)
538
- srcObj.download_fileobj(writable)
539
- return (
540
- srcObj.content_length,
541
- False # executable bit is always False
542
- )
671
+ try:
672
+ src_obj = get_object_for_url(url, existing=True, anonymous=True)
673
+ src_obj.download_fileobj(writable)
674
+ except Exception as e:
675
+ if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
676
+ # The object setup or the download does not have permission. Try again with a login.
677
+ src_obj = get_object_for_url(url, existing=True)
678
+ src_obj.download_fileobj(writable)
679
+ else:
680
+ raise
681
+ return (src_obj.content_length, False) # executable bit is always False
543
682
 
544
683
  @classmethod
545
684
  def _open_url(cls, url: ParseResult) -> IO[bytes]:
546
- src_obj = get_object_for_url(url, existing=True)
547
- response = src_obj.get()
685
+ try:
686
+ src_obj = get_object_for_url(url, existing=True, anonymous=True)
687
+ response = src_obj.get()
688
+ except Exception as e:
689
+ if isinstance(e, PermissionError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
690
+ # The object setup or the download does not have permission. Try again with a login.
691
+ src_obj = get_object_for_url(url, existing=True)
692
+ response = src_obj.get()
693
+ else:
694
+ raise
548
695
  # We should get back a response with a stream in 'Body'
549
- if 'Body' not in response:
696
+ if "Body" not in response:
550
697
  raise RuntimeError(f"Could not fetch body stream for {url}")
551
- return response['Body']
698
+ return response["Body"]
552
699
 
553
700
  @classmethod
554
- def _write_to_url(cls, readable, url: ParseResult, executable: bool = False) -> None:
701
+ def _write_to_url(
702
+ cls, readable, url: ParseResult, executable: bool = False
703
+ ) -> None:
704
+ # Don't try to do anonympus writes.
555
705
  dstObj = get_object_for_url(url)
556
706
 
557
707
  logger.debug("Uploading %s", dstObj.key)
558
708
  # uploadFile takes care of using multipart upload if the file is larger than partSize (default to 5MB)
559
- uploadFile(readable=readable,
560
- resource=s3_boto3_resource,
561
- bucketName=dstObj.bucket_name,
562
- fileID=dstObj.key,
563
- partSize=5 * 1000 * 1000)
709
+ uploadFile(
710
+ readable=readable,
711
+ resource=s3_boto3_resource,
712
+ bucketName=dstObj.bucket_name,
713
+ fileID=dstObj.key,
714
+ partSize=5 * 1000 * 1000,
715
+ )
564
716
 
565
717
  @classmethod
566
- def _list_url(cls, url: ParseResult) -> List[str]:
567
- return list_objects_for_url(url)
718
+ def _list_url(cls, url: ParseResult) -> list[str]:
719
+ try:
720
+ return list_objects_for_url(url, anonymous=True)
721
+ except PermissionError:
722
+ return list_objects_for_url(url)
723
+
568
724
 
569
725
  @classmethod
570
726
  def _get_is_directory(cls, url: ParseResult) -> bool:
571
727
  # We consider it a directory if anything is in it.
572
728
  # TODO: Can we just get the first item and not the whole list?
573
- return len(list_objects_for_url(url)) > 0
729
+ return len(cls._list_url(url)) > 0
574
730
 
575
731
  @classmethod
576
732
  def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
577
- return url.scheme.lower() == 's3'
733
+ return url.scheme.lower() == "s3"
578
734
 
579
- def write_file(self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False) -> FileID:
735
+ def write_file(
736
+ self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False
737
+ ) -> FileID:
580
738
  info = self.FileInfo.create(job_id if cleanup else None)
581
739
  info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
582
740
  info.save()
@@ -584,7 +742,14 @@ class AWSJobStore(AbstractJobStore):
584
742
  return info.fileID
585
743
 
586
744
  @contextmanager
587
- def write_file_stream(self, job_id: Optional[FileID] = None, cleanup: bool = False, basename=None, encoding=None, errors=None):
745
+ def write_file_stream(
746
+ self,
747
+ job_id: Optional[FileID] = None,
748
+ cleanup: bool = False,
749
+ basename=None,
750
+ encoding=None,
751
+ errors=None,
752
+ ):
588
753
  info = self.FileInfo.create(job_id if cleanup else None)
589
754
  with info.uploadStream(encoding=encoding, errors=errors) as writable:
590
755
  yield writable, info.fileID
@@ -592,11 +757,15 @@ class AWSJobStore(AbstractJobStore):
592
757
  logger.debug("Wrote %r.", info)
593
758
 
594
759
  @contextmanager
595
- def write_shared_file_stream(self, shared_file_name, encrypted=None, encoding=None, errors=None):
760
+ def write_shared_file_stream(
761
+ self, shared_file_name, encrypted=None, encoding=None, errors=None
762
+ ):
596
763
  self._requireValidSharedFileName(shared_file_name)
597
- info = self.FileInfo.loadOrCreate(jobStoreFileID=self._shared_file_id(shared_file_name),
598
- ownerID=str(self.sharedFileOwnerID),
599
- encrypted=encrypted)
764
+ info = self.FileInfo.loadOrCreate(
765
+ jobStoreFileID=self._shared_file_id(shared_file_name),
766
+ ownerID=str(self.sharedFileOwnerID),
767
+ encrypted=encrypted,
768
+ )
600
769
  with info.uploadStream(encoding=encoding, errors=errors) as writable:
601
770
  yield writable
602
771
  info.save()
@@ -629,7 +798,7 @@ class AWSJobStore(AbstractJobStore):
629
798
  info = self.FileInfo.loadOrFail(file_id)
630
799
  logger.debug("Reading %r into %r.", info, local_path)
631
800
  info.download(local_path, not self.config.disableJobStoreChecksumVerification)
632
- if getattr(file_id, 'executable', False):
801
+ if getattr(file_id, "executable", False):
633
802
  os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
634
803
 
635
804
  @contextmanager
@@ -644,7 +813,9 @@ class AWSJobStore(AbstractJobStore):
644
813
  self._requireValidSharedFileName(shared_file_name)
645
814
  jobStoreFileID = self._shared_file_id(shared_file_name)
646
815
  info = self.FileInfo.loadOrFail(jobStoreFileID, customName=shared_file_name)
647
- logger.debug("Reading %r for shared file %r into stream.", info, shared_file_name)
816
+ logger.debug(
817
+ "Reading %r for shared file %r into stream.", info, shared_file_name
818
+ )
648
819
  with info.downloadStream(encoding=encoding, errors=errors) as readable:
649
820
  yield readable
650
821
 
@@ -660,7 +831,7 @@ class AWSJobStore(AbstractJobStore):
660
831
  with info.uploadStream(multipart=False) as writeable:
661
832
  if isinstance(msg, str):
662
833
  # This stream is for binary data, so encode any non-encoded things
663
- msg = msg.encode('utf-8', errors='ignore')
834
+ msg = msg.encode("utf-8", errors="ignore")
664
835
  writeable.write(msg)
665
836
  info.save()
666
837
 
@@ -682,10 +853,12 @@ class AWSJobStore(AbstractJobStore):
682
853
  items = None
683
854
  for attempt in retry_sdb():
684
855
  with attempt:
685
- items = boto3_pager(self.db.select,
686
- "Items",
687
- ConsistentRead=True,
688
- SelectExpression="select * from `{}` where ownerID='{}'".format(self.files_domain_name, str(ownerId)))
856
+ items = boto3_pager(
857
+ self.db.select,
858
+ "Items",
859
+ ConsistentRead=True,
860
+ SelectExpression=f"select * from `{self.files_domain_name}` where ownerID='{str(ownerId)}'",
861
+ )
689
862
  assert items is not None
690
863
  for item in items:
691
864
  info = self.FileInfo.fromItem(item)
@@ -702,13 +875,19 @@ class AWSJobStore(AbstractJobStore):
702
875
  with info.uploadStream(allowInlining=False) as f:
703
876
  f.write(info.content)
704
877
 
705
- self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(ACL='public-read')
878
+ self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
879
+ ACL="public-read"
880
+ )
706
881
 
707
- url = self.s3_client.generate_presigned_url('get_object',
708
- Params={'Bucket': self.files_bucket.name,
709
- 'Key': compat_bytes(jobStoreFileID),
710
- 'VersionId': info.version},
711
- ExpiresIn=self.publicUrlExpiration.total_seconds())
882
+ url = self.s3_client.generate_presigned_url(
883
+ "get_object",
884
+ Params={
885
+ "Bucket": self.files_bucket.name,
886
+ "Key": compat_bytes(jobStoreFileID),
887
+ "VersionId": info.version,
888
+ },
889
+ ExpiresIn=self.publicUrlExpiration.total_seconds(),
890
+ )
712
891
 
713
892
  # boto doesn't properly remove the x-amz-security-token parameter when
714
893
  # query_auth is False when using an IAM role (see issue #2043). Including the
@@ -716,12 +895,12 @@ class AWSJobStore(AbstractJobStore):
716
895
  # even if the resource is public, so we need to remove it.
717
896
  scheme, netloc, path, query, fragment = urlsplit(url)
718
897
  params = parse_qs(query)
719
- if 'x-amz-security-token' in params:
720
- del params['x-amz-security-token']
721
- if 'AWSAccessKeyId' in params:
722
- del params['AWSAccessKeyId']
723
- if 'Signature' in params:
724
- del params['Signature']
898
+ if "x-amz-security-token" in params:
899
+ del params["x-amz-security-token"]
900
+ if "AWSAccessKeyId" in params:
901
+ del params["AWSAccessKeyId"]
902
+ if "Signature" in params:
903
+ del params["Signature"]
725
904
  query = urlencode(params, doseq=True)
726
905
  url = urlunsplit((scheme, netloc, path, query, fragment))
727
906
  return url
@@ -730,12 +909,14 @@ class AWSJobStore(AbstractJobStore):
730
909
  self._requireValidSharedFileName(shared_file_name)
731
910
  return self.get_public_url(self._shared_file_id(shared_file_name))
732
911
 
733
- def _bindBucket(self,
734
- bucket_name: str,
735
- create: bool = False,
736
- block: bool = True,
737
- versioning: bool = False,
738
- check_versioning_consistency: bool = True):
912
+ def _bindBucket(
913
+ self,
914
+ bucket_name: str,
915
+ create: bool = False,
916
+ block: bool = True,
917
+ versioning: bool = False,
918
+ check_versioning_consistency: bool = True,
919
+ ):
739
920
  """
740
921
  Return the Boto Bucket object representing the S3 bucket with the given name. If the
741
922
  bucket does not exist and `create` is True, it will be created.
@@ -760,8 +941,7 @@ class AWSJobStore(AbstractJobStore):
760
941
  Decide, given an error, whether we should retry binding the bucket.
761
942
  """
762
943
 
763
- if (isinstance(error, ClientError) and
764
- get_error_status(error) in (404, 409)):
944
+ if isinstance(error, ClientError) and get_error_status(error) in (404, 409):
765
945
  # Handle cases where the bucket creation is in a weird state that might let us proceed.
766
946
  # https://github.com/BD2KGenomics/toil/issues/955
767
947
  # https://github.com/BD2KGenomics/toil/issues/995
@@ -771,7 +951,7 @@ class AWSJobStore(AbstractJobStore):
771
951
  # OperationAborted == 409
772
952
  # NoSuchBucket == 404
773
953
  return True
774
- if get_error_code(error) == 'SlowDown':
954
+ if get_error_code(error) == "SlowDown":
775
955
  # We may get told to SlowDown by AWS when we try to create our
776
956
  # bucket. In that case, we should retry and use the exponential
777
957
  # backoff.
@@ -804,15 +984,17 @@ class AWSJobStore(AbstractJobStore):
804
984
  # NoSuchBucket. We let that kick us back up to the
805
985
  # main retry loop.
806
986
  assert (
807
- get_bucket_region(bucket_name) == self.region
987
+ get_bucket_region(bucket_name) == self.region
808
988
  ), f"bucket_name: {bucket_name}, {get_bucket_region(bucket_name)} != {self.region}"
809
989
 
810
990
  tags = build_tag_dict_from_env()
811
991
 
812
992
  if tags:
813
993
  flat_tags = flatten_tags(tags)
814
- bucket_tagging = self.s3_resource.BucketTagging(bucket_name)
815
- bucket_tagging.put(Tagging={'TagSet': flat_tags})
994
+ bucket_tagging = self.s3_resource.BucketTagging(
995
+ bucket_name
996
+ )
997
+ bucket_tagging.put(Tagging={"TagSet": flat_tags})
816
998
 
817
999
  # Configure bucket so that we can make objects in
818
1000
  # it public, which was the historical default.
@@ -825,7 +1007,9 @@ class AWSJobStore(AbstractJobStore):
825
1007
  # This is raised if the user attempts to get a bucket in a region outside
826
1008
  # the specified one, if the specified one is not `us-east-1`. The us-east-1
827
1009
  # server allows a user to use buckets from any region.
828
- raise BucketLocationConflictException(get_bucket_region(bucket_name))
1010
+ raise BucketLocationConflictException(
1011
+ get_bucket_region(bucket_name)
1012
+ )
829
1013
  else:
830
1014
  raise
831
1015
  else:
@@ -842,24 +1026,32 @@ class AWSJobStore(AbstractJobStore):
842
1026
  # consistent?
843
1027
  time.sleep(1)
844
1028
  while not self._getBucketVersioning(bucket_name):
845
- logger.warning(f"Waiting for versioning activation on bucket '{bucket_name}'...")
1029
+ logger.warning(
1030
+ f"Waiting for versioning activation on bucket '{bucket_name}'..."
1031
+ )
846
1032
  time.sleep(1)
847
1033
  elif check_versioning_consistency:
848
1034
  # now test for versioning consistency
849
1035
  # we should never see any of these errors since 'versioning' should always be true
850
1036
  bucket_versioning = self._getBucketVersioning(bucket_name)
851
1037
  if bucket_versioning != versioning:
852
- assert False, 'Cannot modify versioning on existing bucket'
1038
+ assert False, "Cannot modify versioning on existing bucket"
853
1039
  elif bucket_versioning is None:
854
- assert False, 'Cannot use a bucket with versioning suspended'
1040
+ assert False, "Cannot use a bucket with versioning suspended"
855
1041
  if bucketExisted:
856
- logger.debug(f"Using pre-existing job store bucket '{bucket_name}'.")
1042
+ logger.debug(
1043
+ f"Using pre-existing job store bucket '{bucket_name}'."
1044
+ )
857
1045
  else:
858
- logger.debug(f"Created new job store bucket '{bucket_name}' with versioning state {versioning}.")
1046
+ logger.debug(
1047
+ f"Created new job store bucket '{bucket_name}' with versioning state {versioning}."
1048
+ )
859
1049
 
860
1050
  return bucket
861
1051
 
862
- def _bindDomain(self, domain_name: str, create: bool = False, block: bool = True) -> None:
1052
+ def _bindDomain(
1053
+ self, domain_name: str, create: bool = False, block: bool = True
1054
+ ) -> None:
863
1055
  """
864
1056
  Return the Boto3 domain name representing the SDB domain. When create=True, it will
865
1057
  create the domain if it does not exist.
@@ -878,9 +1070,11 @@ class AWSJobStore(AbstractJobStore):
878
1070
  retry timeout expires.
879
1071
  """
880
1072
  logger.debug("Binding to job store domain '%s'.", domain_name)
881
- retryargs = dict(predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e))
1073
+ retryargs = dict(
1074
+ predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e)
1075
+ )
882
1076
  if not block:
883
- retryargs['timeout'] = 15
1077
+ retryargs["timeout"] = 15
884
1078
  for attempt in retry_sdb(**retryargs):
885
1079
  with attempt:
886
1080
  try:
@@ -902,13 +1096,13 @@ class AWSJobStore(AbstractJobStore):
902
1096
  return str(uuid.uuid4())
903
1097
 
904
1098
  # A dummy job ID under which all shared files are stored
905
- sharedFileOwnerID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94')
1099
+ sharedFileOwnerID = uuid.UUID("891f7db6-e4d9-4221-a58e-ab6cc4395f94")
906
1100
 
907
1101
  # A dummy job ID under which all unread stats files are stored
908
- statsFileOwnerID = uuid.UUID('bfcf5286-4bc7-41ef-a85d-9ab415b69d53')
1102
+ statsFileOwnerID = uuid.UUID("bfcf5286-4bc7-41ef-a85d-9ab415b69d53")
909
1103
 
910
1104
  # A dummy job ID under which all read stats files are stored
911
- readStatsFileOwnerID = uuid.UUID('e77fc3aa-d232-4255-ae04-f64ee8eb0bfa')
1105
+ readStatsFileOwnerID = uuid.UUID("e77fc3aa-d232-4255-ae04-f64ee8eb0bfa")
912
1106
 
913
1107
  def _shared_file_id(self, shared_file_name):
914
1108
  return str(uuid.uuid5(self.sharedFileOwnerID, shared_file_name))
@@ -918,13 +1112,22 @@ class AWSJobStore(AbstractJobStore):
918
1112
  """
919
1113
  Represents a file in this job store.
920
1114
  """
1115
+
921
1116
  outer = None
922
1117
  """
923
1118
  :type: AWSJobStore
924
1119
  """
925
1120
 
926
- def __init__(self, fileID, ownerID, encrypted,
927
- version=None, content=None, numContentChunks=0, checksum=None):
1121
+ def __init__(
1122
+ self,
1123
+ fileID,
1124
+ ownerID,
1125
+ encrypted,
1126
+ version=None,
1127
+ content=None,
1128
+ numContentChunks=0,
1129
+ checksum=None,
1130
+ ):
928
1131
  """
929
1132
  :type fileID: str
930
1133
  :param fileID: the file's ID
@@ -1003,24 +1206,30 @@ class AWSJobStore(AbstractJobStore):
1003
1206
  assert content is None or isinstance(content, bytes)
1004
1207
  self._content = content
1005
1208
  if content is not None:
1006
- self.version = ''
1209
+ self.version = ""
1007
1210
 
1008
1211
  @classmethod
1009
1212
  def create(cls, ownerID: str):
1010
- return cls(str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None)
1213
+ return cls(
1214
+ str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None
1215
+ )
1011
1216
 
1012
1217
  @classmethod
1013
1218
  def presenceIndicator(cls):
1014
- return 'encrypted'
1219
+ return "encrypted"
1015
1220
 
1016
1221
  @classmethod
1017
1222
  def exists(cls, jobStoreFileID):
1018
1223
  for attempt in retry_sdb():
1019
1224
  with attempt:
1020
- return bool(cls.outer.db.get_attributes(DomainName=cls.outer.files_domain_name,
1021
- ItemName=compat_bytes(jobStoreFileID),
1022
- AttributeNames=[cls.presenceIndicator()],
1023
- ConsistentRead=True).get("Attributes", []))
1225
+ return bool(
1226
+ cls.outer.db.get_attributes(
1227
+ DomainName=cls.outer.files_domain_name,
1228
+ ItemName=compat_bytes(jobStoreFileID),
1229
+ AttributeNames=[cls.presenceIndicator()],
1230
+ ConsistentRead=True,
1231
+ ).get("Attributes", [])
1232
+ )
1024
1233
 
1025
1234
  @classmethod
1026
1235
  def load(cls, jobStoreFileID):
@@ -1029,10 +1238,13 @@ class AWSJobStore(AbstractJobStore):
1029
1238
  self = cls.fromItem(
1030
1239
  {
1031
1240
  "Name": compat_bytes(jobStoreFileID),
1032
- "Attributes": cls.outer.db.get_attributes(DomainName=cls.outer.files_domain_name,
1033
- ItemName=compat_bytes(jobStoreFileID),
1034
- ConsistentRead=True).get("Attributes", [])
1035
- })
1241
+ "Attributes": cls.outer.db.get_attributes(
1242
+ DomainName=cls.outer.files_domain_name,
1243
+ ItemName=compat_bytes(jobStoreFileID),
1244
+ ConsistentRead=True,
1245
+ ).get("Attributes", []),
1246
+ }
1247
+ )
1036
1248
  return self
1037
1249
 
1038
1250
  @classmethod
@@ -1062,7 +1274,7 @@ class AWSJobStore(AbstractJobStore):
1062
1274
  return self
1063
1275
 
1064
1276
  @classmethod
1065
- def fromItem(cls, item: ItemTypeDef):
1277
+ def fromItem(cls, item: "ItemTypeDef"):
1066
1278
  """
1067
1279
  Convert an SDB item to an instance of this class.
1068
1280
 
@@ -1075,7 +1287,9 @@ class AWSJobStore(AbstractJobStore):
1075
1287
  return s if s is None else str(s)
1076
1288
 
1077
1289
  # ownerID and encrypted are the only mandatory attributes
1078
- ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(item, ["ownerID", "encrypted", "version", "checksum"])
1290
+ ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
1291
+ item, ["ownerID", "encrypted", "version", "checksum"]
1292
+ )
1079
1293
  if ownerID is None:
1080
1294
  assert encrypted is None
1081
1295
  return None
@@ -1085,14 +1299,23 @@ class AWSJobStore(AbstractJobStore):
1085
1299
  if encrypted:
1086
1300
  sseKeyPath = cls.outer.sseKeyPath
1087
1301
  if sseKeyPath is None:
1088
- raise AssertionError('Content is encrypted but no key was provided.')
1302
+ raise AssertionError(
1303
+ "Content is encrypted but no key was provided."
1304
+ )
1089
1305
  if content is not None:
1090
1306
  content = encryption.decrypt(content, sseKeyPath)
1091
- self = cls(fileID=item["Name"], ownerID=ownerID, encrypted=encrypted, version=version,
1092
- content=content, numContentChunks=numContentChunks, checksum=checksum)
1307
+ self = cls(
1308
+ fileID=item["Name"],
1309
+ ownerID=ownerID,
1310
+ encrypted=encrypted,
1311
+ version=version,
1312
+ content=content,
1313
+ numContentChunks=numContentChunks,
1314
+ checksum=checksum,
1315
+ )
1093
1316
  return self
1094
1317
 
1095
- def toItem(self) -> Tuple[Dict[str, str], int]:
1318
+ def toItem(self) -> tuple[dict[str, str], int]:
1096
1319
  """
1097
1320
  Convert this instance to a dictionary of attribute names to values
1098
1321
 
@@ -1104,15 +1327,21 @@ class AWSJobStore(AbstractJobStore):
1104
1327
  if self.encrypted and content is not None:
1105
1328
  sseKeyPath = self.outer.sseKeyPath
1106
1329
  if sseKeyPath is None:
1107
- raise AssertionError('Encryption requested but no key was provided.')
1330
+ raise AssertionError(
1331
+ "Encryption requested but no key was provided."
1332
+ )
1108
1333
  content = encryption.encrypt(content, sseKeyPath)
1109
1334
  assert content is None or isinstance(content, bytes)
1110
1335
  attributes = self.binaryToAttributes(content)
1111
- numChunks = int(attributes['numChunks'])
1112
- attributes.update(dict(ownerID=self.ownerID or '',
1113
- encrypted=str(self.encrypted),
1114
- version=self.version or '',
1115
- checksum=self.checksum or ''))
1336
+ numChunks = int(attributes["numChunks"])
1337
+ attributes.update(
1338
+ dict(
1339
+ ownerID=self.ownerID or "",
1340
+ encrypted=str(self.encrypted),
1341
+ version=self.version or "",
1342
+ checksum=self.checksum or "",
1343
+ )
1344
+ )
1116
1345
  return attributes, numChunks
1117
1346
 
1118
1347
  @classmethod
@@ -1128,24 +1357,37 @@ class AWSJobStore(AbstractJobStore):
1128
1357
  attributes_boto3 = SDBHelper.attributeDictToList(attributes)
1129
1358
  # False stands for absence
1130
1359
  if self.previousVersion is None:
1131
- expected: UpdateConditionTypeDef = {"Name": 'version', "Exists": False}
1360
+ expected: "UpdateConditionTypeDef" = {
1361
+ "Name": "version",
1362
+ "Exists": False,
1363
+ }
1132
1364
  else:
1133
- expected = {"Name": 'version', "Value": cast(str, self.previousVersion)}
1365
+ expected = {"Name": "version", "Value": cast(str, self.previousVersion)}
1134
1366
  try:
1135
1367
  for attempt in retry_sdb():
1136
1368
  with attempt:
1137
- self.outer.db.put_attributes(DomainName=self.outer.files_domain_name,
1138
- ItemName=compat_bytes(self.fileID),
1139
- Attributes=[{"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
1140
- for attribute in attributes_boto3],
1141
- Expected=expected)
1369
+ self.outer.db.put_attributes(
1370
+ DomainName=self.outer.files_domain_name,
1371
+ ItemName=compat_bytes(self.fileID),
1372
+ Attributes=[
1373
+ {
1374
+ "Name": attribute["Name"],
1375
+ "Value": attribute["Value"],
1376
+ "Replace": True,
1377
+ }
1378
+ for attribute in attributes_boto3
1379
+ ],
1380
+ Expected=expected,
1381
+ )
1142
1382
  # clean up the old version of the file if necessary and safe
1143
1383
  if self.previousVersion and (self.previousVersion != self.version):
1144
1384
  for attempt in retry_s3():
1145
1385
  with attempt:
1146
- self.outer.s3_client.delete_object(Bucket=self.outer.files_bucket.name,
1147
- Key=compat_bytes(self.fileID),
1148
- VersionId=self.previousVersion)
1386
+ self.outer.s3_client.delete_object(
1387
+ Bucket=self.outer.files_bucket.name,
1388
+ Key=compat_bytes(self.fileID),
1389
+ VersionId=self.previousVersion,
1390
+ )
1149
1391
  self._previousVersion = self._version
1150
1392
  if numNewContentChunks < self._numContentChunks:
1151
1393
  residualChunks = range(numNewContentChunks, self._numContentChunks)
@@ -1153,19 +1395,26 @@ class AWSJobStore(AbstractJobStore):
1153
1395
  # boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
1154
1396
  # the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
1155
1397
  # but this doesnt extend to boto3
1156
- delete_attributes = self.outer.db.get_attributes(DomainName=self.outer.files_domain_name,
1157
- ItemName=compat_bytes(self.fileID),
1158
- AttributeNames=[chunk for chunk in residual_chunk_names]).get("Attributes")
1398
+ delete_attributes = self.outer.db.get_attributes(
1399
+ DomainName=self.outer.files_domain_name,
1400
+ ItemName=compat_bytes(self.fileID),
1401
+ AttributeNames=[chunk for chunk in residual_chunk_names],
1402
+ ).get("Attributes")
1159
1403
  for attempt in retry_sdb():
1160
1404
  with attempt:
1161
- self.outer.db.delete_attributes(DomainName=self.outer.files_domain_name,
1162
- ItemName=compat_bytes(self.fileID),
1163
- Attributes=delete_attributes)
1164
- self.outer.db.get_attributes(DomainName=self.outer.files_domain_name, ItemName=compat_bytes(self.fileID))
1405
+ self.outer.db.delete_attributes(
1406
+ DomainName=self.outer.files_domain_name,
1407
+ ItemName=compat_bytes(self.fileID),
1408
+ Attributes=delete_attributes,
1409
+ )
1410
+ self.outer.db.get_attributes(
1411
+ DomainName=self.outer.files_domain_name,
1412
+ ItemName=compat_bytes(self.fileID),
1413
+ )
1165
1414
 
1166
1415
  self._numContentChunks = numNewContentChunks
1167
1416
  except ClientError as e:
1168
- if get_error_code(e) == 'ConditionalCheckFailed':
1417
+ if get_error_code(e) == "ConditionalCheckFailed":
1169
1418
  raise ConcurrentFileModificationException(self.fileID)
1170
1419
  else:
1171
1420
  raise
@@ -1173,24 +1422,30 @@ class AWSJobStore(AbstractJobStore):
1173
1422
  def upload(self, localFilePath, calculateChecksum=True):
1174
1423
  file_size, file_time = fileSizeAndTime(localFilePath)
1175
1424
  if file_size <= self.maxInlinedSize():
1176
- with open(localFilePath, 'rb') as f:
1425
+ with open(localFilePath, "rb") as f:
1177
1426
  self.content = f.read()
1178
1427
  # Clear out any old checksum in case of overwrite
1179
- self.checksum = ''
1428
+ self.checksum = ""
1180
1429
  else:
1181
1430
  headerArgs = self._s3EncryptionArgs()
1182
1431
  # Create a new Resource in case it needs to be on its own thread
1183
- resource = boto3_session.resource('s3', region_name=self.outer.region)
1184
-
1185
- self.checksum = self._get_file_checksum(localFilePath) if calculateChecksum else None
1186
- self.version = uploadFromPath(localFilePath,
1187
- resource=resource,
1188
- bucketName=self.outer.files_bucket.name,
1189
- fileID=compat_bytes(self.fileID),
1190
- headerArgs=headerArgs,
1191
- partSize=self.outer.part_size)
1192
-
1193
- def _start_checksum(self, to_match=None, algorithm='sha1'):
1432
+ resource = boto3_session.resource("s3", region_name=self.outer.region)
1433
+
1434
+ self.checksum = (
1435
+ self._get_file_checksum(localFilePath)
1436
+ if calculateChecksum
1437
+ else None
1438
+ )
1439
+ self.version = uploadFromPath(
1440
+ localFilePath,
1441
+ resource=resource,
1442
+ bucketName=self.outer.files_bucket.name,
1443
+ fileID=compat_bytes(self.fileID),
1444
+ headerArgs=headerArgs,
1445
+ partSize=self.outer.part_size,
1446
+ )
1447
+
1448
+ def _start_checksum(self, to_match=None, algorithm="sha1"):
1194
1449
  """
1195
1450
  Get a hasher that can be used with _update_checksum and
1196
1451
  _finish_checksum.
@@ -1208,12 +1463,12 @@ class AWSJobStore(AbstractJobStore):
1208
1463
  expected = None
1209
1464
 
1210
1465
  if to_match is not None:
1211
- parts = to_match.split('$')
1466
+ parts = to_match.split("$")
1212
1467
  algorithm = parts[0]
1213
1468
  expected = parts[1]
1214
1469
 
1215
1470
  wrapped = getattr(hashlib, algorithm)()
1216
- logger.debug(f'Starting {algorithm} checksum to match {expected}')
1471
+ logger.debug(f"Starting {algorithm} checksum to match {expected}")
1217
1472
  return algorithm, wrapped, expected
1218
1473
 
1219
1474
  def _update_checksum(self, checksum_in_progress, data):
@@ -1230,26 +1485,32 @@ class AWSJobStore(AbstractJobStore):
1230
1485
 
1231
1486
  result_hash = checksum_in_progress[1].hexdigest()
1232
1487
 
1233
- logger.debug(f'Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}')
1488
+ logger.debug(
1489
+ f"Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}"
1490
+ )
1234
1491
  if checksum_in_progress[2] is not None:
1235
1492
  # We expected a particular hash
1236
1493
  if result_hash != checksum_in_progress[2]:
1237
- raise ChecksumError('Checksum mismatch. Expected: %s Actual: %s' %
1238
- (checksum_in_progress[2], result_hash))
1494
+ raise ChecksumError(
1495
+ "Checksum mismatch. Expected: %s Actual: %s"
1496
+ % (checksum_in_progress[2], result_hash)
1497
+ )
1239
1498
 
1240
- return '$'.join([checksum_in_progress[0], result_hash])
1499
+ return "$".join([checksum_in_progress[0], result_hash])
1241
1500
 
1242
1501
  def _get_file_checksum(self, localFilePath, to_match=None):
1243
- with open(localFilePath, 'rb') as f:
1502
+ with open(localFilePath, "rb") as f:
1244
1503
  hasher = self._start_checksum(to_match=to_match)
1245
1504
  contents = f.read(1024 * 1024)
1246
- while contents != b'':
1505
+ while contents != b"":
1247
1506
  self._update_checksum(hasher, contents)
1248
1507
  contents = f.read(1024 * 1024)
1249
1508
  return self._finish_checksum(hasher)
1250
1509
 
1251
1510
  @contextmanager
1252
- def uploadStream(self, multipart=True, allowInlining=True, encoding=None, errors=None):
1511
+ def uploadStream(
1512
+ self, multipart=True, allowInlining=True, encoding=None, errors=None
1513
+ ):
1253
1514
  """
1254
1515
  Context manager that gives out a binary or text mode upload stream to upload data.
1255
1516
  """
@@ -1270,14 +1531,14 @@ class AWSJobStore(AbstractJobStore):
1270
1531
  assert isinstance(buf, bytes)
1271
1532
 
1272
1533
  if allowInlining and len(buf) <= info.maxInlinedSize():
1273
- logger.debug('Inlining content of %d bytes', len(buf))
1534
+ logger.debug("Inlining content of %d bytes", len(buf))
1274
1535
  info.content = buf
1275
1536
  # There will be no checksum
1276
- info.checksum = ''
1537
+ info.checksum = ""
1277
1538
  else:
1278
1539
  # We will compute a checksum
1279
1540
  hasher = info._start_checksum()
1280
- logger.debug('Updating checksum with %d bytes', len(buf))
1541
+ logger.debug("Updating checksum with %d bytes", len(buf))
1281
1542
  info._update_checksum(hasher, buf)
1282
1543
 
1283
1544
  client = store.s3_client
@@ -1286,47 +1547,72 @@ class AWSJobStore(AbstractJobStore):
1286
1547
 
1287
1548
  for attempt in retry_s3():
1288
1549
  with attempt:
1289
- logger.debug('Starting multipart upload')
1550
+ logger.debug("Starting multipart upload")
1290
1551
  # low-level clients are thread safe
1291
- upload = client.create_multipart_upload(Bucket=bucket_name,
1292
- Key=compat_bytes(info.fileID),
1293
- **headerArgs)
1294
- uploadId = upload['UploadId']
1552
+ upload = client.create_multipart_upload(
1553
+ Bucket=bucket_name,
1554
+ Key=compat_bytes(info.fileID),
1555
+ **headerArgs,
1556
+ )
1557
+ uploadId = upload["UploadId"]
1295
1558
  parts = []
1296
- logger.debug('Multipart upload started as %s', uploadId)
1559
+ logger.debug("Multipart upload started as %s", uploadId)
1297
1560
 
1298
1561
  for attempt in retry_s3():
1299
1562
  with attempt:
1300
1563
  for i in range(CONSISTENCY_TICKS):
1301
1564
  # Sometimes we can create a multipart upload and not see it. Wait around for it.
1302
- response = client.list_multipart_uploads(Bucket=bucket_name,
1303
- MaxUploads=1,
1304
- Prefix=compat_bytes(info.fileID))
1305
- if ('Uploads' in response and
1306
- len(response['Uploads']) != 0 and
1307
- response['Uploads'][0]['UploadId'] == uploadId):
1308
-
1309
- logger.debug('Multipart upload visible as %s', uploadId)
1565
+ response = client.list_multipart_uploads(
1566
+ Bucket=bucket_name,
1567
+ MaxUploads=1,
1568
+ Prefix=compat_bytes(info.fileID),
1569
+ )
1570
+ if (
1571
+ "Uploads" in response
1572
+ and len(response["Uploads"]) != 0
1573
+ and response["Uploads"][0]["UploadId"]
1574
+ == uploadId
1575
+ ):
1576
+
1577
+ logger.debug(
1578
+ "Multipart upload visible as %s", uploadId
1579
+ )
1310
1580
  break
1311
1581
  else:
1312
- logger.debug('Multipart upload %s is not visible; we see %s', uploadId, response.get('Uploads'))
1313
- time.sleep(CONSISTENCY_TIME * 2 ** i)
1582
+ logger.debug(
1583
+ "Multipart upload %s is not visible; we see %s",
1584
+ uploadId,
1585
+ response.get("Uploads"),
1586
+ )
1587
+ time.sleep(CONSISTENCY_TIME * 2**i)
1314
1588
 
1315
1589
  try:
1316
1590
  for part_num in itertools.count():
1317
1591
  for attempt in retry_s3():
1318
1592
  with attempt:
1319
- logger.debug('Uploading part %d of %d bytes to %s', part_num + 1, len(buf), uploadId)
1593
+ logger.debug(
1594
+ "Uploading part %d of %d bytes to %s",
1595
+ part_num + 1,
1596
+ len(buf),
1597
+ uploadId,
1598
+ )
1320
1599
  # TODO: include the Content-MD5 header:
1321
1600
  # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.complete_multipart_upload
1322
- part = client.upload_part(Bucket=bucket_name,
1323
- Key=compat_bytes(info.fileID),
1324
- PartNumber=part_num + 1,
1325
- UploadId=uploadId,
1326
- Body=BytesIO(buf),
1327
- **headerArgs)
1328
-
1329
- parts.append({"PartNumber": part_num + 1, "ETag": part["ETag"]})
1601
+ part = client.upload_part(
1602
+ Bucket=bucket_name,
1603
+ Key=compat_bytes(info.fileID),
1604
+ PartNumber=part_num + 1,
1605
+ UploadId=uploadId,
1606
+ Body=BytesIO(buf),
1607
+ **headerArgs,
1608
+ )
1609
+
1610
+ parts.append(
1611
+ {
1612
+ "PartNumber": part_num + 1,
1613
+ "ETag": part["ETag"],
1614
+ }
1615
+ )
1330
1616
 
1331
1617
  # Get the next block of data we want to put
1332
1618
  buf = readable.read(info.outer.part_size)
@@ -1339,15 +1625,21 @@ class AWSJobStore(AbstractJobStore):
1339
1625
  with panic(log=logger):
1340
1626
  for attempt in retry_s3():
1341
1627
  with attempt:
1342
- client.abort_multipart_upload(Bucket=bucket_name,
1343
- Key=compat_bytes(info.fileID),
1344
- UploadId=uploadId)
1628
+ client.abort_multipart_upload(
1629
+ Bucket=bucket_name,
1630
+ Key=compat_bytes(info.fileID),
1631
+ UploadId=uploadId,
1632
+ )
1345
1633
 
1346
1634
  else:
1347
1635
 
1348
- while not store._getBucketVersioning(store.files_bucket.name):
1349
- logger.warning('Versioning does not appear to be enabled yet. Deferring multipart '
1350
- 'upload completion...')
1636
+ while not store._getBucketVersioning(
1637
+ store.files_bucket.name
1638
+ ):
1639
+ logger.warning(
1640
+ "Versioning does not appear to be enabled yet. Deferring multipart "
1641
+ "upload completion..."
1642
+ )
1351
1643
  time.sleep(1)
1352
1644
 
1353
1645
  # Save the checksum
@@ -1359,32 +1651,46 @@ class AWSJobStore(AbstractJobStore):
1359
1651
  # in tests
1360
1652
  # (https://github.com/DataBiosphere/toil/issues/3894)
1361
1653
  with attempt:
1362
- logger.debug('Attempting to complete upload...')
1654
+ logger.debug("Attempting to complete upload...")
1363
1655
  completed = client.complete_multipart_upload(
1364
1656
  Bucket=bucket_name,
1365
1657
  Key=compat_bytes(info.fileID),
1366
1658
  UploadId=uploadId,
1367
- MultipartUpload={"Parts": parts})
1368
-
1369
- logger.debug('Completed upload object of type %s: %s', str(type(completed)),
1370
- repr(completed))
1371
- info.version = completed.get('VersionId')
1372
- logger.debug('Completed upload with version %s', str(info.version))
1659
+ MultipartUpload={"Parts": parts},
1660
+ )
1661
+
1662
+ logger.debug(
1663
+ "Completed upload object of type %s: %s",
1664
+ str(type(completed)),
1665
+ repr(completed),
1666
+ )
1667
+ info.version = completed.get("VersionId")
1668
+ logger.debug(
1669
+ "Completed upload with version %s",
1670
+ str(info.version),
1671
+ )
1373
1672
 
1374
1673
  if info.version is None:
1375
1674
  # Somehow we don't know the version. Try and get it.
1376
- for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, AssertionError)):
1675
+ for attempt in retry_s3(
1676
+ predicate=lambda e: retryable_s3_errors(e)
1677
+ or isinstance(e, AssertionError)
1678
+ ):
1377
1679
  with attempt:
1378
- version = client.head_object(Bucket=bucket_name,
1379
- Key=compat_bytes(info.fileID),
1380
- **headerArgs).get('VersionId', None)
1381
- logger.warning('Loaded key for upload with no version and got version %s',
1382
- str(version))
1680
+ version = client.head_object(
1681
+ Bucket=bucket_name,
1682
+ Key=compat_bytes(info.fileID),
1683
+ **headerArgs,
1684
+ ).get("VersionId", None)
1685
+ logger.warning(
1686
+ "Loaded key for upload with no version and got version %s",
1687
+ str(version),
1688
+ )
1383
1689
  info.version = version
1384
1690
  assert info.version is not None
1385
1691
 
1386
1692
  # Make sure we actually wrote something, even if an empty file
1387
- assert (bool(info.version) or info.content is not None)
1693
+ assert bool(info.version) or info.content is not None
1388
1694
 
1389
1695
  class SinglePartPipe(WritablePipe):
1390
1696
  def readFrom(self, readable):
@@ -1392,10 +1698,10 @@ class AWSJobStore(AbstractJobStore):
1392
1698
  assert isinstance(buf, bytes)
1393
1699
  dataLength = len(buf)
1394
1700
  if allowInlining and dataLength <= info.maxInlinedSize():
1395
- logger.debug('Inlining content of %d bytes', len(buf))
1701
+ logger.debug("Inlining content of %d bytes", len(buf))
1396
1702
  info.content = buf
1397
1703
  # There will be no checksum
1398
- info.checksum = ''
1704
+ info.checksum = ""
1399
1705
  else:
1400
1706
  # We will compute a checksum
1401
1707
  hasher = info._start_checksum()
@@ -1409,39 +1715,57 @@ class AWSJobStore(AbstractJobStore):
1409
1715
  buf = BytesIO(buf)
1410
1716
 
1411
1717
  while not store._getBucketVersioning(bucket_name):
1412
- logger.warning('Versioning does not appear to be enabled yet. Deferring single part '
1413
- 'upload...')
1718
+ logger.warning(
1719
+ "Versioning does not appear to be enabled yet. Deferring single part "
1720
+ "upload..."
1721
+ )
1414
1722
  time.sleep(1)
1415
1723
 
1416
1724
  for attempt in retry_s3():
1417
1725
  with attempt:
1418
- logger.debug('Uploading single part of %d bytes', dataLength)
1419
- client.upload_fileobj(Bucket=bucket_name,
1420
- Key=compat_bytes(info.fileID),
1421
- Fileobj=buf,
1422
- ExtraArgs=headerArgs)
1726
+ logger.debug(
1727
+ "Uploading single part of %d bytes", dataLength
1728
+ )
1729
+ client.upload_fileobj(
1730
+ Bucket=bucket_name,
1731
+ Key=compat_bytes(info.fileID),
1732
+ Fileobj=buf,
1733
+ ExtraArgs=headerArgs,
1734
+ )
1423
1735
 
1424
1736
  # use head_object with the SSE headers to access versionId and content_length attributes
1425
- headObj = client.head_object(Bucket=bucket_name,
1426
- Key=compat_bytes(info.fileID),
1427
- **headerArgs)
1428
- assert dataLength == headObj.get('ContentLength', None)
1429
- info.version = headObj.get('VersionId', None)
1430
- logger.debug('Upload received version %s', str(info.version))
1737
+ headObj = client.head_object(
1738
+ Bucket=bucket_name,
1739
+ Key=compat_bytes(info.fileID),
1740
+ **headerArgs,
1741
+ )
1742
+ assert dataLength == headObj.get("ContentLength", None)
1743
+ info.version = headObj.get("VersionId", None)
1744
+ logger.debug(
1745
+ "Upload received version %s", str(info.version)
1746
+ )
1431
1747
 
1432
1748
  if info.version is None:
1433
1749
  # Somehow we don't know the version
1434
- for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, AssertionError)):
1750
+ for attempt in retry_s3(
1751
+ predicate=lambda e: retryable_s3_errors(e)
1752
+ or isinstance(e, AssertionError)
1753
+ ):
1435
1754
  with attempt:
1436
- headObj = client.head_object(Bucket=bucket_name,
1437
- Key=compat_bytes(info.fileID),
1438
- **headerArgs)
1439
- info.version = headObj.get('VersionId', None)
1440
- logger.warning('Reloaded key with no version and got version %s', str(info.version))
1755
+ headObj = client.head_object(
1756
+ Bucket=bucket_name,
1757
+ Key=compat_bytes(info.fileID),
1758
+ **headerArgs,
1759
+ )
1760
+ info.version = headObj.get("VersionId", None)
1761
+ logger.warning(
1762
+ "Reloaded key with no version and got version %s",
1763
+ str(info.version),
1764
+ )
1441
1765
  assert info.version is not None
1442
1766
 
1443
1767
  # Make sure we actually wrote something, even if an empty file
1444
- assert (bool(info.version) or info.content is not None)
1768
+ assert bool(info.version) or info.content is not None
1445
1769
 
1446
1770
  if multipart:
1447
1771
  pipe = MultiPartPipe(encoding=encoding, errors=errors)
@@ -1452,20 +1776,22 @@ class AWSJobStore(AbstractJobStore):
1452
1776
  yield writable
1453
1777
 
1454
1778
  if not pipe.reader_done:
1455
- logger.debug(f'Version: {self.version} Content: {self.content}')
1456
- raise RuntimeError('Escaped context manager without written data being read!')
1779
+ logger.debug(f"Version: {self.version} Content: {self.content}")
1780
+ raise RuntimeError(
1781
+ "Escaped context manager without written data being read!"
1782
+ )
1457
1783
 
1458
1784
  # We check our work to make sure we have exactly one of embedded
1459
1785
  # content or a real object version.
1460
1786
 
1461
1787
  if self.content is None:
1462
1788
  if not bool(self.version):
1463
- logger.debug(f'Version: {self.version} Content: {self.content}')
1464
- raise RuntimeError('No content added and no version created')
1789
+ logger.debug(f"Version: {self.version} Content: {self.content}")
1790
+ raise RuntimeError("No content added and no version created")
1465
1791
  else:
1466
1792
  if bool(self.version):
1467
- logger.debug(f'Version: {self.version} Content: {self.content}')
1468
- raise RuntimeError('Content added and version created')
1793
+ logger.debug(f"Version: {self.version} Content: {self.content}")
1794
+ raise RuntimeError("Content added and version created")
1469
1795
 
1470
1796
  def copyFrom(self, srcObj):
1471
1797
  """
@@ -1475,18 +1801,20 @@ class AWSJobStore(AbstractJobStore):
1475
1801
  """
1476
1802
  assert srcObj.content_length is not None
1477
1803
  if srcObj.content_length <= self.maxInlinedSize():
1478
- self.content = srcObj.get().get('Body').read()
1804
+ self.content = srcObj.get().get("Body").read()
1479
1805
  else:
1480
1806
  # Create a new Resource in case it needs to be on its own thread
1481
- resource = boto3_session.resource('s3', region_name=self.outer.region)
1482
- self.version = copyKeyMultipart(resource,
1483
- srcBucketName=compat_bytes(srcObj.bucket_name),
1484
- srcKeyName=compat_bytes(srcObj.key),
1485
- srcKeyVersion=compat_bytes(srcObj.version_id),
1486
- dstBucketName=compat_bytes(self.outer.files_bucket.name),
1487
- dstKeyName=compat_bytes(self._fileID),
1488
- sseAlgorithm='AES256',
1489
- sseKey=self._getSSEKey())
1807
+ resource = boto3_session.resource("s3", region_name=self.outer.region)
1808
+ self.version = copyKeyMultipart(
1809
+ resource,
1810
+ srcBucketName=compat_bytes(srcObj.bucket_name),
1811
+ srcKeyName=compat_bytes(srcObj.key),
1812
+ srcKeyVersion=compat_bytes(srcObj.version_id),
1813
+ dstBucketName=compat_bytes(self.outer.files_bucket.name),
1814
+ dstKeyName=compat_bytes(self._fileID),
1815
+ sseAlgorithm="AES256",
1816
+ sseKey=self._getSSEKey(),
1817
+ )
1490
1818
 
1491
1819
  def copyTo(self, dstObj):
1492
1820
  """
@@ -1500,35 +1828,43 @@ class AWSJobStore(AbstractJobStore):
1500
1828
  dstObj.put(Body=self.content)
1501
1829
  elif self.version:
1502
1830
  # Create a new Resource in case it needs to be on its own thread
1503
- resource = boto3_session.resource('s3', region_name=self.outer.region)
1831
+ resource = boto3_session.resource("s3", region_name=self.outer.region)
1504
1832
 
1505
1833
  for attempt in retry_s3():
1506
1834
  # encrypted = True if self.outer.sseKeyPath else False
1507
1835
  with attempt:
1508
- copyKeyMultipart(resource,
1509
- srcBucketName=compat_bytes(self.outer.files_bucket.name),
1510
- srcKeyName=compat_bytes(self.fileID),
1511
- srcKeyVersion=compat_bytes(self.version),
1512
- dstBucketName=compat_bytes(dstObj.bucket_name),
1513
- dstKeyName=compat_bytes(dstObj.key),
1514
- copySourceSseAlgorithm='AES256',
1515
- copySourceSseKey=self._getSSEKey())
1836
+ copyKeyMultipart(
1837
+ resource,
1838
+ srcBucketName=compat_bytes(self.outer.files_bucket.name),
1839
+ srcKeyName=compat_bytes(self.fileID),
1840
+ srcKeyVersion=compat_bytes(self.version),
1841
+ dstBucketName=compat_bytes(dstObj.bucket_name),
1842
+ dstKeyName=compat_bytes(dstObj.key),
1843
+ copySourceSseAlgorithm="AES256",
1844
+ copySourceSseKey=self._getSSEKey(),
1845
+ )
1516
1846
  else:
1517
1847
  assert False
1518
1848
 
1519
1849
  def download(self, localFilePath, verifyChecksum=True):
1520
1850
  if self.content is not None:
1521
1851
  with AtomicFileCreate(localFilePath) as tmpPath:
1522
- with open(tmpPath, 'wb') as f:
1852
+ with open(tmpPath, "wb") as f:
1523
1853
  f.write(self.content)
1524
1854
  elif self.version:
1525
1855
  headerArgs = self._s3EncryptionArgs()
1526
1856
  obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
1527
1857
 
1528
- for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, ChecksumError)):
1858
+ for attempt in retry_s3(
1859
+ predicate=lambda e: retryable_s3_errors(e)
1860
+ or isinstance(e, ChecksumError)
1861
+ ):
1529
1862
  with attempt:
1530
1863
  with AtomicFileCreate(localFilePath) as tmpPath:
1531
- obj.download_file(Filename=tmpPath, ExtraArgs={'VersionId': self.version, **headerArgs})
1864
+ obj.download_file(
1865
+ Filename=tmpPath,
1866
+ ExtraArgs={"VersionId": self.version, **headerArgs},
1867
+ )
1532
1868
 
1533
1869
  if verifyChecksum and self.checksum:
1534
1870
  try:
@@ -1536,7 +1872,10 @@ class AWSJobStore(AbstractJobStore):
1536
1872
  self._get_file_checksum(localFilePath, self.checksum)
1537
1873
  except ChecksumError as e:
1538
1874
  # Annotate checksum mismatches with file name
1539
- raise ChecksumError('Checksums do not match for file %s.' % localFilePath) from e
1875
+ raise ChecksumError(
1876
+ "Checksums do not match for file %s."
1877
+ % localFilePath
1878
+ ) from e
1540
1879
  # The error will get caught and result in a retry of the download until we run out of retries.
1541
1880
  # TODO: handle obviously truncated downloads by resuming instead.
1542
1881
  else:
@@ -1558,7 +1897,10 @@ class AWSJobStore(AbstractJobStore):
1558
1897
  obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
1559
1898
  for attempt in retry_s3():
1560
1899
  with attempt:
1561
- obj.download_fileobj(writable, ExtraArgs={'VersionId': info.version, **headerArgs})
1900
+ obj.download_fileobj(
1901
+ writable,
1902
+ ExtraArgs={"VersionId": info.version, **headerArgs},
1903
+ )
1562
1904
  else:
1563
1905
  assert False
1564
1906
 
@@ -1574,7 +1916,7 @@ class AWSJobStore(AbstractJobStore):
1574
1916
  def transform(self, readable, writable):
1575
1917
  hasher = info._start_checksum(to_match=info.checksum)
1576
1918
  contents = readable.read(1024 * 1024)
1577
- while contents != b'':
1919
+ while contents != b"":
1578
1920
  info._update_checksum(hasher, contents)
1579
1921
  try:
1580
1922
  writable.write(contents)
@@ -1591,7 +1933,9 @@ class AWSJobStore(AbstractJobStore):
1591
1933
  if verifyChecksum and self.checksum:
1592
1934
  with DownloadPipe() as readable:
1593
1935
  # Interpose a pipe to check the hash
1594
- with HashingPipe(readable, encoding=encoding, errors=errors) as verified:
1936
+ with HashingPipe(
1937
+ readable, encoding=encoding, errors=errors
1938
+ ) as verified:
1595
1939
  yield verified
1596
1940
  else:
1597
1941
  # Readable end of pipe produces text mode output if encoding specified
@@ -1602,18 +1946,25 @@ class AWSJobStore(AbstractJobStore):
1602
1946
  def delete(self):
1603
1947
  store = self.outer
1604
1948
  if self.previousVersion is not None:
1605
- expected: UpdateConditionTypeDef = {"Name": 'version', "Value": cast(str, self.previousVersion)}
1949
+ expected: "UpdateConditionTypeDef" = {
1950
+ "Name": "version",
1951
+ "Value": cast(str, self.previousVersion),
1952
+ }
1606
1953
  for attempt in retry_sdb():
1607
1954
  with attempt:
1608
- store.db.delete_attributes(DomainName=store.files_domain_name,
1609
- ItemName=compat_bytes(self.fileID),
1610
- Expected=expected)
1955
+ store.db.delete_attributes(
1956
+ DomainName=store.files_domain_name,
1957
+ ItemName=compat_bytes(self.fileID),
1958
+ Expected=expected,
1959
+ )
1611
1960
  if self.previousVersion:
1612
1961
  for attempt in retry_s3():
1613
1962
  with attempt:
1614
- store.s3_client.delete_object(Bucket=store.files_bucket.name,
1615
- Key=compat_bytes(self.fileID),
1616
- VersionId=self.previousVersion)
1963
+ store.s3_client.delete_object(
1964
+ Bucket=store.files_bucket.name,
1965
+ Key=compat_bytes(self.fileID),
1966
+ VersionId=self.previousVersion,
1967
+ )
1617
1968
 
1618
1969
  def getSize(self):
1619
1970
  """
@@ -1632,7 +1983,7 @@ class AWSJobStore(AbstractJobStore):
1632
1983
  def _getSSEKey(self) -> Optional[bytes]:
1633
1984
  sseKeyPath = self.outer.sseKeyPath
1634
1985
  if sseKeyPath:
1635
- with open(sseKeyPath, 'rb') as f:
1986
+ with open(sseKeyPath, "rb") as f:
1636
1987
  sseKey = f.read()
1637
1988
  return sseKey
1638
1989
 
@@ -1641,25 +1992,30 @@ class AWSJobStore(AbstractJobStore):
1641
1992
  # parameters and will be used to set the http headers
1642
1993
  if self.encrypted:
1643
1994
  sseKey = self._getSSEKey()
1644
- assert sseKey is not None, 'Content is encrypted but no key was provided.'
1995
+ assert (
1996
+ sseKey is not None
1997
+ ), "Content is encrypted but no key was provided."
1645
1998
  assert len(sseKey) == 32
1646
1999
  # boto3 encodes the key and calculates the MD5 for us
1647
- return {'SSECustomerAlgorithm': 'AES256', 'SSECustomerKey': sseKey}
2000
+ return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sseKey}
1648
2001
  else:
1649
2002
  return {}
1650
2003
 
1651
2004
  def __repr__(self):
1652
2005
  r = custom_repr
1653
- d = (('fileID', r(self.fileID)),
1654
- ('ownerID', r(self.ownerID)),
1655
- ('encrypted', r(self.encrypted)),
1656
- ('version', r(self.version)),
1657
- ('previousVersion', r(self.previousVersion)),
1658
- ('content', r(self.content)),
1659
- ('checksum', r(self.checksum)),
1660
- ('_numContentChunks', r(self._numContentChunks)))
1661
- return "{}({})".format(type(self).__name__,
1662
- ', '.join(f'{k}={v}' for k, v in d))
2006
+ d = (
2007
+ ("fileID", r(self.fileID)),
2008
+ ("ownerID", r(self.ownerID)),
2009
+ ("encrypted", r(self.encrypted)),
2010
+ ("version", r(self.version)),
2011
+ ("previousVersion", r(self.previousVersion)),
2012
+ ("content", r(self.content)),
2013
+ ("checksum", r(self.checksum)),
2014
+ ("_numContentChunks", r(self._numContentChunks)),
2015
+ )
2016
+ return "{}({})".format(
2017
+ type(self).__name__, ", ".join(f"{k}={v}" for k, v in d)
2018
+ )
1663
2019
 
1664
2020
  versionings = dict(Enabled=True, Disabled=False, Suspended=None)
1665
2021
 
@@ -1696,7 +2052,7 @@ class AWSJobStore(AbstractJobStore):
1696
2052
  if self.files_bucket is not None:
1697
2053
  self._delete_bucket(self.files_bucket)
1698
2054
  self.files_bucket = None
1699
- for name in 'files_domain_name', 'jobs_domain_name':
2055
+ for name in "files_domain_name", "jobs_domain_name":
1700
2056
  domainName = getattr(self, name)
1701
2057
  if domainName is not None:
1702
2058
  self._delete_domain(domainName)
@@ -1720,12 +2076,14 @@ class AWSJobStore(AbstractJobStore):
1720
2076
  for attempt in retry_s3():
1721
2077
  with attempt:
1722
2078
  try:
1723
- uploads = s3_boto3_client.list_multipart_uploads(Bucket=bucket.name).get('Uploads')
2079
+ uploads = s3_boto3_client.list_multipart_uploads(
2080
+ Bucket=bucket.name
2081
+ ).get("Uploads")
1724
2082
  if uploads:
1725
2083
  for u in uploads:
1726
- s3_boto3_client.abort_multipart_upload(Bucket=bucket.name,
1727
- Key=u["Key"],
1728
- UploadId=u["UploadId"])
2084
+ s3_boto3_client.abort_multipart_upload(
2085
+ Bucket=bucket.name, Key=u["Key"], UploadId=u["UploadId"]
2086
+ )
1729
2087
 
1730
2088
  bucket.objects.all().delete()
1731
2089
  bucket.object_versions.delete()
@@ -1745,5 +2103,7 @@ custom_repr = aRepr.repr
1745
2103
  class BucketLocationConflictException(LocatorException):
1746
2104
  def __init__(self, bucketRegion):
1747
2105
  super().__init__(
1748
- 'A bucket with the same name as the jobstore was found in another region (%s). '
1749
- 'Cannot proceed as the unique bucket name is already in use.', locator=bucketRegion)
2106
+ "A bucket with the same name as the jobstore was found in another region (%s). "
2107
+ "Cannot proceed as the unique bucket name is already in use.",
2108
+ locator=bucketRegion,
2109
+ )