toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -17,25 +17,28 @@ import logging
17
17
  import os
18
18
  import types
19
19
  from ssl import SSLError
20
- from typing import Optional, cast
20
+ from typing import TYPE_CHECKING, Optional, cast
21
21
 
22
22
  from boto3.s3.transfer import TransferConfig
23
- from boto.exception import SDBResponseError
24
23
  from botocore.client import Config
25
24
  from botocore.exceptions import ClientError
26
- from mypy_boto3_s3 import S3Client, S3ServiceResource
27
25
 
28
- from toil.lib.aws import session
29
- from toil.lib.aws.utils import connection_reset, get_bucket_region
26
+ from toil.lib.aws import AWSServerErrors, session
27
+ from toil.lib.aws.utils import connection_error, get_bucket_region
30
28
  from toil.lib.compatibility import compat_bytes
31
- from toil.lib.retry import (DEFAULT_DELAYS,
32
- DEFAULT_TIMEOUT,
33
- ErrorCondition,
34
- get_error_code,
35
- get_error_message,
36
- get_error_status,
37
- old_retry,
38
- retry)
29
+ from toil.lib.retry import (
30
+ DEFAULT_DELAYS,
31
+ DEFAULT_TIMEOUT,
32
+ get_error_code,
33
+ get_error_message,
34
+ get_error_status,
35
+ old_retry,
36
+ retry,
37
+ )
38
+
39
+ if TYPE_CHECKING:
40
+ from mypy_boto3_s3 import S3ServiceResource
41
+ from mypy_boto3_sdb.type_defs import AttributeTypeDef, ItemTypeDef
39
42
 
40
43
  logger = logging.getLogger(__name__)
41
44
 
@@ -46,10 +49,10 @@ logger = logging.getLogger(__name__)
46
49
  # also need to set a special flag to make sure we don't use the generic
47
50
  # s3.amazonaws.com for us-east-1, or else we might not actually end up talking
48
51
  # to us-east-1 when a bucket is there.
49
- DIAL_SPECIFIC_REGION_CONFIG = Config(s3={
50
- 'addressing_style': 'path',
51
- 'us_east_1_regional_endpoint': 'regional'
52
- })
52
+ DIAL_SPECIFIC_REGION_CONFIG = Config(
53
+ s3={"addressing_style": "path", "us_east_1_regional_endpoint": "regional"}
54
+ )
55
+
53
56
 
54
57
  class SDBHelper:
55
58
  """
@@ -89,6 +92,7 @@ class SDBHelper:
89
92
  True
90
93
 
91
94
  """
95
+
92
96
  # The SDB documentation is not clear as to whether the attribute value size limit of 1024
93
97
  # applies to the base64-encoded value or the raw value. It suggests that responses are
94
98
  # automatically encoded from which I conclude that the limit should apply to the raw,
@@ -101,8 +105,8 @@ class SDBHelper:
101
105
  maxValueSize = 1024
102
106
  maxRawValueSize = maxValueSize * 3 // 4
103
107
  # Just make sure we don't have a problem with padding or integer truncation:
104
- assert len(base64.b64encode(b' ' * maxRawValueSize)) == 1024
105
- assert len(base64.b64encode(b' ' * (1 + maxRawValueSize))) > 1024
108
+ assert len(base64.b64encode(b" " * maxRawValueSize)) == 1024
109
+ assert len(base64.b64encode(b" " * (1 + maxRawValueSize))) > 1024
106
110
 
107
111
  @classmethod
108
112
  def _reservedAttributes(cls):
@@ -118,35 +122,79 @@ class SDBHelper:
118
122
 
119
123
  @classmethod
120
124
  def maxBinarySize(cls, extraReservedChunks=0):
121
- return (cls._maxChunks() - extraReservedChunks) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
125
+ return (
126
+ cls._maxChunks() - extraReservedChunks
127
+ ) * cls.maxRawValueSize - 1 # for the 'C' or 'U' prefix
122
128
 
123
129
  @classmethod
124
130
  def _maxEncodedSize(cls):
125
131
  return cls._maxChunks() * cls.maxValueSize
126
132
 
127
133
  @classmethod
128
- def binaryToAttributes(cls, binary):
134
+ def binaryToAttributes(cls, binary) -> dict[str, str]:
129
135
  """
130
136
  Turn a bytestring, or None, into SimpleDB attributes.
131
137
  """
132
- if binary is None: return {'numChunks': 0}
138
+ if binary is None:
139
+ return {"numChunks": "0"}
133
140
  assert isinstance(binary, bytes)
134
141
  assert len(binary) <= cls.maxBinarySize()
135
142
  # The use of compression is just an optimization. We can't include it in the maxValueSize
136
143
  # computation because the compression ratio depends on the input.
137
144
  compressed = bz2.compress(binary)
138
145
  if len(compressed) > len(binary):
139
- compressed = b'U' + binary
146
+ compressed = b"U" + binary
140
147
  else:
141
- compressed = b'C' + compressed
148
+ compressed = b"C" + compressed
142
149
  encoded = base64.b64encode(compressed)
143
150
  assert len(encoded) <= cls._maxEncodedSize()
144
151
  n = cls.maxValueSize
145
- chunks = (encoded[i:i + n] for i in range(0, len(encoded), n))
146
- attributes = {cls._chunkName(i): chunk for i, chunk in enumerate(chunks)}
147
- attributes.update({'numChunks': len(attributes)})
152
+ chunks = (encoded[i : i + n] for i in range(0, len(encoded), n))
153
+ attributes = {
154
+ cls._chunkName(i): chunk.decode("utf-8") for i, chunk in enumerate(chunks)
155
+ }
156
+ attributes.update({"numChunks": str(len(attributes))})
148
157
  return attributes
149
158
 
159
+ @classmethod
160
+ def attributeDictToList(
161
+ cls, attributes: dict[str, str]
162
+ ) -> list["AttributeTypeDef"]:
163
+ """
164
+ Convert the attribute dict (ex: from binaryToAttributes) into a list of attribute typed dicts
165
+ to be compatible with boto3 argument syntax
166
+ :param attributes: Dict[str, str], attribute in object form
167
+ :return: list of attributes in typed dict form
168
+ """
169
+ return [{"Name": name, "Value": value} for name, value in attributes.items()]
170
+
171
+ @classmethod
172
+ def attributeListToDict(
173
+ cls, attributes: list["AttributeTypeDef"]
174
+ ) -> dict[str, str]:
175
+ """
176
+ Convert the attribute boto3 representation of list of attribute typed dicts
177
+ back to a dictionary with name, value pairs
178
+ :param attribute: attribute in typed dict form
179
+ :return: Dict[str, str], attribute in dict form
180
+ """
181
+ return {attribute["Name"]: attribute["Value"] for attribute in attributes}
182
+
183
+ @classmethod
184
+ def get_attributes_from_item(
185
+ cls, item: "ItemTypeDef", keys: list[str]
186
+ ) -> list[Optional[str]]:
187
+ return_values: list[Optional[str]] = [None for _ in keys]
188
+ mapped_indices: dict[str, int] = {
189
+ name: index for index, name in enumerate(keys)
190
+ }
191
+ for attribute in item["Attributes"]:
192
+ name = attribute["Name"]
193
+ value = attribute["Value"]
194
+ if name in mapped_indices:
195
+ return_values[mapped_indices[name]] = value
196
+ return return_values
197
+
150
198
  @classmethod
151
199
  def _chunkName(cls, i):
152
200
  return str(i).zfill(3)
@@ -162,26 +210,35 @@ class SDBHelper:
162
210
  Assuming that binaryToAttributes() is used with SDB's PutAttributes, the return value of
163
211
  this method could be used to detect the presence/absence of an item in SDB.
164
212
  """
165
- return 'numChunks'
213
+ return "numChunks"
166
214
 
167
215
  @classmethod
168
- def attributesToBinary(cls, attributes):
216
+ def attributesToBinary(
217
+ cls, attributes: list["AttributeTypeDef"]
218
+ ) -> tuple[bytes, int]:
169
219
  """
170
220
  :rtype: (str|None,int)
171
221
  :return: the binary data and the number of chunks it was composed from
172
222
  """
173
- chunks = [(int(k), v) for k, v in attributes.items() if cls._isValidChunkName(k)]
223
+ chunks = []
224
+ numChunks: int = 0
225
+ for attribute in attributes:
226
+ name = attribute["Name"]
227
+ value = attribute["Value"]
228
+ if cls._isValidChunkName(name):
229
+ chunks.append((int(name), value))
230
+ if name == "numChunks":
231
+ numChunks = int(value)
174
232
  chunks.sort()
175
- numChunks = int(attributes['numChunks'])
176
233
  if numChunks:
177
- serializedJob = b''.join(v.encode() for k, v in chunks)
234
+ serializedJob = b"".join(v.encode() for k, v in chunks)
178
235
  compressed = base64.b64decode(serializedJob)
179
- if compressed[0] == b'C'[0]:
236
+ if compressed[0] == b"C"[0]:
180
237
  binary = bz2.decompress(compressed[1:])
181
- elif compressed[0] == b'U'[0]:
238
+ elif compressed[0] == b"U"[0]:
182
239
  binary = compressed[1:]
183
240
  else:
184
- raise RuntimeError(f'Unexpected prefix {compressed[0]}')
241
+ raise RuntimeError(f"Unexpected prefix {compressed[0]}")
185
242
  else:
186
243
  binary = None
187
244
  return binary, numChunks
@@ -192,16 +249,15 @@ def fileSizeAndTime(localFilePath):
192
249
  return file_stat.st_size, file_stat.st_mtime
193
250
 
194
251
 
195
- @retry(errors=[ErrorCondition(
196
- error=ClientError,
197
- error_codes=[404, 500, 502, 503, 504]
198
- )])
199
- def uploadFromPath(localFilePath: str,
200
- resource,
201
- bucketName: str,
202
- fileID: str,
203
- headerArgs: Optional[dict] = None,
204
- partSize: int = 50 << 20):
252
+ @retry(errors=[AWSServerErrors])
253
+ def uploadFromPath(
254
+ localFilePath: str,
255
+ resource,
256
+ bucketName: str,
257
+ fileID: str,
258
+ headerArgs: Optional[dict] = None,
259
+ partSize: int = 50 << 20,
260
+ ):
205
261
  """
206
262
  Uploads a file to s3, using multipart uploading if applicable
207
263
 
@@ -220,9 +276,13 @@ def uploadFromPath(localFilePath: str,
220
276
  client = resource.meta.client
221
277
  file_size, file_time = fileSizeAndTime(localFilePath)
222
278
 
223
- version = uploadFile(localFilePath, resource, bucketName, fileID, headerArgs, partSize)
224
- info = client.head_object(Bucket=bucketName, Key=compat_bytes(fileID), VersionId=version, **headerArgs)
225
- size = info.get('ContentLength')
279
+ version = uploadFile(
280
+ localFilePath, resource, bucketName, fileID, headerArgs, partSize
281
+ )
282
+ info = client.head_object(
283
+ Bucket=bucketName, Key=compat_bytes(fileID), VersionId=version, **headerArgs
284
+ )
285
+ size = info.get("ContentLength")
226
286
 
227
287
  assert size == file_size
228
288
 
@@ -231,16 +291,15 @@ def uploadFromPath(localFilePath: str,
231
291
  return version
232
292
 
233
293
 
234
- @retry(errors=[ErrorCondition(
235
- error=ClientError,
236
- error_codes=[404, 500, 502, 503, 504]
237
- )])
238
- def uploadFile(readable,
239
- resource,
240
- bucketName: str,
241
- fileID: str,
242
- headerArgs: Optional[dict] = None,
243
- partSize: int = 50 << 20):
294
+ @retry(errors=[AWSServerErrors])
295
+ def uploadFile(
296
+ readable,
297
+ resource,
298
+ bucketName: str,
299
+ fileID: str,
300
+ headerArgs: Optional[dict] = None,
301
+ partSize: int = 50 << 20,
302
+ ):
244
303
  """
245
304
  Upload a readable object to s3, using multipart uploading if applicable.
246
305
  :param readable: a readable stream or a file path to upload to s3
@@ -256,29 +315,32 @@ def uploadFile(readable,
256
315
 
257
316
  client = resource.meta.client
258
317
  config = TransferConfig(
259
- multipart_threshold=partSize,
260
- multipart_chunksize=partSize,
261
- use_threads=True
318
+ multipart_threshold=partSize, multipart_chunksize=partSize, use_threads=True
262
319
  )
263
320
  if isinstance(readable, str):
264
- client.upload_file(Filename=readable,
265
- Bucket=bucketName,
266
- Key=compat_bytes(fileID),
267
- ExtraArgs=headerArgs,
268
- Config=config)
321
+ client.upload_file(
322
+ Filename=readable,
323
+ Bucket=bucketName,
324
+ Key=compat_bytes(fileID),
325
+ ExtraArgs=headerArgs,
326
+ Config=config,
327
+ )
269
328
  else:
270
- client.upload_fileobj(Fileobj=readable,
271
- Bucket=bucketName,
272
- Key=compat_bytes(fileID),
273
- ExtraArgs=headerArgs,
274
- Config=config)
329
+ client.upload_fileobj(
330
+ Fileobj=readable,
331
+ Bucket=bucketName,
332
+ Key=compat_bytes(fileID),
333
+ ExtraArgs=headerArgs,
334
+ Config=config,
335
+ )
275
336
 
276
337
  # Wait until the object exists before calling head_object
277
338
  object_summary = resource.ObjectSummary(bucketName, compat_bytes(fileID))
278
339
  object_summary.wait_until_exists(**headerArgs)
279
340
 
280
341
  info = client.head_object(Bucket=bucketName, Key=compat_bytes(fileID), **headerArgs)
281
- return info.get('VersionId', None)
342
+ return info.get("VersionId", None)
343
+
282
344
 
283
345
  class ServerSideCopyProhibitedError(RuntimeError):
284
346
  """
@@ -286,20 +348,20 @@ class ServerSideCopyProhibitedError(RuntimeError):
286
348
  insists that you pay to download and upload the data yourself instead.
287
349
  """
288
350
 
289
- @retry(errors=[ErrorCondition(
290
- error=ClientError,
291
- error_codes=[404, 500, 502, 503, 504]
292
- )])
293
- def copyKeyMultipart(resource: S3ServiceResource,
294
- srcBucketName: str,
295
- srcKeyName: str,
296
- srcKeyVersion: str,
297
- dstBucketName: str,
298
- dstKeyName: str,
299
- sseAlgorithm: Optional[str] = None,
300
- sseKey: Optional[str] = None,
301
- copySourceSseAlgorithm: Optional[str] = None,
302
- copySourceSseKey: Optional[str] = None):
351
+
352
+ @retry(errors=[AWSServerErrors])
353
+ def copyKeyMultipart(
354
+ resource: "S3ServiceResource",
355
+ srcBucketName: str,
356
+ srcKeyName: str,
357
+ srcKeyVersion: str,
358
+ dstBucketName: str,
359
+ dstKeyName: str,
360
+ sseAlgorithm: Optional[str] = None,
361
+ sseKey: Optional[str] = None,
362
+ copySourceSseAlgorithm: Optional[str] = None,
363
+ copySourceSseKey: Optional[str] = None,
364
+ ):
303
365
  """
304
366
  Copies a key from a source key to a destination key in multiple parts. Note that if the
305
367
  destination key exists it will be overwritten implicitly, and if it does not exist a new
@@ -336,9 +398,12 @@ def copyKeyMultipart(resource: S3ServiceResource,
336
398
  """
337
399
  dstBucket = resource.Bucket(compat_bytes(dstBucketName))
338
400
  dstObject = dstBucket.Object(compat_bytes(dstKeyName))
339
- copySource = {'Bucket': compat_bytes(srcBucketName), 'Key': compat_bytes(srcKeyName)}
401
+ copySource = {
402
+ "Bucket": compat_bytes(srcBucketName),
403
+ "Key": compat_bytes(srcKeyName),
404
+ }
340
405
  if srcKeyVersion is not None:
341
- copySource['VersionId'] = compat_bytes(srcKeyVersion)
406
+ copySource["VersionId"] = compat_bytes(srcKeyVersion)
342
407
 
343
408
  # Get a client to the source region, which may not be the same as the one
344
409
  # this resource is connected to. We should probably talk to it for source
@@ -346,12 +411,10 @@ def copyKeyMultipart(resource: S3ServiceResource,
346
411
  # not wherever the bucket virtual hostnames go.
347
412
  source_region = get_bucket_region(srcBucketName)
348
413
  source_client = cast(
349
- S3Client,
414
+ "S3Client",
350
415
  session.client(
351
- 's3',
352
- region_name=source_region,
353
- config=DIAL_SPECIFIC_REGION_CONFIG
354
- )
416
+ "s3", region_name=source_region, config=DIAL_SPECIFIC_REGION_CONFIG
417
+ ),
355
418
  )
356
419
 
357
420
  # The boto3 functions don't allow passing parameters as None to
@@ -360,19 +423,28 @@ def copyKeyMultipart(resource: S3ServiceResource,
360
423
  # required.
361
424
  destEncryptionArgs = {}
362
425
  if sseKey is not None:
363
- destEncryptionArgs.update({'SSECustomerAlgorithm': sseAlgorithm,
364
- 'SSECustomerKey': sseKey})
426
+ destEncryptionArgs.update(
427
+ {"SSECustomerAlgorithm": sseAlgorithm, "SSECustomerKey": sseKey}
428
+ )
365
429
  copyEncryptionArgs = {}
366
430
  if copySourceSseKey is not None:
367
- copyEncryptionArgs.update({'CopySourceSSECustomerAlgorithm': copySourceSseAlgorithm,
368
- 'CopySourceSSECustomerKey': copySourceSseKey})
431
+ copyEncryptionArgs.update(
432
+ {
433
+ "CopySourceSSECustomerAlgorithm": copySourceSseAlgorithm,
434
+ "CopySourceSSECustomerKey": copySourceSseKey,
435
+ }
436
+ )
369
437
  copyEncryptionArgs.update(destEncryptionArgs)
370
438
 
371
439
  try:
372
440
  # Kick off a server-side copy operation
373
- dstObject.copy(copySource, SourceClient=source_client, ExtraArgs=copyEncryptionArgs)
441
+ dstObject.copy(
442
+ copySource, SourceClient=source_client, ExtraArgs=copyEncryptionArgs
443
+ )
374
444
  except ClientError as e:
375
- if get_error_code(e) == 'AccessDenied' and 'cross-region' in get_error_message(e):
445
+ if get_error_code(e) == "AccessDenied" and "cross-region" in get_error_message(
446
+ e
447
+ ):
376
448
  # We have this problem: <https://aws.amazon.com/premiumsupport/knowledge-center/s3-troubleshoot-copy-between-buckets/#Cross-Region_request_issues_with_VPC_endpoints_for_Amazon_S3>
377
449
  # The Internet and AWS docs say that we just can't do a
378
450
  # cross-region CopyObject from inside a VPC with an endpoint. The
@@ -382,13 +454,16 @@ def copyKeyMultipart(resource: S3ServiceResource,
382
454
  # the source region's API servers, they reject it and tell us to
383
455
  # talk to the destination region's API servers instead. Which we
384
456
  # can't reach.
385
- logger.error('Amazon is refusing to perform a server-side copy of %s: %s', copySource, e)
457
+ logger.error(
458
+ "Amazon is refusing to perform a server-side copy of %s: %s",
459
+ copySource,
460
+ e,
461
+ )
386
462
  raise ServerSideCopyProhibitedError()
387
463
  else:
388
464
  # Some other ClientError happened
389
465
  raise
390
466
 
391
-
392
467
  # Wait until the object exists before calling head_object
393
468
  object_summary = resource.ObjectSummary(dstObject.bucket_name, dstObject.key)
394
469
  object_summary.wait_until_exists(**destEncryptionArgs)
@@ -398,14 +473,15 @@ def copyKeyMultipart(resource: S3ServiceResource,
398
473
  # after, leaving open the possibility that it may have been
399
474
  # modified again in the few seconds since the copy finished. There
400
475
  # isn't much we can do about it.
401
- info = resource.meta.client.head_object(Bucket=dstObject.bucket_name,
402
- Key=dstObject.key,
403
- **destEncryptionArgs)
404
- return info.get('VersionId', None)
476
+ info = resource.meta.client.head_object(
477
+ Bucket=dstObject.bucket_name, Key=dstObject.key, **destEncryptionArgs
478
+ )
479
+ return info.get("VersionId", None)
405
480
 
406
481
 
407
- def _put_attributes_using_post(self, domain_or_name, item_name, attributes,
408
- replace=True, expected_value=None):
482
+ def _put_attributes_using_post(
483
+ self, domain_or_name, item_name, attributes, replace=True, expected_value=None
484
+ ):
409
485
  """
410
486
  Monkey-patched version of SDBConnection.put_attributes that uses POST instead of GET
411
487
 
@@ -415,13 +491,12 @@ def _put_attributes_using_post(self, domain_or_name, item_name, attributes,
415
491
  https://github.com/BD2KGenomics/toil/issues/502
416
492
  """
417
493
  domain, domain_name = self.get_domain_and_name(domain_or_name)
418
- params = {'DomainName': domain_name,
419
- 'ItemName': item_name}
494
+ params = {"DomainName": domain_name, "ItemName": item_name}
420
495
  self._build_name_value_list(params, attributes, replace)
421
496
  if expected_value:
422
497
  self._build_expected_value(params, expected_value)
423
498
  # The addition of the verb keyword argument is the only difference to put_attributes (Hannes)
424
- return self.get_status('PutAttributes', params, verb='POST')
499
+ return self.get_status("PutAttributes", params, verb="POST")
425
500
 
426
501
 
427
502
  def monkeyPatchSdbConnection(sdb):
@@ -430,6 +505,7 @@ def monkeyPatchSdbConnection(sdb):
430
505
  """
431
506
  sdb.put_attributes = types.MethodType(_put_attributes_using_post, sdb)
432
507
 
508
+
433
509
  def sdb_unavailable(e):
434
510
  # Since we're checking against a collection here we absolutely need an
435
511
  # integer status code. This is probably a BotoServerError, but other 500s
@@ -438,23 +514,28 @@ def sdb_unavailable(e):
438
514
 
439
515
 
440
516
  def no_such_sdb_domain(e):
441
- return (isinstance(e, SDBResponseError)
442
- and e.error_code
443
- and e.error_code.endswith('NoSuchDomain'))
517
+ return (
518
+ isinstance(e, ClientError)
519
+ and get_error_code(e)
520
+ and get_error_code(e).endswith("NoSuchDomain")
521
+ )
444
522
 
445
523
 
446
524
  def retryable_ssl_error(e):
447
525
  # https://github.com/BD2KGenomics/toil/issues/978
448
- return isinstance(e, SSLError) and e.reason == 'DECRYPTION_FAILED_OR_BAD_RECORD_MAC'
526
+ return isinstance(e, SSLError) and e.reason == "DECRYPTION_FAILED_OR_BAD_RECORD_MAC"
449
527
 
450
528
 
451
529
  def retryable_sdb_errors(e):
452
- return (sdb_unavailable(e)
453
- or no_such_sdb_domain(e)
454
- or connection_reset(e)
455
- or retryable_ssl_error(e))
530
+ return (
531
+ sdb_unavailable(e)
532
+ or no_such_sdb_domain(e)
533
+ or connection_error(e)
534
+ or retryable_ssl_error(e)
535
+ )
456
536
 
457
537
 
458
- def retry_sdb(delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors):
538
+ def retry_sdb(
539
+ delays=DEFAULT_DELAYS, timeout=DEFAULT_TIMEOUT, predicate=retryable_sdb_errors
540
+ ):
459
541
  return old_retry(delays=delays, timeout=timeout, predicate=predicate)
460
-
@@ -17,7 +17,8 @@
17
17
  collect_ignore = []
18
18
 
19
19
  try:
20
- import boto
21
- print(boto.__file__) # prevent this import from being removed
20
+ import boto3
21
+
22
+ print(boto3.__file__) # prevent this import from being removed
22
23
  except ImportError:
23
24
  collect_ignore.append("aws")