toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -21,47 +21,53 @@ import reprlib
21
21
  import stat
22
22
  import time
23
23
  import uuid
24
+ from collections.abc import Generator
24
25
  from contextlib import contextmanager
25
26
  from io import BytesIO
26
- from typing import List, Optional, IO
27
+ from typing import IO, TYPE_CHECKING, Optional, Union, cast
27
28
  from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
28
29
 
29
- import boto.s3.connection
30
- import boto.sdb
31
- from boto.exception import SDBResponseError
32
30
  from botocore.exceptions import ClientError
33
31
 
34
32
  import toil.lib.encryption as encryption
35
33
  from toil.fileStores import FileID
36
- from toil.jobStores.abstractJobStore import (AbstractJobStore,
37
- ConcurrentFileModificationException,
38
- JobStoreExistsException,
39
- NoSuchFileException,
40
- NoSuchJobException,
41
- NoSuchJobStoreException)
42
- from toil.jobStores.aws.utils import (SDBHelper,
43
- ServerSideCopyProhibitedError,
44
- copyKeyMultipart,
45
- fileSizeAndTime,
46
- monkeyPatchSdbConnection,
47
- no_such_sdb_domain,
48
- retry_sdb,
49
- sdb_unavailable,
50
- uploadFile,
51
- uploadFromPath)
52
- from toil.jobStores.utils import (ReadablePipe,
53
- ReadableTransformingPipe,
54
- WritablePipe)
34
+ from toil.job import Job, JobDescription
35
+ from toil.jobStores.abstractJobStore import (
36
+ AbstractJobStore,
37
+ ConcurrentFileModificationException,
38
+ JobStoreExistsException,
39
+ LocatorException,
40
+ NoSuchFileException,
41
+ NoSuchJobException,
42
+ NoSuchJobStoreException,
43
+ )
44
+ from toil.jobStores.aws.utils import (
45
+ SDBHelper,
46
+ ServerSideCopyProhibitedError,
47
+ copyKeyMultipart,
48
+ fileSizeAndTime,
49
+ no_such_sdb_domain,
50
+ retry_sdb,
51
+ sdb_unavailable,
52
+ uploadFile,
53
+ uploadFromPath,
54
+ )
55
+ from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe
55
56
  from toil.lib.aws import build_tag_dict_from_env
56
57
  from toil.lib.aws.session import establish_boto3_session
57
- from toil.lib.aws.utils import (create_s3_bucket,
58
- enable_public_objects,
59
- flatten_tags,
60
- get_bucket_region,
61
- get_object_for_url,
62
- list_objects_for_url,
63
- retry_s3,
64
- retryable_s3_errors)
58
+ from toil.lib.aws.utils import (
59
+ NoBucketLocationError,
60
+ boto3_pager,
61
+ create_s3_bucket,
62
+ enable_public_objects,
63
+ flatten_tags,
64
+ get_bucket_region,
65
+ get_item_from_attributes,
66
+ get_object_for_url,
67
+ list_objects_for_url,
68
+ retry_s3,
69
+ retryable_s3_errors,
70
+ )
65
71
  from toil.lib.compatibility import compat_bytes
66
72
  from toil.lib.ec2nodes import EC2Regions
67
73
  from toil.lib.exceptions import panic
@@ -70,9 +76,21 @@ from toil.lib.memoize import strict_bool
70
76
  from toil.lib.objects import InnerClass
71
77
  from toil.lib.retry import get_error_code, get_error_status, retry
72
78
 
79
+ if TYPE_CHECKING:
80
+ from mypy_boto3_sdb.type_defs import (
81
+ AttributeTypeDef,
82
+ DeletableItemTypeDef,
83
+ ItemTypeDef,
84
+ ReplaceableAttributeTypeDef,
85
+ ReplaceableItemTypeDef,
86
+ UpdateConditionTypeDef,
87
+ )
88
+
89
+ from toil import Config
90
+
73
91
  boto3_session = establish_boto3_session()
74
- s3_boto3_resource = boto3_session.resource('s3')
75
- s3_boto3_client = boto3_session.client('s3')
92
+ s3_boto3_resource = boto3_session.resource("s3")
93
+ s3_boto3_client = boto3_session.client("s3")
76
94
  logger = logging.getLogger(__name__)
77
95
 
78
96
  # Sometimes we have to wait for multipart uploads to become real. How long
@@ -85,6 +103,13 @@ class ChecksumError(Exception):
85
103
  """Raised when a download from AWS does not contain the correct data."""
86
104
 
87
105
 
106
+ class DomainDoesNotExist(Exception):
107
+ """Raised when a domain that is expected to exist does not exist."""
108
+
109
+ def __init__(self, domain_name):
110
+ super().__init__(f"Expected domain {domain_name} to exist!")
111
+
112
+
88
113
  class AWSJobStore(AbstractJobStore):
89
114
  """
90
115
  A job store that uses Amazon's S3 for file storage and SimpleDB for storing job info and
@@ -98,14 +123,14 @@ class AWSJobStore(AbstractJobStore):
98
123
  # URLs where the may interfere with the certificate common name. We use a double
99
124
  # underscore as a separator instead.
100
125
  #
101
- bucketNameRe = re.compile(r'^[a-z0-9][a-z0-9-]+[a-z0-9]$')
126
+ bucketNameRe = re.compile(r"^[a-z0-9][a-z0-9-]+[a-z0-9]$")
102
127
 
103
128
  # See http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
104
129
  #
105
130
  minBucketNameLen = 3
106
131
  maxBucketNameLen = 63
107
132
  maxNameLen = 10
108
- nameSeparator = '--'
133
+ nameSeparator = "--"
109
134
 
110
135
  def __init__(self, locator: str, partSize: int = 50 << 20) -> None:
111
136
  """
@@ -116,37 +141,49 @@ class AWSJobStore(AbstractJobStore):
116
141
  whole file
117
142
  """
118
143
  super().__init__(locator)
119
- region, namePrefix = locator.split(':')
144
+ region, namePrefix = locator.split(":")
120
145
  regions = EC2Regions.keys()
121
146
  if region not in regions:
122
147
  raise ValueError(f'Region "{region}" is not one of: {regions}')
123
148
  if not self.bucketNameRe.match(namePrefix):
124
- raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, "
125
- "hyphens or lower-case letters and must not start or end in a "
126
- "hyphen." % namePrefix)
149
+ raise ValueError(
150
+ "Invalid name prefix '%s'. Name prefixes must contain only digits, "
151
+ "hyphens or lower-case letters and must not start or end in a "
152
+ "hyphen." % namePrefix
153
+ )
127
154
  # Reserve 13 for separator and suffix
128
- if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(self.nameSeparator):
129
- raise ValueError("Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
130
- "characters." % namePrefix)
131
- if '--' in namePrefix:
132
- raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain "
133
- "%s." % (namePrefix, self.nameSeparator))
134
- logger.debug("Instantiating %s for region %s and name prefix '%s'",
135
- self.__class__, region, namePrefix)
155
+ if len(namePrefix) > self.maxBucketNameLen - self.maxNameLen - len(
156
+ self.nameSeparator
157
+ ):
158
+ raise ValueError(
159
+ "Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
160
+ "characters." % namePrefix
161
+ )
162
+ if "--" in namePrefix:
163
+ raise ValueError(
164
+ "Invalid name prefix '%s'. Name prefixes may not contain "
165
+ "%s." % (namePrefix, self.nameSeparator)
166
+ )
167
+ logger.debug(
168
+ "Instantiating %s for region %s and name prefix '%s'",
169
+ self.__class__,
170
+ region,
171
+ namePrefix,
172
+ )
136
173
  self.region = region
137
- self.namePrefix = namePrefix
138
- self.partSize = partSize
139
- self.jobsDomain = None
140
- self.filesDomain = None
141
- self.filesBucket = None
142
- self.db = self._connectSimpleDB()
143
-
144
- self.s3_resource = boto3_session.resource('s3', region_name=self.region)
174
+ self.name_prefix = namePrefix
175
+ self.part_size = partSize
176
+ self.jobs_domain_name: Optional[str] = None
177
+ self.files_domain_name: Optional[str] = None
178
+ self.files_bucket = None
179
+ self.db = boto3_session.client(service_name="sdb", region_name=region)
180
+
181
+ self.s3_resource = boto3_session.resource("s3", region_name=self.region)
145
182
  self.s3_client = self.s3_resource.meta.client
146
183
 
147
- def initialize(self, config):
184
+ def initialize(self, config: "Config") -> None:
148
185
  if self._registered:
149
- raise JobStoreExistsException(self.locator)
186
+ raise JobStoreExistsException(self.locator, "aws")
150
187
  self._registered = None
151
188
  try:
152
189
  self._bind(create=True)
@@ -159,36 +196,45 @@ class AWSJobStore(AbstractJobStore):
159
196
  self._registered = True
160
197
 
161
198
  @property
162
- def sseKeyPath(self):
199
+ def sseKeyPath(self) -> Optional[str]:
163
200
  return self.config.sseKey
164
201
 
165
- def resume(self):
202
+ def resume(self) -> None:
166
203
  if not self._registered:
167
- raise NoSuchJobStoreException(self.locator)
204
+ raise NoSuchJobStoreException(self.locator, "aws")
168
205
  self._bind(create=False)
169
206
  super().resume()
170
207
 
171
- def _bind(self, create=False, block=True, check_versioning_consistency=True):
208
+ def _bind(
209
+ self,
210
+ create: bool = False,
211
+ block: bool = True,
212
+ check_versioning_consistency: bool = True,
213
+ ) -> None:
172
214
  def qualify(name):
173
215
  assert len(name) <= self.maxNameLen
174
- return self.namePrefix + self.nameSeparator + name
216
+ return self.name_prefix + self.nameSeparator + name
175
217
 
176
218
  # The order in which this sequence of events happens is important. We can easily handle the
177
219
  # inability to bind a domain, but it is a little harder to handle some cases of binding the
178
220
  # jobstore bucket. Maintaining this order allows for an easier `destroy` method.
179
- if self.jobsDomain is None:
180
- self.jobsDomain = self._bindDomain(qualify('jobs'), create=create, block=block)
181
- if self.filesDomain is None:
182
- self.filesDomain = self._bindDomain(qualify('files'), create=create, block=block)
183
- if self.filesBucket is None:
184
- self.filesBucket = self._bindBucket(qualify('files'),
185
- create=create,
186
- block=block,
187
- versioning=True,
188
- check_versioning_consistency=check_versioning_consistency)
221
+ if self.jobs_domain_name is None:
222
+ self.jobs_domain_name = qualify("jobs")
223
+ self._bindDomain(self.jobs_domain_name, create=create, block=block)
224
+ if self.files_domain_name is None:
225
+ self.files_domain_name = qualify("files")
226
+ self._bindDomain(self.files_domain_name, create=create, block=block)
227
+ if self.files_bucket is None:
228
+ self.files_bucket = self._bindBucket(
229
+ qualify("files"),
230
+ create=create,
231
+ block=block,
232
+ versioning=True,
233
+ check_versioning_consistency=check_versioning_consistency,
234
+ )
189
235
 
190
236
  @property
191
- def _registered(self):
237
+ def _registered(self) -> Optional[bool]:
192
238
  """
193
239
  A optional boolean property indicating whether this job store is registered. The
194
240
  registry is the authority on deciding if a job store exists or not. If True, this job
@@ -205,55 +251,75 @@ class AWSJobStore(AbstractJobStore):
205
251
  # store destruction, indicates a job store in transition, reflecting the fact that 3.3.0
206
252
  # may leak buckets or domains even though the registry reports 'False' for them. We
207
253
  # can't handle job stores that were partially created by 3.3.0, though.
208
- registry_domain = self._bindDomain(domain_name='toil-registry',
209
- create=False,
210
- block=False)
211
- if registry_domain is None:
254
+ registry_domain_name = "toil-registry"
255
+ try:
256
+ self._bindDomain(
257
+ domain_name=registry_domain_name, create=False, block=False
258
+ )
259
+ except DomainDoesNotExist:
212
260
  return False
213
- else:
214
- for attempt in retry_sdb():
215
- with attempt:
216
- attributes = registry_domain.get_attributes(item_name=self.namePrefix,
217
- attribute_name='exists',
218
- consistent_read=True)
219
- try:
220
- exists = attributes['exists']
221
- except KeyError:
222
- return False
223
- else:
224
- if exists == 'True':
225
- return True
226
- elif exists == 'False':
227
- return None
228
- else:
229
- assert False
261
+
262
+ for attempt in retry_sdb():
263
+ with attempt:
264
+ get_result = self.db.get_attributes(
265
+ DomainName=registry_domain_name,
266
+ ItemName=self.name_prefix,
267
+ AttributeNames=["exists"],
268
+ ConsistentRead=True,
269
+ )
270
+ attributes: list["AttributeTypeDef"] = get_result.get(
271
+ "Attributes", []
272
+ ) # the documentation says 'Attributes' should always exist, but this is not true
273
+ exists: Optional[str] = get_item_from_attributes(
274
+ attributes=attributes, name="exists"
275
+ )
276
+ if exists is None:
277
+ return False
278
+ elif exists == "True":
279
+ return True
280
+ elif exists == "False":
281
+ return None
282
+ else:
283
+ assert False
230
284
 
231
285
  @_registered.setter
232
- def _registered(self, value):
233
-
234
- registry_domain = self._bindDomain(domain_name='toil-registry',
235
- # Only create registry domain when registering or
236
- # transitioning a store
237
- create=value is not False,
238
- block=False)
239
- if registry_domain is None and value is False:
286
+ def _registered(self, value: bool) -> None:
287
+ registry_domain_name = "toil-registry"
288
+ try:
289
+ self._bindDomain(
290
+ domain_name=registry_domain_name,
291
+ # Only create registry domain when registering or
292
+ # transitioning a store
293
+ create=value is not False,
294
+ block=False,
295
+ )
296
+ except DomainDoesNotExist:
240
297
  pass
241
298
  else:
242
299
  for attempt in retry_sdb():
243
300
  with attempt:
244
301
  if value is False:
245
- registry_domain.delete_attributes(item_name=self.namePrefix)
302
+ self.db.delete_attributes(
303
+ DomainName=registry_domain_name, ItemName=self.name_prefix
304
+ )
246
305
  else:
247
306
  if value is True:
248
- attributes = dict(exists='True')
307
+ attributes: list["ReplaceableAttributeTypeDef"] = [
308
+ {"Name": "exists", "Value": "True", "Replace": True}
309
+ ]
249
310
  elif value is None:
250
- attributes = dict(exists='False')
311
+ attributes = [
312
+ {"Name": "exists", "Value": "False", "Replace": True}
313
+ ]
251
314
  else:
252
315
  assert False
253
- registry_domain.put_attributes(item_name=self.namePrefix,
254
- attributes=attributes)
316
+ self.db.put_attributes(
317
+ DomainName=registry_domain_name,
318
+ ItemName=self.name_prefix,
319
+ Attributes=attributes,
320
+ )
255
321
 
256
- def _checkItem(self, item, enforce: bool = True):
322
+ def _checkItem(self, item: "ItemTypeDef", enforce: bool = True) -> None:
257
323
  """
258
324
  Make sure that the given SimpleDB item actually has the attributes we think it should.
259
325
 
@@ -261,32 +327,56 @@ class AWSJobStore(AbstractJobStore):
261
327
 
262
328
  If enforce is false, log but don't throw.
263
329
  """
264
-
265
- if "overlargeID" not in item:
266
- logger.error("overlargeID attribute isn't present: either SimpleDB entry is "
267
- "corrupt or jobstore is from an extremely old Toil: %s", item)
330
+ self._checkAttributes(item["Attributes"], enforce)
331
+
332
+ def _checkAttributes(
333
+ self, attributes: list["AttributeTypeDef"], enforce: bool = True
334
+ ) -> None:
335
+ if get_item_from_attributes(attributes=attributes, name="overlargeID") is None:
336
+ logger.error(
337
+ "overlargeID attribute isn't present: either SimpleDB entry is "
338
+ "corrupt or jobstore is from an extremely old Toil: %s",
339
+ attributes,
340
+ )
268
341
  if enforce:
269
- raise RuntimeError("encountered SimpleDB entry missing required attribute "
270
- "'overlargeID'; is your job store ancient?")
342
+ raise RuntimeError(
343
+ "encountered SimpleDB entry missing required attribute "
344
+ "'overlargeID'; is your job store ancient?"
345
+ )
271
346
 
272
- def _awsJobFromItem(self, item):
273
- self._checkItem(item)
274
- if item.get("overlargeID", None):
275
- assert self.file_exists(item["overlargeID"])
347
+ def _awsJobFromAttributes(self, attributes: list["AttributeTypeDef"]) -> Job:
348
+ """
349
+ Get a Toil Job object from attributes that are defined in an item from the DB
350
+ :param attributes: List of attributes
351
+ :return: Toil job
352
+ """
353
+ self._checkAttributes(attributes)
354
+ overlarge_id_value = get_item_from_attributes(
355
+ attributes=attributes, name="overlargeID"
356
+ )
357
+ if overlarge_id_value:
358
+ assert self.file_exists(overlarge_id_value)
276
359
  # This is an overlarge job, download the actual attributes
277
360
  # from the file store
278
361
  logger.debug("Loading overlarge job from S3.")
279
- with self.read_file_stream(item["overlargeID"]) as fh:
362
+ with self.read_file_stream(overlarge_id_value) as fh:
280
363
  binary = fh.read()
281
364
  else:
282
- binary, _ = SDBHelper.attributesToBinary(item)
365
+ binary, _ = SDBHelper.attributesToBinary(attributes)
283
366
  assert binary is not None
284
367
  job = pickle.loads(binary)
285
368
  if job is not None:
286
369
  job.assignConfig(self.config)
287
370
  return job
288
371
 
289
- def _awsJobToItem(self, job):
372
+ def _awsJobFromItem(self, item: "ItemTypeDef") -> Job:
373
+ """
374
+ Get a Toil Job object from an item from the DB
375
+ :return: Toil Job
376
+ """
377
+ return self._awsJobFromAttributes(item["Attributes"])
378
+
379
+ def _awsJobToAttributes(self, job: JobDescription) -> list["AttributeTypeDef"]:
290
380
  binary = pickle.dumps(job, protocol=pickle.HIGHEST_PROTOCOL)
291
381
  if len(binary) > SDBHelper.maxBinarySize(extraReservedChunks=1):
292
382
  # Store as an overlarge job in S3
@@ -297,66 +387,100 @@ class AWSJobStore(AbstractJobStore):
297
387
  else:
298
388
  item = SDBHelper.binaryToAttributes(binary)
299
389
  item["overlargeID"] = ""
300
- return item
390
+ return SDBHelper.attributeDictToList(item)
391
+
392
+ def _awsJobToItem(self, job: JobDescription, name: str) -> "ItemTypeDef":
393
+ return {"Name": name, "Attributes": self._awsJobToAttributes(job)}
301
394
 
302
395
  jobsPerBatchInsert = 25
303
396
 
304
397
  @contextmanager
305
- def batch(self):
398
+ def batch(self) -> None:
306
399
  self._batchedUpdates = []
307
400
  yield
308
- batches = [self._batchedUpdates[i:i + self.jobsPerBatchInsert] for i in
309
- range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)]
401
+ batches = [
402
+ self._batchedUpdates[i : i + self.jobsPerBatchInsert]
403
+ for i in range(0, len(self._batchedUpdates), self.jobsPerBatchInsert)
404
+ ]
310
405
 
311
406
  for batch in batches:
407
+ items: list["ReplaceableItemTypeDef"] = []
312
408
  for jobDescription in batch:
409
+ item_attributes: list["ReplaceableAttributeTypeDef"] = []
313
410
  jobDescription.pre_update_hook()
314
- items = {compat_bytes(jobDescription.jobStoreID): self._awsJobToItem(jobDescription) for jobDescription in batch}
411
+ item_name = compat_bytes(jobDescription.jobStoreID)
412
+ got_job_attributes: list["AttributeTypeDef"] = self._awsJobToAttributes(
413
+ jobDescription
414
+ )
415
+ for each_attribute in got_job_attributes:
416
+ new_attribute: "ReplaceableAttributeTypeDef" = {
417
+ "Name": each_attribute["Name"],
418
+ "Value": each_attribute["Value"],
419
+ "Replace": True,
420
+ }
421
+ item_attributes.append(new_attribute)
422
+ items.append({"Name": item_name, "Attributes": item_attributes})
423
+
315
424
  for attempt in retry_sdb():
316
425
  with attempt:
317
- assert self.jobsDomain.batch_put_attributes(items)
426
+ self.db.batch_put_attributes(
427
+ DomainName=self.jobs_domain_name, Items=items
428
+ )
318
429
  self._batchedUpdates = None
319
430
 
320
- def assign_job_id(self, job_description):
431
+ def assign_job_id(self, job_description: JobDescription) -> None:
321
432
  jobStoreID = self._new_job_id()
322
- logger.debug("Assigning ID to job %s for '%s'",
323
- jobStoreID, '<no command>' if job_description.command is None else job_description.command)
433
+ logger.debug("Assigning ID to job %s", jobStoreID)
324
434
  job_description.jobStoreID = jobStoreID
325
435
 
326
- def create_job(self, job_description):
436
+ def create_job(self, job_description: JobDescription) -> JobDescription:
327
437
  if hasattr(self, "_batchedUpdates") and self._batchedUpdates is not None:
328
438
  self._batchedUpdates.append(job_description)
329
439
  else:
330
440
  self.update_job(job_description)
331
441
  return job_description
332
442
 
333
- def job_exists(self, job_id):
443
+ def job_exists(self, job_id: Union[bytes, str]) -> bool:
334
444
  for attempt in retry_sdb():
335
445
  with attempt:
336
- return bool(self.jobsDomain.get_attributes(
337
- item_name=compat_bytes(job_id),
338
- attribute_name=[SDBHelper.presenceIndicator()],
339
- consistent_read=True))
340
-
341
- def jobs(self):
342
- result = None
446
+ return (
447
+ len(
448
+ self.db.get_attributes(
449
+ DomainName=self.jobs_domain_name,
450
+ ItemName=compat_bytes(job_id),
451
+ AttributeNames=[SDBHelper.presenceIndicator()],
452
+ ConsistentRead=True,
453
+ ).get("Attributes", [])
454
+ )
455
+ > 0
456
+ )
457
+
458
+ def jobs(self) -> Generator[Job, None, None]:
459
+ job_items: Optional[list["ItemTypeDef"]] = None
343
460
  for attempt in retry_sdb():
344
461
  with attempt:
345
- result = list(self.jobsDomain.select(
346
- consistent_read=True,
347
- query="select * from `%s`" % self.jobsDomain.name))
348
- assert result is not None
349
- for jobItem in result:
462
+ job_items = boto3_pager(
463
+ self.db.select,
464
+ "Items",
465
+ ConsistentRead=True,
466
+ SelectExpression="select * from `%s`" % self.jobs_domain_name,
467
+ )
468
+ assert job_items is not None
469
+ for jobItem in job_items:
350
470
  yield self._awsJobFromItem(jobItem)
351
471
 
352
- def load_job(self, job_id):
353
- item = None
472
+ def load_job(self, job_id: FileID) -> Job:
473
+ item_attributes = None
354
474
  for attempt in retry_sdb():
355
475
  with attempt:
356
- item = self.jobsDomain.get_attributes(compat_bytes(job_id), consistent_read=True)
357
- if not item:
476
+ item_attributes = self.db.get_attributes(
477
+ DomainName=self.jobs_domain_name,
478
+ ItemName=compat_bytes(job_id),
479
+ ConsistentRead=True,
480
+ ).get("Attributes", [])
481
+ if not item_attributes:
358
482
  raise NoSuchJobException(job_id)
359
- job = self._awsJobFromItem(item)
483
+ job = self._awsJobFromAttributes(item_attributes)
360
484
  if job is None:
361
485
  raise NoSuchJobException(job_id)
362
486
  logger.debug("Loaded job %s", job_id)
@@ -365,10 +489,18 @@ class AWSJobStore(AbstractJobStore):
365
489
  def update_job(self, job_description):
366
490
  logger.debug("Updating job %s", job_description.jobStoreID)
367
491
  job_description.pre_update_hook()
368
- item = self._awsJobToItem(job_description)
492
+ job_attributes = self._awsJobToAttributes(job_description)
493
+ update_attributes: list["ReplaceableAttributeTypeDef"] = [
494
+ {"Name": attribute["Name"], "Value": attribute["Value"], "Replace": True}
495
+ for attribute in job_attributes
496
+ ]
369
497
  for attempt in retry_sdb():
370
498
  with attempt:
371
- assert self.jobsDomain.put_attributes(compat_bytes(job_description.jobStoreID), item)
499
+ self.db.put_attributes(
500
+ DomainName=self.jobs_domain_name,
501
+ ItemName=compat_bytes(job_description.jobStoreID),
502
+ Attributes=update_attributes,
503
+ )
372
504
 
373
505
  itemsPerBatchDelete = 25
374
506
 
@@ -377,49 +509,77 @@ class AWSJobStore(AbstractJobStore):
377
509
  logger.debug("Deleting job %s", job_id)
378
510
 
379
511
  # If the job is overlarge, delete its file from the filestore
380
- item = None
381
512
  for attempt in retry_sdb():
382
513
  with attempt:
383
- item = self.jobsDomain.get_attributes(compat_bytes(job_id), consistent_read=True)
514
+ attributes = self.db.get_attributes(
515
+ DomainName=self.jobs_domain_name,
516
+ ItemName=compat_bytes(job_id),
517
+ ConsistentRead=True,
518
+ ).get("Attributes", [])
384
519
  # If the overlargeID has fallen off, maybe we partially deleted the
385
520
  # attributes of the item? Or raced on it? Or hit SimpleDB being merely
386
521
  # eventually consistent? We should still be able to get rid of it.
387
- self._checkItem(item, enforce = False)
388
- if item.get("overlargeID", None):
522
+ self._checkAttributes(attributes, enforce=False)
523
+ overlarge_id_value = get_item_from_attributes(
524
+ attributes=attributes, name="overlargeID"
525
+ )
526
+ if overlarge_id_value:
389
527
  logger.debug("Deleting job from filestore")
390
- self.delete_file(item["overlargeID"])
528
+ self.delete_file(overlarge_id_value)
391
529
  for attempt in retry_sdb():
392
530
  with attempt:
393
- self.jobsDomain.delete_attributes(item_name=compat_bytes(job_id))
394
- items = None
531
+ self.db.delete_attributes(
532
+ DomainName=self.jobs_domain_name, ItemName=compat_bytes(job_id)
533
+ )
534
+ items: Optional[list["ItemTypeDef"]] = None
395
535
  for attempt in retry_sdb():
396
536
  with attempt:
397
- items = list(self.filesDomain.select(
398
- consistent_read=True,
399
- query=f"select version from `{self.filesDomain.name}` where ownerID='{job_id}'"))
537
+ items = list(
538
+ boto3_pager(
539
+ self.db.select,
540
+ "Items",
541
+ ConsistentRead=True,
542
+ SelectExpression=f"select version from `{self.files_domain_name}` where ownerID='{job_id}'",
543
+ )
544
+ )
400
545
  assert items is not None
401
546
  if items:
402
- logger.debug("Deleting %d file(s) associated with job %s", len(items), job_id)
547
+ logger.debug(
548
+ "Deleting %d file(s) associated with job %s", len(items), job_id
549
+ )
403
550
  n = self.itemsPerBatchDelete
404
- batches = [items[i:i + n] for i in range(0, len(items), n)]
551
+ batches = [items[i : i + n] for i in range(0, len(items), n)]
405
552
  for batch in batches:
406
- itemsDict = {item.name: None for item in batch}
553
+ delete_items: list["DeletableItemTypeDef"] = [
554
+ {"Name": item["Name"]} for item in batch
555
+ ]
407
556
  for attempt in retry_sdb():
408
557
  with attempt:
409
- self.filesDomain.batch_delete_attributes(itemsDict)
558
+ self.db.batch_delete_attributes(
559
+ DomainName=self.files_domain_name, Items=delete_items
560
+ )
410
561
  for item in items:
411
- version = item.get('version')
562
+ item: "ItemTypeDef"
563
+ version = get_item_from_attributes(
564
+ attributes=item["Attributes"], name="version"
565
+ )
412
566
  for attempt in retry_s3():
413
567
  with attempt:
414
568
  if version:
415
- self.s3_client.delete_object(Bucket=self.filesBucket.name,
416
- Key=compat_bytes(item.name),
417
- VersionId=version)
569
+ self.s3_client.delete_object(
570
+ Bucket=self.files_bucket.name,
571
+ Key=compat_bytes(item["Name"]),
572
+ VersionId=version,
573
+ )
418
574
  else:
419
- self.s3_client.delete_object(Bucket=self.filesBucket.name,
420
- Key=compat_bytes(item.name))
575
+ self.s3_client.delete_object(
576
+ Bucket=self.files_bucket.name,
577
+ Key=compat_bytes(item["Name"]),
578
+ )
421
579
 
422
- def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
580
+ def get_empty_file_store_id(
581
+ self, jobStoreID=None, cleanup=False, basename=None
582
+ ) -> FileID:
423
583
  info = self.FileInfo.create(jobStoreID if cleanup else None)
424
584
  with info.uploadStream() as _:
425
585
  # Empty
@@ -428,7 +588,14 @@ class AWSJobStore(AbstractJobStore):
428
588
  logger.debug("Created %r.", info)
429
589
  return info.fileID
430
590
 
431
- def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True):
591
+ def _import_file(
592
+ self,
593
+ otherCls,
594
+ uri: ParseResult,
595
+ shared_file_name: Optional[str] = None,
596
+ hardlink: bool = False,
597
+ symlink: bool = True,
598
+ ) -> Optional[FileID]:
432
599
  try:
433
600
  if issubclass(otherCls, AWSJobStore):
434
601
  srcObj = get_object_for_url(uri, existing=True)
@@ -438,29 +605,35 @@ class AWSJobStore(AbstractJobStore):
438
605
  else:
439
606
  self._requireValidSharedFileName(shared_file_name)
440
607
  jobStoreFileID = self._shared_file_id(shared_file_name)
441
- info = self.FileInfo.loadOrCreate(jobStoreFileID=jobStoreFileID,
442
- ownerID=str(self.sharedFileOwnerID),
443
- encrypted=None)
608
+ info = self.FileInfo.loadOrCreate(
609
+ jobStoreFileID=jobStoreFileID,
610
+ ownerID=str(self.sharedFileOwnerID),
611
+ encrypted=None,
612
+ )
444
613
  info.copyFrom(srcObj)
445
614
  info.save()
446
615
  return FileID(info.fileID, size) if shared_file_name is None else None
447
- except ServerSideCopyProhibitedError:
448
- # AWS refuses to do this copy for us
449
- logger.warning("Falling back to copying via the local machine. This could get expensive!")
616
+ except (NoBucketLocationError, ServerSideCopyProhibitedError):
617
+ # AWS refuses to tell us where the bucket is or do this copy for us
618
+ logger.warning(
619
+ "Falling back to copying via the local machine. This could get expensive!"
620
+ )
450
621
 
451
622
  # copy if exception
452
623
  return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
453
624
 
454
- def _export_file(self, otherCls, file_id, uri):
625
+ def _export_file(self, otherCls, file_id: FileID, uri: ParseResult) -> None:
455
626
  try:
456
627
  if issubclass(otherCls, AWSJobStore):
457
628
  dstObj = get_object_for_url(uri)
458
629
  info = self.FileInfo.loadOrFail(file_id)
459
630
  info.copyTo(dstObj)
460
631
  return
461
- except ServerSideCopyProhibitedError:
462
- # AWS refuses to do this copy for us
463
- logger.warning("Falling back to copying via the local machine. This could get expensive!")
632
+ except (NoBucketLocationError, ServerSideCopyProhibitedError):
633
+ # AWS refuses to tell us where the bucket is or do this copy for us
634
+ logger.warning(
635
+ "Falling back to copying via the local machine. This could get expensive!"
636
+ )
464
637
  else:
465
638
  super()._default_export_file(otherCls, file_id, uri)
466
639
 
@@ -475,41 +648,42 @@ class AWSJobStore(AbstractJobStore):
475
648
  return cls._get_is_directory(url)
476
649
 
477
650
  @classmethod
478
- def _get_size(cls, url):
651
+ def _get_size(cls, url: ParseResult) -> int:
479
652
  return get_object_for_url(url, existing=True).content_length
480
653
 
481
654
  @classmethod
482
- def _read_from_url(cls, url, writable):
655
+ def _read_from_url(cls, url: ParseResult, writable):
483
656
  srcObj = get_object_for_url(url, existing=True)
484
657
  srcObj.download_fileobj(writable)
485
- return (
486
- srcObj.content_length,
487
- False # executable bit is always False
488
- )
658
+ return (srcObj.content_length, False) # executable bit is always False
489
659
 
490
660
  @classmethod
491
661
  def _open_url(cls, url: ParseResult) -> IO[bytes]:
492
662
  src_obj = get_object_for_url(url, existing=True)
493
663
  response = src_obj.get()
494
664
  # We should get back a response with a stream in 'Body'
495
- if 'Body' not in response:
665
+ if "Body" not in response:
496
666
  raise RuntimeError(f"Could not fetch body stream for {url}")
497
- return response['Body']
667
+ return response["Body"]
498
668
 
499
669
  @classmethod
500
- def _write_to_url(cls, readable, url, executable=False):
670
+ def _write_to_url(
671
+ cls, readable, url: ParseResult, executable: bool = False
672
+ ) -> None:
501
673
  dstObj = get_object_for_url(url)
502
674
 
503
675
  logger.debug("Uploading %s", dstObj.key)
504
676
  # uploadFile takes care of using multipart upload if the file is larger than partSize (default to 5MB)
505
- uploadFile(readable=readable,
506
- resource=s3_boto3_resource,
507
- bucketName=dstObj.bucket_name,
508
- fileID=dstObj.key,
509
- partSize=5 * 1000 * 1000)
677
+ uploadFile(
678
+ readable=readable,
679
+ resource=s3_boto3_resource,
680
+ bucketName=dstObj.bucket_name,
681
+ fileID=dstObj.key,
682
+ partSize=5 * 1000 * 1000,
683
+ )
510
684
 
511
685
  @classmethod
512
- def _list_url(cls, url: ParseResult) -> List[str]:
686
+ def _list_url(cls, url: ParseResult) -> list[str]:
513
687
  return list_objects_for_url(url)
514
688
 
515
689
  @classmethod
@@ -519,10 +693,12 @@ class AWSJobStore(AbstractJobStore):
519
693
  return len(list_objects_for_url(url)) > 0
520
694
 
521
695
  @classmethod
522
- def _supports_url(cls, url, export=False):
523
- return url.scheme.lower() == 's3'
696
+ def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
697
+ return url.scheme.lower() == "s3"
524
698
 
525
- def write_file(self, local_path, job_id=None, cleanup=False):
699
+ def write_file(
700
+ self, local_path: FileID, job_id: Optional[FileID] = None, cleanup: bool = False
701
+ ) -> FileID:
526
702
  info = self.FileInfo.create(job_id if cleanup else None)
527
703
  info.upload(local_path, not self.config.disableJobStoreChecksumVerification)
528
704
  info.save()
@@ -530,7 +706,14 @@ class AWSJobStore(AbstractJobStore):
530
706
  return info.fileID
531
707
 
532
708
  @contextmanager
533
- def write_file_stream(self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None):
709
+ def write_file_stream(
710
+ self,
711
+ job_id: Optional[FileID] = None,
712
+ cleanup: bool = False,
713
+ basename=None,
714
+ encoding=None,
715
+ errors=None,
716
+ ):
534
717
  info = self.FileInfo.create(job_id if cleanup else None)
535
718
  with info.uploadStream(encoding=encoding, errors=errors) as writable:
536
719
  yield writable, info.fileID
@@ -538,11 +721,15 @@ class AWSJobStore(AbstractJobStore):
538
721
  logger.debug("Wrote %r.", info)
539
722
 
540
723
  @contextmanager
541
- def write_shared_file_stream(self, shared_file_name, encrypted=None, encoding=None, errors=None):
724
+ def write_shared_file_stream(
725
+ self, shared_file_name, encrypted=None, encoding=None, errors=None
726
+ ):
542
727
  self._requireValidSharedFileName(shared_file_name)
543
- info = self.FileInfo.loadOrCreate(jobStoreFileID=self._shared_file_id(shared_file_name),
544
- ownerID=str(self.sharedFileOwnerID),
545
- encrypted=encrypted)
728
+ info = self.FileInfo.loadOrCreate(
729
+ jobStoreFileID=self._shared_file_id(shared_file_name),
730
+ ownerID=str(self.sharedFileOwnerID),
731
+ encrypted=encrypted,
732
+ )
546
733
  with info.uploadStream(encoding=encoding, errors=errors) as writable:
547
734
  yield writable
548
735
  info.save()
@@ -575,7 +762,7 @@ class AWSJobStore(AbstractJobStore):
575
762
  info = self.FileInfo.loadOrFail(file_id)
576
763
  logger.debug("Reading %r into %r.", info, local_path)
577
764
  info.download(local_path, not self.config.disableJobStoreChecksumVerification)
578
- if getattr(file_id, 'executable', False):
765
+ if getattr(file_id, "executable", False):
579
766
  os.chmod(local_path, os.stat(local_path).st_mode | stat.S_IXUSR)
580
767
 
581
768
  @contextmanager
@@ -590,7 +777,9 @@ class AWSJobStore(AbstractJobStore):
590
777
  self._requireValidSharedFileName(shared_file_name)
591
778
  jobStoreFileID = self._shared_file_id(shared_file_name)
592
779
  info = self.FileInfo.loadOrFail(jobStoreFileID, customName=shared_file_name)
593
- logger.debug("Reading %r for shared file %r into stream.", info, shared_file_name)
780
+ logger.debug(
781
+ "Reading %r for shared file %r into stream.", info, shared_file_name
782
+ )
594
783
  with info.downloadStream(encoding=encoding, errors=errors) as readable:
595
784
  yield readable
596
785
 
@@ -606,7 +795,7 @@ class AWSJobStore(AbstractJobStore):
606
795
  with info.uploadStream(multipart=False) as writeable:
607
796
  if isinstance(msg, str):
608
797
  # This stream is for binary data, so encode any non-encoded things
609
- msg = msg.encode('utf-8', errors='ignore')
798
+ msg = msg.encode("utf-8", errors="ignore")
610
799
  writeable.write(msg)
611
800
  info.save()
612
801
 
@@ -614,7 +803,7 @@ class AWSJobStore(AbstractJobStore):
614
803
  itemsProcessed = 0
615
804
 
616
805
  for info in self._read_logs(callback, self.statsFileOwnerID):
617
- info._ownerID = self.readStatsFileOwnerID
806
+ info._ownerID = str(self.readStatsFileOwnerID) # boto3 requires strings
618
807
  info.save()
619
808
  itemsProcessed += 1
620
809
 
@@ -628,10 +817,12 @@ class AWSJobStore(AbstractJobStore):
628
817
  items = None
629
818
  for attempt in retry_sdb():
630
819
  with attempt:
631
- items = list(self.filesDomain.select(
632
- consistent_read=True,
633
- query="select * from `{}` where ownerID='{}'".format(
634
- self.filesDomain.name, str(ownerId))))
820
+ items = boto3_pager(
821
+ self.db.select,
822
+ "Items",
823
+ ConsistentRead=True,
824
+ SelectExpression=f"select * from `{self.files_domain_name}` where ownerID='{str(ownerId)}'",
825
+ )
635
826
  assert items is not None
636
827
  for item in items:
637
828
  info = self.FileInfo.fromItem(item)
@@ -648,13 +839,19 @@ class AWSJobStore(AbstractJobStore):
648
839
  with info.uploadStream(allowInlining=False) as f:
649
840
  f.write(info.content)
650
841
 
651
- self.filesBucket.Object(compat_bytes(jobStoreFileID)).Acl().put(ACL='public-read')
842
+ self.files_bucket.Object(compat_bytes(jobStoreFileID)).Acl().put(
843
+ ACL="public-read"
844
+ )
652
845
 
653
- url = self.s3_client.generate_presigned_url('get_object',
654
- Params={'Bucket': self.filesBucket.name,
655
- 'Key': compat_bytes(jobStoreFileID),
656
- 'VersionId': info.version},
657
- ExpiresIn=self.publicUrlExpiration.total_seconds())
846
+ url = self.s3_client.generate_presigned_url(
847
+ "get_object",
848
+ Params={
849
+ "Bucket": self.files_bucket.name,
850
+ "Key": compat_bytes(jobStoreFileID),
851
+ "VersionId": info.version,
852
+ },
853
+ ExpiresIn=self.publicUrlExpiration.total_seconds(),
854
+ )
658
855
 
659
856
  # boto doesn't properly remove the x-amz-security-token parameter when
660
857
  # query_auth is False when using an IAM role (see issue #2043). Including the
@@ -662,12 +859,12 @@ class AWSJobStore(AbstractJobStore):
662
859
  # even if the resource is public, so we need to remove it.
663
860
  scheme, netloc, path, query, fragment = urlsplit(url)
664
861
  params = parse_qs(query)
665
- if 'x-amz-security-token' in params:
666
- del params['x-amz-security-token']
667
- if 'AWSAccessKeyId' in params:
668
- del params['AWSAccessKeyId']
669
- if 'Signature' in params:
670
- del params['Signature']
862
+ if "x-amz-security-token" in params:
863
+ del params["x-amz-security-token"]
864
+ if "AWSAccessKeyId" in params:
865
+ del params["AWSAccessKeyId"]
866
+ if "Signature" in params:
867
+ del params["Signature"]
671
868
  query = urlencode(params, doseq=True)
672
869
  url = urlunsplit((scheme, netloc, path, query, fragment))
673
870
  return url
@@ -676,22 +873,14 @@ class AWSJobStore(AbstractJobStore):
676
873
  self._requireValidSharedFileName(shared_file_name)
677
874
  return self.get_public_url(self._shared_file_id(shared_file_name))
678
875
 
679
- def _connectSimpleDB(self):
680
- """
681
- :rtype: SDBConnection
682
- """
683
- db = boto.sdb.connect_to_region(self.region)
684
- if db is None:
685
- raise ValueError("Could not connect to SimpleDB. Make sure '%s' is a valid SimpleDB region." % self.region)
686
- monkeyPatchSdbConnection(db)
687
- return db
688
-
689
- def _bindBucket(self,
690
- bucket_name: str,
691
- create: bool = False,
692
- block: bool = True,
693
- versioning: bool = False,
694
- check_versioning_consistency: bool = True):
876
+ def _bindBucket(
877
+ self,
878
+ bucket_name: str,
879
+ create: bool = False,
880
+ block: bool = True,
881
+ versioning: bool = False,
882
+ check_versioning_consistency: bool = True,
883
+ ):
695
884
  """
696
885
  Return the Boto Bucket object representing the S3 bucket with the given name. If the
697
886
  bucket does not exist and `create` is True, it will be created.
@@ -716,8 +905,7 @@ class AWSJobStore(AbstractJobStore):
716
905
  Decide, given an error, whether we should retry binding the bucket.
717
906
  """
718
907
 
719
- if (isinstance(error, ClientError) and
720
- get_error_status(error) in (404, 409)):
908
+ if isinstance(error, ClientError) and get_error_status(error) in (404, 409):
721
909
  # Handle cases where the bucket creation is in a weird state that might let us proceed.
722
910
  # https://github.com/BD2KGenomics/toil/issues/955
723
911
  # https://github.com/BD2KGenomics/toil/issues/995
@@ -727,7 +915,7 @@ class AWSJobStore(AbstractJobStore):
727
915
  # OperationAborted == 409
728
916
  # NoSuchBucket == 404
729
917
  return True
730
- if get_error_code(error) == 'SlowDown':
918
+ if get_error_code(error) == "SlowDown":
731
919
  # We may get told to SlowDown by AWS when we try to create our
732
920
  # bucket. In that case, we should retry and use the exponential
733
921
  # backoff.
@@ -767,8 +955,10 @@ class AWSJobStore(AbstractJobStore):
767
955
 
768
956
  if tags:
769
957
  flat_tags = flatten_tags(tags)
770
- bucket_tagging = self.s3_resource.BucketTagging(bucket_name)
771
- bucket_tagging.put(Tagging={'TagSet': flat_tags})
958
+ bucket_tagging = self.s3_resource.BucketTagging(
959
+ bucket_name
960
+ )
961
+ bucket_tagging.put(Tagging={"TagSet": flat_tags})
772
962
 
773
963
  # Configure bucket so that we can make objects in
774
964
  # it public, which was the historical default.
@@ -781,7 +971,9 @@ class AWSJobStore(AbstractJobStore):
781
971
  # This is raised if the user attempts to get a bucket in a region outside
782
972
  # the specified one, if the specified one is not `us-east-1`. The us-east-1
783
973
  # server allows a user to use buckets from any region.
784
- raise BucketLocationConflictException(get_bucket_region(bucket_name))
974
+ raise BucketLocationConflictException(
975
+ get_bucket_region(bucket_name)
976
+ )
785
977
  else:
786
978
  raise
787
979
  else:
@@ -798,25 +990,35 @@ class AWSJobStore(AbstractJobStore):
798
990
  # consistent?
799
991
  time.sleep(1)
800
992
  while not self._getBucketVersioning(bucket_name):
801
- logger.warning(f"Waiting for versioning activation on bucket '{bucket_name}'...")
993
+ logger.warning(
994
+ f"Waiting for versioning activation on bucket '{bucket_name}'..."
995
+ )
802
996
  time.sleep(1)
803
997
  elif check_versioning_consistency:
804
998
  # now test for versioning consistency
805
999
  # we should never see any of these errors since 'versioning' should always be true
806
1000
  bucket_versioning = self._getBucketVersioning(bucket_name)
807
1001
  if bucket_versioning != versioning:
808
- assert False, 'Cannot modify versioning on existing bucket'
1002
+ assert False, "Cannot modify versioning on existing bucket"
809
1003
  elif bucket_versioning is None:
810
- assert False, 'Cannot use a bucket with versioning suspended'
1004
+ assert False, "Cannot use a bucket with versioning suspended"
811
1005
  if bucketExisted:
812
- logger.debug(f"Using pre-existing job store bucket '{bucket_name}'.")
1006
+ logger.debug(
1007
+ f"Using pre-existing job store bucket '{bucket_name}'."
1008
+ )
813
1009
  else:
814
- logger.debug(f"Created new job store bucket '{bucket_name}' with versioning state {versioning}.")
1010
+ logger.debug(
1011
+ f"Created new job store bucket '{bucket_name}' with versioning state {versioning}."
1012
+ )
815
1013
 
816
1014
  return bucket
817
1015
 
818
- def _bindDomain(self, domain_name, create=False, block=True):
1016
+ def _bindDomain(
1017
+ self, domain_name: str, create: bool = False, block: bool = True
1018
+ ) -> None:
819
1019
  """
1020
+ Return the Boto3 domain name representing the SDB domain. When create=True, it will
1021
+ create the domain if it does not exist.
820
1022
  Return the Boto Domain object representing the SDB domain of the given name. If the
821
1023
  domain does not exist and `create` is True, it will be created.
822
1024
 
@@ -824,29 +1026,33 @@ class AWSJobStore(AbstractJobStore):
824
1026
 
825
1027
  :param bool create: True if domain should be created if it doesn't exist
826
1028
 
827
- :param bool block: If False, return None if the domain doesn't exist. If True, wait until
1029
+ :param bool block: If False, raise DomainDoesNotExist if the domain doesn't exist. If True, wait until
828
1030
  domain appears. This parameter is ignored if create is True.
829
1031
 
830
- :rtype: Domain|None
831
- :raises SDBResponseError: If `block` is True and the domain still doesn't exist after the
1032
+ :rtype: None
1033
+ :raises ClientError: If `block` is True and the domain still doesn't exist after the
832
1034
  retry timeout expires.
833
1035
  """
834
1036
  logger.debug("Binding to job store domain '%s'.", domain_name)
835
- retryargs = dict(predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e))
1037
+ retryargs = dict(
1038
+ predicate=lambda e: no_such_sdb_domain(e) or sdb_unavailable(e)
1039
+ )
836
1040
  if not block:
837
- retryargs['timeout'] = 15
1041
+ retryargs["timeout"] = 15
838
1042
  for attempt in retry_sdb(**retryargs):
839
1043
  with attempt:
840
1044
  try:
841
- return self.db.get_domain(domain_name)
842
- except SDBResponseError as e:
1045
+ self.db.domain_metadata(DomainName=domain_name)
1046
+ return
1047
+ except ClientError as e:
843
1048
  if no_such_sdb_domain(e):
844
1049
  if create:
845
- return self.db.create_domain(domain_name)
1050
+ self.db.create_domain(DomainName=domain_name)
1051
+ return
846
1052
  elif block:
847
1053
  raise
848
1054
  else:
849
- return None
1055
+ raise DomainDoesNotExist(domain_name)
850
1056
  else:
851
1057
  raise
852
1058
 
@@ -854,13 +1060,13 @@ class AWSJobStore(AbstractJobStore):
854
1060
  return str(uuid.uuid4())
855
1061
 
856
1062
  # A dummy job ID under which all shared files are stored
857
- sharedFileOwnerID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94')
1063
+ sharedFileOwnerID = uuid.UUID("891f7db6-e4d9-4221-a58e-ab6cc4395f94")
858
1064
 
859
1065
  # A dummy job ID under which all unread stats files are stored
860
- statsFileOwnerID = uuid.UUID('bfcf5286-4bc7-41ef-a85d-9ab415b69d53')
1066
+ statsFileOwnerID = uuid.UUID("bfcf5286-4bc7-41ef-a85d-9ab415b69d53")
861
1067
 
862
1068
  # A dummy job ID under which all read stats files are stored
863
- readStatsFileOwnerID = uuid.UUID('e77fc3aa-d232-4255-ae04-f64ee8eb0bfa')
1069
+ readStatsFileOwnerID = uuid.UUID("e77fc3aa-d232-4255-ae04-f64ee8eb0bfa")
864
1070
 
865
1071
  def _shared_file_id(self, shared_file_name):
866
1072
  return str(uuid.uuid5(self.sharedFileOwnerID, shared_file_name))
@@ -870,13 +1076,22 @@ class AWSJobStore(AbstractJobStore):
870
1076
  """
871
1077
  Represents a file in this job store.
872
1078
  """
1079
+
873
1080
  outer = None
874
1081
  """
875
1082
  :type: AWSJobStore
876
1083
  """
877
1084
 
878
- def __init__(self, fileID, ownerID, encrypted,
879
- version=None, content=None, numContentChunks=0, checksum=None):
1085
+ def __init__(
1086
+ self,
1087
+ fileID,
1088
+ ownerID,
1089
+ encrypted,
1090
+ version=None,
1091
+ content=None,
1092
+ numContentChunks=0,
1093
+ checksum=None,
1094
+ ):
880
1095
  """
881
1096
  :type fileID: str
882
1097
  :param fileID: the file's ID
@@ -955,32 +1170,45 @@ class AWSJobStore(AbstractJobStore):
955
1170
  assert content is None or isinstance(content, bytes)
956
1171
  self._content = content
957
1172
  if content is not None:
958
- self.version = ''
1173
+ self.version = ""
959
1174
 
960
1175
  @classmethod
961
- def create(cls, ownerID):
962
- return cls(str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None)
1176
+ def create(cls, ownerID: str):
1177
+ return cls(
1178
+ str(uuid.uuid4()), ownerID, encrypted=cls.outer.sseKeyPath is not None
1179
+ )
963
1180
 
964
1181
  @classmethod
965
1182
  def presenceIndicator(cls):
966
- return 'encrypted'
1183
+ return "encrypted"
967
1184
 
968
1185
  @classmethod
969
1186
  def exists(cls, jobStoreFileID):
970
1187
  for attempt in retry_sdb():
971
1188
  with attempt:
972
- return bool(cls.outer.filesDomain.get_attributes(
973
- item_name=compat_bytes(jobStoreFileID),
974
- attribute_name=[cls.presenceIndicator()],
975
- consistent_read=True))
1189
+ return bool(
1190
+ cls.outer.db.get_attributes(
1191
+ DomainName=cls.outer.files_domain_name,
1192
+ ItemName=compat_bytes(jobStoreFileID),
1193
+ AttributeNames=[cls.presenceIndicator()],
1194
+ ConsistentRead=True,
1195
+ ).get("Attributes", [])
1196
+ )
976
1197
 
977
1198
  @classmethod
978
1199
  def load(cls, jobStoreFileID):
979
1200
  for attempt in retry_sdb():
980
1201
  with attempt:
981
1202
  self = cls.fromItem(
982
- cls.outer.filesDomain.get_attributes(item_name=compat_bytes(jobStoreFileID),
983
- consistent_read=True))
1203
+ {
1204
+ "Name": compat_bytes(jobStoreFileID),
1205
+ "Attributes": cls.outer.db.get_attributes(
1206
+ DomainName=cls.outer.files_domain_name,
1207
+ ItemName=compat_bytes(jobStoreFileID),
1208
+ ConsistentRead=True,
1209
+ ).get("Attributes", []),
1210
+ }
1211
+ )
984
1212
  return self
985
1213
 
986
1214
  @classmethod
@@ -1010,7 +1238,7 @@ class AWSJobStore(AbstractJobStore):
1010
1238
  return self
1011
1239
 
1012
1240
  @classmethod
1013
- def fromItem(cls, item):
1241
+ def fromItem(cls, item: "ItemTypeDef"):
1014
1242
  """
1015
1243
  Convert an SDB item to an instance of this class.
1016
1244
 
@@ -1023,31 +1251,37 @@ class AWSJobStore(AbstractJobStore):
1023
1251
  return s if s is None else str(s)
1024
1252
 
1025
1253
  # ownerID and encrypted are the only mandatory attributes
1026
- ownerID = strOrNone(item.get('ownerID'))
1027
- encrypted = item.get('encrypted')
1254
+ ownerID, encrypted, version, checksum = SDBHelper.get_attributes_from_item(
1255
+ item, ["ownerID", "encrypted", "version", "checksum"]
1256
+ )
1028
1257
  if ownerID is None:
1029
1258
  assert encrypted is None
1030
1259
  return None
1031
1260
  else:
1032
- version = strOrNone(item['version'])
1033
- checksum = strOrNone(item.get('checksum'))
1034
1261
  encrypted = strict_bool(encrypted)
1035
- content, numContentChunks = cls.attributesToBinary(item)
1262
+ content, numContentChunks = cls.attributesToBinary(item["Attributes"])
1036
1263
  if encrypted:
1037
1264
  sseKeyPath = cls.outer.sseKeyPath
1038
1265
  if sseKeyPath is None:
1039
- raise AssertionError('Content is encrypted but no key was provided.')
1266
+ raise AssertionError(
1267
+ "Content is encrypted but no key was provided."
1268
+ )
1040
1269
  if content is not None:
1041
1270
  content = encryption.decrypt(content, sseKeyPath)
1042
- self = cls(fileID=item.name, ownerID=ownerID, encrypted=encrypted, version=version,
1043
- content=content, numContentChunks=numContentChunks, checksum=checksum)
1271
+ self = cls(
1272
+ fileID=item["Name"],
1273
+ ownerID=ownerID,
1274
+ encrypted=encrypted,
1275
+ version=version,
1276
+ content=content,
1277
+ numContentChunks=numContentChunks,
1278
+ checksum=checksum,
1279
+ )
1044
1280
  return self
1045
1281
 
1046
- def toItem(self):
1282
+ def toItem(self) -> tuple[dict[str, str], int]:
1047
1283
  """
1048
- Convert this instance to an attribute dictionary suitable for SDB put_attributes().
1049
-
1050
- :rtype: (dict,int)
1284
+ Convert this instance to a dictionary of attribute names to values
1051
1285
 
1052
1286
  :return: the attributes dict and an integer specifying the the number of chunk
1053
1287
  attributes in the dictionary that are used for storing inlined content.
@@ -1057,15 +1291,21 @@ class AWSJobStore(AbstractJobStore):
1057
1291
  if self.encrypted and content is not None:
1058
1292
  sseKeyPath = self.outer.sseKeyPath
1059
1293
  if sseKeyPath is None:
1060
- raise AssertionError('Encryption requested but no key was provided.')
1294
+ raise AssertionError(
1295
+ "Encryption requested but no key was provided."
1296
+ )
1061
1297
  content = encryption.encrypt(content, sseKeyPath)
1062
1298
  assert content is None or isinstance(content, bytes)
1063
1299
  attributes = self.binaryToAttributes(content)
1064
- numChunks = attributes['numChunks']
1065
- attributes.update(dict(ownerID=self.ownerID,
1066
- encrypted=self.encrypted,
1067
- version=self.version or '',
1068
- checksum=self.checksum or ''))
1300
+ numChunks = int(attributes["numChunks"])
1301
+ attributes.update(
1302
+ dict(
1303
+ ownerID=self.ownerID or "",
1304
+ encrypted=str(self.encrypted),
1305
+ version=self.version or "",
1306
+ checksum=self.checksum or "",
1307
+ )
1308
+ )
1069
1309
  return attributes, numChunks
1070
1310
 
1071
1311
  @classmethod
@@ -1078,32 +1318,67 @@ class AWSJobStore(AbstractJobStore):
1078
1318
 
1079
1319
  def save(self):
1080
1320
  attributes, numNewContentChunks = self.toItem()
1321
+ attributes_boto3 = SDBHelper.attributeDictToList(attributes)
1081
1322
  # False stands for absence
1082
- expected = ['version', False if self.previousVersion is None else self.previousVersion]
1323
+ if self.previousVersion is None:
1324
+ expected: "UpdateConditionTypeDef" = {
1325
+ "Name": "version",
1326
+ "Exists": False,
1327
+ }
1328
+ else:
1329
+ expected = {"Name": "version", "Value": cast(str, self.previousVersion)}
1083
1330
  try:
1084
1331
  for attempt in retry_sdb():
1085
1332
  with attempt:
1086
- assert self.outer.filesDomain.put_attributes(item_name=compat_bytes(self.fileID),
1087
- attributes=attributes,
1088
- expected_value=expected)
1333
+ self.outer.db.put_attributes(
1334
+ DomainName=self.outer.files_domain_name,
1335
+ ItemName=compat_bytes(self.fileID),
1336
+ Attributes=[
1337
+ {
1338
+ "Name": attribute["Name"],
1339
+ "Value": attribute["Value"],
1340
+ "Replace": True,
1341
+ }
1342
+ for attribute in attributes_boto3
1343
+ ],
1344
+ Expected=expected,
1345
+ )
1089
1346
  # clean up the old version of the file if necessary and safe
1090
1347
  if self.previousVersion and (self.previousVersion != self.version):
1091
1348
  for attempt in retry_s3():
1092
1349
  with attempt:
1093
- self.outer.s3_client.delete_object(Bucket=self.outer.filesBucket.name,
1094
- Key=compat_bytes(self.fileID),
1095
- VersionId=self.previousVersion)
1350
+ self.outer.s3_client.delete_object(
1351
+ Bucket=self.outer.files_bucket.name,
1352
+ Key=compat_bytes(self.fileID),
1353
+ VersionId=self.previousVersion,
1354
+ )
1096
1355
  self._previousVersion = self._version
1097
1356
  if numNewContentChunks < self._numContentChunks:
1098
1357
  residualChunks = range(numNewContentChunks, self._numContentChunks)
1099
- attributes = [self._chunkName(i) for i in residualChunks]
1358
+ residual_chunk_names = [self._chunkName(i) for i in residualChunks]
1359
+ # boto3 requires providing the value as well as the name in the attribute, and we don't store it locally
1360
+ # the php sdk resolves this issue by not requiring the Value key https://github.com/aws/aws-sdk-php/issues/185
1361
+ # but this doesnt extend to boto3
1362
+ delete_attributes = self.outer.db.get_attributes(
1363
+ DomainName=self.outer.files_domain_name,
1364
+ ItemName=compat_bytes(self.fileID),
1365
+ AttributeNames=[chunk for chunk in residual_chunk_names],
1366
+ ).get("Attributes")
1100
1367
  for attempt in retry_sdb():
1101
1368
  with attempt:
1102
- self.outer.filesDomain.delete_attributes(compat_bytes(self.fileID),
1103
- attributes=attributes)
1369
+ self.outer.db.delete_attributes(
1370
+ DomainName=self.outer.files_domain_name,
1371
+ ItemName=compat_bytes(self.fileID),
1372
+ Attributes=delete_attributes,
1373
+ )
1374
+ self.outer.db.get_attributes(
1375
+ DomainName=self.outer.files_domain_name,
1376
+ ItemName=compat_bytes(self.fileID),
1377
+ )
1378
+
1104
1379
  self._numContentChunks = numNewContentChunks
1105
- except SDBResponseError as e:
1106
- if e.error_code == 'ConditionalCheckFailed':
1380
+ except ClientError as e:
1381
+ if get_error_code(e) == "ConditionalCheckFailed":
1107
1382
  raise ConcurrentFileModificationException(self.fileID)
1108
1383
  else:
1109
1384
  raise
@@ -1111,24 +1386,30 @@ class AWSJobStore(AbstractJobStore):
1111
1386
  def upload(self, localFilePath, calculateChecksum=True):
1112
1387
  file_size, file_time = fileSizeAndTime(localFilePath)
1113
1388
  if file_size <= self.maxInlinedSize():
1114
- with open(localFilePath, 'rb') as f:
1389
+ with open(localFilePath, "rb") as f:
1115
1390
  self.content = f.read()
1116
1391
  # Clear out any old checksum in case of overwrite
1117
- self.checksum = ''
1392
+ self.checksum = ""
1118
1393
  else:
1119
1394
  headerArgs = self._s3EncryptionArgs()
1120
1395
  # Create a new Resource in case it needs to be on its own thread
1121
- resource = boto3_session.resource('s3', region_name=self.outer.region)
1122
-
1123
- self.checksum = self._get_file_checksum(localFilePath) if calculateChecksum else None
1124
- self.version = uploadFromPath(localFilePath,
1125
- resource=resource,
1126
- bucketName=self.outer.filesBucket.name,
1127
- fileID=compat_bytes(self.fileID),
1128
- headerArgs=headerArgs,
1129
- partSize=self.outer.partSize)
1130
-
1131
- def _start_checksum(self, to_match=None, algorithm='sha1'):
1396
+ resource = boto3_session.resource("s3", region_name=self.outer.region)
1397
+
1398
+ self.checksum = (
1399
+ self._get_file_checksum(localFilePath)
1400
+ if calculateChecksum
1401
+ else None
1402
+ )
1403
+ self.version = uploadFromPath(
1404
+ localFilePath,
1405
+ resource=resource,
1406
+ bucketName=self.outer.files_bucket.name,
1407
+ fileID=compat_bytes(self.fileID),
1408
+ headerArgs=headerArgs,
1409
+ partSize=self.outer.part_size,
1410
+ )
1411
+
1412
+ def _start_checksum(self, to_match=None, algorithm="sha1"):
1132
1413
  """
1133
1414
  Get a hasher that can be used with _update_checksum and
1134
1415
  _finish_checksum.
@@ -1146,12 +1427,12 @@ class AWSJobStore(AbstractJobStore):
1146
1427
  expected = None
1147
1428
 
1148
1429
  if to_match is not None:
1149
- parts = to_match.split('$')
1430
+ parts = to_match.split("$")
1150
1431
  algorithm = parts[0]
1151
1432
  expected = parts[1]
1152
1433
 
1153
1434
  wrapped = getattr(hashlib, algorithm)()
1154
- logger.debug(f'Starting {algorithm} checksum to match {expected}')
1435
+ logger.debug(f"Starting {algorithm} checksum to match {expected}")
1155
1436
  return algorithm, wrapped, expected
1156
1437
 
1157
1438
  def _update_checksum(self, checksum_in_progress, data):
@@ -1168,26 +1449,32 @@ class AWSJobStore(AbstractJobStore):
1168
1449
 
1169
1450
  result_hash = checksum_in_progress[1].hexdigest()
1170
1451
 
1171
- logger.debug(f'Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}')
1452
+ logger.debug(
1453
+ f"Completed checksum with hash {result_hash} vs. expected {checksum_in_progress[2]}"
1454
+ )
1172
1455
  if checksum_in_progress[2] is not None:
1173
1456
  # We expected a particular hash
1174
1457
  if result_hash != checksum_in_progress[2]:
1175
- raise ChecksumError('Checksum mismatch. Expected: %s Actual: %s' %
1176
- (checksum_in_progress[2], result_hash))
1458
+ raise ChecksumError(
1459
+ "Checksum mismatch. Expected: %s Actual: %s"
1460
+ % (checksum_in_progress[2], result_hash)
1461
+ )
1177
1462
 
1178
- return '$'.join([checksum_in_progress[0], result_hash])
1463
+ return "$".join([checksum_in_progress[0], result_hash])
1179
1464
 
1180
1465
  def _get_file_checksum(self, localFilePath, to_match=None):
1181
- with open(localFilePath, 'rb') as f:
1466
+ with open(localFilePath, "rb") as f:
1182
1467
  hasher = self._start_checksum(to_match=to_match)
1183
1468
  contents = f.read(1024 * 1024)
1184
- while contents != b'':
1469
+ while contents != b"":
1185
1470
  self._update_checksum(hasher, contents)
1186
1471
  contents = f.read(1024 * 1024)
1187
1472
  return self._finish_checksum(hasher)
1188
1473
 
1189
1474
  @contextmanager
1190
- def uploadStream(self, multipart=True, allowInlining=True, encoding=None, errors=None):
1475
+ def uploadStream(
1476
+ self, multipart=True, allowInlining=True, encoding=None, errors=None
1477
+ ):
1191
1478
  """
1192
1479
  Context manager that gives out a binary or text mode upload stream to upload data.
1193
1480
  """
@@ -1204,71 +1491,95 @@ class AWSJobStore(AbstractJobStore):
1204
1491
  class MultiPartPipe(WritablePipe):
1205
1492
  def readFrom(self, readable):
1206
1493
  # Get the first block of data we want to put
1207
- buf = readable.read(store.partSize)
1494
+ buf = readable.read(store.part_size)
1208
1495
  assert isinstance(buf, bytes)
1209
1496
 
1210
1497
  if allowInlining and len(buf) <= info.maxInlinedSize():
1211
- logger.debug('Inlining content of %d bytes', len(buf))
1498
+ logger.debug("Inlining content of %d bytes", len(buf))
1212
1499
  info.content = buf
1213
1500
  # There will be no checksum
1214
- info.checksum = ''
1501
+ info.checksum = ""
1215
1502
  else:
1216
1503
  # We will compute a checksum
1217
1504
  hasher = info._start_checksum()
1218
- logger.debug('Updating checksum with %d bytes', len(buf))
1505
+ logger.debug("Updating checksum with %d bytes", len(buf))
1219
1506
  info._update_checksum(hasher, buf)
1220
1507
 
1221
1508
  client = store.s3_client
1222
- bucket_name = store.filesBucket.name
1509
+ bucket_name = store.files_bucket.name
1223
1510
  headerArgs = info._s3EncryptionArgs()
1224
1511
 
1225
1512
  for attempt in retry_s3():
1226
1513
  with attempt:
1227
- logger.debug('Starting multipart upload')
1514
+ logger.debug("Starting multipart upload")
1228
1515
  # low-level clients are thread safe
1229
- upload = client.create_multipart_upload(Bucket=bucket_name,
1230
- Key=compat_bytes(info.fileID),
1231
- **headerArgs)
1232
- uploadId = upload['UploadId']
1516
+ upload = client.create_multipart_upload(
1517
+ Bucket=bucket_name,
1518
+ Key=compat_bytes(info.fileID),
1519
+ **headerArgs,
1520
+ )
1521
+ uploadId = upload["UploadId"]
1233
1522
  parts = []
1234
- logger.debug('Multipart upload started as %s', uploadId)
1235
-
1523
+ logger.debug("Multipart upload started as %s", uploadId)
1236
1524
 
1237
1525
  for attempt in retry_s3():
1238
1526
  with attempt:
1239
1527
  for i in range(CONSISTENCY_TICKS):
1240
1528
  # Sometimes we can create a multipart upload and not see it. Wait around for it.
1241
- response = client.list_multipart_uploads(Bucket=bucket_name,
1242
- MaxUploads=1,
1243
- Prefix=compat_bytes(info.fileID))
1244
- if ('Uploads' in response and
1245
- len(response['Uploads']) != 0 and
1246
- response['Uploads'][0]['UploadId'] == uploadId):
1247
-
1248
- logger.debug('Multipart upload visible as %s', uploadId)
1529
+ response = client.list_multipart_uploads(
1530
+ Bucket=bucket_name,
1531
+ MaxUploads=1,
1532
+ Prefix=compat_bytes(info.fileID),
1533
+ )
1534
+ if (
1535
+ "Uploads" in response
1536
+ and len(response["Uploads"]) != 0
1537
+ and response["Uploads"][0]["UploadId"]
1538
+ == uploadId
1539
+ ):
1540
+
1541
+ logger.debug(
1542
+ "Multipart upload visible as %s", uploadId
1543
+ )
1249
1544
  break
1250
1545
  else:
1251
- logger.debug('Multipart upload %s is not visible; we see %s', uploadId, response.get('Uploads'))
1252
- time.sleep(CONSISTENCY_TIME * 2 ** i)
1546
+ logger.debug(
1547
+ "Multipart upload %s is not visible; we see %s",
1548
+ uploadId,
1549
+ response.get("Uploads"),
1550
+ )
1551
+ time.sleep(CONSISTENCY_TIME * 2**i)
1253
1552
 
1254
1553
  try:
1255
1554
  for part_num in itertools.count():
1256
1555
  for attempt in retry_s3():
1257
1556
  with attempt:
1258
- logger.debug('Uploading part %d of %d bytes to %s', part_num + 1, len(buf), uploadId)
1557
+ logger.debug(
1558
+ "Uploading part %d of %d bytes to %s",
1559
+ part_num + 1,
1560
+ len(buf),
1561
+ uploadId,
1562
+ )
1259
1563
  # TODO: include the Content-MD5 header:
1260
1564
  # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.complete_multipart_upload
1261
- part = client.upload_part(Bucket=bucket_name,
1262
- Key=compat_bytes(info.fileID),
1263
- PartNumber=part_num + 1,
1264
- UploadId=uploadId,
1265
- Body=BytesIO(buf),
1266
- **headerArgs)
1267
-
1268
- parts.append({"PartNumber": part_num + 1, "ETag": part["ETag"]})
1565
+ part = client.upload_part(
1566
+ Bucket=bucket_name,
1567
+ Key=compat_bytes(info.fileID),
1568
+ PartNumber=part_num + 1,
1569
+ UploadId=uploadId,
1570
+ Body=BytesIO(buf),
1571
+ **headerArgs,
1572
+ )
1573
+
1574
+ parts.append(
1575
+ {
1576
+ "PartNumber": part_num + 1,
1577
+ "ETag": part["ETag"],
1578
+ }
1579
+ )
1269
1580
 
1270
1581
  # Get the next block of data we want to put
1271
- buf = readable.read(info.outer.partSize)
1582
+ buf = readable.read(info.outer.part_size)
1272
1583
  assert isinstance(buf, bytes)
1273
1584
  if len(buf) == 0:
1274
1585
  # Don't allow any part other than the very first to be empty.
@@ -1278,15 +1589,21 @@ class AWSJobStore(AbstractJobStore):
1278
1589
  with panic(log=logger):
1279
1590
  for attempt in retry_s3():
1280
1591
  with attempt:
1281
- client.abort_multipart_upload(Bucket=bucket_name,
1282
- Key=compat_bytes(info.fileID),
1283
- UploadId=uploadId)
1592
+ client.abort_multipart_upload(
1593
+ Bucket=bucket_name,
1594
+ Key=compat_bytes(info.fileID),
1595
+ UploadId=uploadId,
1596
+ )
1284
1597
 
1285
1598
  else:
1286
1599
 
1287
- while not store._getBucketVersioning(store.filesBucket.name):
1288
- logger.warning('Versioning does not appear to be enabled yet. Deferring multipart '
1289
- 'upload completion...')
1600
+ while not store._getBucketVersioning(
1601
+ store.files_bucket.name
1602
+ ):
1603
+ logger.warning(
1604
+ "Versioning does not appear to be enabled yet. Deferring multipart "
1605
+ "upload completion..."
1606
+ )
1290
1607
  time.sleep(1)
1291
1608
 
1292
1609
  # Save the checksum
@@ -1298,32 +1615,46 @@ class AWSJobStore(AbstractJobStore):
1298
1615
  # in tests
1299
1616
  # (https://github.com/DataBiosphere/toil/issues/3894)
1300
1617
  with attempt:
1301
- logger.debug('Attempting to complete upload...')
1618
+ logger.debug("Attempting to complete upload...")
1302
1619
  completed = client.complete_multipart_upload(
1303
1620
  Bucket=bucket_name,
1304
1621
  Key=compat_bytes(info.fileID),
1305
1622
  UploadId=uploadId,
1306
- MultipartUpload={"Parts": parts})
1307
-
1308
- logger.debug('Completed upload object of type %s: %s', str(type(completed)),
1309
- repr(completed))
1310
- info.version = completed.get('VersionId')
1311
- logger.debug('Completed upload with version %s', str(info.version))
1623
+ MultipartUpload={"Parts": parts},
1624
+ )
1625
+
1626
+ logger.debug(
1627
+ "Completed upload object of type %s: %s",
1628
+ str(type(completed)),
1629
+ repr(completed),
1630
+ )
1631
+ info.version = completed.get("VersionId")
1632
+ logger.debug(
1633
+ "Completed upload with version %s",
1634
+ str(info.version),
1635
+ )
1312
1636
 
1313
1637
  if info.version is None:
1314
1638
  # Somehow we don't know the version. Try and get it.
1315
- for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, AssertionError)):
1639
+ for attempt in retry_s3(
1640
+ predicate=lambda e: retryable_s3_errors(e)
1641
+ or isinstance(e, AssertionError)
1642
+ ):
1316
1643
  with attempt:
1317
- version = client.head_object(Bucket=bucket_name,
1318
- Key=compat_bytes(info.fileID),
1319
- **headerArgs).get('VersionId', None)
1320
- logger.warning('Loaded key for upload with no version and got version %s',
1321
- str(version))
1644
+ version = client.head_object(
1645
+ Bucket=bucket_name,
1646
+ Key=compat_bytes(info.fileID),
1647
+ **headerArgs,
1648
+ ).get("VersionId", None)
1649
+ logger.warning(
1650
+ "Loaded key for upload with no version and got version %s",
1651
+ str(version),
1652
+ )
1322
1653
  info.version = version
1323
1654
  assert info.version is not None
1324
1655
 
1325
1656
  # Make sure we actually wrote something, even if an empty file
1326
- assert (bool(info.version) or info.content is not None)
1657
+ assert bool(info.version) or info.content is not None
1327
1658
 
1328
1659
  class SinglePartPipe(WritablePipe):
1329
1660
  def readFrom(self, readable):
@@ -1331,56 +1662,74 @@ class AWSJobStore(AbstractJobStore):
1331
1662
  assert isinstance(buf, bytes)
1332
1663
  dataLength = len(buf)
1333
1664
  if allowInlining and dataLength <= info.maxInlinedSize():
1334
- logger.debug('Inlining content of %d bytes', len(buf))
1665
+ logger.debug("Inlining content of %d bytes", len(buf))
1335
1666
  info.content = buf
1336
1667
  # There will be no checksum
1337
- info.checksum = ''
1668
+ info.checksum = ""
1338
1669
  else:
1339
1670
  # We will compute a checksum
1340
1671
  hasher = info._start_checksum()
1341
1672
  info._update_checksum(hasher, buf)
1342
1673
  info.checksum = info._finish_checksum(hasher)
1343
1674
 
1344
- bucket_name = store.filesBucket.name
1675
+ bucket_name = store.files_bucket.name
1345
1676
  headerArgs = info._s3EncryptionArgs()
1346
1677
  client = store.s3_client
1347
1678
 
1348
1679
  buf = BytesIO(buf)
1349
1680
 
1350
1681
  while not store._getBucketVersioning(bucket_name):
1351
- logger.warning('Versioning does not appear to be enabled yet. Deferring single part '
1352
- 'upload...')
1682
+ logger.warning(
1683
+ "Versioning does not appear to be enabled yet. Deferring single part "
1684
+ "upload..."
1685
+ )
1353
1686
  time.sleep(1)
1354
1687
 
1355
1688
  for attempt in retry_s3():
1356
1689
  with attempt:
1357
- logger.debug('Uploading single part of %d bytes', dataLength)
1358
- client.upload_fileobj(Bucket=bucket_name,
1359
- Key=compat_bytes(info.fileID),
1360
- Fileobj=buf,
1361
- ExtraArgs=headerArgs)
1690
+ logger.debug(
1691
+ "Uploading single part of %d bytes", dataLength
1692
+ )
1693
+ client.upload_fileobj(
1694
+ Bucket=bucket_name,
1695
+ Key=compat_bytes(info.fileID),
1696
+ Fileobj=buf,
1697
+ ExtraArgs=headerArgs,
1698
+ )
1362
1699
 
1363
1700
  # use head_object with the SSE headers to access versionId and content_length attributes
1364
- headObj = client.head_object(Bucket=bucket_name,
1365
- Key=compat_bytes(info.fileID),
1366
- **headerArgs)
1367
- assert dataLength == headObj.get('ContentLength', None)
1368
- info.version = headObj.get('VersionId', None)
1369
- logger.debug('Upload received version %s', str(info.version))
1701
+ headObj = client.head_object(
1702
+ Bucket=bucket_name,
1703
+ Key=compat_bytes(info.fileID),
1704
+ **headerArgs,
1705
+ )
1706
+ assert dataLength == headObj.get("ContentLength", None)
1707
+ info.version = headObj.get("VersionId", None)
1708
+ logger.debug(
1709
+ "Upload received version %s", str(info.version)
1710
+ )
1370
1711
 
1371
1712
  if info.version is None:
1372
1713
  # Somehow we don't know the version
1373
- for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, AssertionError)):
1714
+ for attempt in retry_s3(
1715
+ predicate=lambda e: retryable_s3_errors(e)
1716
+ or isinstance(e, AssertionError)
1717
+ ):
1374
1718
  with attempt:
1375
- headObj = client.head_object(Bucket=bucket_name,
1376
- Key=compat_bytes(info.fileID),
1377
- **headerArgs)
1378
- info.version = headObj.get('VersionId', None)
1379
- logger.warning('Reloaded key with no version and got version %s', str(info.version))
1719
+ headObj = client.head_object(
1720
+ Bucket=bucket_name,
1721
+ Key=compat_bytes(info.fileID),
1722
+ **headerArgs,
1723
+ )
1724
+ info.version = headObj.get("VersionId", None)
1725
+ logger.warning(
1726
+ "Reloaded key with no version and got version %s",
1727
+ str(info.version),
1728
+ )
1380
1729
  assert info.version is not None
1381
1730
 
1382
1731
  # Make sure we actually wrote something, even if an empty file
1383
- assert (bool(info.version) or info.content is not None)
1732
+ assert bool(info.version) or info.content is not None
1384
1733
 
1385
1734
  if multipart:
1386
1735
  pipe = MultiPartPipe(encoding=encoding, errors=errors)
@@ -1391,20 +1740,22 @@ class AWSJobStore(AbstractJobStore):
1391
1740
  yield writable
1392
1741
 
1393
1742
  if not pipe.reader_done:
1394
- logger.debug(f'Version: {self.version} Content: {self.content}')
1395
- raise RuntimeError('Escaped context manager without written data being read!')
1743
+ logger.debug(f"Version: {self.version} Content: {self.content}")
1744
+ raise RuntimeError(
1745
+ "Escaped context manager without written data being read!"
1746
+ )
1396
1747
 
1397
1748
  # We check our work to make sure we have exactly one of embedded
1398
1749
  # content or a real object version.
1399
1750
 
1400
1751
  if self.content is None:
1401
1752
  if not bool(self.version):
1402
- logger.debug(f'Version: {self.version} Content: {self.content}')
1403
- raise RuntimeError('No content added and no version created')
1753
+ logger.debug(f"Version: {self.version} Content: {self.content}")
1754
+ raise RuntimeError("No content added and no version created")
1404
1755
  else:
1405
1756
  if bool(self.version):
1406
- logger.debug(f'Version: {self.version} Content: {self.content}')
1407
- raise RuntimeError('Content added and version created')
1757
+ logger.debug(f"Version: {self.version} Content: {self.content}")
1758
+ raise RuntimeError("Content added and version created")
1408
1759
 
1409
1760
  def copyFrom(self, srcObj):
1410
1761
  """
@@ -1414,18 +1765,20 @@ class AWSJobStore(AbstractJobStore):
1414
1765
  """
1415
1766
  assert srcObj.content_length is not None
1416
1767
  if srcObj.content_length <= self.maxInlinedSize():
1417
- self.content = srcObj.get().get('Body').read()
1768
+ self.content = srcObj.get().get("Body").read()
1418
1769
  else:
1419
1770
  # Create a new Resource in case it needs to be on its own thread
1420
- resource = boto3_session.resource('s3', region_name=self.outer.region)
1421
- self.version = copyKeyMultipart(resource,
1422
- srcBucketName=compat_bytes(srcObj.bucket_name),
1423
- srcKeyName=compat_bytes(srcObj.key),
1424
- srcKeyVersion=compat_bytes(srcObj.version_id),
1425
- dstBucketName=compat_bytes(self.outer.filesBucket.name),
1426
- dstKeyName=compat_bytes(self._fileID),
1427
- sseAlgorithm='AES256',
1428
- sseKey=self._getSSEKey())
1771
+ resource = boto3_session.resource("s3", region_name=self.outer.region)
1772
+ self.version = copyKeyMultipart(
1773
+ resource,
1774
+ srcBucketName=compat_bytes(srcObj.bucket_name),
1775
+ srcKeyName=compat_bytes(srcObj.key),
1776
+ srcKeyVersion=compat_bytes(srcObj.version_id),
1777
+ dstBucketName=compat_bytes(self.outer.files_bucket.name),
1778
+ dstKeyName=compat_bytes(self._fileID),
1779
+ sseAlgorithm="AES256",
1780
+ sseKey=self._getSSEKey(),
1781
+ )
1429
1782
 
1430
1783
  def copyTo(self, dstObj):
1431
1784
  """
@@ -1439,35 +1792,43 @@ class AWSJobStore(AbstractJobStore):
1439
1792
  dstObj.put(Body=self.content)
1440
1793
  elif self.version:
1441
1794
  # Create a new Resource in case it needs to be on its own thread
1442
- resource = boto3_session.resource('s3', region_name=self.outer.region)
1795
+ resource = boto3_session.resource("s3", region_name=self.outer.region)
1443
1796
 
1444
1797
  for attempt in retry_s3():
1445
1798
  # encrypted = True if self.outer.sseKeyPath else False
1446
1799
  with attempt:
1447
- copyKeyMultipart(resource,
1448
- srcBucketName=compat_bytes(self.outer.filesBucket.name),
1449
- srcKeyName=compat_bytes(self.fileID),
1450
- srcKeyVersion=compat_bytes(self.version),
1451
- dstBucketName=compat_bytes(dstObj.bucket_name),
1452
- dstKeyName=compat_bytes(dstObj.key),
1453
- copySourceSseAlgorithm='AES256',
1454
- copySourceSseKey=self._getSSEKey())
1800
+ copyKeyMultipart(
1801
+ resource,
1802
+ srcBucketName=compat_bytes(self.outer.files_bucket.name),
1803
+ srcKeyName=compat_bytes(self.fileID),
1804
+ srcKeyVersion=compat_bytes(self.version),
1805
+ dstBucketName=compat_bytes(dstObj.bucket_name),
1806
+ dstKeyName=compat_bytes(dstObj.key),
1807
+ copySourceSseAlgorithm="AES256",
1808
+ copySourceSseKey=self._getSSEKey(),
1809
+ )
1455
1810
  else:
1456
1811
  assert False
1457
1812
 
1458
1813
  def download(self, localFilePath, verifyChecksum=True):
1459
1814
  if self.content is not None:
1460
1815
  with AtomicFileCreate(localFilePath) as tmpPath:
1461
- with open(tmpPath, 'wb') as f:
1816
+ with open(tmpPath, "wb") as f:
1462
1817
  f.write(self.content)
1463
1818
  elif self.version:
1464
1819
  headerArgs = self._s3EncryptionArgs()
1465
- obj = self.outer.filesBucket.Object(compat_bytes(self.fileID))
1820
+ obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
1466
1821
 
1467
- for attempt in retry_s3(predicate=lambda e: retryable_s3_errors(e) or isinstance(e, ChecksumError)):
1822
+ for attempt in retry_s3(
1823
+ predicate=lambda e: retryable_s3_errors(e)
1824
+ or isinstance(e, ChecksumError)
1825
+ ):
1468
1826
  with attempt:
1469
1827
  with AtomicFileCreate(localFilePath) as tmpPath:
1470
- obj.download_file(Filename=tmpPath, ExtraArgs={'VersionId': self.version, **headerArgs})
1828
+ obj.download_file(
1829
+ Filename=tmpPath,
1830
+ ExtraArgs={"VersionId": self.version, **headerArgs},
1831
+ )
1471
1832
 
1472
1833
  if verifyChecksum and self.checksum:
1473
1834
  try:
@@ -1475,7 +1836,10 @@ class AWSJobStore(AbstractJobStore):
1475
1836
  self._get_file_checksum(localFilePath, self.checksum)
1476
1837
  except ChecksumError as e:
1477
1838
  # Annotate checksum mismatches with file name
1478
- raise ChecksumError('Checksums do not match for file %s.' % localFilePath) from e
1839
+ raise ChecksumError(
1840
+ "Checksums do not match for file %s."
1841
+ % localFilePath
1842
+ ) from e
1479
1843
  # The error will get caught and result in a retry of the download until we run out of retries.
1480
1844
  # TODO: handle obviously truncated downloads by resuming instead.
1481
1845
  else:
@@ -1494,10 +1858,13 @@ class AWSJobStore(AbstractJobStore):
1494
1858
  writable.write(info.content)
1495
1859
  elif info.version:
1496
1860
  headerArgs = info._s3EncryptionArgs()
1497
- obj = info.outer.filesBucket.Object(compat_bytes(info.fileID))
1861
+ obj = info.outer.files_bucket.Object(compat_bytes(info.fileID))
1498
1862
  for attempt in retry_s3():
1499
1863
  with attempt:
1500
- obj.download_fileobj(writable, ExtraArgs={'VersionId': info.version, **headerArgs})
1864
+ obj.download_fileobj(
1865
+ writable,
1866
+ ExtraArgs={"VersionId": info.version, **headerArgs},
1867
+ )
1501
1868
  else:
1502
1869
  assert False
1503
1870
 
@@ -1513,7 +1880,7 @@ class AWSJobStore(AbstractJobStore):
1513
1880
  def transform(self, readable, writable):
1514
1881
  hasher = info._start_checksum(to_match=info.checksum)
1515
1882
  contents = readable.read(1024 * 1024)
1516
- while contents != b'':
1883
+ while contents != b"":
1517
1884
  info._update_checksum(hasher, contents)
1518
1885
  try:
1519
1886
  writable.write(contents)
@@ -1530,7 +1897,9 @@ class AWSJobStore(AbstractJobStore):
1530
1897
  if verifyChecksum and self.checksum:
1531
1898
  with DownloadPipe() as readable:
1532
1899
  # Interpose a pipe to check the hash
1533
- with HashingPipe(readable, encoding=encoding, errors=errors) as verified:
1900
+ with HashingPipe(
1901
+ readable, encoding=encoding, errors=errors
1902
+ ) as verified:
1534
1903
  yield verified
1535
1904
  else:
1536
1905
  # Readable end of pipe produces text mode output if encoding specified
@@ -1541,17 +1910,25 @@ class AWSJobStore(AbstractJobStore):
1541
1910
  def delete(self):
1542
1911
  store = self.outer
1543
1912
  if self.previousVersion is not None:
1913
+ expected: "UpdateConditionTypeDef" = {
1914
+ "Name": "version",
1915
+ "Value": cast(str, self.previousVersion),
1916
+ }
1544
1917
  for attempt in retry_sdb():
1545
1918
  with attempt:
1546
- store.filesDomain.delete_attributes(
1547
- compat_bytes(self.fileID),
1548
- expected_values=['version', self.previousVersion])
1919
+ store.db.delete_attributes(
1920
+ DomainName=store.files_domain_name,
1921
+ ItemName=compat_bytes(self.fileID),
1922
+ Expected=expected,
1923
+ )
1549
1924
  if self.previousVersion:
1550
1925
  for attempt in retry_s3():
1551
1926
  with attempt:
1552
- store.s3_client.delete_object(Bucket=store.filesBucket.name,
1553
- Key=compat_bytes(self.fileID),
1554
- VersionId=self.previousVersion)
1927
+ store.s3_client.delete_object(
1928
+ Bucket=store.files_bucket.name,
1929
+ Key=compat_bytes(self.fileID),
1930
+ VersionId=self.previousVersion,
1931
+ )
1555
1932
 
1556
1933
  def getSize(self):
1557
1934
  """
@@ -1562,7 +1939,7 @@ class AWSJobStore(AbstractJobStore):
1562
1939
  elif self.version:
1563
1940
  for attempt in retry_s3():
1564
1941
  with attempt:
1565
- obj = self.outer.filesBucket.Object(compat_bytes(self.fileID))
1942
+ obj = self.outer.files_bucket.Object(compat_bytes(self.fileID))
1566
1943
  return obj.content_length
1567
1944
  else:
1568
1945
  return 0
@@ -1570,7 +1947,7 @@ class AWSJobStore(AbstractJobStore):
1570
1947
  def _getSSEKey(self) -> Optional[bytes]:
1571
1948
  sseKeyPath = self.outer.sseKeyPath
1572
1949
  if sseKeyPath:
1573
- with open(sseKeyPath, 'rb') as f:
1950
+ with open(sseKeyPath, "rb") as f:
1574
1951
  sseKey = f.read()
1575
1952
  return sseKey
1576
1953
 
@@ -1579,25 +1956,30 @@ class AWSJobStore(AbstractJobStore):
1579
1956
  # parameters and will be used to set the http headers
1580
1957
  if self.encrypted:
1581
1958
  sseKey = self._getSSEKey()
1582
- assert sseKey is not None, 'Content is encrypted but no key was provided.'
1959
+ assert (
1960
+ sseKey is not None
1961
+ ), "Content is encrypted but no key was provided."
1583
1962
  assert len(sseKey) == 32
1584
1963
  # boto3 encodes the key and calculates the MD5 for us
1585
- return {'SSECustomerAlgorithm': 'AES256', 'SSECustomerKey': sseKey}
1964
+ return {"SSECustomerAlgorithm": "AES256", "SSECustomerKey": sseKey}
1586
1965
  else:
1587
1966
  return {}
1588
1967
 
1589
1968
  def __repr__(self):
1590
1969
  r = custom_repr
1591
- d = (('fileID', r(self.fileID)),
1592
- ('ownerID', r(self.ownerID)),
1593
- ('encrypted', r(self.encrypted)),
1594
- ('version', r(self.version)),
1595
- ('previousVersion', r(self.previousVersion)),
1596
- ('content', r(self.content)),
1597
- ('checksum', r(self.checksum)),
1598
- ('_numContentChunks', r(self._numContentChunks)))
1599
- return "{}({})".format(type(self).__name__,
1600
- ', '.join(f'{k}={v}' for k, v in d))
1970
+ d = (
1971
+ ("fileID", r(self.fileID)),
1972
+ ("ownerID", r(self.ownerID)),
1973
+ ("encrypted", r(self.encrypted)),
1974
+ ("version", r(self.version)),
1975
+ ("previousVersion", r(self.previousVersion)),
1976
+ ("content", r(self.content)),
1977
+ ("checksum", r(self.checksum)),
1978
+ ("_numContentChunks", r(self._numContentChunks)),
1979
+ )
1980
+ return "{}({})".format(
1981
+ type(self).__name__, ", ".join(f"{k}={v}" for k, v in d)
1982
+ )
1601
1983
 
1602
1984
  versionings = dict(Enabled=True, Disabled=False, Suspended=None)
1603
1985
 
@@ -1631,22 +2013,22 @@ class AWSJobStore(AbstractJobStore):
1631
2013
  pass
1632
2014
  # TODO: Add other failure cases to be ignored here.
1633
2015
  self._registered = None
1634
- if self.filesBucket is not None:
1635
- self._delete_bucket(self.filesBucket)
1636
- self.filesBucket = None
1637
- for name in 'filesDomain', 'jobsDomain':
1638
- domain = getattr(self, name)
1639
- if domain is not None:
1640
- self._delete_domain(domain)
2016
+ if self.files_bucket is not None:
2017
+ self._delete_bucket(self.files_bucket)
2018
+ self.files_bucket = None
2019
+ for name in "files_domain_name", "jobs_domain_name":
2020
+ domainName = getattr(self, name)
2021
+ if domainName is not None:
2022
+ self._delete_domain(domainName)
1641
2023
  setattr(self, name, None)
1642
2024
  self._registered = False
1643
2025
 
1644
- def _delete_domain(self, domain):
2026
+ def _delete_domain(self, domainName):
1645
2027
  for attempt in retry_sdb():
1646
2028
  with attempt:
1647
2029
  try:
1648
- domain.delete()
1649
- except SDBResponseError as e:
2030
+ self.db.delete_domain(DomainName=domainName)
2031
+ except ClientError as e:
1650
2032
  if not no_such_sdb_domain(e):
1651
2033
  raise
1652
2034
 
@@ -1658,12 +2040,14 @@ class AWSJobStore(AbstractJobStore):
1658
2040
  for attempt in retry_s3():
1659
2041
  with attempt:
1660
2042
  try:
1661
- uploads = s3_boto3_client.list_multipart_uploads(Bucket=bucket.name).get('Uploads')
2043
+ uploads = s3_boto3_client.list_multipart_uploads(
2044
+ Bucket=bucket.name
2045
+ ).get("Uploads")
1662
2046
  if uploads:
1663
2047
  for u in uploads:
1664
- s3_boto3_client.abort_multipart_upload(Bucket=bucket.name,
1665
- Key=u["Key"],
1666
- UploadId=u["UploadId"])
2048
+ s3_boto3_client.abort_multipart_upload(
2049
+ Bucket=bucket.name, Key=u["Key"], UploadId=u["UploadId"]
2050
+ )
1667
2051
 
1668
2052
  bucket.objects.all().delete()
1669
2053
  bucket.object_versions.delete()
@@ -1680,8 +2064,10 @@ aRepr.maxstring = 38 # so UUIDs don't get truncated (36 for UUID plus 2 for quo
1680
2064
  custom_repr = aRepr.repr
1681
2065
 
1682
2066
 
1683
- class BucketLocationConflictException(Exception):
2067
+ class BucketLocationConflictException(LocatorException):
1684
2068
  def __init__(self, bucketRegion):
1685
2069
  super().__init__(
1686
- 'A bucket with the same name as the jobstore was found in another region (%s). '
1687
- 'Cannot proceed as the unique bucket name is already in use.' % bucketRegion)
2070
+ "A bucket with the same name as the jobstore was found in another region (%s). "
2071
+ "Cannot proceed as the unique bucket name is already in use.",
2072
+ locator=bucketRegion,
2073
+ )