toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. toil/__init__.py +5 -9
  2. toil/batchSystems/abstractBatchSystem.py +23 -22
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +4 -4
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/gridengine.py +3 -4
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +65 -63
  10. toil/batchSystems/local_support.py +2 -3
  11. toil/batchSystems/lsf.py +6 -7
  12. toil/batchSystems/mesos/batchSystem.py +11 -7
  13. toil/batchSystems/mesos/test/__init__.py +1 -2
  14. toil/batchSystems/options.py +9 -10
  15. toil/batchSystems/registry.py +3 -7
  16. toil/batchSystems/singleMachine.py +8 -11
  17. toil/batchSystems/slurm.py +49 -38
  18. toil/batchSystems/torque.py +3 -4
  19. toil/bus.py +36 -34
  20. toil/common.py +129 -89
  21. toil/cwl/cwltoil.py +857 -729
  22. toil/cwl/utils.py +44 -35
  23. toil/fileStores/__init__.py +3 -1
  24. toil/fileStores/abstractFileStore.py +28 -30
  25. toil/fileStores/cachingFileStore.py +8 -8
  26. toil/fileStores/nonCachingFileStore.py +10 -21
  27. toil/job.py +159 -158
  28. toil/jobStores/abstractJobStore.py +68 -69
  29. toil/jobStores/aws/jobStore.py +249 -213
  30. toil/jobStores/aws/utils.py +13 -24
  31. toil/jobStores/fileJobStore.py +28 -22
  32. toil/jobStores/googleJobStore.py +21 -17
  33. toil/jobStores/utils.py +3 -7
  34. toil/leader.py +17 -22
  35. toil/lib/accelerators.py +6 -4
  36. toil/lib/aws/__init__.py +9 -10
  37. toil/lib/aws/ami.py +33 -19
  38. toil/lib/aws/iam.py +6 -6
  39. toil/lib/aws/s3.py +259 -157
  40. toil/lib/aws/session.py +76 -76
  41. toil/lib/aws/utils.py +51 -43
  42. toil/lib/checksum.py +19 -15
  43. toil/lib/compatibility.py +3 -2
  44. toil/lib/conversions.py +45 -18
  45. toil/lib/directory.py +29 -26
  46. toil/lib/docker.py +93 -99
  47. toil/lib/dockstore.py +77 -50
  48. toil/lib/ec2.py +39 -38
  49. toil/lib/ec2nodes.py +11 -4
  50. toil/lib/exceptions.py +8 -5
  51. toil/lib/ftp_utils.py +9 -14
  52. toil/lib/generatedEC2Lists.py +161 -20
  53. toil/lib/history.py +141 -97
  54. toil/lib/history_submission.py +163 -72
  55. toil/lib/io.py +27 -17
  56. toil/lib/memoize.py +2 -1
  57. toil/lib/misc.py +15 -11
  58. toil/lib/pipes.py +40 -25
  59. toil/lib/plugins.py +12 -8
  60. toil/lib/resources.py +1 -0
  61. toil/lib/retry.py +32 -38
  62. toil/lib/threading.py +12 -12
  63. toil/lib/throttle.py +1 -2
  64. toil/lib/trs.py +113 -51
  65. toil/lib/url.py +14 -23
  66. toil/lib/web.py +7 -2
  67. toil/options/common.py +18 -15
  68. toil/options/cwl.py +2 -2
  69. toil/options/runner.py +9 -5
  70. toil/options/wdl.py +1 -3
  71. toil/provisioners/__init__.py +9 -9
  72. toil/provisioners/abstractProvisioner.py +22 -20
  73. toil/provisioners/aws/__init__.py +20 -14
  74. toil/provisioners/aws/awsProvisioner.py +10 -8
  75. toil/provisioners/clusterScaler.py +19 -18
  76. toil/provisioners/gceProvisioner.py +2 -3
  77. toil/provisioners/node.py +11 -13
  78. toil/realtimeLogger.py +4 -4
  79. toil/resource.py +5 -5
  80. toil/server/app.py +2 -2
  81. toil/server/cli/wes_cwl_runner.py +11 -11
  82. toil/server/utils.py +18 -21
  83. toil/server/wes/abstract_backend.py +9 -8
  84. toil/server/wes/amazon_wes_utils.py +3 -3
  85. toil/server/wes/tasks.py +3 -5
  86. toil/server/wes/toil_backend.py +17 -21
  87. toil/server/wsgi_app.py +3 -3
  88. toil/serviceManager.py +3 -4
  89. toil/statsAndLogging.py +12 -13
  90. toil/test/__init__.py +33 -24
  91. toil/test/batchSystems/batchSystemTest.py +12 -11
  92. toil/test/batchSystems/batch_system_plugin_test.py +3 -5
  93. toil/test/batchSystems/test_slurm.py +38 -24
  94. toil/test/cwl/conftest.py +5 -6
  95. toil/test/cwl/cwlTest.py +194 -78
  96. toil/test/cwl/download_file_uri.json +6 -0
  97. toil/test/cwl/download_file_uri_no_hostname.json +6 -0
  98. toil/test/docs/scripts/tutorial_staging.py +1 -0
  99. toil/test/jobStores/jobStoreTest.py +9 -7
  100. toil/test/lib/aws/test_iam.py +1 -3
  101. toil/test/lib/aws/test_s3.py +1 -1
  102. toil/test/lib/dockerTest.py +9 -9
  103. toil/test/lib/test_ec2.py +12 -11
  104. toil/test/lib/test_history.py +4 -4
  105. toil/test/lib/test_trs.py +16 -14
  106. toil/test/lib/test_url.py +7 -6
  107. toil/test/lib/url_plugin_test.py +12 -18
  108. toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
  109. toil/test/provisioners/clusterScalerTest.py +2 -5
  110. toil/test/provisioners/clusterTest.py +1 -3
  111. toil/test/server/serverTest.py +13 -4
  112. toil/test/sort/restart_sort.py +2 -6
  113. toil/test/sort/sort.py +3 -8
  114. toil/test/src/deferredFunctionTest.py +7 -7
  115. toil/test/src/environmentTest.py +1 -2
  116. toil/test/src/fileStoreTest.py +5 -5
  117. toil/test/src/importExportFileTest.py +5 -6
  118. toil/test/src/jobServiceTest.py +22 -14
  119. toil/test/src/jobTest.py +121 -25
  120. toil/test/src/miscTests.py +5 -7
  121. toil/test/src/promisedRequirementTest.py +8 -7
  122. toil/test/src/regularLogTest.py +2 -3
  123. toil/test/src/resourceTest.py +5 -8
  124. toil/test/src/restartDAGTest.py +5 -6
  125. toil/test/src/resumabilityTest.py +2 -2
  126. toil/test/src/retainTempDirTest.py +3 -3
  127. toil/test/src/systemTest.py +3 -3
  128. toil/test/src/threadingTest.py +1 -1
  129. toil/test/src/workerTest.py +1 -2
  130. toil/test/utils/toilDebugTest.py +6 -4
  131. toil/test/utils/toilKillTest.py +1 -1
  132. toil/test/utils/utilsTest.py +15 -14
  133. toil/test/wdl/wdltoil_test.py +247 -124
  134. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  135. toil/toilState.py +2 -3
  136. toil/utils/toilDebugFile.py +3 -8
  137. toil/utils/toilDebugJob.py +1 -2
  138. toil/utils/toilLaunchCluster.py +1 -2
  139. toil/utils/toilSshCluster.py +2 -0
  140. toil/utils/toilStats.py +19 -24
  141. toil/utils/toilStatus.py +11 -14
  142. toil/version.py +10 -10
  143. toil/wdl/wdltoil.py +313 -209
  144. toil/worker.py +18 -12
  145. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
  146. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
  147. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
  148. toil/test/cwl/staging_cat.cwl +0 -27
  149. toil/test/cwl/staging_make_file.cwl +0 -25
  150. toil/test/cwl/staging_workflow.cwl +0 -43
  151. toil/test/cwl/zero_default.cwl +0 -61
  152. toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
  153. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
  154. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
  155. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/lib/aws/session.py CHANGED
@@ -15,7 +15,7 @@ import collections
15
15
  import logging
16
16
  import os
17
17
  import threading
18
- from typing import TYPE_CHECKING, Literal, Optional, cast, overload
18
+ from typing import TYPE_CHECKING, Literal, cast, overload
19
19
 
20
20
  import boto3
21
21
  import boto3.resources.base
@@ -55,7 +55,7 @@ ANONYMOUS_CONFIG = Config(signature_version=botocore.UNSIGNED)
55
55
  _init_lock = threading.RLock()
56
56
 
57
57
 
58
- def _new_boto3_session(region_name: Optional[str] = None) -> Session:
58
+ def _new_boto3_session(region_name: str | None = None) -> Session:
59
59
  """
60
60
  This is the One True Place where new Boto3 sessions should be made, and
61
61
  prepares them with the necessary credential caching. Does *not* cache
@@ -115,26 +115,26 @@ class AWSConnectionManager:
115
115
  """
116
116
  # This stores Boto3 sessions in .item of a thread-local storage, by
117
117
  # region.
118
- self.sessions_by_region: dict[Optional[str], threading.local] = (
118
+ self.sessions_by_region: dict[str | None, threading.local] = (
119
119
  collections.defaultdict(threading.local)
120
120
  )
121
121
  # This stores Boto3 resources in .item of a thread-local storage, by
122
122
  # (region, service name, endpoint URL) tuples
123
123
  self.resource_cache: dict[
124
- tuple[Optional[str], str, Optional[str]], threading.local
124
+ tuple[str | None, str, str | None], threading.local
125
125
  ] = collections.defaultdict(threading.local)
126
126
  # This stores Boto3 clients in .item of a thread-local storage, by
127
127
  # (region, service name, endpoint URL) tuples
128
- self.client_cache: dict[
129
- tuple[Optional[str], str, Optional[str]], threading.local
130
- ] = collections.defaultdict(threading.local)
128
+ self.client_cache: dict[tuple[str | None, str, str | None], threading.local] = (
129
+ collections.defaultdict(threading.local)
130
+ )
131
131
  # This stores Boto 2 connections in .item of a thread-local storage, by
132
132
  # (region, service name) tuples.
133
- self.boto2_cache: dict[tuple[Optional[str], str], threading.local] = (
133
+ self.boto2_cache: dict[tuple[str | None, str], threading.local] = (
134
134
  collections.defaultdict(threading.local)
135
135
  )
136
136
 
137
- def session(self, region: Optional[str]) -> boto3.session.Session:
137
+ def session(self, region: str | None) -> boto3.session.Session:
138
138
  """
139
139
  Get the Boto3 Session to use for the given region.
140
140
  """
@@ -148,34 +148,34 @@ class AWSConnectionManager:
148
148
  @overload
149
149
  def resource(
150
150
  self,
151
- region: Optional[str],
151
+ region: str | None,
152
152
  service_name: Literal["s3"],
153
- endpoint_url: Optional[str] = None,
154
- config: Optional[Config] = None,
153
+ endpoint_url: str | None = None,
154
+ config: Config | None = None,
155
155
  ) -> "S3ServiceResource": ...
156
156
  @overload
157
157
  def resource(
158
158
  self,
159
- region: Optional[str],
159
+ region: str | None,
160
160
  service_name: Literal["iam"],
161
- endpoint_url: Optional[str] = None,
162
- config: Optional[Config] = None,
161
+ endpoint_url: str | None = None,
162
+ config: Config | None = None,
163
163
  ) -> "IAMServiceResource": ...
164
164
  @overload
165
165
  def resource(
166
166
  self,
167
- region: Optional[str],
167
+ region: str | None,
168
168
  service_name: Literal["ec2"],
169
- endpoint_url: Optional[str] = None,
170
- config: Optional[Config] = None,
169
+ endpoint_url: str | None = None,
170
+ config: Config | None = None,
171
171
  ) -> "EC2ServiceResource": ...
172
172
 
173
173
  def resource(
174
174
  self,
175
- region: Optional[str],
175
+ region: str | None,
176
176
  service_name: str,
177
- endpoint_url: Optional[str] = None,
178
- config: Optional[Config] = None,
177
+ endpoint_url: str | None = None,
178
+ config: Config | None = None,
179
179
  ) -> boto3.resources.base.ServiceResource:
180
180
  """
181
181
  Get the Boto3 Resource to use with the given service (like 'ec2') in the given region.
@@ -205,58 +205,58 @@ class AWSConnectionManager:
205
205
  @overload
206
206
  def client(
207
207
  self,
208
- region: Optional[str],
208
+ region: str | None,
209
209
  service_name: Literal["ec2"],
210
- endpoint_url: Optional[str] = None,
211
- config: Optional[Config] = None,
210
+ endpoint_url: str | None = None,
211
+ config: Config | None = None,
212
212
  ) -> "EC2Client": ...
213
213
  @overload
214
214
  def client(
215
215
  self,
216
- region: Optional[str],
216
+ region: str | None,
217
217
  service_name: Literal["iam"],
218
- endpoint_url: Optional[str] = None,
219
- config: Optional[Config] = None,
218
+ endpoint_url: str | None = None,
219
+ config: Config | None = None,
220
220
  ) -> "IAMClient": ...
221
221
  @overload
222
222
  def client(
223
223
  self,
224
- region: Optional[str],
224
+ region: str | None,
225
225
  service_name: Literal["s3"],
226
- endpoint_url: Optional[str] = None,
227
- config: Optional[Config] = None,
226
+ endpoint_url: str | None = None,
227
+ config: Config | None = None,
228
228
  ) -> "S3Client": ...
229
229
  @overload
230
230
  def client(
231
231
  self,
232
- region: Optional[str],
232
+ region: str | None,
233
233
  service_name: Literal["sts"],
234
- endpoint_url: Optional[str] = None,
235
- config: Optional[Config] = None,
234
+ endpoint_url: str | None = None,
235
+ config: Config | None = None,
236
236
  ) -> "STSClient": ...
237
237
  @overload
238
238
  def client(
239
239
  self,
240
- region: Optional[str],
240
+ region: str | None,
241
241
  service_name: Literal["sdb"],
242
- endpoint_url: Optional[str] = None,
243
- config: Optional[Config] = None,
242
+ endpoint_url: str | None = None,
243
+ config: Config | None = None,
244
244
  ) -> "SimpleDBClient": ...
245
245
  @overload
246
246
  def client(
247
247
  self,
248
- region: Optional[str],
248
+ region: str | None,
249
249
  service_name: Literal["autoscaling"],
250
- endpoint_url: Optional[str] = None,
251
- config: Optional[Config] = None,
250
+ endpoint_url: str | None = None,
251
+ config: Config | None = None,
252
252
  ) -> "AutoScalingClient": ...
253
253
 
254
254
  def client(
255
255
  self,
256
- region: Optional[str],
256
+ region: str | None,
257
257
  service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"],
258
- endpoint_url: Optional[str] = None,
259
- config: Optional[Config] = None,
258
+ endpoint_url: str | None = None,
259
+ config: Config | None = None,
260
260
  ) -> botocore.client.BaseClient:
261
261
  """
262
262
  Get the Boto3 Client to use with the given service (like 'ec2') in the given region.
@@ -298,7 +298,7 @@ class AWSConnectionManager:
298
298
  _global_manager = AWSConnectionManager()
299
299
 
300
300
 
301
- def establish_boto3_session(region_name: Optional[str] = None) -> Session:
301
+ def establish_boto3_session(region_name: str | None = None) -> Session:
302
302
  """
303
303
  Get a Boto 3 session usable by the current thread.
304
304
 
@@ -312,52 +312,52 @@ def establish_boto3_session(region_name: Optional[str] = None) -> Session:
312
312
  @overload
313
313
  def client(
314
314
  service_name: Literal["ec2"],
315
- region_name: Optional[str] = None,
316
- endpoint_url: Optional[str] = None,
317
- config: Optional[Config] = None,
315
+ region_name: str | None = None,
316
+ endpoint_url: str | None = None,
317
+ config: Config | None = None,
318
318
  ) -> "EC2Client": ...
319
319
  @overload
320
320
  def client(
321
321
  service_name: Literal["iam"],
322
- region_name: Optional[str] = None,
323
- endpoint_url: Optional[str] = None,
324
- config: Optional[Config] = None,
322
+ region_name: str | None = None,
323
+ endpoint_url: str | None = None,
324
+ config: Config | None = None,
325
325
  ) -> "IAMClient": ...
326
326
  @overload
327
327
  def client(
328
328
  service_name: Literal["s3"],
329
- region_name: Optional[str] = None,
330
- endpoint_url: Optional[str] = None,
331
- config: Optional[Config] = None,
329
+ region_name: str | None = None,
330
+ endpoint_url: str | None = None,
331
+ config: Config | None = None,
332
332
  ) -> "S3Client": ...
333
333
  @overload
334
334
  def client(
335
335
  service_name: Literal["sts"],
336
- region_name: Optional[str] = None,
337
- endpoint_url: Optional[str] = None,
338
- config: Optional[Config] = None,
336
+ region_name: str | None = None,
337
+ endpoint_url: str | None = None,
338
+ config: Config | None = None,
339
339
  ) -> "STSClient": ...
340
340
  @overload
341
341
  def client(
342
342
  service_name: Literal["sdb"],
343
- region_name: Optional[str] = None,
344
- endpoint_url: Optional[str] = None,
345
- config: Optional[Config] = None,
343
+ region_name: str | None = None,
344
+ endpoint_url: str | None = None,
345
+ config: Config | None = None,
346
346
  ) -> "SimpleDBClient": ...
347
347
  @overload
348
348
  def client(
349
349
  service_name: Literal["autoscaling"],
350
- region_name: Optional[str] = None,
351
- endpoint_url: Optional[str] = None,
352
- config: Optional[Config] = None,
350
+ region_name: str | None = None,
351
+ endpoint_url: str | None = None,
352
+ config: Config | None = None,
353
353
  ) -> "AutoScalingClient": ...
354
354
 
355
355
 
356
356
  def client(
357
357
  service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"],
358
- region_name: Optional[str] = None,
359
- endpoint_url: Optional[str] = None,
360
- config: Optional[Config] = None,
358
+ region_name: str | None = None,
359
+ endpoint_url: str | None = None,
360
+ config: Config | None = None,
361
361
  ) -> botocore.client.BaseClient:
362
362
  """
363
363
  Get a Boto 3 client for a particular AWS service, usable by the current thread.
@@ -374,31 +374,31 @@ def client(
374
374
  @overload
375
375
  def resource(
376
376
  service_name: Literal["s3"],
377
- region_name: Optional[str] = None,
378
- endpoint_url: Optional[str] = None,
379
- config: Optional[Config] = None,
377
+ region_name: str | None = None,
378
+ endpoint_url: str | None = None,
379
+ config: Config | None = None,
380
380
  ) -> "S3ServiceResource": ...
381
381
  @overload
382
382
  def resource(
383
383
  service_name: Literal["iam"],
384
- region_name: Optional[str] = None,
385
- endpoint_url: Optional[str] = None,
386
- config: Optional[Config] = None,
384
+ region_name: str | None = None,
385
+ endpoint_url: str | None = None,
386
+ config: Config | None = None,
387
387
  ) -> "IAMServiceResource": ...
388
388
  @overload
389
389
  def resource(
390
390
  service_name: Literal["ec2"],
391
- region_name: Optional[str] = None,
392
- endpoint_url: Optional[str] = None,
393
- config: Optional[Config] = None,
391
+ region_name: str | None = None,
392
+ endpoint_url: str | None = None,
393
+ config: Config | None = None,
394
394
  ) -> "EC2ServiceResource": ...
395
395
 
396
396
 
397
397
  def resource(
398
398
  service_name: Literal["s3", "iam", "ec2"],
399
- region_name: Optional[str] = None,
400
- endpoint_url: Optional[str] = None,
401
- config: Optional[Config] = None,
399
+ region_name: str | None = None,
400
+ endpoint_url: str | None = None,
401
+ config: Config | None = None,
402
402
  ) -> boto3.resources.base.ServiceResource:
403
403
  """
404
404
  Get a Boto 3 resource for a particular AWS service, usable by the current thread.
toil/lib/aws/utils.py CHANGED
@@ -15,20 +15,11 @@ import errno
15
15
  import logging
16
16
  import os
17
17
  import socket
18
- from collections.abc import Iterable, Iterator
19
- from typing import (
20
- TYPE_CHECKING,
21
- Any,
22
- Callable,
23
- ContextManager,
24
- Literal,
25
- Optional,
26
- Union,
27
- cast,
28
- )
29
- from urllib.parse import ParseResult, urlparse
18
+ from collections.abc import Callable, Iterable, Iterator
19
+ from typing import TYPE_CHECKING, Any, ContextManager, Literal, cast
20
+ from urllib.parse import ParseResult
30
21
 
31
- # To import toil.lib.aws.session, the AWS libraries must be installed
22
+ # To import toil.lib.aws.session, the AWS libraries must be installed
32
23
  from toil.lib.aws import AWSRegionName, AWSServerErrors, session
33
24
  from toil.lib.conversions import strtobool
34
25
  from toil.lib.memoize import memoize
@@ -73,7 +64,7 @@ THROTTLED_ERROR_CODES = [
73
64
 
74
65
  @retry(errors=[AWSServerErrors])
75
66
  def delete_sdb_domain(
76
- sdb_domain_name: str, region: Optional[str] = None, quiet: bool = True
67
+ sdb_domain_name: str, region: str | None = None, quiet: bool = True
77
68
  ) -> None:
78
69
  sdb_client = session.client("sdb", region_name=region)
79
70
  sdb_client.delete_domain(DomainName=sdb_domain_name)
@@ -168,7 +159,9 @@ def delete_s3_bucket(
168
159
  s3_resource.Bucket(bucket).delete()
169
160
  # S3 bucket deletion is only eventually-consistent. See
170
161
  # <https://docs.aws.amazon.com/AmazonS3/latest/userguide/delete-bucket.html>
171
- printq(f"\n * S3 bucket successfully scheduled for deletion: {bucket}\n\n", quiet)
162
+ printq(
163
+ f"\n * S3 bucket successfully scheduled for deletion: {bucket}\n\n", quiet
164
+ )
172
165
  except s3_resource.meta.client.exceptions.NoSuchBucket:
173
166
  printq(f"\n * S3 bucket no longer exists: {bucket}\n\n", quiet)
174
167
 
@@ -242,9 +235,9 @@ class NoBucketLocationError(Exception):
242
235
 
243
236
  def get_bucket_region(
244
237
  bucket_name: str,
245
- endpoint_url: Optional[str] = None,
246
- only_strategies: Optional[set[int]] = None,
247
- anonymous: Optional[bool] = None
238
+ endpoint_url: str | None = None,
239
+ only_strategies: set[int] | None = None,
240
+ anonymous: bool | None = None,
248
241
  ) -> str:
249
242
  """
250
243
  Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
@@ -262,7 +255,7 @@ def get_bucket_region(
262
255
  config = session.ANONYMOUS_CONFIG if anonymous else None
263
256
  s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
264
257
 
265
- def attempt_get_bucket_location() -> Optional[str]:
258
+ def attempt_get_bucket_location() -> str | None:
266
259
  """
267
260
  Try and get the bucket location from the normal API call.
268
261
  """
@@ -270,7 +263,7 @@ def get_bucket_region(
270
263
  "LocationConstraint", None
271
264
  )
272
265
 
273
- def attempt_get_bucket_location_from_us_east_1() -> Optional[str]:
266
+ def attempt_get_bucket_location_from_us_east_1() -> str | None:
274
267
  """
275
268
  Try and get the bucket location from the normal API call, but against us-east-1
276
269
  """
@@ -289,7 +282,7 @@ def get_bucket_region(
289
282
  "LocationConstraint", None
290
283
  )
291
284
 
292
- def attempt_head_bucket() -> Optional[str]:
285
+ def attempt_head_bucket() -> str | None:
293
286
  """
294
287
  Try and get the bucket location from calling HeadBucket and inspecting
295
288
  the headers.
@@ -303,7 +296,7 @@ def get_bucket_region(
303
296
 
304
297
  # Compose a list of strategies we want to try in order, which may work.
305
298
  # None is an acceptable return type that actually means something.
306
- strategies: list[Callable[[], Optional[str]]] = []
299
+ strategies: list[Callable[[], str | None]] = []
307
300
  strategies.append(attempt_get_bucket_location)
308
301
  if not endpoint_url:
309
302
  # We should only try to talk to us-east-1 if we don't have a custom
@@ -338,25 +331,30 @@ def get_bucket_region(
338
331
  raise
339
332
  except KeyError as e:
340
333
  # If we get a weird head response we will have a KeyError
341
- logger.debug("Strategy %d to get bucket location did not work: %s", i + 1, e)
334
+ logger.debug(
335
+ "Strategy %d to get bucket location did not work: %s", i + 1, e
336
+ )
342
337
  error_logs.append((i + 1, str(e)))
343
338
  last_error = e
344
339
 
345
340
  error_messages = []
346
341
  for rank, message in error_logs:
347
- error_messages.append(f"Strategy {rank} failed to get bucket location because: {message}")
342
+ error_messages.append(
343
+ f"Strategy {rank} failed to get bucket location because: {message}"
344
+ )
348
345
  # If we get here we ran out of attempts.
349
346
  raise NoBucketLocationError(
350
347
  "Could not get bucket location: " + "\n".join(error_messages)
351
348
  ) from last_error
352
349
 
350
+
353
351
  @memoize
354
352
  def get_bucket_region_if_available(
355
353
  bucket_name: str,
356
- endpoint_url: Optional[str] = None,
357
- only_strategies: Optional[set[int]] = None,
358
- anonymous: Optional[bool] = None
359
- ) -> Optional[str]:
354
+ endpoint_url: str | None = None,
355
+ only_strategies: set[int] | None = None,
356
+ anonymous: bool | None = None,
357
+ ) -> str | None:
360
358
  """
361
359
  Get the AWS region name associated with the given S3 bucket, or return None.
362
360
 
@@ -369,21 +367,26 @@ def get_bucket_region_if_available(
369
367
  try:
370
368
  return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
371
369
  except Exception as e:
372
- if isinstance(e, NoBucketLocationError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
370
+ if isinstance(e, NoBucketLocationError) or (
371
+ isinstance(e, ClientError) and get_error_status(e) == 403
372
+ ):
373
373
  # We can't know
374
374
  return None
375
375
  else:
376
376
  raise
377
377
 
378
+
378
379
  def region_to_bucket_location(region: str) -> str:
379
380
  return "" if region == "us-east-1" else region
380
381
 
381
382
 
382
- def bucket_location_to_region(location: Optional[str]) -> str:
383
+ def bucket_location_to_region(location: str | None) -> str:
383
384
  return "us-east-1" if location == "" or location is None else location
384
385
 
385
386
 
386
- def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonymous: Optional[bool] = None) -> "S3Object":
387
+ def get_object_for_url(
388
+ url: ParseResult, existing: bool | None = None, anonymous: bool | None = None
389
+ ) -> "S3Object":
387
390
  """
388
391
  Extracts a key (object) from a given parsed s3:// URL.
389
392
 
@@ -400,11 +403,11 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonym
400
403
  bucket_name = url.netloc
401
404
 
402
405
  # Decide if we need to override Boto's built-in URL here.
403
- endpoint_url: Optional[str] = None
406
+ endpoint_url: str | None = None
404
407
  host = os.environ.get("TOIL_S3_HOST", None)
405
408
  port = os.environ.get("TOIL_S3_PORT", None)
406
409
  protocol = "https"
407
- if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
410
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", "True")) is False:
408
411
  protocol = "http"
409
412
  if host:
410
413
  endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
@@ -412,13 +415,17 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonym
412
415
  # TODO: OrdinaryCallingFormat equivalent in boto3?
413
416
  # if botoargs:
414
417
  # botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
415
-
418
+
416
419
  config = session.ANONYMOUS_CONFIG if anonymous else None
417
420
  # Get the bucket's region to avoid a redirect per request.
418
421
  # Cache the result
419
- region = get_bucket_region_if_available(bucket_name, endpoint_url=endpoint_url, anonymous=anonymous)
422
+ region = get_bucket_region_if_available(
423
+ bucket_name, endpoint_url=endpoint_url, anonymous=anonymous
424
+ )
420
425
  if region is not None:
421
- s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url, config=config)
426
+ s3 = session.resource(
427
+ "s3", region_name=region, endpoint_url=endpoint_url, config=config
428
+ )
422
429
  else:
423
430
  # We can't get the bucket location, perhaps because we don't have
424
431
  # permission to do that.
@@ -461,7 +468,7 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonym
461
468
 
462
469
 
463
470
  @retry(errors=[AWSServerErrors])
464
- def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) -> list[str]:
471
+ def list_objects_for_url(url: ParseResult, anonymous: bool | None = None) -> list[str]:
465
472
  """
466
473
  Extracts a key (object) from a given parsed s3:// URL. The URL will be
467
474
  supplemented with a trailing slash if it is missing.
@@ -478,20 +485,20 @@ def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) ->
478
485
 
479
486
  # Decide if we need to override Boto's built-in URL here.
480
487
  # TODO: Deduplicate with get_object_for_url, or push down into session module
481
- endpoint_url: Optional[str] = None
488
+ endpoint_url: str | None = None
482
489
  host = os.environ.get("TOIL_S3_HOST", None)
483
490
  port = os.environ.get("TOIL_S3_PORT", None)
484
491
  protocol = "https"
485
- if strtobool(os.environ.get("TOIL_S3_USE_SSL", 'True')) is False:
492
+ if strtobool(os.environ.get("TOIL_S3_USE_SSL", "True")) is False:
486
493
  protocol = "http"
487
494
  if host:
488
495
  endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
489
-
496
+
490
497
  config = session.ANONYMOUS_CONFIG if anonymous else None
491
498
  client = session.client("s3", endpoint_url=endpoint_url, config=config)
492
499
 
493
500
  listing = []
494
-
501
+
495
502
  try:
496
503
  paginator = client.get_paginator("list_objects_v2")
497
504
  result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
@@ -513,12 +520,13 @@ def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) ->
513
520
  else:
514
521
  raise
515
522
 
516
-
517
523
  logger.debug("Found in %s items: %s", url, listing)
518
524
  return listing
519
525
 
520
526
 
521
- def flatten_tags(tags: dict[str, str]) -> list[dict[Union[Literal["Key"], Literal["Value"]], str]]:
527
+ def flatten_tags(
528
+ tags: dict[str, str],
529
+ ) -> list[dict[Literal["Key"] | Literal["Value"], str]]:
522
530
  """
523
531
  Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.
524
532
  """
toil/lib/checksum.py CHANGED
@@ -11,11 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import logging
15
14
  import hashlib
16
-
15
+ import logging
17
16
  from io import BytesIO
18
- from typing import BinaryIO, Union, List, TYPE_CHECKING
17
+ from typing import TYPE_CHECKING, BinaryIO
19
18
 
20
19
  from toil.lib.aws.config import S3_PART_SIZE
21
20
 
@@ -33,16 +32,17 @@ class ChecksumError(Exception):
33
32
 
34
33
  class Etag:
35
34
  """A hasher for s3 etags."""
35
+
36
36
  def __init__(self, chunk_size: int) -> None:
37
37
  self.etag_bytes: int = 0
38
- self.etag_parts: List[bytes] = []
38
+ self.etag_parts: list[bytes] = []
39
39
  self.etag_hasher: "_Hash" = hashlib.md5()
40
40
  self.chunk_size: int = chunk_size
41
41
 
42
42
  def update(self, chunk: bytes) -> None:
43
43
  if self.etag_bytes + len(chunk) > self.chunk_size:
44
- chunk_head = chunk[:self.chunk_size - self.etag_bytes]
45
- chunk_tail = chunk[self.chunk_size - self.etag_bytes:]
44
+ chunk_head = chunk[: self.chunk_size - self.etag_bytes]
45
+ chunk_tail = chunk[self.chunk_size - self.etag_bytes :]
46
46
  self.etag_hasher.update(chunk_head)
47
47
  self.etag_parts.append(self.etag_hasher.digest())
48
48
  self.etag_hasher = hashlib.md5()
@@ -58,31 +58,35 @@ class Etag:
58
58
  self.etag_bytes = 0
59
59
  if len(self.etag_parts) > 1:
60
60
  etag = hashlib.md5(b"".join(self.etag_parts)).hexdigest()
61
- return f'{etag}-{len(self.etag_parts)}'
61
+ return f"{etag}-{len(self.etag_parts)}"
62
62
  else:
63
63
  return self.etag_hasher.hexdigest()
64
64
 
65
65
 
66
- hashers = {'sha1': hashlib.sha1(),
67
- 'sha256': hashlib.sha256(),
68
- 'etag': Etag(chunk_size=S3_PART_SIZE)}
66
+ hashers = {
67
+ "sha1": hashlib.sha1(),
68
+ "sha256": hashlib.sha256(),
69
+ "etag": Etag(chunk_size=S3_PART_SIZE),
70
+ }
69
71
 
70
72
 
71
- def compute_checksum_for_file(local_file_path: str, algorithm: str = 'sha1') -> str:
72
- with open(local_file_path, 'rb') as fh:
73
+ def compute_checksum_for_file(local_file_path: str, algorithm: str = "sha1") -> str:
74
+ with open(local_file_path, "rb") as fh:
73
75
  checksum_result = compute_checksum_for_content(fh, algorithm=algorithm)
74
76
  return checksum_result
75
77
 
76
78
 
77
- def compute_checksum_for_content(fh: Union[BinaryIO, BytesIO], algorithm: str = 'sha1') -> str:
79
+ def compute_checksum_for_content(
80
+ fh: BinaryIO | BytesIO, algorithm: str = "sha1"
81
+ ) -> str:
78
82
  """
79
83
  Note: Chunk size matters for s3 etags, and must be the same to get the same hash from the same object.
80
84
  Therefore this buffer is not modifiable throughout Toil.
81
85
  """
82
86
  hasher: "_Hash" = hashers[algorithm] # type: ignore
83
87
  contents = fh.read(S3_PART_SIZE)
84
- while contents != b'':
88
+ while contents != b"":
85
89
  hasher.update(contents)
86
90
  contents = fh.read(S3_PART_SIZE)
87
91
 
88
- return f'{algorithm}${hasher.hexdigest()}'
92
+ return f"{algorithm}${hasher.hexdigest()}"
toil/lib/compatibility.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import functools
2
2
  import warnings
3
- from typing import Any, Callable, Union
3
+ from collections.abc import Callable
4
+ from typing import Any
4
5
 
5
6
 
6
7
  def deprecated(new_function_name: str) -> Callable[..., Any]:
@@ -18,7 +19,7 @@ def deprecated(new_function_name: str) -> Callable[..., Any]:
18
19
  return decorate
19
20
 
20
21
 
21
- def compat_bytes(s: Union[bytes, str]) -> str:
22
+ def compat_bytes(s: bytes | str) -> str:
22
23
  return s.decode("utf-8") if isinstance(s, bytes) else s
23
24
 
24
25