toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
@@ -34,15 +34,18 @@ import tempfile
34
34
  import time
35
35
  import uuid
36
36
  from argparse import ArgumentParser, _ArgumentGroup
37
- from typing import Any, Dict, Iterator, List, Optional, Set, Union
37
+ from collections.abc import Iterator
38
+ from typing import Any, Optional, Union
38
39
 
39
40
  from botocore.exceptions import ClientError
40
41
 
41
42
  from toil import applianceSelf
42
- from toil.batchSystems.abstractBatchSystem import (EXIT_STATUS_UNAVAILABLE_VALUE,
43
- BatchJobExitReason,
44
- InsufficientSystemResources,
45
- UpdatedBatchJobInfo)
43
+ from toil.batchSystems.abstractBatchSystem import (
44
+ EXIT_STATUS_UNAVAILABLE_VALUE,
45
+ BatchJobExitReason,
46
+ InsufficientSystemResources,
47
+ UpdatedBatchJobInfo,
48
+ )
46
49
  from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
47
50
  from toil.batchSystems.contained_executor import pack_job
48
51
  from toil.batchSystems.options import OptionSetter
@@ -60,9 +63,9 @@ logger = logging.getLogger(__name__)
60
63
 
61
64
 
62
65
  # Map from AWS Batch terminal states to Toil batch job exit reasons
63
- STATE_TO_EXIT_REASON: Dict[str, BatchJobExitReason] = {
64
- 'SUCCEEDED': BatchJobExitReason.FINISHED,
65
- 'FAILED': BatchJobExitReason.FAILED
66
+ STATE_TO_EXIT_REASON: dict[str, BatchJobExitReason] = {
67
+ "SUCCEEDED": BatchJobExitReason.FINISHED,
68
+ "FAILED": BatchJobExitReason.FAILED,
66
69
  }
67
70
 
68
71
  # What's the max polling list size?
@@ -73,53 +76,62 @@ MIN_REQUESTABLE_MIB = 4
73
76
  # AWS batch won't accept API requests asking for less than this many CPUs.
74
77
  MIN_REQUESTABLE_CORES = 1
75
78
 
79
+
76
80
  class AWSBatchBatchSystem(BatchSystemCleanupSupport):
77
81
  @classmethod
78
82
  def supportsAutoDeployment(cls) -> bool:
79
83
  return True
80
84
 
81
- def __init__(self, config: Config, maxCores: float, maxMemory: int, maxDisk: int) -> None:
85
+ def __init__(
86
+ self, config: Config, maxCores: float, maxMemory: int, maxDisk: int
87
+ ) -> None:
82
88
  super().__init__(config, maxCores, maxMemory, maxDisk)
83
89
 
84
90
  # Determine region to use.
85
91
  # Either it's set specifically or maybe we can get it from the "best" zone.
86
92
  # TODO: Parse it from a full queue ARN?
87
- self.region = getattr(config, 'aws_batch_region')
93
+ self.region = getattr(config, "aws_batch_region")
88
94
  if self.region is None:
89
95
  self.region = get_current_aws_region()
90
96
  if self.region is None:
91
97
  # Can't proceed without a real region
92
- raise RuntimeError('To use AWS Batch, specify --awsBatchRegion or '
93
- 'TOIL_AWS_REGION or TOIL_AWS_ZONE, or configure '
94
- 'a default zone in boto')
98
+ raise RuntimeError(
99
+ "To use AWS Batch, specify --awsBatchRegion or "
100
+ "TOIL_AWS_REGION or TOIL_AWS_ZONE, or configure "
101
+ "a default zone in boto"
102
+ )
95
103
 
96
104
  # Connect to AWS Batch.
97
105
  # TODO: Use a global AWSConnectionManager so we can share a client
98
106
  # cache with provisioners, etc.
99
- self.client = establish_boto3_session(self.region).client('batch')
107
+ self.client = establish_boto3_session(self.region).client("batch")
100
108
 
101
109
  # Determine our batch queue
102
- self.queue = getattr(config, 'aws_batch_queue')
110
+ self.queue = getattr(config, "aws_batch_queue")
103
111
  if self.queue is None:
104
112
  # Make sure we actually have a queue
105
- raise RuntimeError("To use AWS Batch, --awsBatchQueue or TOIL_AWS_BATCH_QUEUE must be set")
113
+ raise RuntimeError(
114
+ "To use AWS Batch, --awsBatchQueue or TOIL_AWS_BATCH_QUEUE must be set"
115
+ )
106
116
  # And the role, if any, jobs should assume
107
- self.job_role_arn = getattr(config, 'aws_batch_job_role_arn')
117
+ self.job_role_arn = getattr(config, "aws_batch_job_role_arn")
108
118
  # And the Owner tag value, if any, to apply to things we create
109
- self.owner_tag = os.environ.get('TOIL_OWNER_TAG')
119
+ self.owner_tag = os.environ.get("TOIL_OWNER_TAG")
110
120
 
111
121
  # Try and guess what Toil work dir the workers will use.
112
122
  # We need to be able to provision (possibly shared) space there.
113
123
  # TODO: Deduplicate with Kubernetes batch system.
114
124
  self.worker_work_dir = Toil.getToilWorkDir(config.workDir)
115
- if (config.workDir is None and
116
- os.getenv('TOIL_WORKDIR') is None and
117
- self.worker_work_dir == tempfile.gettempdir()):
125
+ if (
126
+ config.workDir is None
127
+ and os.getenv("TOIL_WORKDIR") is None
128
+ and self.worker_work_dir == tempfile.gettempdir()
129
+ ):
118
130
 
119
131
  # We defaulted to the system temp directory. But we think the
120
132
  # worker Dockerfiles will make them use /var/lib/toil instead.
121
133
  # TODO: Keep this in sync with the Dockerfile.
122
- self.worker_work_dir = '/var/lib/toil'
134
+ self.worker_work_dir = "/var/lib/toil"
123
135
 
124
136
  # We assign job names based on a numerical job ID. This functionality
125
137
  # is managed by the BatchSystemLocalSupport.
@@ -136,27 +148,39 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
136
148
  self.job_definition: Optional[str] = None
137
149
 
138
150
  # We need a way to map between our batch system ID numbers, and AWS Batch job IDs from the server.
139
- self.bs_id_to_aws_id: Dict[int, str] = {}
140
- self.aws_id_to_bs_id: Dict[str, int] = {}
151
+ self.bs_id_to_aws_id: dict[int, str] = {}
152
+ self.aws_id_to_bs_id: dict[str, int] = {}
141
153
  # We need to track if jobs were killed so they don't come out as updated
142
- self.killed_job_aws_ids: Set[str] = set()
154
+ self.killed_job_aws_ids: set[str] = set()
143
155
 
144
156
  def setUserScript(self, user_script: Resource) -> None:
145
- logger.debug(f'Setting user script for deployment: {user_script}')
157
+ logger.debug(f"Setting user script for deployment: {user_script}")
146
158
  self.user_script = user_script
147
159
 
148
160
  # setEnv is provided by BatchSystemSupport, updates self.environment
149
161
 
150
162
  def _check_accelerator_request(self, requirer: Requirer) -> None:
151
163
  for accelerator in requirer.accelerators:
152
- if accelerator['kind'] != 'gpu' or accelerator.get('brand', 'nvidia') != 'nvidia':
164
+ if (
165
+ accelerator["kind"] != "gpu"
166
+ or accelerator.get("brand", "nvidia") != "nvidia"
167
+ ):
153
168
  # We can only provide GPUs, and of those only nvidia ones.
154
- raise InsufficientSystemResources(requirer, 'accelerators', details=[
155
- f'The accelerator {accelerator} could not be provided.',
156
- 'AWS Batch can only provide nvidia gpu accelerators.'
157
- ])
158
-
159
- def issueBatchJob(self, command: str, job_desc: JobDescription, job_environment: Optional[Dict[str, str]] = None) -> int:
169
+ raise InsufficientSystemResources(
170
+ requirer,
171
+ "accelerators",
172
+ details=[
173
+ f"The accelerator {accelerator} could not be provided.",
174
+ "AWS Batch can only provide nvidia gpu accelerators.",
175
+ ],
176
+ )
177
+
178
+ def issueBatchJob(
179
+ self,
180
+ command: str,
181
+ job_desc: JobDescription,
182
+ job_environment: Optional[dict[str, str]] = None,
183
+ ) -> int:
160
184
  # Try the job as local
161
185
  local_id = self.handleLocalJob(command, job_desc)
162
186
  if local_id is not None:
@@ -188,41 +212,54 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
188
212
 
189
213
  # Compose a job spec to submit
190
214
  job_spec = {
191
- 'jobName': job_name,
192
- 'jobQueue': self.queue,
193
- 'jobDefinition': self._get_or_create_job_definition(),
194
- 'containerOverrides': {
195
- 'command': command_list,
196
- 'environment': [{'name': k, 'value': v} for k, v in environment.items()],
197
- 'resourceRequirements': [
198
- {'type': 'MEMORY', 'value': str(max(MIN_REQUESTABLE_MIB, math.ceil(b_to_mib(job_desc.memory))))},
199
- {'type': 'VCPU', 'value': str(max(MIN_REQUESTABLE_CORES, math.ceil(job_desc.cores)))}
200
- ]
201
- }
215
+ "jobName": job_name,
216
+ "jobQueue": self.queue,
217
+ "jobDefinition": self._get_or_create_job_definition(),
218
+ "containerOverrides": {
219
+ "command": command_list,
220
+ "environment": [
221
+ {"name": k, "value": v} for k, v in environment.items()
222
+ ],
223
+ "resourceRequirements": [
224
+ {
225
+ "type": "MEMORY",
226
+ "value": str(
227
+ max(
228
+ MIN_REQUESTABLE_MIB,
229
+ math.ceil(b_to_mib(job_desc.memory)),
230
+ )
231
+ ),
232
+ },
233
+ {
234
+ "type": "VCPU",
235
+ "value": str(
236
+ max(MIN_REQUESTABLE_CORES, math.ceil(job_desc.cores))
237
+ ),
238
+ },
239
+ ],
240
+ },
202
241
  }
203
242
  gpus_needed = 0
204
243
  for accelerator in job_desc.accelerators:
205
- if accelerator['kind'] == 'gpu':
244
+ if accelerator["kind"] == "gpu":
206
245
  # We just assume that all GPUs are equivalent when running
207
246
  # on AWS Batch because there's no way to tell AWS Batch to
208
247
  # send us to one or another.
209
- gpus_needed += accelerator['count']
248
+ gpus_needed += accelerator["count"]
210
249
  # Other accelerators are rejected by check_resource_request
211
250
  if gpus_needed > 0:
212
251
  # We need some GPUs so ask for them.
213
- job_spec['containerOverrides']['resourceRequirements'].append({
214
- 'type': 'GPU',
215
- 'value': gpus_needed
216
- })
252
+ job_spec["containerOverrides"]["resourceRequirements"].append(
253
+ {"type": "GPU", "value": gpus_needed}
254
+ )
217
255
  if self.owner_tag:
218
256
  # We are meant to tag everything with an owner
219
- job_spec['tags'] = {'Owner': self.owner_tag}
220
-
257
+ job_spec["tags"] = {"Owner": self.owner_tag}
221
258
 
222
259
  # Launch it and get back the AWS ID that we can use to poll the task.
223
260
  # TODO: retry!
224
261
  response = self.client.submit_job(**job_spec)
225
- aws_id = response['jobId']
262
+ aws_id = response["jobId"]
226
263
 
227
264
  # Tie it to the numeric ID
228
265
  self.bs_id_to_aws_id[bs_id] = aws_id
@@ -230,8 +267,10 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
230
267
 
231
268
  if self._outbox is not None:
232
269
  # Specify relationship between toil batch ID and aws ID in message bus
233
- self._outbox.publish(ExternalBatchIdMessage(bs_id, aws_id, self.__class__.__name__))
234
- logger.debug('Launched job: %s', job_name)
270
+ self._outbox.publish(
271
+ ExternalBatchIdMessage(bs_id, aws_id, self.__class__.__name__)
272
+ )
273
+ logger.debug("Launched job: %s", job_name)
235
274
 
236
275
  return bs_id
237
276
 
@@ -250,16 +289,16 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
250
289
  # Do replacements to enhance readability
251
290
  input_name = input_name.replace(" ", "-")
252
291
  # Keep only acceptable characters
253
- kept_chars = [c for c in input_name if c.isalnum() or c == '-' or c == '_']
292
+ kept_chars = [c for c in input_name if c.isalnum() or c == "-" or c == "_"]
254
293
  if len(kept_chars) == 0 or not kept_chars[0].isalnum():
255
294
  # Make sure we start with something alphanumeric
256
- kept_chars = ['j'] + kept_chars
295
+ kept_chars = ["j"] + kept_chars
257
296
  # Keep no more than the limit of them
258
297
  kept_chars = kept_chars[:128]
259
298
  # And re-compose them into a string
260
- return ''.join(kept_chars)
299
+ return "".join(kept_chars)
261
300
 
262
- def _get_runtime(self, job_detail: Dict[str, Any]) -> Optional[float]:
301
+ def _get_runtime(self, job_detail: dict[str, Any]) -> Optional[float]:
263
302
  """
264
303
  Internal function. Should not be called outside this class.
265
304
 
@@ -269,20 +308,25 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
269
308
  Takes an AWS JobDetail as a dict.
270
309
  """
271
310
 
272
- if 'status' not in job_detail or job_detail['status'] not in ['STARTING', 'RUNNING', 'SUCCEEDED', 'FAILED']:
311
+ if "status" not in job_detail or job_detail["status"] not in [
312
+ "STARTING",
313
+ "RUNNING",
314
+ "SUCCEEDED",
315
+ "FAILED",
316
+ ]:
273
317
  # Job is not running yet.
274
318
  logger.info("Runtime unavailable because job is still waiting")
275
319
  return None
276
320
 
277
- if 'startedAt' not in job_detail:
321
+ if "startedAt" not in job_detail:
278
322
  # Job has no known start time
279
323
  logger.info("Runtime unavailable because job has no start time")
280
324
  return None
281
325
 
282
- start_ms = job_detail['startedAt']
326
+ start_ms = job_detail["startedAt"]
283
327
 
284
- if 'stoppedAt' in job_detail:
285
- end_ms = job_detail['stoppedAt']
328
+ if "stoppedAt" in job_detail:
329
+ end_ms = job_detail["stoppedAt"]
286
330
  else:
287
331
  end_ms = unix_now_ms()
288
332
 
@@ -291,7 +335,7 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
291
335
  # Return the time it has been running for.
292
336
  return runtime
293
337
 
294
- def _get_exit_code(self, job_detail: Dict[str, Any]) -> int:
338
+ def _get_exit_code(self, job_detail: dict[str, Any]) -> int:
295
339
  """
296
340
  Internal function. Should not be called outside this class.
297
341
 
@@ -299,12 +343,18 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
299
343
  EXIT_STATUS_UNAVAILABLE_VALUE if it cannot be gotten.
300
344
  """
301
345
 
302
- return int(job_detail.get('container', {}).get('exitCode', EXIT_STATUS_UNAVAILABLE_VALUE))
346
+ return int(
347
+ job_detail.get("container", {}).get(
348
+ "exitCode", EXIT_STATUS_UNAVAILABLE_VALUE
349
+ )
350
+ )
303
351
 
304
352
  def getUpdatedBatchJob(self, maxWait: int) -> Optional[UpdatedBatchJobInfo]:
305
353
  # Remember when we started, for respecting the timeout
306
354
  entry = datetime.datetime.now()
307
- while ((datetime.datetime.now() - entry).total_seconds() < maxWait or not maxWait):
355
+ while (
356
+ datetime.datetime.now() - entry
357
+ ).total_seconds() < maxWait or not maxWait:
308
358
  result = self.getUpdatedLocalJob(0)
309
359
  if result:
310
360
  return result
@@ -315,9 +365,9 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
315
365
  acknowledged = []
316
366
 
317
367
  for job_detail in self._describe_jobs_in_batches():
318
- if job_detail.get('status') in ['SUCCEEDED', 'FAILED']:
368
+ if job_detail.get("status") in ["SUCCEEDED", "FAILED"]:
319
369
  # This job is done!
320
- aws_id = job_detail['jobId']
370
+ aws_id = job_detail["jobId"]
321
371
  bs_id = self.aws_id_to_bs_id[aws_id]
322
372
 
323
373
  # Acknowledge it
@@ -325,7 +375,7 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
325
375
 
326
376
  if aws_id in self.killed_job_aws_ids:
327
377
  # Killed jobs aren't allowed to appear as updated.
328
- logger.debug('Job %s was killed so skipping it', bs_id)
378
+ logger.debug("Job %s was killed so skipping it", bs_id)
329
379
  continue
330
380
 
331
381
  # Otherwise, it stopped running and it wasn't our fault.
@@ -334,21 +384,33 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
334
384
  runtime = self._get_runtime(job_detail)
335
385
 
336
386
  # Determine if it succeeded
337
- exit_reason = STATE_TO_EXIT_REASON[job_detail['status']]
387
+ exit_reason = STATE_TO_EXIT_REASON[job_detail["status"]]
338
388
 
339
389
  # Get its exit code
340
390
  exit_code = self._get_exit_code(job_detail)
341
391
 
342
- if job_detail['status'] == 'FAILED' and 'statusReason' in job_detail:
392
+ if (
393
+ job_detail["status"] == "FAILED"
394
+ and "statusReason" in job_detail
395
+ ):
343
396
  # AWS knows why the job failed, so log the error
344
- logger.error('Job %s failed because: %s', bs_id, job_detail['statusReason'])
397
+ logger.error(
398
+ "Job %s failed because: %s",
399
+ bs_id,
400
+ job_detail["statusReason"],
401
+ )
345
402
 
346
403
  # Compose a result
347
- return UpdatedBatchJobInfo(jobID=bs_id, exitStatus=exit_code, wallTime=runtime, exitReason=exit_reason)
404
+ return UpdatedBatchJobInfo(
405
+ jobID=bs_id,
406
+ exitStatus=exit_code,
407
+ wallTime=runtime,
408
+ exitReason=exit_reason,
409
+ )
348
410
 
349
411
  finally:
350
412
  # Drop all the records for tasks we acknowledged
351
- for (aws_id, bs_id) in acknowledged:
413
+ for aws_id, bs_id in acknowledged:
352
414
  del self.aws_id_to_bs_id[aws_id]
353
415
  del self.bs_id_to_aws_id[bs_id]
354
416
  if aws_id in self.killed_job_aws_ids:
@@ -357,7 +419,7 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
357
419
 
358
420
  if maxWait:
359
421
  # Wait a bit and poll again
360
- time.sleep(min(maxWait/2, 1.0))
422
+ time.sleep(min(maxWait / 2, 1.0))
361
423
  else:
362
424
  # Only poll once
363
425
  break
@@ -390,7 +452,7 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
390
452
  # later.
391
453
  self.killed_job_aws_ids.add(aws_id)
392
454
  # Kill the AWS Batch job
393
- self.client.terminate_job(jobId=aws_id, reason='Killed by Toil')
455
+ self.client.terminate_job(jobId=aws_id, reason="Killed by Toil")
394
456
 
395
457
  @retry(errors=[ClientError])
396
458
  def _wait_until_stopped(self, aws_id: str) -> None:
@@ -406,16 +468,19 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
406
468
  while True:
407
469
  # Poll the job
408
470
  response = self.client.describe_jobs(jobs=[aws_id])
409
- jobs = response.get('jobs', [])
471
+ jobs = response.get("jobs", [])
410
472
  if len(jobs) == 0:
411
473
  # Job no longer exists at all
412
474
  return
413
475
  job = jobs[0]
414
- if job.get('status') and job['status'] in ['SUCCEEDED', 'FAILED']:
476
+ if job.get("status") and job["status"] in ["SUCCEEDED", "FAILED"]:
415
477
  # The job has stopped
416
478
  return
417
479
  # Otherwise the job is still going. Wait for it to stop.
418
- logger.info('Waiting for killed job %s to stop', self.aws_id_to_bs_id.get(aws_id, aws_id))
480
+ logger.info(
481
+ "Waiting for killed job %s to stop",
482
+ self.aws_id_to_bs_id.get(aws_id, aws_id),
483
+ )
419
484
  time.sleep(2)
420
485
 
421
486
  @retry(errors=[ClientError])
@@ -429,56 +494,76 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
429
494
  if self.job_definition is None:
430
495
  # First work out what volume mounts to make, because the type
431
496
  # system is happiest this way
432
- volumes: List[Dict[str, Union[str, Dict[str, str]]]] = []
433
- mount_points: List[Dict[str, str]] = []
434
- for i, shared_path in enumerate({
435
- '/var/lib/toil',
436
- '/var/lib/docker',
437
- '/var/lib/cwl',
438
- '/var/run/docker.sock',
439
- '/var/run/user',
440
- '/tmp',
441
- self.worker_work_dir
442
- }):
497
+ volumes: list[dict[str, Union[str, dict[str, str]]]] = []
498
+ mount_points: list[dict[str, str]] = []
499
+ for i, shared_path in enumerate(
500
+ {
501
+ "/var/lib/toil",
502
+ "/var/lib/docker",
503
+ "/var/lib/cwl",
504
+ "/var/run/docker.sock",
505
+ "/var/run/user",
506
+ "/tmp",
507
+ self.worker_work_dir,
508
+ }
509
+ ):
443
510
  # For every path we want to be the same on the host and the
444
511
  # container, choose a name
445
- vol_name = f'mnt{i}'
512
+ vol_name = f"mnt{i}"
446
513
  # Make a volume for that path
447
- volumes.append({'name': vol_name, 'host': {'sourcePath': shared_path}})
514
+ volumes.append({"name": vol_name, "host": {"sourcePath": shared_path}})
448
515
  # Mount the volume at that path
449
- mount_points.append({'containerPath': shared_path, 'sourceVolume': vol_name})
516
+ mount_points.append(
517
+ {"containerPath": shared_path, "sourceVolume": vol_name}
518
+ )
450
519
 
451
520
  job_def_spec = {
452
- 'jobDefinitionName': 'toil-' + str(uuid.uuid4()),
453
- 'type': 'container',
454
- 'containerProperties': {
455
- 'image': self.docker_image,
456
- 'volumes': volumes,
457
- 'mountPoints': mount_points,
521
+ "jobDefinitionName": "toil-" + str(uuid.uuid4()),
522
+ "type": "container",
523
+ "containerProperties": {
524
+ "image": self.docker_image,
525
+ "volumes": volumes,
526
+ "mountPoints": mount_points,
458
527
  # Requirements will always be overridden but must be present anyway
459
- 'resourceRequirements': [
460
- {'type': 'MEMORY', 'value': str(max(MIN_REQUESTABLE_MIB, math.ceil(b_to_mib(self.config.defaultMemory))))},
461
- {'type': 'VCPU', 'value': str(max(MIN_REQUESTABLE_CORES, math.ceil(self.config.defaultCores)))}
528
+ "resourceRequirements": [
529
+ {
530
+ "type": "MEMORY",
531
+ "value": str(
532
+ max(
533
+ MIN_REQUESTABLE_MIB,
534
+ math.ceil(b_to_mib(self.config.defaultMemory)),
535
+ )
536
+ ),
537
+ },
538
+ {
539
+ "type": "VCPU",
540
+ "value": str(
541
+ max(
542
+ MIN_REQUESTABLE_CORES,
543
+ math.ceil(self.config.defaultCores),
544
+ )
545
+ ),
546
+ },
462
547
  ],
463
548
  # Be privileged because we can. And we'd like Singularity
464
549
  # to work even if we do have the Docker socket. See
465
550
  # <https://github.com/moby/moby/issues/42441>.
466
- 'privileged': True
551
+ "privileged": True,
467
552
  },
468
- 'retryStrategy': {'attempts': 1},
469
- 'propagateTags': True # This will propagate to ECS task but not to job!
553
+ "retryStrategy": {"attempts": 1},
554
+ "propagateTags": True, # This will propagate to ECS task but not to job!
470
555
  }
471
556
  if self.job_role_arn:
472
557
  # We need to give the job a role.
473
558
  # We might not be able to do much job store access without this!
474
- container_properties = job_def_spec['containerProperties']
559
+ container_properties = job_def_spec["containerProperties"]
475
560
  assert isinstance(container_properties, dict)
476
- container_properties['jobRoleArn'] = self.job_role_arn
561
+ container_properties["jobRoleArn"] = self.job_role_arn
477
562
  if self.owner_tag:
478
563
  # We are meant to tag everything with an owner
479
- job_def_spec['tags'] = {'Owner': self.owner_tag}
564
+ job_def_spec["tags"] = {"Owner": self.owner_tag}
480
565
  response = self.client.register_job_definition(**job_def_spec)
481
- self.job_definition = response['jobDefinitionArn']
566
+ self.job_definition = response["jobDefinitionArn"]
482
567
 
483
568
  return self.job_definition
484
569
 
@@ -494,10 +579,10 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
494
579
  # TODO: How do we tolerate it not existing anymore?
495
580
  self.job_definition = None
496
581
 
497
- def getIssuedBatchJobIDs(self) -> List[int]:
582
+ def getIssuedBatchJobIDs(self) -> list[int]:
498
583
  return self.getIssuedLocalJobIDs() + list(self.bs_id_to_aws_id.keys())
499
584
 
500
- def _describe_jobs_in_batches(self) -> Iterator[Dict[str, Any]]:
585
+ def _describe_jobs_in_batches(self) -> Iterator[dict[str, Any]]:
501
586
  """
502
587
  Internal function. Should not be called outside this class.
503
588
 
@@ -506,28 +591,30 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
506
591
  """
507
592
 
508
593
  # Get all the AWS IDs to poll
509
- to_check = list(aws_and_bs_id[0] for aws_and_bs_id in self.aws_id_to_bs_id.items())
594
+ to_check = list(
595
+ aws_and_bs_id[0] for aws_and_bs_id in self.aws_id_to_bs_id.items()
596
+ )
510
597
 
511
598
  while len(to_check) > 0:
512
599
  # Go through jobs we want to poll in batches of the max size
513
600
  check_batch = to_check[-MAX_POLL_COUNT:]
514
601
  # And pop them off the end of the list of jobs to check
515
- to_check = to_check[:-len(check_batch)]
602
+ to_check = to_check[: -len(check_batch)]
516
603
 
517
604
  # TODO: retry
518
605
  response = self.client.describe_jobs(jobs=check_batch)
519
606
 
520
607
  # Yield each returned JobDetail
521
- yield from response.get('jobs', [])
608
+ yield from response.get("jobs", [])
522
609
 
523
- def getRunningBatchJobIDs(self) -> Dict[int, float]:
610
+ def getRunningBatchJobIDs(self) -> dict[int, float]:
524
611
  # We need a dict from job_id (integer) to seconds it has been running
525
612
  bs_id_to_runtime = {}
526
613
 
527
614
  for job_detail in self._describe_jobs_in_batches():
528
- if job_detail.get('status') == 'RUNNING':
615
+ if job_detail.get("status") == "RUNNING":
529
616
  runtime = self._get_runtime(job_detail)
530
- aws_id = job_detail['jobId']
617
+ aws_id = job_detail["jobId"]
531
618
  bs_id = self.aws_id_to_bs_id[aws_id]
532
619
  if runtime:
533
620
  # We can measure a runtime
@@ -535,12 +622,17 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
535
622
  else:
536
623
  # If we can't find a runtime, we can't say it's running
537
624
  # because we can't say how long it has been running for.
538
- logger.warning("Job %s is %s but has no runtime: %s", bs_id, job_detail['status'], job_detail)
625
+ logger.warning(
626
+ "Job %s is %s but has no runtime: %s",
627
+ bs_id,
628
+ job_detail["status"],
629
+ job_detail,
630
+ )
539
631
 
540
632
  # Give back the times all our running jobs have been running for.
541
633
  return bs_id_to_runtime
542
634
 
543
- def killBatchJobs(self, job_ids: List[int]) -> None:
635
+ def killBatchJobs(self, job_ids: list[int]) -> None:
544
636
  # Kill all the ones that are local
545
637
  self.killLocalJobs(job_ids)
546
638
 
@@ -559,14 +651,31 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
559
651
 
560
652
  @classmethod
561
653
  def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
562
- parser.add_argument("--awsBatchRegion", dest="aws_batch_region", default=None, env_var="TOIL_AWS_REGION",
563
- help="The AWS region containing the AWS Batch queue to submit to.")
564
- parser.add_argument("--awsBatchQueue", dest="aws_batch_queue", default=None, env_var="TOIL_AWS_BATCH_QUEUE",
565
- help="The name or ARN of the AWS Batch queue to submit to.")
566
- parser.add_argument("--awsBatchJobRoleArn", dest="aws_batch_job_role_arn", default=None, env_var="TOIL_AWS_BATCH_JOB_ROLE_ARN",
567
- help=("The ARN of an IAM role to run AWS Batch jobs as, so they "
568
- "can e.g. access a job store. Must be assumable by "
569
- "ecs-tasks.amazonaws.com."))
654
+ parser.add_argument(
655
+ "--awsBatchRegion",
656
+ dest="aws_batch_region",
657
+ default=None,
658
+ env_var="TOIL_AWS_REGION",
659
+ help="The AWS region containing the AWS Batch queue to submit to.",
660
+ )
661
+ parser.add_argument(
662
+ "--awsBatchQueue",
663
+ dest="aws_batch_queue",
664
+ default=None,
665
+ env_var="TOIL_AWS_BATCH_QUEUE",
666
+ help="The name or ARN of the AWS Batch queue to submit to.",
667
+ )
668
+ parser.add_argument(
669
+ "--awsBatchJobRoleArn",
670
+ dest="aws_batch_job_role_arn",
671
+ default=None,
672
+ env_var="TOIL_AWS_BATCH_JOB_ROLE_ARN",
673
+ help=(
674
+ "The ARN of an IAM role to run AWS Batch jobs as, so they "
675
+ "can e.g. access a job store. Must be assumable by "
676
+ "ecs-tasks.amazonaws.com."
677
+ ),
678
+ )
570
679
 
571
680
  @classmethod
572
681
  def setOptions(cls, setOption: OptionSetter) -> None: