toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -21,62 +21,55 @@ from argparse import Namespace
21
21
  from collections import defaultdict
22
22
  from queue import Empty, Queue
23
23
  from threading import Event, Thread
24
- from typing import List, Optional, Set, Tuple
24
+ from typing import Optional
25
25
  from unittest.mock import MagicMock
26
26
 
27
- from toil.batchSystems.abstractBatchSystem import (AbstractBatchSystem,
28
- AbstractScalableBatchSystem,
29
- NodeInfo)
27
+ from toil.batchSystems.abstractBatchSystem import (
28
+ AbstractBatchSystem,
29
+ AbstractScalableBatchSystem,
30
+ NodeInfo,
31
+ )
30
32
  from toil.common import Config
31
- from toil.options.common import defaultTargetTime
32
33
  from toil.job import JobDescription
33
34
  from toil.lib.conversions import human2bytes as h2b
35
+ from toil.options.common import defaultTargetTime
34
36
  from toil.provisioners.abstractProvisioner import AbstractProvisioner, Shape
35
- from toil.provisioners.clusterScaler import (BinPackedFit,
36
- ClusterScaler,
37
- NodeReservation,
38
- ScalerThread)
37
+ from toil.provisioners.clusterScaler import (
38
+ BinPackedFit,
39
+ ClusterScaler,
40
+ NodeReservation,
41
+ ScalerThread,
42
+ )
39
43
  from toil.provisioners.node import Node
40
44
  from toil.test import ToilTest, slow
41
45
 
42
46
  logger = logging.getLogger(__name__)
43
47
 
44
48
  # simplified c4.8xlarge (preemptible)
45
- c4_8xlarge_preemptible = Shape(wallTime=3600,
46
- memory=h2b('60G'),
47
- cores=36,
48
- disk=h2b('100G'),
49
- preemptible=True)
49
+ c4_8xlarge_preemptible = Shape(
50
+ wallTime=3600, memory=h2b("60G"), cores=36, disk=h2b("100G"), preemptible=True
51
+ )
50
52
  # simplified c4.8xlarge (non-preemptible)
51
- c4_8xlarge = Shape(wallTime=3600,
52
- memory=h2b('60G'),
53
- cores=36,
54
- disk=h2b('100G'),
55
- preemptible=False)
53
+ c4_8xlarge = Shape(
54
+ wallTime=3600, memory=h2b("60G"), cores=36, disk=h2b("100G"), preemptible=False
55
+ )
56
56
  # simplified r3.8xlarge (non-preemptible)
57
- r3_8xlarge = Shape(wallTime=3600,
58
- memory=h2b('260G'),
59
- cores=32,
60
- disk=h2b('600G'),
61
- preemptible=False)
57
+ r3_8xlarge = Shape(
58
+ wallTime=3600, memory=h2b("260G"), cores=32, disk=h2b("600G"), preemptible=False
59
+ )
62
60
  # simplified r5.2xlarge (non-preemptible)
63
- r5_2xlarge = Shape(wallTime=3600,
64
- memory=h2b('64Gi'),
65
- cores=8,
66
- disk=h2b('50G'),
67
- preemptible=False)
61
+ r5_2xlarge = Shape(
62
+ wallTime=3600, memory=h2b("64Gi"), cores=8, disk=h2b("50G"), preemptible=False
63
+ )
68
64
  # simplified r5.4xlarge (non-preemptible)
69
- r5_4xlarge = Shape(wallTime=3600,
70
- memory=h2b('128Gi'),
71
- cores=16,
72
- disk=h2b('50G'),
73
- preemptible=False)
65
+ r5_4xlarge = Shape(
66
+ wallTime=3600, memory=h2b("128Gi"), cores=16, disk=h2b("50G"), preemptible=False
67
+ )
74
68
  # simplified t2.micro (non-preemptible)
75
- t2_micro = Shape(wallTime=3600,
76
- memory=h2b('1G'),
77
- cores=1,
78
- disk=h2b('8G'),
79
- preemptible=False)
69
+ t2_micro = Shape(
70
+ wallTime=3600, memory=h2b("1G"), cores=1, disk=h2b("8G"), preemptible=False
71
+ )
72
+
80
73
 
81
74
  class BinPackingTest(ToilTest):
82
75
  def setUp(self):
@@ -85,56 +78,104 @@ class BinPackingTest(ToilTest):
85
78
 
86
79
  def testPackingOneShape(self):
87
80
  """Pack one shape and check that the resulting reservations look sane."""
88
- self.bpf.nodeReservations[c4_8xlarge_preemptible] = [NodeReservation(c4_8xlarge_preemptible)]
89
- self.bpf.addJobShape(Shape(wallTime=1000,
90
- cores=2,
91
- memory=h2b('1G'),
92
- disk=h2b('2G'),
93
- preemptible=True))
81
+ self.bpf.nodeReservations[c4_8xlarge_preemptible] = [
82
+ NodeReservation(c4_8xlarge_preemptible)
83
+ ]
84
+ self.bpf.addJobShape(
85
+ Shape(
86
+ wallTime=1000,
87
+ cores=2,
88
+ memory=h2b("1G"),
89
+ disk=h2b("2G"),
90
+ preemptible=True,
91
+ )
92
+ )
94
93
  self.assertEqual(self.bpf.nodeReservations[r3_8xlarge], [])
95
- self.assertEqual([x.shapes() for x in self.bpf.nodeReservations[c4_8xlarge_preemptible]],
96
- [[Shape(wallTime=1000,
97
- memory=h2b('59G'),
98
- cores=34,
99
- disk=h2b('98G'),
100
- preemptible=True),
101
- Shape(wallTime=2600,
102
- memory=h2b('60G'),
103
- cores=36,
104
- disk=h2b('100G'),
105
- preemptible=True)]])
94
+ self.assertEqual(
95
+ [x.shapes() for x in self.bpf.nodeReservations[c4_8xlarge_preemptible]],
96
+ [
97
+ [
98
+ Shape(
99
+ wallTime=1000,
100
+ memory=h2b("59G"),
101
+ cores=34,
102
+ disk=h2b("98G"),
103
+ preemptible=True,
104
+ ),
105
+ Shape(
106
+ wallTime=2600,
107
+ memory=h2b("60G"),
108
+ cores=36,
109
+ disk=h2b("100G"),
110
+ preemptible=True,
111
+ ),
112
+ ]
113
+ ],
114
+ )
106
115
 
107
116
  def testSorting(self):
108
117
  """
109
118
  Test that sorting is correct: preemptible, then memory, then cores, then disk,
110
119
  then wallTime.
111
120
  """
112
- shapeList = [c4_8xlarge_preemptible, r3_8xlarge, c4_8xlarge, c4_8xlarge,
113
- t2_micro, t2_micro, c4_8xlarge, r3_8xlarge, r3_8xlarge, t2_micro]
121
+ shapeList = [
122
+ c4_8xlarge_preemptible,
123
+ r3_8xlarge,
124
+ c4_8xlarge,
125
+ c4_8xlarge,
126
+ t2_micro,
127
+ t2_micro,
128
+ c4_8xlarge,
129
+ r3_8xlarge,
130
+ r3_8xlarge,
131
+ t2_micro,
132
+ ]
114
133
  shapeList.sort()
115
- assert shapeList == [c4_8xlarge_preemptible,
116
- t2_micro, t2_micro, t2_micro,
117
- c4_8xlarge, c4_8xlarge, c4_8xlarge,
118
- r3_8xlarge, r3_8xlarge, r3_8xlarge]
134
+ assert shapeList == [
135
+ c4_8xlarge_preemptible,
136
+ t2_micro,
137
+ t2_micro,
138
+ t2_micro,
139
+ c4_8xlarge,
140
+ c4_8xlarge,
141
+ c4_8xlarge,
142
+ r3_8xlarge,
143
+ r3_8xlarge,
144
+ r3_8xlarge,
145
+ ]
119
146
 
120
147
  def testAddingInitialNode(self):
121
148
  """Pack one shape when no nodes are available and confirm that we fit one node properly."""
122
- self.bpf.addJobShape(Shape(wallTime=1000,
123
- cores=2,
124
- memory=h2b('1G'),
125
- disk=h2b('2G'),
126
- preemptible=True))
127
- self.assertEqual([x.shapes() for x in self.bpf.nodeReservations[c4_8xlarge_preemptible]],
128
- [[Shape(wallTime=1000,
129
- memory=h2b('59G'),
130
- cores=34,
131
- disk=h2b('98G'),
132
- preemptible=True),
133
- Shape(wallTime=2600,
134
- memory=h2b('60G'),
135
- cores=36,
136
- disk=h2b('100G'),
137
- preemptible=True)]])
149
+ self.bpf.addJobShape(
150
+ Shape(
151
+ wallTime=1000,
152
+ cores=2,
153
+ memory=h2b("1G"),
154
+ disk=h2b("2G"),
155
+ preemptible=True,
156
+ )
157
+ )
158
+ self.assertEqual(
159
+ [x.shapes() for x in self.bpf.nodeReservations[c4_8xlarge_preemptible]],
160
+ [
161
+ [
162
+ Shape(
163
+ wallTime=1000,
164
+ memory=h2b("59G"),
165
+ cores=34,
166
+ disk=h2b("98G"),
167
+ preemptible=True,
168
+ ),
169
+ Shape(
170
+ wallTime=2600,
171
+ memory=h2b("60G"),
172
+ cores=36,
173
+ disk=h2b("100G"),
174
+ preemptible=True,
175
+ ),
176
+ ]
177
+ ],
178
+ )
138
179
 
139
180
  def testLowTargetTime(self):
140
181
  """
@@ -150,11 +191,13 @@ class BinPackingTest(ToilTest):
150
191
  Each job is parametrized to take 300 seconds, so (the minimum of) 1 of them should fit into
151
192
  each node's 0 second window, so we expect 1000 nodes.
152
193
  """
153
- allocation = self.run1000JobsOnMicros(jobCores=1,
154
- jobMem=h2b('1G'),
155
- jobDisk=h2b('1G'),
156
- jobTime=300,
157
- globalTargetTime=0)
194
+ allocation = self.run1000JobsOnMicros(
195
+ jobCores=1,
196
+ jobMem=h2b("1G"),
197
+ jobDisk=h2b("1G"),
198
+ jobTime=300,
199
+ globalTargetTime=0,
200
+ )
158
201
  self.assertEqual(allocation, {t2_micro: 1000})
159
202
 
160
203
  def testHighTargetTime(self):
@@ -170,11 +213,13 @@ class BinPackingTest(ToilTest):
170
213
  Each job is parametrized to take 300 seconds, so 12 of them should fit into each node's
171
214
  3600 second window. 1000/12 = 83.33, so we expect 84 nodes.
172
215
  """
173
- allocation = self.run1000JobsOnMicros(jobCores=1,
174
- jobMem=h2b('1G'),
175
- jobDisk=h2b('1G'),
176
- jobTime=300,
177
- globalTargetTime=3600)
216
+ allocation = self.run1000JobsOnMicros(
217
+ jobCores=1,
218
+ jobMem=h2b("1G"),
219
+ jobDisk=h2b("1G"),
220
+ jobTime=300,
221
+ globalTargetTime=3600,
222
+ )
178
223
  self.assertEqual(allocation, {t2_micro: 84})
179
224
 
180
225
  def testZeroResourceJobs(self):
@@ -188,11 +233,9 @@ class BinPackingTest(ToilTest):
188
233
  Since all jobs should pack cpu/disk/mem-wise on a t2.micro, we expect only one t2.micro to
189
234
  be provisioned. If we raise this, as in testLowTargetTime, it will launch 1000 t2.micros.
190
235
  """
191
- allocation = self.run1000JobsOnMicros(jobCores=0,
192
- jobMem=0,
193
- jobDisk=0,
194
- jobTime=300,
195
- globalTargetTime=0)
236
+ allocation = self.run1000JobsOnMicros(
237
+ jobCores=0, jobMem=0, jobDisk=0, jobTime=300, globalTargetTime=0
238
+ )
196
239
  self.assertEqual(allocation, {t2_micro: 1})
197
240
 
198
241
  def testLongRunningJobs(self):
@@ -206,11 +249,13 @@ class BinPackingTest(ToilTest):
206
249
  Despite setting globalTargetTime=3600, this should launch 1000 t2.micros because each job's
207
250
  estimated runtime (30000 seconds) extends well beyond 3600 seconds.
208
251
  """
209
- allocation = self.run1000JobsOnMicros(jobCores=1,
210
- jobMem=h2b('1G'),
211
- jobDisk=h2b('1G'),
212
- jobTime=30000,
213
- globalTargetTime=3600)
252
+ allocation = self.run1000JobsOnMicros(
253
+ jobCores=1,
254
+ jobMem=h2b("1G"),
255
+ jobDisk=h2b("1G"),
256
+ jobTime=30000,
257
+ globalTargetTime=3600,
258
+ )
214
259
  self.assertEqual(allocation, {t2_micro: 1000})
215
260
 
216
261
  def run1000JobsOnMicros(self, jobCores, jobMem, jobDisk, jobTime, globalTargetTime):
@@ -221,11 +266,15 @@ class BinPackingTest(ToilTest):
221
266
  bpf = BinPackedFit(node_shapes_for_testing, targetTime=globalTargetTime)
222
267
 
223
268
  for _ in range(1000):
224
- bpf.addJobShape(Shape(wallTime=jobTime,
225
- memory=jobMem,
226
- cores=jobCores,
227
- disk=jobDisk,
228
- preemptible=False))
269
+ bpf.addJobShape(
270
+ Shape(
271
+ wallTime=jobTime,
272
+ memory=jobMem,
273
+ cores=jobCores,
274
+ disk=jobDisk,
275
+ preemptible=False,
276
+ )
277
+ )
229
278
  return bpf.getRequiredNodes()
230
279
 
231
280
  def testPathologicalCase(self):
@@ -238,20 +287,30 @@ class BinPackingTest(ToilTest):
238
287
  the future.
239
288
  """
240
289
  # Add one job that partially fills an r3.8xlarge for 1000 hours
241
- self.bpf.addJobShape(Shape(wallTime=3600000,
242
- memory=h2b('10G'),
243
- cores=0,
244
- disk=h2b('10G'),
245
- preemptible=False))
290
+ self.bpf.addJobShape(
291
+ Shape(
292
+ wallTime=3600000,
293
+ memory=h2b("10G"),
294
+ cores=0,
295
+ disk=h2b("10G"),
296
+ preemptible=False,
297
+ )
298
+ )
246
299
  for _ in range(500):
247
300
  # Add 500 CPU-hours worth of jobs that fill an r3.8xlarge
248
- self.bpf.addJobShape(Shape(wallTime=3600,
249
- memory=h2b('26G'),
250
- cores=32,
251
- disk=h2b('60G'),
252
- preemptible=False))
301
+ self.bpf.addJobShape(
302
+ Shape(
303
+ wallTime=3600,
304
+ memory=h2b("26G"),
305
+ cores=32,
306
+ disk=h2b("60G"),
307
+ preemptible=False,
308
+ )
309
+ )
253
310
  # Hopefully we didn't assign just one node to cover all those jobs.
254
- self.assertNotEqual(self.bpf.getRequiredNodes(), {r3_8xlarge: 1, c4_8xlarge_preemptible: 0})
311
+ self.assertNotEqual(
312
+ self.bpf.getRequiredNodes(), {r3_8xlarge: 1, c4_8xlarge_preemptible: 0}
313
+ )
255
314
 
256
315
  def testJobTooLargeForAllNodes(self):
257
316
  """
@@ -259,14 +318,17 @@ class BinPackingTest(ToilTest):
259
318
  warning, but definitely not crash.
260
319
  """
261
320
  # Takes more RAM than an r3.8xlarge
262
- largerThanR3 = Shape(wallTime=3600,
263
- memory=h2b('360G'),
264
- cores=32,
265
- disk=h2b('600G'),
266
- preemptible=False)
321
+ largerThanR3 = Shape(
322
+ wallTime=3600,
323
+ memory=h2b("360G"),
324
+ cores=32,
325
+ disk=h2b("600G"),
326
+ preemptible=False,
327
+ )
267
328
  self.bpf.addJobShape(largerThanR3)
268
329
  # If we got here we didn't crash.
269
330
 
331
+
270
332
  class ClusterScalerTest(ToilTest):
271
333
  def setUp(self):
272
334
  super().setUp()
@@ -279,7 +341,9 @@ class ClusterScalerTest(ToilTest):
279
341
  # It is also a full mock provisioner, so configure it to be that as well
280
342
  self.provisioner = self.leader
281
343
  # Pretend that Shapes are actually strings we can use for instance type names.
282
- self.provisioner.setAutoscaledNodeTypes([({t}, None) for t in self.config.nodeTypes])
344
+ self.provisioner.setAutoscaledNodeTypes(
345
+ [({t}, None) for t in self.config.nodeTypes]
346
+ )
283
347
 
284
348
  def testRounding(self):
285
349
  """
@@ -299,8 +363,8 @@ class ClusterScalerTest(ToilTest):
299
363
  self.assertEqual(scaler._round(123456789101112.13), 123456789101112)
300
364
 
301
365
  # Decimals other than X.5 round to the side they are closer to
302
- self.assertEqual(scaler._round(1E-10), 0)
303
- self.assertEqual(scaler._round(0.5 + 1E-15), 1)
366
+ self.assertEqual(scaler._round(1e-10), 0)
367
+ self.assertEqual(scaler._round(0.5 + 1e-15), 1)
304
368
  self.assertEqual(scaler._round(-0.9), -1)
305
369
  self.assertEqual(scaler._round(-0.4), 0)
306
370
 
@@ -322,17 +386,30 @@ class ClusterScalerTest(ToilTest):
322
386
  self.config.betaInertia = 0.0
323
387
  self.config.maxNodes = [2, 3]
324
388
  scaler = ClusterScaler(self.provisioner, self.leader, self.config)
325
- jobShapes = [Shape(wallTime=3600,
326
- cores=2,
327
- memory=h2b('1G'),
328
- disk=h2b('2G'),
329
- preemptible=True)] * 1000
330
- jobShapes.extend([Shape(wallTime=3600,
331
- cores=2,
332
- memory=h2b('1G'),
333
- disk=h2b('2G'),
334
- preemptible=False)] * 1000)
335
- estimatedNodeCounts, could_not_fit = scaler.getEstimatedNodeCounts(jobShapes, defaultdict(int))
389
+ jobShapes = [
390
+ Shape(
391
+ wallTime=3600,
392
+ cores=2,
393
+ memory=h2b("1G"),
394
+ disk=h2b("2G"),
395
+ preemptible=True,
396
+ )
397
+ ] * 1000
398
+ jobShapes.extend(
399
+ [
400
+ Shape(
401
+ wallTime=3600,
402
+ cores=2,
403
+ memory=h2b("1G"),
404
+ disk=h2b("2G"),
405
+ preemptible=False,
406
+ )
407
+ ]
408
+ * 1000
409
+ )
410
+ estimatedNodeCounts, could_not_fit = scaler.getEstimatedNodeCounts(
411
+ jobShapes, defaultdict(int)
412
+ )
336
413
  self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2)
337
414
  self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptible], 3)
338
415
  self.assertEqual(len(could_not_fit), 0)
@@ -345,7 +422,9 @@ class ClusterScalerTest(ToilTest):
345
422
  self.config.minNodes = [2, 3]
346
423
  scaler = ClusterScaler(self.provisioner, self.leader, self.config)
347
424
  jobShapes = []
348
- estimatedNodeCounts, could_not_fit = scaler.getEstimatedNodeCounts(jobShapes, defaultdict(int))
425
+ estimatedNodeCounts, could_not_fit = scaler.getEstimatedNodeCounts(
426
+ jobShapes, defaultdict(int)
427
+ )
349
428
  self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2)
350
429
  self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptible], 3)
351
430
  self.assertEqual(len(could_not_fit), 0)
@@ -367,7 +446,9 @@ class ClusterScalerTest(ToilTest):
367
446
  # the same type. That is the only situation where
368
447
  # preemptibleCompensation applies.
369
448
  self.config.nodeTypes = [c4_8xlarge_preemptible, c4_8xlarge]
370
- self.provisioner.setAutoscaledNodeTypes([({t}, None) for t in self.config.nodeTypes])
449
+ self.provisioner.setAutoscaledNodeTypes(
450
+ [({t}, None) for t in self.config.nodeTypes]
451
+ )
371
452
 
372
453
  scaler = ClusterScaler(self.provisioner, self.leader, self.config)
373
454
  # Simulate a situation where a previous run caused a
@@ -375,16 +456,24 @@ class ClusterScalerTest(ToilTest):
375
456
  scaler.preemptibleNodeDeficit[c4_8xlarge] = 5
376
457
  # Add a bunch of preemptible jobs (so the bin-packing
377
458
  # estimate for the non-preemptible node should still be 0)
378
- jobShapes = [Shape(wallTime=3600,
379
- cores=2,
380
- memory=h2b('1G'),
381
- disk=h2b('2G'),
382
- preemptible=True)] * 1000
383
- estimatedNodeCounts, could_not_fit = scaler.getEstimatedNodeCounts(jobShapes, defaultdict(int))
459
+ jobShapes = [
460
+ Shape(
461
+ wallTime=3600,
462
+ cores=2,
463
+ memory=h2b("1G"),
464
+ disk=h2b("2G"),
465
+ preemptible=True,
466
+ )
467
+ ] * 1000
468
+ estimatedNodeCounts, could_not_fit = scaler.getEstimatedNodeCounts(
469
+ jobShapes, defaultdict(int)
470
+ )
384
471
  # We don't care about the estimated size of the preemptible
385
472
  # nodes. All we want to know is if we responded to the deficit
386
473
  # properly: 0.5 * 5 (preemptibleCompensation * the deficit) = 3 (rounded up).
387
- self.assertEqual(estimatedNodeCounts[self.provisioner.node_shapes_for_testing[1]], 3)
474
+ self.assertEqual(
475
+ estimatedNodeCounts[self.provisioner.node_shapes_for_testing[1]], 3
476
+ )
388
477
  self.assertEqual(len(could_not_fit), 0)
389
478
 
390
479
  def testPreemptibleDeficitIsSet(self):
@@ -404,7 +493,9 @@ class ClusterScalerTest(ToilTest):
404
493
  # the same type. That is the only situation where
405
494
  # preemptibleCompensation applies.
406
495
  self.config.nodeTypes = [c4_8xlarge_preemptible, c4_8xlarge]
407
- self.provisioner.setAutoscaledNodeTypes([({t}, None) for t in self.config.nodeTypes])
496
+ self.provisioner.setAutoscaledNodeTypes(
497
+ [({t}, None) for t in self.config.nodeTypes]
498
+ )
408
499
  scaler = ClusterScaler(self.provisioner, self.leader, self.config)
409
500
  estimatedNodeCounts = {c4_8xlarge_preemptible: 5, c4_8xlarge: 0}
410
501
  scaler.updateClusterSize(estimatedNodeCounts)
@@ -427,18 +518,30 @@ class ClusterScalerTest(ToilTest):
427
518
  scaler = ClusterScaler(self.provisioner, self.leader, self.config)
428
519
  # Pretend there is one ignored worker in the cluster
429
520
  self.provisioner.getProvisionedWorkers = MagicMock(
430
- return_value=[Node('127.0.0.1', '127.0.0.1', 'testNode',
431
- datetime.datetime.now().isoformat(),
432
- nodeType=c4_8xlarge, preemptible=True)])
433
- scaler.ignoredNodes.add('127.0.0.1')
521
+ return_value=[
522
+ Node(
523
+ "127.0.0.1",
524
+ "127.0.0.1",
525
+ "testNode",
526
+ datetime.datetime.now().isoformat(),
527
+ nodeType=c4_8xlarge,
528
+ preemptible=True,
529
+ )
530
+ ]
531
+ )
532
+ scaler.ignoredNodes.add("127.0.0.1")
434
533
  # Exercise the updateClusterSize logic
435
534
  self.provisioner.addNodes = MagicMock()
436
535
  scaler.updateClusterSize({c4_8xlarge: 1})
437
- self.assertFalse(self.provisioner.addNodes.called,
438
- "addNodes was called when no new nodes were needed")
439
- self.assertEqual(len(scaler.ignoredNodes), 0,
440
- "The scaler didn't unignore an ignored node when "
441
- "scaling up")
536
+ self.assertFalse(
537
+ self.provisioner.addNodes.called,
538
+ "addNodes was called when no new nodes were needed",
539
+ )
540
+ self.assertEqual(
541
+ len(scaler.ignoredNodes),
542
+ 0,
543
+ "The scaler didn't unignore an ignored node when " "scaling up",
544
+ )
442
545
 
443
546
  def testBetaInertia(self):
444
547
  # This is really high, but makes things easy to calculate.
@@ -466,25 +569,29 @@ class ClusterScalerTest(ToilTest):
466
569
 
467
570
  # If the job needs 100% of the memory of the instance type, it won't
468
571
  # fit and will need a bigger node.
469
- self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], memory=h2b('60G'))
572
+ self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], memory=h2b("60G"))
470
573
 
471
574
  # If the job needs 98% of the memory of the instance type, it won't
472
575
  # fit and will need a bigger node.
473
- self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], memory=int(h2b('60G') * 0.98))
576
+ self._check_job_estimate(
577
+ [(c4_8xlarge, 0), (r3_8xlarge, 1)], memory=int(h2b("60G") * 0.98)
578
+ )
474
579
 
475
580
  # If the job needs 90% of the memory of the instance type, it will fit.
476
- self._check_job_estimate([(c4_8xlarge, 1), (r3_8xlarge, 0)], memory=int(h2b('60G') * 0.90))
581
+ self._check_job_estimate(
582
+ [(c4_8xlarge, 1), (r3_8xlarge, 0)], memory=int(h2b("60G") * 0.90)
583
+ )
477
584
 
478
585
  # If the job needs 100% of the disk of the instance type, it won't
479
586
  # fit and will need a bigger node.
480
- self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], disk=h2b('100G'))
587
+ self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], disk=h2b("100G"))
481
588
 
482
589
  # If the job needs all but 7G of the disk of the instance type, it won't
483
590
  # fit and will need a bigger node.
484
- self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], disk=h2b('93G'))
591
+ self._check_job_estimate([(c4_8xlarge, 0), (r3_8xlarge, 1)], disk=h2b("93G"))
485
592
 
486
593
  # If the job leaves 10% and 10G of the disk free, it fits
487
- self._check_job_estimate([(c4_8xlarge, 1), (r3_8xlarge, 0)], disk=h2b('90G'))
594
+ self._check_job_estimate([(c4_8xlarge, 1), (r3_8xlarge, 0)], disk=h2b("90G"))
488
595
 
489
596
  def test_overhead_accounting_small(self):
490
597
  """
@@ -499,11 +606,13 @@ class ClusterScalerTest(ToilTest):
499
606
 
500
607
  # If the job needs 100% of the memory of the instance type, it won't
501
608
  # fit and will need a bigger node.
502
- self._check_job_estimate([(t2_micro, 0), (r3_8xlarge, 1)], memory=h2b('1G'))
609
+ self._check_job_estimate([(t2_micro, 0), (r3_8xlarge, 1)], memory=h2b("1G"))
503
610
 
504
611
  # If the job needs all but 100M of the memory of the instance type, it
505
612
  # won't fit and will need a bigger node.
506
- self._check_job_estimate([(t2_micro, 0), (r3_8xlarge, 1)], memory=h2b('1G') - h2b('100M'))
613
+ self._check_job_estimate(
614
+ [(t2_micro, 0), (r3_8xlarge, 1)], memory=h2b("1G") - h2b("100M")
615
+ )
507
616
 
508
617
  # If the job needs no more than 90% of the memory on the node *and*
509
618
  # leaves at least 384M free for overhead, we can rely on it fitting on a 1G
@@ -512,12 +621,14 @@ class ClusterScalerTest(ToilTest):
512
621
  Shape(
513
622
  wallTime=3600,
514
623
  cores=1,
515
- memory=h2b('1G') - h2b('384M'),
516
- disk=h2b('2G'),
517
- preemptible=True
624
+ memory=h2b("1G") - h2b("384M"),
625
+ disk=h2b("2G"),
626
+ preemptible=True,
518
627
  )
519
628
  ]
520
- self._check_job_estimate([(t2_micro, 1), (r3_8xlarge, 0)], memory=h2b('1G') - h2b('384M'))
629
+ self._check_job_estimate(
630
+ [(t2_micro, 1), (r3_8xlarge, 0)], memory=h2b("1G") - h2b("384M")
631
+ )
521
632
 
522
633
  def test_overhead_accounting_observed(self):
523
634
  """
@@ -536,9 +647,13 @@ class ClusterScalerTest(ToilTest):
536
647
  # not clear if Mesos is thinking in actual GB or GiB here.
537
648
 
538
649
  # A 62.5Gi job is sent to the larger node
539
- self._check_job_estimate([(r5_2xlarge, 0), (r5_4xlarge, 1)], memory=h2b('62.5 Gi'))
650
+ self._check_job_estimate(
651
+ [(r5_2xlarge, 0), (r5_4xlarge, 1)], memory=h2b("62.5 Gi")
652
+ )
540
653
 
541
- def _check_job_estimate(self, nodes: List[Tuple[Shape, int]], cores=1, memory=1, disk=1) -> None:
654
+ def _check_job_estimate(
655
+ self, nodes: list[tuple[Shape, int]], cores=1, memory=1, disk=1
656
+ ) -> None:
542
657
  """
543
658
  Make sure that a job with the given requirements, when run on the given
544
659
  nodes, produces the given numbers of them.
@@ -553,23 +668,20 @@ class ClusterScalerTest(ToilTest):
553
668
 
554
669
  jobs = [
555
670
  Shape(
556
- wallTime=3600,
557
- cores=cores,
558
- memory=memory,
559
- disk=disk,
560
- preemptible=True
671
+ wallTime=3600, cores=cores, memory=memory, disk=disk, preemptible=True
561
672
  )
562
673
  ]
563
674
 
564
- logger.debug('Try and fit jobs: %s', jobs)
675
+ logger.debug("Try and fit jobs: %s", jobs)
565
676
  counts, could_not_fit = scaler.getEstimatedNodeCounts(jobs, defaultdict(int))
566
677
  for node, count in nodes:
567
678
  seen_count = counts.get(node, 0)
568
679
  if seen_count != count:
569
- logger.error('Saw %s/%s instances of node %s', seen_count, count, node)
680
+ logger.error("Saw %s/%s instances of node %s", seen_count, count, node)
570
681
  self.assertEqual(seen_count, count)
571
682
  self.assertEqual(len(could_not_fit), 0)
572
683
 
684
+
573
685
  class ScalerThreadTest(ToilTest):
574
686
  def _testClusterScaling(self, config, numJobs, numPreemptibleJobs, jobShape):
575
687
  """
@@ -587,49 +699,77 @@ class ScalerThreadTest(ToilTest):
587
699
  clusterScaler.start()
588
700
  try:
589
701
  # Add 100 jobs to complete
590
- list(map(lambda x: mock.addJob(jobShape=jobShape),
591
- list(range(numJobs))))
592
- list(map(lambda x: mock.addJob(jobShape=jobShape, preemptible=True),
593
- list(range(numPreemptibleJobs))))
702
+ list(map(lambda x: mock.addJob(jobShape=jobShape), list(range(numJobs))))
703
+ list(
704
+ map(
705
+ lambda x: mock.addJob(jobShape=jobShape, preemptible=True),
706
+ list(range(numPreemptibleJobs)),
707
+ )
708
+ )
594
709
 
595
710
  # Add some completed jobs
596
711
  for preemptible in (True, False):
597
- if preemptible and numPreemptibleJobs > 0 or not preemptible and numJobs > 0:
712
+ if (
713
+ preemptible
714
+ and numPreemptibleJobs > 0
715
+ or not preemptible
716
+ and numJobs > 0
717
+ ):
598
718
  # Add 1000 random jobs
599
719
  for _ in range(1000):
600
720
  x = mock.getNodeShape(nodeType=jobShape)
601
- iJ = JobDescription(requirements=dict(
602
- memory=random.randrange(1, x.memory),
603
- cores=random.randrange(1, x.cores),
604
- disk=random.randrange(1, x.disk),
605
- preemptible=preemptible),
606
- jobName='testClusterScaling', unitName='')
607
- clusterScaler.addCompletedJob(iJ, random.choice(list(range(1, x.wallTime))))
721
+ iJ = JobDescription(
722
+ requirements=dict(
723
+ memory=random.randrange(1, x.memory),
724
+ cores=random.randrange(1, x.cores),
725
+ disk=random.randrange(1, x.disk),
726
+ preemptible=preemptible,
727
+ ),
728
+ jobName="testClusterScaling",
729
+ unitName="",
730
+ )
731
+ clusterScaler.addCompletedJob(
732
+ iJ, random.choice(list(range(1, x.wallTime)))
733
+ )
608
734
 
609
735
  startTime = time.time()
610
736
  # Wait while the cluster processes the jobs
611
- while (mock.getNumberOfJobsIssued(preemptible=False) > 0
612
- or mock.getNumberOfJobsIssued(preemptible=True) > 0
613
- or mock.getNumberOfNodes() > 0 or mock.getNumberOfNodes(preemptible=True) > 0):
614
- logger.debug("Running, non-preemptible queue size: %s, non-preemptible workers: %s, "
615
- "preemptible queue size: %s, preemptible workers: %s" %
616
- (mock.getNumberOfJobsIssued(preemptible=False),
617
- mock.getNumberOfNodes(preemptible=False),
618
- mock.getNumberOfJobsIssued(preemptible=True),
619
- mock.getNumberOfNodes(preemptible=True)))
737
+ while (
738
+ mock.getNumberOfJobsIssued(preemptible=False) > 0
739
+ or mock.getNumberOfJobsIssued(preemptible=True) > 0
740
+ or mock.getNumberOfNodes() > 0
741
+ or mock.getNumberOfNodes(preemptible=True) > 0
742
+ ):
743
+ logger.debug(
744
+ "Running, non-preemptible queue size: %s, non-preemptible workers: %s, "
745
+ "preemptible queue size: %s, preemptible workers: %s"
746
+ % (
747
+ mock.getNumberOfJobsIssued(preemptible=False),
748
+ mock.getNumberOfNodes(preemptible=False),
749
+ mock.getNumberOfJobsIssued(preemptible=True),
750
+ mock.getNumberOfNodes(preemptible=True),
751
+ )
752
+ )
620
753
  clusterScaler.check()
621
754
  time.sleep(0.5)
622
- logger.debug("We waited %s for cluster to finish" % (time.time() - startTime))
755
+ logger.debug(
756
+ "We waited %s for cluster to finish" % (time.time() - startTime)
757
+ )
623
758
  finally:
624
759
  clusterScaler.shutdown()
625
760
  mock.shutDown()
626
761
 
627
762
  # Print some info about the autoscaling
628
- logger.debug("Total-jobs: %s: Max-workers: %s, "
629
- "Total-worker-time: %s, Worker-time-per-job: %s" %
630
- (mock.totalJobs, sum(mock.maxWorkers.values()),
631
- mock.totalWorkerTime,
632
- mock.totalWorkerTime // mock.totalJobs if mock.totalJobs > 0 else 0.0))
763
+ logger.debug(
764
+ "Total-jobs: %s: Max-workers: %s, "
765
+ "Total-worker-time: %s, Worker-time-per-job: %s"
766
+ % (
767
+ mock.totalJobs,
768
+ sum(mock.maxWorkers.values()),
769
+ mock.totalWorkerTime,
770
+ mock.totalWorkerTime // mock.totalJobs if mock.totalJobs > 0 else 0.0,
771
+ )
772
+ )
633
773
 
634
774
  @slow
635
775
  def testClusterScaling(self):
@@ -640,15 +780,15 @@ class ScalerThreadTest(ToilTest):
640
780
  config = Config()
641
781
 
642
782
  # Make defaults dummy values
643
- config.defaultMemory = h2b('1Gi')
783
+ config.defaultMemory = h2b("1Gi")
644
784
  config.defaultCores = 1
645
- config.defaultDisk = h2b('1Gi')
785
+ config.defaultDisk = h2b("1Gi")
646
786
 
647
787
  # No preemptible nodes/jobs
648
788
  config.maxPreemptibleNodes = [] # No preemptible nodes
649
789
 
650
790
  # Non-preemptible parameters
651
- config.nodeTypes = [Shape(20, h2b('10Gi'), 10, h2b('100Gi'), False)]
791
+ config.nodeTypes = [Shape(20, h2b("10Gi"), 10, h2b("100Gi"), False)]
652
792
  config.minNodes = [0]
653
793
  config.maxNodes = [10]
654
794
 
@@ -657,27 +797,31 @@ class ScalerThreadTest(ToilTest):
657
797
  config.betaInertia = 0.1
658
798
  config.scaleInterval = 3
659
799
 
660
- self._testClusterScaling(config, numJobs=100, numPreemptibleJobs=0,
661
- jobShape=Shape(20, h2b('7Gi'), 10, h2b('80Gi'), False))
800
+ self._testClusterScaling(
801
+ config,
802
+ numJobs=100,
803
+ numPreemptibleJobs=0,
804
+ jobShape=Shape(20, h2b("7Gi"), 10, h2b("80Gi"), False),
805
+ )
662
806
 
663
807
  @slow
664
808
  def testClusterScalingMultipleNodeTypes(self):
665
809
 
666
- small_node = Shape(20, h2b('5Gi'), 10, h2b('20Gi'), False)
667
- small_job = Shape(20, h2b('3Gi'), 10, h2b('4Gi'), False)
668
- medium_node = Shape(20, h2b('10Gi'), 10, h2b('20Gi'), False)
669
- medium_job = Shape(20, h2b('7Gi'), 10, h2b('4Gi'), False)
670
- large_node = Shape(20, h2b('20Gi'), 10, h2b('20Gi'), False)
671
- large_job = Shape(20, h2b('16Gi'), 10, h2b('4Gi'), False)
810
+ small_node = Shape(20, h2b("5Gi"), 10, h2b("20Gi"), False)
811
+ small_job = Shape(20, h2b("3Gi"), 10, h2b("4Gi"), False)
812
+ medium_node = Shape(20, h2b("10Gi"), 10, h2b("20Gi"), False)
813
+ medium_job = Shape(20, h2b("7Gi"), 10, h2b("4Gi"), False)
814
+ large_node = Shape(20, h2b("20Gi"), 10, h2b("20Gi"), False)
815
+ large_job = Shape(20, h2b("16Gi"), 10, h2b("4Gi"), False)
672
816
 
673
817
  numJobs = 100
674
818
 
675
819
  config = Config()
676
820
 
677
821
  # Make defaults dummy values
678
- config.defaultMemory = h2b('1Gi')
822
+ config.defaultMemory = h2b("1Gi")
679
823
  config.defaultCores = 1
680
- config.defaultDisk = h2b('1Gi')
824
+ config.defaultDisk = h2b("1Gi")
681
825
 
682
826
  # No preemptible nodes/jobs
683
827
  config.preemptibleNodeTypes = []
@@ -707,12 +851,18 @@ class ScalerThreadTest(ToilTest):
707
851
 
708
852
  # Add medium completed jobs
709
853
  for i in range(1000):
710
- iJ = JobDescription(requirements=dict(
711
- memory=random.choice(range(small_job.memory, medium_job.memory)),
712
- cores=medium_job.cores,
713
- disk=large_job.disk,
714
- preemptible=False),
715
- jobName='testClusterScaling', unitName='')
854
+ iJ = JobDescription(
855
+ requirements=dict(
856
+ memory=random.choice(
857
+ range(small_job.memory, medium_job.memory)
858
+ ),
859
+ cores=medium_job.cores,
860
+ disk=large_job.disk,
861
+ preemptible=False,
862
+ ),
863
+ jobName="testClusterScaling",
864
+ unitName="",
865
+ )
716
866
  clusterScaler.addCompletedJob(iJ, random.choice(range(1, 10)))
717
867
 
718
868
  while mock.getNumberOfJobsIssued() > 0 or mock.getNumberOfNodes() > 0:
@@ -739,15 +889,15 @@ class ScalerThreadTest(ToilTest):
739
889
  """
740
890
  config = Config()
741
891
 
742
- node_shape = Shape(20, h2b('10Gi'), 10, h2b('20Gi'), False)
743
- preemptible_node_shape = Shape(20, h2b('10Gi'), 10, h2b('20Gi'), True)
744
- job_shape = Shape(20, h2b('7Gi'), 10, h2b('2Gi'), False)
745
- preemptible_job_shape = Shape(20, h2b('7Gi'), 10, h2b('2Gi'), True)
892
+ node_shape = Shape(20, h2b("10Gi"), 10, h2b("20Gi"), False)
893
+ preemptible_node_shape = Shape(20, h2b("10Gi"), 10, h2b("20Gi"), True)
894
+ job_shape = Shape(20, h2b("7Gi"), 10, h2b("2Gi"), False)
895
+ preemptible_job_shape = Shape(20, h2b("7Gi"), 10, h2b("2Gi"), True)
746
896
 
747
897
  # Make defaults dummy values
748
- config.defaultMemory = h2b('1Gi')
898
+ config.defaultMemory = h2b("1Gi")
749
899
  config.defaultCores = 1
750
- config.defaultDisk = h2b('1Gi')
900
+ config.defaultDisk = h2b("1Gi")
751
901
 
752
902
  # non-preemptible node parameters
753
903
  config.nodeTypes = [node_shape, preemptible_node_shape]
@@ -759,13 +909,16 @@ class ScalerThreadTest(ToilTest):
759
909
  config.betaInertia = 0.9
760
910
  config.scaleInterval = 3
761
911
 
762
- self._testClusterScaling(config, numJobs=100, numPreemptibleJobs=100, jobShape=job_shape)
912
+ self._testClusterScaling(
913
+ config, numJobs=100, numPreemptibleJobs=100, jobShape=job_shape
914
+ )
763
915
 
764
916
 
765
917
  class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisioner):
766
918
  """Mimics a leader, job batcher, provisioner and scalable batch system."""
919
+
767
920
  def __init__(self, config, secondsPerJob):
768
- super().__init__(clusterName='clusterName', clusterType='mesos')
921
+ super().__init__(clusterName="clusterName", clusterType="mesos")
769
922
  # To mimic parallel preemptible and non-preemptible queues
770
923
  # for jobs we create two parallel instances of the following class
771
924
  self.config = config
@@ -797,8 +950,8 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
797
950
 
798
951
  # Stub out all AbstractBatchSystem methods since they are never called
799
952
  for name, value in AbstractBatchSystem.__dict__.items():
800
- if getattr(value, '__isabstractmethod__', False):
801
- exec('def %s(): pass' % name)
953
+ if getattr(value, "__isabstractmethod__", False):
954
+ exec("def %s(): pass" % name)
802
955
  # Without this, the class would end up with .name and .value attributes
803
956
  del name, value
804
957
 
@@ -813,7 +966,7 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
813
966
  pass
814
967
 
815
968
  def supportedClusterTypes(self):
816
- return {'mesos'}
969
+ return {"mesos"}
817
970
 
818
971
  def createClusterSettings(self):
819
972
  pass
@@ -822,7 +975,9 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
822
975
  pass
823
976
 
824
977
  # AbstractProvisioner methods
825
- def setAutoscaledNodeTypes(self, node_types: List[Tuple[Set[Shape], Optional[float]]]):
978
+ def setAutoscaledNodeTypes(
979
+ self, node_types: list[tuple[set[Shape], Optional[float]]]
980
+ ):
826
981
  self.node_shapes_for_testing = sorted(it for t in node_types for it in t[0])
827
982
  super().setAutoscaledNodeTypes(node_types)
828
983
 
@@ -856,18 +1011,25 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
856
1011
  """
857
1012
  self.totalJobs += 1
858
1013
  jobID = uuid.uuid4()
859
- self.jobBatchSystemIDToIssuedJob[jobID] = JobDescription(requirements={"memory": jobShape.memory,
860
- "cores": jobShape.cores,
861
- "disk": jobShape.disk,
862
- "preemptible": preemptible},
863
- jobName=f'job{self.totalJobs}')
1014
+ self.jobBatchSystemIDToIssuedJob[jobID] = JobDescription(
1015
+ requirements={
1016
+ "memory": jobShape.memory,
1017
+ "cores": jobShape.cores,
1018
+ "disk": jobShape.disk,
1019
+ "preemptible": preemptible,
1020
+ },
1021
+ jobName=f"job{self.totalJobs}",
1022
+ )
864
1023
  self.jobQueue.put(jobID)
865
1024
 
866
1025
  # JobBatcher functionality
867
1026
  def getNumberOfJobsIssued(self, preemptible=None):
868
1027
  if preemptible is not None:
869
- jobList = [job for job in list(self.jobQueue.queue) if
870
- self.jobBatchSystemIDToIssuedJob[job].preemptible == preemptible]
1028
+ jobList = [
1029
+ job
1030
+ for job in list(self.jobQueue.queue)
1031
+ if self.jobBatchSystemIDToIssuedJob[job].preemptible == preemptible
1032
+ ]
871
1033
  return len(jobList)
872
1034
  else:
873
1035
  return self.jobQueue.qsize()
@@ -883,13 +1045,19 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
883
1045
  for node in self.nodesToWorker:
884
1046
  if node.preemptible == preemptible:
885
1047
  worker = self.nodesToWorker[node]
886
- nodes[node.privateIP] = NodeInfo(coresTotal=0, coresUsed=0, requestedCores=1,
887
- memoryTotal=0, memoryUsed=0, requestedMemory=1,
888
- workers=1 if worker.busyEvent.is_set() else 0)
1048
+ nodes[node.privateIP] = NodeInfo(
1049
+ coresTotal=0,
1050
+ coresUsed=0,
1051
+ requestedCores=1,
1052
+ memoryTotal=0,
1053
+ memoryUsed=0,
1054
+ requestedMemory=1,
1055
+ workers=1 if worker.busyEvent.is_set() else 0,
1056
+ )
889
1057
  return nodes
890
1058
 
891
1059
  # AbstractProvisioner functionality
892
- def addNodes(self, nodeTypes: Set[str], numNodes, preemptible) -> int:
1060
+ def addNodes(self, nodeTypes: set[str], numNodes, preemptible) -> int:
893
1061
  nodeType = next(iter(nodeTypes))
894
1062
  self._addNodes(numNodes=numNodes, nodeType=nodeType, preemptible=preemptible)
895
1063
  return self.getNumberOfNodes(nodeType=nodeType, preemptible=preemptible)
@@ -902,8 +1070,17 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
902
1070
  def getWorkersInCluster(self, nodeShape):
903
1071
  return self.workers[nodeShape]
904
1072
 
905
- def launchCluster(self, leaderNodeType, keyName, userTags=None,
906
- vpcSubnet=None, leaderStorage=50, nodeStorage=50, botoPath=None, **kwargs):
1073
+ def launchCluster(
1074
+ self,
1075
+ leaderNodeType,
1076
+ keyName,
1077
+ userTags=None,
1078
+ vpcSubnet=None,
1079
+ leaderStorage=50,
1080
+ nodeStorage=50,
1081
+ botoPath=None,
1082
+ **kwargs,
1083
+ ):
907
1084
  pass
908
1085
 
909
1086
  def destroyCluster(self) -> None:
@@ -912,7 +1089,6 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
912
1089
  def getLeader(self):
913
1090
  pass
914
1091
 
915
-
916
1092
  def _leaderFn(self):
917
1093
  while self.running:
918
1094
  updatedJobID = None
@@ -955,14 +1131,28 @@ class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisi
955
1131
  return time.time() - self.startTime
956
1132
 
957
1133
  for _ in range(numNodes):
958
- node = Node('127.0.0.1', uuid.uuid4(), 'testNode', datetime.datetime.now().isoformat()+'Z', nodeType=nodeType,
959
- preemptible=preemptible)
960
- self.nodesToWorker[node] = Worker(self.jobQueue, self.updatedJobsQueue, self.secondsPerJob)
1134
+ node = Node(
1135
+ "127.0.0.1",
1136
+ uuid.uuid4(),
1137
+ "testNode",
1138
+ datetime.datetime.now().isoformat() + "Z",
1139
+ nodeType=nodeType,
1140
+ preemptible=preemptible,
1141
+ )
1142
+ self.nodesToWorker[node] = Worker(
1143
+ self.jobQueue, self.updatedJobsQueue, self.secondsPerJob
1144
+ )
961
1145
  self.workers[nodeShape].append(self.nodesToWorker[node])
962
- self.maxWorkers[nodeShape] = max(self.maxWorkers[nodeShape], len(self.workers[nodeShape]))
1146
+ self.maxWorkers[nodeShape] = max(
1147
+ self.maxWorkers[nodeShape], len(self.workers[nodeShape])
1148
+ )
963
1149
 
964
1150
  def _removeNodes(self, nodes):
965
- logger.debug("Removing nodes. %s workers and %s to terminate.", len(self.nodesToWorker), len(nodes))
1151
+ logger.debug(
1152
+ "Removing nodes. %s workers and %s to terminate.",
1153
+ len(self.nodesToWorker),
1154
+ len(nodes),
1155
+ )
966
1156
  for node in nodes:
967
1157
  try:
968
1158
  nodeShape = self.getNodeShape(node.nodeType, node.preemptible)