toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -51,14 +51,14 @@ class MesosExecutor(Executor):
51
51
  self.popenLock = threading.Lock()
52
52
  self.runningTasks = {}
53
53
  self.workerCleanupInfo = None
54
- log.debug('Preparing system for resource download')
54
+ log.debug("Preparing system for resource download")
55
55
  Resource.prepareSystem()
56
56
  self.address = None
57
57
  self.id = None
58
58
  # Setting this value at this point will ensure that the toil workflow directory will go to
59
59
  # the mesos sandbox if the user hasn't specified --workDir on the command line.
60
- if not os.getenv('TOIL_WORKDIR'):
61
- os.environ['TOIL_WORKDIR'] = os.getcwd()
60
+ if not os.getenv("TOIL_WORKDIR"):
61
+ os.environ["TOIL_WORKDIR"] = os.getcwd()
62
62
 
63
63
  def registered(self, driver, executorInfo, frameworkInfo, agentInfo):
64
64
  """
@@ -66,11 +66,13 @@ class MesosExecutor(Executor):
66
66
  """
67
67
 
68
68
  # Get the ID we have been assigned, if we have it
69
- self.id = executorInfo.executor_id.get('value', None)
69
+ self.id = executorInfo.executor_id.get("value", None)
70
70
 
71
71
  log.debug("Registered executor %s with framework", self.id)
72
72
  self.address = socket.gethostbyname(agentInfo.hostname)
73
- nodeInfoThread = threading.Thread(target=self._sendFrameworkMessage, args=[driver], daemon=True)
73
+ nodeInfoThread = threading.Thread(
74
+ target=self._sendFrameworkMessage, args=[driver], daemon=True
75
+ )
74
76
  nodeInfoThread.start()
75
77
 
76
78
  def reregistered(self, driver, agentInfo):
@@ -99,12 +101,12 @@ class MesosExecutor(Executor):
99
101
  os.killpg(pgid, signal.SIGKILL)
100
102
 
101
103
  def shutdown(self, driver):
102
- log.critical('Shutting down executor ...')
104
+ log.critical("Shutting down executor ...")
103
105
  for taskId in list(self.runningTasks.keys()):
104
106
  self.killTask(driver, taskId)
105
107
  Resource.cleanSystem()
106
108
  BatchSystemSupport.workerCleanup(self.workerCleanupInfo)
107
- log.critical('... executor shut down.')
109
+ log.critical("... executor shut down.")
108
110
 
109
111
  def error(self, driver, message):
110
112
  """
@@ -123,13 +125,15 @@ class MesosExecutor(Executor):
123
125
  message = Expando(address=self.address)
124
126
  psutil.cpu_percent()
125
127
  else:
126
- message.nodeInfo = dict(coresUsed=float(psutil.cpu_percent()) * .01,
127
- memoryUsed=float(psutil.virtual_memory().percent) * .01,
128
- coresTotal=cpu_count(),
129
- memoryTotal=psutil.virtual_memory().total,
130
- workers=len(self.runningTasks))
128
+ message.nodeInfo = dict(
129
+ coresUsed=float(psutil.cpu_percent()) * 0.01,
130
+ memoryUsed=float(psutil.virtual_memory().percent) * 0.01,
131
+ coresTotal=cpu_count(),
132
+ memoryTotal=psutil.virtual_memory().total,
133
+ workers=len(self.runningTasks),
134
+ )
131
135
  log.debug("Send framework message: %s", message)
132
- driver.sendFrameworkMessage(encode_data(repr(message).encode('utf-8')))
136
+ driver.sendFrameworkMessage(encode_data(repr(message).encode("utf-8")))
133
137
  # Prevent workers launched together from repeatedly hitting the leader at the same time
134
138
  time.sleep(random.randint(45, 75))
135
139
 
@@ -144,16 +148,21 @@ class MesosExecutor(Executor):
144
148
 
145
149
  log.debug("Running task %s", task.task_id.value)
146
150
  startTime = time.time()
147
- sendUpdate(task, 'TASK_RUNNING', wallTime=0)
151
+ sendUpdate(task, "TASK_RUNNING", wallTime=0)
148
152
 
149
153
  # try to unpickle the task
150
154
  try:
151
155
  taskData = pickle.loads(decode_data(task.data))
152
156
  except:
153
157
  exc_info = sys.exc_info()
154
- log.error('Exception while unpickling task: ', exc_info=exc_info)
158
+ log.error("Exception while unpickling task: ", exc_info=exc_info)
155
159
  exc_type, exc_value, exc_trace = exc_info
156
- sendUpdate(task, 'TASK_FAILED', wallTime=0, msg=''.join(traceback.format_exception_only(exc_type, exc_value)))
160
+ sendUpdate(
161
+ task,
162
+ "TASK_FAILED",
163
+ wallTime=0,
164
+ msg="".join(traceback.format_exception_only(exc_type, exc_value)),
165
+ )
157
166
  return
158
167
 
159
168
  # This is where task.data is first invoked. Using this position to setup cleanupInfo
@@ -170,23 +179,27 @@ class MesosExecutor(Executor):
170
179
  exitStatus = process.wait()
171
180
  wallTime = time.time() - startTime
172
181
  if 0 == exitStatus:
173
- sendUpdate(task, 'TASK_FINISHED', wallTime)
182
+ sendUpdate(task, "TASK_FINISHED", wallTime)
174
183
  elif -9 == exitStatus:
175
- sendUpdate(task, 'TASK_KILLED', wallTime)
184
+ sendUpdate(task, "TASK_KILLED", wallTime)
176
185
  else:
177
- sendUpdate(task, 'TASK_FAILED', wallTime, msg=str(exitStatus))
186
+ sendUpdate(task, "TASK_FAILED", wallTime, msg=str(exitStatus))
178
187
  finally:
179
188
  del self.runningTasks[task.task_id.value]
180
189
  except:
181
190
  wallTime = time.time() - startTime
182
191
  exc_info = sys.exc_info()
183
- log.error('Exception while running task:', exc_info=exc_info)
192
+ log.error("Exception while running task:", exc_info=exc_info)
184
193
  exc_type, exc_value, exc_trace = exc_info
185
- sendUpdate(task, 'TASK_FAILED', wallTime=wallTime, msg=''.join(traceback.format_exception_only(exc_type, exc_value)))
194
+ sendUpdate(
195
+ task,
196
+ "TASK_FAILED",
197
+ wallTime=wallTime,
198
+ msg="".join(traceback.format_exception_only(exc_type, exc_value)),
199
+ )
186
200
 
187
201
  wallTime = time.time() - startTime
188
- sendUpdate(task, 'TASK_FINISHED', wallTime)
189
-
202
+ sendUpdate(task, "TASK_FINISHED", wallTime)
190
203
 
191
204
  def runJob(job):
192
205
  """
@@ -196,16 +209,17 @@ class MesosExecutor(Executor):
196
209
  """
197
210
  if job.userScript:
198
211
  job.userScript.register()
199
- log.debug("Invoking command: '%s'", job.command)
212
+ command = job.command
213
+ log.debug("Invoking command: '%s'", command)
200
214
  # Construct the job's environment
201
215
  jobEnv = dict(os.environ, **job.environment)
202
- log.debug('Using environment variables: %s', jobEnv.keys())
216
+ log.debug("Using environment variables: %s", jobEnv.keys())
203
217
  with self.popenLock:
204
- return subprocess.Popen(job.command,
205
- preexec_fn=lambda: os.setpgrp(),
206
- shell=True, env=jobEnv)
218
+ return subprocess.Popen(
219
+ command, preexec_fn=lambda: os.setpgrp(), shell=True, env=jobEnv
220
+ )
207
221
 
208
- def sendUpdate(task, taskState, wallTime, msg=''):
222
+ def sendUpdate(task, taskState, wallTime, msg=""):
209
223
  update = addict.Dict()
210
224
  update.task_id.value = task.task_id.value
211
225
  if self.id is not None:
@@ -216,7 +230,7 @@ class MesosExecutor(Executor):
216
230
 
217
231
  # Add wallTime as a label.
218
232
  labels = addict.Dict()
219
- labels.labels = [{'key': 'wallTime', 'value': str(wallTime)}]
233
+ labels.labels = [{"key": "wallTime", "value": str(wallTime)}]
220
234
  update.labels = labels
221
235
 
222
236
  driver.sendStatusUpdate(update)
@@ -238,34 +252,48 @@ def main():
238
252
  if not os.environ.get("MESOS_AGENT_ENDPOINT"):
239
253
  # Some Mesos setups in our tests somehow lack this variable. Provide a
240
254
  # fake one to maybe convince the executor driver to work.
241
- os.environ["MESOS_AGENT_ENDPOINT"] = os.environ.get("MESOS_SLAVE_ENDPOINT", "127.0.0.1:5051")
242
- log.warning("Had to fake MESOS_AGENT_ENDPOINT as %s" % os.environ["MESOS_AGENT_ENDPOINT"])
255
+ os.environ["MESOS_AGENT_ENDPOINT"] = os.environ.get(
256
+ "MESOS_SLAVE_ENDPOINT", "127.0.0.1:5051"
257
+ )
258
+ log.warning(
259
+ "Had to fake MESOS_AGENT_ENDPOINT as %s"
260
+ % os.environ["MESOS_AGENT_ENDPOINT"]
261
+ )
243
262
 
244
263
  # must be set manually to enable toggling of the mesos log level for debugging jenkins
245
264
  # may be useful: https://github.com/DataBiosphere/toil/pull/2338#discussion_r223854931
246
265
  if False:
247
266
  try:
248
- urlopen("http://%s/logging/toggle?level=1&duration=15mins" % os.environ["MESOS_AGENT_ENDPOINT"]).read()
267
+ urlopen(
268
+ "http://%s/logging/toggle?level=1&duration=15mins"
269
+ % os.environ["MESOS_AGENT_ENDPOINT"]
270
+ ).read()
249
271
  log.debug("Toggled agent log level")
250
272
  except Exception:
251
273
  log.debug("Failed to toggle agent log level")
252
274
 
253
275
  # Parse the agent state
254
- agent_state = json.loads(urlopen("http://%s/state" % os.environ["MESOS_AGENT_ENDPOINT"]).read())
255
- if 'completed_frameworks' in agent_state:
276
+ agent_state = json.loads(
277
+ urlopen("http://%s/state" % os.environ["MESOS_AGENT_ENDPOINT"]).read()
278
+ )
279
+ if "completed_frameworks" in agent_state:
256
280
  # Drop the completed frameworks which grow over time
257
- del agent_state['completed_frameworks']
281
+ del agent_state["completed_frameworks"]
258
282
  log.debug("Agent state: %s", str(agent_state))
259
283
 
260
284
  log.debug("Virtual memory info in executor: %s" % repr(psutil.virtual_memory()))
261
285
 
262
- if os.path.exists('/sys/fs/cgroup/memory'):
286
+ if os.path.exists("/sys/fs/cgroup/memory"):
263
287
  # Mesos can limit memory with a cgroup, so we should report on that.
264
- for (dirpath, dirnames, filenames) in os.walk('/sys/fs/cgroup/memory', followlinks=True):
288
+ for dirpath, dirnames, filenames in os.walk(
289
+ "/sys/fs/cgroup/memory", followlinks=True
290
+ ):
265
291
  for filename in filenames:
266
- if 'limit_in_bytes' not in filename:
292
+ if "limit_in_bytes" not in filename:
267
293
  continue
268
- log.debug('cgroup memory info from %s:' % os.path.join(dirpath, filename))
294
+ log.debug(
295
+ "cgroup memory info from %s:" % os.path.join(dirpath, filename)
296
+ )
269
297
  try:
270
298
  for line in open(os.path.join(dirpath, filename)):
271
299
  log.debug(line.rstrip())
@@ -274,14 +302,13 @@ def main():
274
302
 
275
303
  # Mesos can also impose rlimit limits, including on things that really
276
304
  # ought to not be limited, like virtual address space size.
277
- log.debug('DATA rlimit: %s', str(resource.getrlimit(resource.RLIMIT_DATA)))
278
- log.debug('STACK rlimit: %s', str(resource.getrlimit(resource.RLIMIT_STACK)))
279
- log.debug('RSS rlimit: %s', str(resource.getrlimit(resource.RLIMIT_RSS)))
280
- log.debug('AS rlimit: %s', str(resource.getrlimit(resource.RLIMIT_AS)))
281
-
305
+ log.debug("DATA rlimit: %s", str(resource.getrlimit(resource.RLIMIT_DATA)))
306
+ log.debug("STACK rlimit: %s", str(resource.getrlimit(resource.RLIMIT_STACK)))
307
+ log.debug("RSS rlimit: %s", str(resource.getrlimit(resource.RLIMIT_RSS)))
308
+ log.debug("AS rlimit: %s", str(resource.getrlimit(resource.RLIMIT_AS)))
282
309
 
283
310
  executor = MesosExecutor()
284
- log.debug('Made executor')
311
+ log.debug("Made executor")
285
312
  driver = MesosExecutorDriver(executor, use_addict=True)
286
313
 
287
314
  old_on_event = driver.on_event
@@ -295,13 +322,15 @@ def main():
295
322
 
296
323
  driver.on_event = patched_on_event
297
324
 
298
- log.debug('Made driver')
325
+ log.debug("Made driver")
299
326
  driver.start()
300
- log.debug('Started driver')
327
+ log.debug("Started driver")
301
328
  driver_result = driver.join()
302
- log.debug('Joined driver')
329
+ log.debug("Joined driver")
303
330
 
304
331
  # Tolerate a None in addition to the code the docs suggest we should receive from join()
305
- exit_value = 0 if (driver_result is None or driver_result == 'DRIVER_STOPPED') else 1
332
+ exit_value = (
333
+ 0 if (driver_result is None or driver_result == "DRIVER_STOPPED") else 1
334
+ )
306
335
  assert len(executor.runningTasks) == 0
307
336
  sys.exit(exit_value)
@@ -17,16 +17,18 @@ log = logging.getLogger(__name__)
17
17
  class MesosTestSupport:
18
18
  """Mixin for test cases that need a running Mesos master and agent on the local host."""
19
19
 
20
- @retry(intervals=[1, 1, 2, 4, 8, 16, 32, 64, 128],
21
- log_message=(log.info, 'Checking if Mesos is ready...'))
20
+ @retry(
21
+ intervals=[1, 1, 2, 4, 8, 16, 32, 64, 128],
22
+ log_message=(log.info, "Checking if Mesos is ready..."),
23
+ )
22
24
  def wait_for_master(self):
23
- with closing(urlopen('http://127.0.0.1:5050/version')) as content:
25
+ with closing(urlopen("http://127.0.0.1:5050/version")) as content:
24
26
  content.read()
25
27
 
26
28
  def _startMesos(self, numCores=None):
27
29
  if numCores is None:
28
30
  numCores = cpu_count()
29
- shutil.rmtree('/tmp/mesos', ignore_errors=True)
31
+ shutil.rmtree("/tmp/mesos", ignore_errors=True)
30
32
  self.master = self.MesosMasterThread(numCores)
31
33
  self.master.start()
32
34
  self.agent = self.MesosAgentThread(numCores)
@@ -35,7 +37,7 @@ class MesosTestSupport:
35
37
  # Bad Things will happen if the master is not yet ready when Toil tries to use it.
36
38
  self.wait_for_master()
37
39
 
38
- log.info('Mesos is ready! Running test.')
40
+ log.info("Mesos is ready! Running test.")
39
41
 
40
42
  def _stopProcess(self, process, timeout=10) -> None:
41
43
  """Gracefully stop a process on a timeout, given the Popen object for the process."""
@@ -47,7 +49,7 @@ class MesosTestSupport:
47
49
  waited += 1
48
50
  if process.poll() is None:
49
51
  # It didn't shut down gracefully
50
- log.warning('Forcibly killing child which ignored SIGTERM')
52
+ log.warning("Forcibly killing child which ignored SIGTERM")
51
53
  process.kill()
52
54
 
53
55
  def _stopMesos(self):
@@ -71,7 +73,7 @@ class MesosTestSupport:
71
73
 
72
74
  def tryRun(self):
73
75
  self.popen.wait()
74
- log.info('Exiting %s', self.__class__.__name__)
76
+ log.info("Exiting %s", self.__class__.__name__)
75
77
 
76
78
  def findMesosBinary(self, names):
77
79
  if isinstance(names, str):
@@ -86,7 +88,7 @@ class MesosTestSupport:
86
88
  # Special case for users of PyCharm on OS X. This is where Homebrew installs
87
89
  # it. It's hard to set PATH for PyCharm (or any GUI app) on OS X so let's
88
90
  # make it easy for those poor souls.
89
- return which(name, path='/usr/local/sbin')
91
+ return which(name, path="/usr/local/sbin")
90
92
  except StopIteration:
91
93
  pass
92
94
 
@@ -94,18 +96,22 @@ class MesosTestSupport:
94
96
  if len(names) == 1:
95
97
  sought = "binary '%s'" % names[0]
96
98
  else:
97
- sought = 'any binary in %s' % str(names)
99
+ sought = "any binary in %s" % str(names)
98
100
 
99
- raise RuntimeError("Cannot find %s. Make sure Mesos is installed "
100
- "and it's 'bin' directory is present on the PATH." % sought)
101
+ raise RuntimeError(
102
+ "Cannot find %s. Make sure Mesos is installed "
103
+ "and it's 'bin' directory is present on the PATH." % sought
104
+ )
101
105
 
102
106
  class MesosMasterThread(MesosThread):
103
107
  def mesosCommand(self):
104
- return [self.findMesosBinary('mesos-master'),
105
- '--registry=in_memory',
106
- '--ip=127.0.0.1',
107
- '--port=5050',
108
- '--allocation_interval=500ms']
108
+ return [
109
+ self.findMesosBinary("mesos-master"),
110
+ "--registry=in_memory",
111
+ "--ip=127.0.0.1",
112
+ "--port=5050",
113
+ "--allocation_interval=500ms",
114
+ ]
109
115
 
110
116
  class MesosAgentThread(MesosThread):
111
117
  def mesosCommand(self):
@@ -114,10 +120,12 @@ class MesosTestSupport:
114
120
  # We also make sure to point it explicitly at the right temp work directory, and
115
121
  # to disable systemd support because we have to be root to make systemd make us
116
122
  # things and we probably aren't when testing.
117
- return [self.findMesosBinary(['mesos-agent']),
118
- '--ip=127.0.0.1',
119
- '--master=127.0.0.1:5050',
120
- '--attributes=preemptible:False',
121
- '--resources=cpus(*):%i' % self.numCores,
122
- '--work_dir=/tmp/mesos',
123
- '--no-systemd_enable_support']
123
+ return [
124
+ self.findMesosBinary(["mesos-agent"]),
125
+ "--ip=127.0.0.1",
126
+ "--master=127.0.0.1:5050",
127
+ "--attributes=preemptible:False",
128
+ "--resources=cpus(*):%i" % self.numCores,
129
+ "--work_dir=/tmp/mesos",
130
+ "--no-systemd_enable_support",
131
+ ]
@@ -12,22 +12,19 @@
12
12
  # See the License for the specific language governing permissions and
13
13
 
14
14
  import logging
15
- import sys
16
15
  from argparse import ArgumentParser, _ArgumentGroup
17
- from typing import Any, Callable, List, Optional, TypeVar, Union
16
+ from typing import Any, Callable, Optional, Protocol, TypeVar, Union
18
17
 
19
- if sys.version_info >= (3, 8):
20
- from typing import Protocol
21
- else:
22
- from typing_extensions import Protocol
23
-
24
- from toil.batchSystems.registry import (DEFAULT_BATCH_SYSTEM,
25
- get_batch_system,
26
- get_batch_systems)
18
+ from toil.batchSystems.registry import (
19
+ DEFAULT_BATCH_SYSTEM,
20
+ get_batch_system,
21
+ get_batch_systems,
22
+ )
27
23
  from toil.lib.threading import cpu_count
28
24
 
29
25
  logger = logging.getLogger(__name__)
30
26
 
27
+
31
28
  class OptionSetter(Protocol):
32
29
  """
33
30
  Protocol for the setOption function we get to let us set up CLI options for
@@ -36,19 +33,22 @@ class OptionSetter(Protocol):
36
33
  Actual functionality is defined in the Config class.
37
34
  """
38
35
 
39
- OptionType = TypeVar('OptionType')
36
+ OptionType = TypeVar("OptionType")
37
+
40
38
  def __call__(
41
39
  self,
42
40
  option_name: str,
43
41
  parsing_function: Optional[Callable[[Any], OptionType]] = None,
44
42
  check_function: Optional[Callable[[OptionType], Union[None, bool]]] = None,
45
43
  default: Optional[OptionType] = None,
46
- env: Optional[List[str]] = None,
47
- old_names: Optional[List[str]] = None
48
- ) -> bool:
49
- ...
44
+ env: Optional[list[str]] = None,
45
+ old_names: Optional[list[str]] = None,
46
+ ) -> bool: ...
50
47
 
51
- def set_batchsystem_options(batch_system: Optional[str], set_option: OptionSetter) -> None:
48
+
49
+ def set_batchsystem_options(
50
+ batch_system: Optional[str], set_option: OptionSetter
51
+ ) -> None:
52
52
  """
53
53
  Call set_option for all the options for the given named batch system, or
54
54
  all batch systems if no name is provided.
@@ -76,6 +76,7 @@ def set_batchsystem_options(batch_system: Optional[str], set_option: OptionSette
76
76
  set_option("manualMemArgs")
77
77
  set_option("run_local_jobs_on_workers")
78
78
  set_option("statePollingWait")
79
+ set_option("state_polling_timeout")
79
80
  set_option("batch_logs_dir")
80
81
 
81
82
 
@@ -109,11 +110,11 @@ def add_all_batchsystem_options(parser: Union[ArgumentParser, _ArgumentGroup]) -
109
110
  parser.add_argument(
110
111
  "--maxJobs",
111
112
  dest="max_jobs",
112
- default=SYS_MAX_SIZE, # This is *basically* unlimited and saves a lot of Optional[]
113
+ default=SYS_MAX_SIZE, # This is *basically* unlimited and saves a lot of Optional[]
113
114
  type=lambda x: int(x) or SYS_MAX_SIZE,
114
115
  help="Specifies the maximum number of jobs to submit to the "
115
- "backing scheduler at once. Not supported on Mesos or "
116
- "AWS Batch. Use 0 for unlimited. Defaults to unlimited.",
116
+ "backing scheduler at once. Not supported on Mesos or "
117
+ "AWS Batch. Use 0 for unlimited. Defaults to unlimited.",
117
118
  )
118
119
  parser.add_argument(
119
120
  "--maxLocalJobs",
@@ -121,8 +122,8 @@ def add_all_batchsystem_options(parser: Union[ArgumentParser, _ArgumentGroup]) -
121
122
  default=None,
122
123
  type=lambda x: int(x) or 0,
123
124
  help=f"Specifies the maximum number of housekeeping jobs to "
124
- f"run sumultaneously on the local system. Use 0 for "
125
- f"unlimited. Defaults to the number of local cores ({cpu_count()}).",
125
+ f"run sumultaneously on the local system. Use 0 for "
126
+ f"unlimited. Defaults to the number of local cores ({cpu_count()}).",
126
127
  )
127
128
  parser.add_argument(
128
129
  "--manualMemArgs",
@@ -161,8 +162,16 @@ def add_all_batchsystem_options(parser: Union[ArgumentParser, _ArgumentGroup]) -
161
162
  type=int,
162
163
  default=None,
163
164
  help="Time, in seconds, to wait before doing a scheduler query for job state. "
164
- "Return cached results if within the waiting period. Only works for grid "
165
- "engine batch systems such as gridengine, htcondor, torque, slurm, and lsf."
165
+ "Return cached results if within the waiting period. Only works for grid "
166
+ "engine batch systems such as gridengine, htcondor, torque, slurm, and lsf.",
167
+ )
168
+ parser.add_argument(
169
+ "--statePollingTimeout",
170
+ dest="state_polling_timeout",
171
+ type=int,
172
+ default=1200,
173
+ help="Time, in seconds, to retry against a broken scheduler. Only works for grid "
174
+ "engine batch systems such as gridengine, htcondor, torque, slurm, and lsf.",
166
175
  )
167
176
  parser.add_argument(
168
177
  "--batchLogsDir",
@@ -170,10 +179,19 @@ def add_all_batchsystem_options(parser: Union[ArgumentParser, _ArgumentGroup]) -
170
179
  default=None,
171
180
  env_var="TOIL_BATCH_LOGS_DIR",
172
181
  help="Directory to tell the backing batch system to log into. Should be available "
173
- "on both the leader and the workers, if the backing batch system writes logs "
174
- "to the worker machines' filesystems, as many HPC schedulers do. If unset, "
175
- "the Toil work directory will be used. Only works for grid engine batch "
176
- "systems such as gridengine, htcondor, torque, slurm, and lsf."
182
+ "on both the leader and the workers, if the backing batch system writes logs "
183
+ "to the worker machines' filesystems, as many HPC schedulers do. If unset, "
184
+ "the Toil work directory will be used. Only works for grid engine batch "
185
+ "systems such as gridengine, htcondor, torque, slurm, and lsf.",
186
+ )
187
+
188
+ parser.add_argument(
189
+ "--memoryIsProduct",
190
+ dest="memory_is_product",
191
+ default=False,
192
+ action="store_true",
193
+ help="If the batch system understands requested memory as a product of the requested memory and the number"
194
+ "of cores, set this flag to properly allocate memory.",
177
195
  )
178
196
 
179
197
  for name in get_batch_systems():
@@ -185,5 +203,5 @@ def add_all_batchsystem_options(parser: Union[ArgumentParser, _ArgumentGroup]) -
185
203
  # Skip anything we can't import
186
204
  continue
187
205
  # Ask the batch system to create its options in the parser
188
- logger.debug('Add options for %s batch system', name)
206
+ logger.debug("Add options for %s batch system", name)
189
207
  batch_system_type.add_options(parser)