toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/serviceManager.py CHANGED
@@ -15,9 +15,10 @@
15
15
 
16
16
  import logging
17
17
  import time
18
+ from collections.abc import Iterable
18
19
  from queue import Empty, Queue
19
20
  from threading import Event, Thread
20
- from typing import Iterable, Optional, Set
21
+ from typing import Optional
21
22
 
22
23
  from toil.job import ServiceJobDescription
23
24
  from toil.jobStores.abstractJobStore import AbstractJobStore
@@ -40,7 +41,7 @@ class ServiceManager:
40
41
 
41
42
  # These are all the client jobs that are waiting for their services to
42
43
  # start.
43
- self.__waiting_clients: Set[str] = set()
44
+ self.__waiting_clients: set[str] = set()
44
45
 
45
46
  # This is used to terminate the thread associated with the service
46
47
  # manager
@@ -123,7 +124,9 @@ class ServiceManager:
123
124
  client_id = self.__clients_out.get(timeout=maxWait)
124
125
  self.__waiting_clients.remove(client_id)
125
126
  if self.__service_manager_jobs < 0:
126
- raise RuntimeError("The number of jobs scheduled by the service manager cannot be negative.")
127
+ raise RuntimeError(
128
+ "The number of jobs scheduled by the service manager cannot be negative."
129
+ )
127
130
  self.__service_manager_jobs -= 1
128
131
  return client_id
129
132
  except Empty:
@@ -141,7 +144,9 @@ class ServiceManager:
141
144
  client_id = self.__failed_clients_out.get(timeout=maxWait)
142
145
  self.__waiting_clients.remove(client_id)
143
146
  if self.__service_manager_jobs < 0:
144
- raise RuntimeError("The number of jobs scheduled by the service manager cannot be negative.")
147
+ raise RuntimeError(
148
+ "The number of jobs scheduled by the service manager cannot be negative."
149
+ )
145
150
  self.__service_manager_jobs -= 1
146
151
  return client_id
147
152
  except Empty:
@@ -157,7 +162,9 @@ class ServiceManager:
157
162
  try:
158
163
  service_id = self.__services_out.get(timeout=maxWait)
159
164
  if self.__service_manager_jobs < 0:
160
- raise RuntimeError("The number of jobs scheduled by the service manager cannot be negative.")
165
+ raise RuntimeError(
166
+ "The number of jobs scheduled by the service manager cannot be negative."
167
+ )
161
168
  self.__service_manager_jobs -= 1
162
169
  return service_id
163
170
  except Empty:
@@ -226,7 +233,7 @@ class ServiceManager:
226
233
 
227
234
  Will block until all services are started and blocked.
228
235
  """
229
- logger.debug('Waiting for service manager thread to finish ...')
236
+ logger.debug("Waiting for service manager thread to finish ...")
230
237
  start_time = time.time()
231
238
  self.__terminate.set()
232
239
  self.__service_starter.join()
@@ -251,13 +258,17 @@ class ServiceManager:
251
258
  while True:
252
259
  with throttle(1.0):
253
260
  if self.__terminate.is_set():
254
- logger.debug('Received signal to quit starting services.')
261
+ logger.debug("Received signal to quit starting services.")
255
262
  break
256
263
  try:
257
264
  client_id = self.__clients_in.get_nowait()
258
265
  client = self.__toil_state.get_job(client_id)
259
266
  host_id_batches = list(client.serviceHostIDsInBatches())
260
- logger.debug("Service manager processing client %s with %d batches of services", client, len(host_id_batches))
267
+ logger.debug(
268
+ "Service manager processing client %s with %d batches of services",
269
+ client,
270
+ len(host_id_batches),
271
+ )
261
272
  if len(host_id_batches) > 1:
262
273
  # Have to fall back to the old blocking behavior to
263
274
  # ensure entire service "groups" are issued as a whole.
@@ -288,7 +299,7 @@ class ServiceManager:
288
299
 
289
300
  pending_service_count = len(starting_services)
290
301
  if pending_service_count > 0 and log_limiter.throttle(False):
291
- logger.debug('%d services are starting...', pending_service_count)
302
+ logger.debug("%d services are starting...", pending_service_count)
292
303
 
293
304
  for service_id in list(starting_services):
294
305
  service_job_desc = self._get_service_job(service_id)
@@ -297,7 +308,9 @@ class ServiceManager:
297
308
  or service_job_desc.errorJobStoreID is None
298
309
  ):
299
310
  raise Exception("Must be a registered ServiceJobDescription")
300
- if not self.__job_store.file_exists(service_job_desc.startJobStoreID):
311
+ if not self.__job_store.file_exists(
312
+ service_job_desc.startJobStoreID
313
+ ):
301
314
  # Service has started (or failed)
302
315
  logger.debug(
303
316
  "Service %s has removed %s and is therefore started",
@@ -308,9 +321,13 @@ class ServiceManager:
308
321
  client_id = service_to_client[service_id]
309
322
  remaining_services_by_client[client_id] -= 1
310
323
  if remaining_services_by_client[client_id] < 0:
311
- raise RuntimeError("The number of remaining services cannot be negative.")
324
+ raise RuntimeError(
325
+ "The number of remaining services cannot be negative."
326
+ )
312
327
  del service_to_client[service_id]
313
- if not self.__job_store.file_exists(service_job_desc.errorJobStoreID):
328
+ if not self.__job_store.file_exists(
329
+ service_job_desc.errorJobStoreID
330
+ ):
314
331
  logger.error(
315
332
  "Service %s has immediately failed before it could be used",
316
333
  service_job_desc,
@@ -321,13 +338,22 @@ class ServiceManager:
321
338
 
322
339
  # Find if any clients have had *all* their services started.
323
340
  ready_clients = set()
324
- for client_id, remainingServices in remaining_services_by_client.items():
341
+ for (
342
+ client_id,
343
+ remainingServices,
344
+ ) in remaining_services_by_client.items():
325
345
  if remainingServices == 0:
326
346
  if client_id in clients_with_failed_services:
327
- logger.error('Job %s has had all its services try to start, but at least one failed', self.__toil_state.get_job(client_id))
347
+ logger.error(
348
+ "Job %s has had all its services try to start, but at least one failed",
349
+ self.__toil_state.get_job(client_id),
350
+ )
328
351
  self.__failed_clients_out.put(client_id)
329
352
  else:
330
- logger.debug('Job %s has all its services started', self.__toil_state.get_job(client_id))
353
+ logger.debug(
354
+ "Job %s has all its services started",
355
+ self.__toil_state.get_job(client_id),
356
+ )
331
357
  self.__clients_out.put(client_id)
332
358
  ready_clients.add(client_id)
333
359
  for client_id in ready_clients:
@@ -344,7 +370,9 @@ class ServiceManager:
344
370
 
345
371
  # Start the service jobs in batches, waiting for each batch
346
372
  # to become established before starting the next batch
347
- for service_job_list in self.__toil_state.get_job(client_id).serviceHostIDsInBatches():
373
+ for service_job_list in self.__toil_state.get_job(
374
+ client_id
375
+ ).serviceHostIDsInBatches():
348
376
  # When we get the job descriptions we store them here to go over them again.
349
377
  wait_on = []
350
378
  for service_id in service_job_list:
@@ -361,9 +389,13 @@ class ServiceManager:
361
389
  service_job_desc.startJobStoreID,
362
390
  )
363
391
  if not self.__job_store.file_exists(service_job_desc.startJobStoreID):
364
- raise RuntimeError(f"Service manager attempted to start service {service_job_desc} that has already started")
392
+ raise RuntimeError(
393
+ f"Service manager attempted to start service {service_job_desc} that has already started"
394
+ )
365
395
  if not self.__toil_state.job_exists(str(service_job_desc.jobStoreID)):
366
- raise RuntimeError(f"Service manager attempted to start service {service_job_desc} that is not in the job store")
396
+ raise RuntimeError(
397
+ f"Service manager attempted to start service {service_job_desc} that is not in the job store"
398
+ )
367
399
  # At this point the terminateJobStoreID and errorJobStoreID
368
400
  # could have been deleted, since the service can be killed at
369
401
  # any time! So we can't assert their presence here.
@@ -382,7 +414,7 @@ class ServiceManager:
382
414
  time.sleep(1.0)
383
415
 
384
416
  if log_limiter.throttle(False):
385
- logger.info('Service %s is starting...', service_job_desc)
417
+ logger.info("Service %s is starting...", service_job_desc)
386
418
 
387
419
  # Check if the thread should quit
388
420
  if self.__terminate.is_set():
@@ -395,9 +427,14 @@ class ServiceManager:
395
427
  ):
396
428
  # The service job has gone away but the service never flipped its start flag.
397
429
  # That's not what the worker is supposed to do when running a service at all.
398
- logger.error('Service %s has completed and been removed without ever starting', service_job_desc)
430
+ logger.error(
431
+ "Service %s has completed and been removed without ever starting",
432
+ service_job_desc,
433
+ )
399
434
  # Stop everything.
400
- raise RuntimeError(f"Service {service_job_desc} is in an inconsistent state")
435
+ raise RuntimeError(
436
+ f"Service {service_job_desc} is in an inconsistent state"
437
+ )
401
438
 
402
439
  # We don't bail out early here.
403
440
 
@@ -409,6 +446,5 @@ class ServiceManager:
409
446
  # though, so they should stop immediately when we run them. TODO:
410
447
  # this is a bad design!
411
448
 
412
-
413
449
  # Add the JobDescription to the output queue of jobs whose services have been started
414
450
  self.__clients_out.put(client_id)
toil/statsAndLogging.py CHANGED
@@ -20,10 +20,11 @@ import time
20
20
  from argparse import ArgumentParser, Namespace
21
21
  from logging.handlers import RotatingFileHandler
22
22
  from threading import Event, Thread
23
- from typing import IO, TYPE_CHECKING, Any, Callable, List, Optional, Union
23
+ from typing import IO, TYPE_CHECKING, Any, Callable, Optional, Union
24
24
 
25
25
  from toil.lib.conversions import strtobool
26
26
  from toil.lib.expando import Expando
27
+ from toil.lib.history import HistoryManager
27
28
  from toil.lib.resources import ResourceMonitor
28
29
 
29
30
  if TYPE_CHECKING:
@@ -32,27 +33,36 @@ if TYPE_CHECKING:
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
  root_logger = logging.getLogger()
35
- toil_logger = logging.getLogger('toil')
36
+ toil_logger = logging.getLogger("toil")
36
37
 
37
38
  DEFAULT_LOGLEVEL = logging.INFO
38
39
  __loggingFiles = []
39
40
 
41
+ # We have some logging that belongs at a TRACE level, below DEBUG
42
+ TRACE = logging.DEBUG - 5
43
+
44
+ logging.addLevelName(TRACE, "TRACE")
45
+
40
46
 
41
47
  class StatsAndLogging:
42
48
  """A thread to aggregate statistics and logging."""
43
49
 
44
- def __init__(self, jobStore: 'AbstractJobStore', config: 'Config') -> None:
50
+ def __init__(self, jobStore: "AbstractJobStore", config: "Config") -> None:
45
51
  self._stop = Event()
46
- self._worker = Thread(target=self.statsAndLoggingAggregator,
47
- args=(jobStore, self._stop, config),
48
- daemon=True)
52
+ self._worker = Thread(
53
+ target=self.statsAndLoggingAggregator,
54
+ args=(jobStore, self._stop, config),
55
+ daemon=True,
56
+ )
49
57
 
50
58
  def start(self) -> None:
51
59
  """Start the stats and logging thread."""
52
60
  self._worker.start()
53
61
 
54
62
  @classmethod
55
- def formatLogStream(cls, stream: Union[IO[str], IO[bytes]], stream_name: str) -> str:
63
+ def formatLogStream(
64
+ cls, stream: Union[IO[str], IO[bytes]], stream_name: str
65
+ ) -> str:
56
66
  """
57
67
  Given a stream of text or bytes, and the job name, job itself, or some
58
68
  other optional stringifyable identity info for the job, return a big
@@ -65,21 +75,25 @@ class StatsAndLogging:
65
75
 
66
76
  :param stream: The stream of text or bytes to print for the user.
67
77
  """
68
- lines = [f'{stream_name} follows:', '=========>']
78
+ lines = [f"{stream_name} follows:", "=========>"]
69
79
 
70
80
  for line in stream:
71
81
  if isinstance(line, bytes):
72
- line = line.decode('utf-8', errors='replace')
73
- lines.append('\t' + line.rstrip('\n'))
82
+ line = line.decode("utf-8", errors="replace")
83
+ lines.append("\t" + line.rstrip("\n"))
74
84
 
75
- lines.append('<=========')
85
+ lines.append("<=========")
76
86
 
77
- return '\n'.join(lines)
87
+ return "\n".join(lines)
78
88
 
79
89
  @classmethod
80
- def logWithFormatting(cls, stream_name: str, jobLogs: Union[IO[str], IO[bytes]],
81
- method: Callable[[str], None] = logger.debug,
82
- message: Optional[str] = None) -> None:
90
+ def logWithFormatting(
91
+ cls,
92
+ stream_name: str,
93
+ jobLogs: Union[IO[str], IO[bytes]],
94
+ method: Callable[[str], None] = logger.debug,
95
+ message: Optional[str] = None,
96
+ ) -> None:
83
97
  if message is not None:
84
98
  method(message)
85
99
 
@@ -87,28 +101,36 @@ class StatsAndLogging:
87
101
  method(cls.formatLogStream(jobLogs, stream_name))
88
102
 
89
103
  @classmethod
90
- def writeLogFiles(cls, jobNames: List[str], jobLogList: List[str], config: 'Config', failed: bool = False) -> None:
91
- def createName(logPath: str, jobName: str, logExtension: str, failed: bool = False) -> str:
92
- logName = jobName.replace('-', '--')
93
- logName = logName.replace('/', '-')
94
- logName = logName.replace(' ', '_')
95
- logName = logName.replace("'", '')
96
- logName = logName.replace('"', '')
104
+ def writeLogFiles(
105
+ cls,
106
+ jobNames: list[str],
107
+ jobLogList: list[str],
108
+ config: "Config",
109
+ failed: bool = False,
110
+ ) -> None:
111
+ def createName(
112
+ logPath: str, jobName: str, logExtension: str, failed: bool = False
113
+ ) -> str:
114
+ logName = jobName.replace("-", "--")
115
+ logName = logName.replace("/", "-")
116
+ logName = logName.replace(" ", "_")
117
+ logName = logName.replace("'", "")
118
+ logName = logName.replace('"', "")
97
119
  # Add a "failed_" prefix to logs from failed jobs.
98
- logName = ('failed_' if failed else '') + logName
120
+ logName = ("failed_" if failed else "") + logName
99
121
  counter = 0
100
122
  while True:
101
- suffix = '_' + str(counter).zfill(3) + logExtension
123
+ suffix = "_" + str(counter).zfill(3) + logExtension
102
124
  fullName = os.path.join(logPath, logName + suffix)
103
125
  # The maximum file name size in the default HFS+ file system is 255 UTF-16 encoding units, so basically 255 characters
104
126
  if len(fullName) >= 255:
105
- return fullName[:(255 - len(suffix))] + suffix
127
+ return fullName[: (255 - len(suffix))] + suffix
106
128
  if not os.path.exists(fullName):
107
129
  return fullName
108
130
  counter += 1
109
131
 
110
132
  mainFileName = jobNames[0]
111
- extension = '.log'
133
+ extension = ".log"
112
134
  writeFn: Callable[..., Any]
113
135
  if config.writeLogs:
114
136
  path = config.writeLogs
@@ -116,7 +138,7 @@ class StatsAndLogging:
116
138
  elif config.writeLogsGzip:
117
139
  path = config.writeLogsGzip
118
140
  writeFn = gzip.open
119
- extension += '.gz'
141
+ extension += ".gz"
120
142
  else:
121
143
  # we don't have anywhere to write the logs, return now
122
144
  return
@@ -125,13 +147,13 @@ class StatsAndLogging:
125
147
  os.makedirs(path, exist_ok=True)
126
148
 
127
149
  fullName = createName(path, mainFileName, extension, failed)
128
- with writeFn(fullName, 'wb') as f:
150
+ with writeFn(fullName, "wb") as f:
129
151
  for l in jobLogList:
130
152
  if isinstance(l, bytes):
131
- l = l.decode('utf-8')
132
- if not l.endswith('\n'):
133
- l += '\n'
134
- f.write(l.encode('utf-8'))
153
+ l = l.decode("utf-8")
154
+ if not l.endswith("\n"):
155
+ l += "\n"
156
+ f.write(l.encode("utf-8"))
135
157
  for alternateName in jobNames[1:]:
136
158
  # There are chained jobs in this output - indicate this with a symlink
137
159
  # of the job's name to this file
@@ -140,11 +162,14 @@ class StatsAndLogging:
140
162
  os.symlink(os.path.relpath(fullName, path), name)
141
163
 
142
164
  @classmethod
143
- def statsAndLoggingAggregator(cls, jobStore: 'AbstractJobStore', stop: Event, config: 'Config') -> None:
165
+ def statsAndLoggingAggregator(
166
+ cls, jobStore: "AbstractJobStore", stop: Event, config: "Config"
167
+ ) -> None:
144
168
  """
145
169
  The following function is used for collating stats/reporting log messages from the workers.
146
170
  Works inside of a thread, collates as long as the stop flag is not True.
147
171
  """
172
+
148
173
  # Overall timing
149
174
  startTime = time.time()
150
175
  startClock = ResourceMonitor.get_total_cpu_time()
@@ -165,9 +190,12 @@ class StatsAndLogging:
165
190
  pass
166
191
  else:
167
192
  for message in logs:
168
- logger.log(int(message.level),
169
- 'Got message from job at time %s: %s',
170
- time.strftime('%m-%d-%Y %H:%M:%S'), message.text)
193
+ logger.log(
194
+ int(message.level),
195
+ "Got message from job at time %s: %s",
196
+ time.strftime("%m-%d-%Y %H:%M:%S"),
197
+ message.text,
198
+ )
171
199
 
172
200
  try:
173
201
  # Handle all the user-level text streams reported back (command output, etc.)
@@ -198,12 +226,47 @@ class StatsAndLogging:
198
226
  # we may have multiple jobs per worker
199
227
  jobNames = logs.names
200
228
  messages = logs.messages
201
- cls.logWithFormatting(f'Log from job "{jobNames[0]}"', messages,
202
- message='Received Toil worker log. Disable debug level logging to hide this output')
229
+ cls.logWithFormatting(
230
+ f'Log from job "{jobNames[0]}"',
231
+ messages,
232
+ message="Received Toil worker log. Disable debug level logging to hide this output",
233
+ )
203
234
  cls.writeLogFiles(jobNames, messages, config=config)
204
235
 
236
+ try:
237
+ jobs = stats.jobs
238
+ except AttributeError:
239
+ pass
240
+ else:
241
+ for job in jobs:
242
+ try:
243
+ # Here we're talking to job._executor which fills in these stats.
244
+
245
+ # Convince MyPy we won't be sent any job stats without
246
+ # a workflow ID. You can't set up the job store without
247
+ # one, but if we're somehow missing one, keep the stats
248
+ # and logging thread up.
249
+ assert config.workflowID is not None
250
+
251
+ # TODO: Use better job names!
252
+ HistoryManager.record_job_attempt(
253
+ config.workflowID,
254
+ config.workflowAttemptNumber,
255
+ job.class_name,
256
+ job.succeeded == "True",
257
+ float(job.start),
258
+ float(job.time),
259
+ cores=float(job.requested_cores),
260
+ cpu_seconds=float(job.clock),
261
+ memory_bytes=int(job.memory) * 1024,
262
+ disk_bytes=int(job.disk)
263
+ )
264
+ except:
265
+ logger.exception("Could not record job attempt in history!")
266
+ # Keep going. Don't fail the workflow for history-related issues.
267
+
205
268
  while True:
206
- # This is a indirect way of getting a message to the thread to exit
269
+ # This is an indirect way of getting a message to the thread to exit
207
270
  if stop.is_set():
208
271
  jobStore.read_logs(callback)
209
272
  break
@@ -211,8 +274,13 @@ class StatsAndLogging:
211
274
  time.sleep(0.5) # Avoid cycling too fast
212
275
 
213
276
  # Finish the stats file
214
- text = json.dumps(dict(total_time=str(time.time() - startTime),
215
- total_clock=str(ResourceMonitor.get_total_cpu_time() - startClock)), ensure_ascii=True)
277
+ text = json.dumps(
278
+ dict(
279
+ total_time=str(time.time() - startTime),
280
+ total_clock=str(ResourceMonitor.get_total_cpu_time() - startClock),
281
+ ),
282
+ ensure_ascii=True,
283
+ )
216
284
  jobStore.write_logs(text)
217
285
 
218
286
  def check(self) -> None:
@@ -225,11 +293,14 @@ class StatsAndLogging:
225
293
 
226
294
  def shutdown(self) -> None:
227
295
  """Finish up the stats/logging aggregation thread."""
228
- logger.debug('Waiting for stats and logging collator thread to finish ...')
296
+ logger.debug("Waiting for stats and logging collator thread to finish ...")
229
297
  startTime = time.time()
230
298
  self._stop.set()
231
299
  self._worker.join()
232
- logger.debug('... finished collating stats and logs. Took %s seconds', time.time() - startTime)
300
+ logger.debug(
301
+ "... finished collating stats and logs. Took %s seconds",
302
+ time.time() - startTime,
303
+ )
233
304
  # in addition to cleaning on exceptions, onError should clean if there are any failed jobs
234
305
 
235
306
 
@@ -250,6 +321,11 @@ def install_log_color(set_logger: Optional[logging.Logger] = None) -> None:
250
321
  import coloredlogs # type: ignore[import-untyped]
251
322
 
252
323
  level_styles = dict(coloredlogs.DEFAULT_LEVEL_STYLES)
324
+ level_styles["trace"] = dict(level_styles["debug"])
325
+
326
+ # TODO: What if these fixed colors aren't right for the terminal background?
327
+ # It might be light or dark or even grey.
328
+ level_styles["trace"]["color"] = 242
253
329
  level_styles["debug"]["color"] = 242
254
330
  level_styles["notice"] = {"color": "green", "bold": True}
255
331
  level_styles["error"]["bold"] = True
@@ -272,7 +348,9 @@ def install_log_color(set_logger: Optional[logging.Logger] = None) -> None:
272
348
  )
273
349
 
274
350
 
275
- def add_logging_options(parser: ArgumentParser, default_level: Optional[int] = None) -> None:
351
+ def add_logging_options(
352
+ parser: ArgumentParser, default_level: Optional[int] = None
353
+ ) -> None:
276
354
  """
277
355
  Add logging options to set the global log level.
278
356
 
@@ -285,23 +363,51 @@ def add_logging_options(parser: ArgumentParser, default_level: Optional[int] = N
285
363
 
286
364
  group = parser.add_argument_group("Logging Options")
287
365
 
288
- levels = ['Critical', 'Error', 'Warning', 'Debug', 'Info']
366
+ levels = ["Critical", "Error", "Warning", "Info", "Debug", "Trace"]
289
367
  for level in levels:
290
- group.add_argument(f"--log{level}", dest="logLevel", default=default_level_name, action="store_const",
291
- const=level, help=f"Turn on loglevel {level}. Default: {default_level_name}.")
368
+ group.add_argument(
369
+ f"--log{level}",
370
+ dest="logLevel",
371
+ default=default_level_name,
372
+ action="store_const",
373
+ const=level,
374
+ help=f"Set logging level to {level}. Default: {default_level_name}.",
375
+ )
292
376
 
293
377
  levels += [l.lower() for l in levels] + [l.upper() for l in levels]
294
- group.add_argument("--logOff", dest="logLevel", default=default_level_name,
295
- action="store_const", const="CRITICAL", help="Same as --logCRITICAL.")
378
+ group.add_argument(
379
+ "--logOff",
380
+ dest="logLevel",
381
+ default=default_level_name,
382
+ action="store_const",
383
+ const="CRITICAL",
384
+ help="Same as --logCritical.",
385
+ )
296
386
  # Maybe deprecate the above in favor of --logLevel?
297
387
 
298
- group.add_argument("--logLevel", dest="logLevel", default=default_level_name, choices=levels,
299
- help=f"Set the log level. Default: {default_level_name}. Options: {levels}.")
388
+ group.add_argument(
389
+ "--logLevel",
390
+ dest="logLevel",
391
+ default=default_level_name,
392
+ choices=levels,
393
+ help=f"Set the log level. Default: {default_level_name}. Options: {levels}.",
394
+ )
300
395
  group.add_argument("--logFile", dest="logFile", help="File to log in.")
301
- group.add_argument("--rotatingLogging", dest="logRotating", action="store_true", default=False,
302
- help="Turn on rotating logging, which prevents log files from getting too big.")
303
- group.add_argument("--logColors", dest="colored_logs", default=True, type=strtobool, metavar="BOOL",
304
- help="Enable or disable colored logging. Default: %(default)s")
396
+ group.add_argument(
397
+ "--rotatingLogging",
398
+ dest="logRotating",
399
+ action="store_true",
400
+ default=False,
401
+ help="Turn on rotating logging, which prevents log files from getting too big.",
402
+ )
403
+ group.add_argument(
404
+ "--logColors",
405
+ dest="colored_logs",
406
+ default=True,
407
+ type=strtobool,
408
+ metavar="BOOL",
409
+ help="Enable or disable colored logging. Default: %(default)s",
410
+ )
305
411
 
306
412
 
307
413
  def configure_root_logger() -> None:
@@ -311,8 +417,10 @@ def configure_root_logger() -> None:
311
417
  Should be called before any entry point tries to log anything,
312
418
  to ensure consistent formatting.
313
419
  """
314
- logging.basicConfig(format='[%(asctime)s] [%(threadName)-10s] [%(levelname).1s] [%(name)s] %(message)s',
315
- datefmt='%Y-%m-%dT%H:%M:%S%z')
420
+ logging.basicConfig(
421
+ format="[%(asctime)s] [%(threadName)-10s] [%(levelname).1s] [%(name)s] %(message)s",
422
+ datefmt="%Y-%m-%dT%H:%M:%S%z",
423
+ )
316
424
  root_logger.setLevel(DEFAULT_LOGLEVEL)
317
425
 
318
426
 
@@ -330,12 +438,16 @@ def log_to_file(log_file: Optional[str], log_rotation: bool) -> None:
330
438
 
331
439
  def set_logging_from_options(options: Union["Config", Namespace]) -> None:
332
440
  configure_root_logger()
333
- options.logLevel = options.logLevel or logging.getLevelName(root_logger.getEffectiveLevel())
441
+ options.logLevel = options.logLevel or logging.getLevelName(
442
+ root_logger.getEffectiveLevel()
443
+ )
334
444
  set_log_level(options.logLevel)
335
445
  if options.colored_logs:
336
446
  install_log_color()
337
- logger.debug(f"Root logger is at level '{logging.getLevelName(root_logger.getEffectiveLevel())}', "
338
- f"'toil' logger at level '{logging.getLevelName(toil_logger.getEffectiveLevel())}'.")
447
+ logger.debug(
448
+ f"Root logger is at level '{logging.getLevelName(root_logger.getEffectiveLevel())}', "
449
+ f"'toil' logger at level '{logging.getLevelName(toil_logger.getEffectiveLevel())}'."
450
+ )
339
451
 
340
452
  # start logging to log file if specified
341
453
  log_to_file(options.logFile, options.logRotating)
@@ -353,18 +465,24 @@ def suppress_exotic_logging(local_logger: str) -> None:
353
465
  This is important because some packages, particularly boto3, are not always instantiated yet in the
354
466
  environment when this is run, and so we create the logger and set the level preemptively.
355
467
  """
356
- never_suppress = ['toil', '__init__', '__main__', 'toil-rt', 'cwltool']
357
- always_suppress = ['boto3', 'boto', 'botocore'] # ensure we suppress even before instantiated
468
+ never_suppress = ["toil", "__init__", "__main__", "toil-rt", "cwltool"]
469
+ always_suppress = [
470
+ "boto3",
471
+ "boto",
472
+ "botocore",
473
+ ] # ensure we suppress even before instantiated
358
474
 
359
- top_level_loggers: List[str] = []
475
+ top_level_loggers: list[str] = []
360
476
 
361
477
  # Due to https://stackoverflow.com/questions/61683713
362
478
  for pkg_logger in list(logging.Logger.manager.loggerDict.keys()) + always_suppress:
363
479
  if pkg_logger != local_logger:
364
480
  # many sub-loggers may exist, like "boto.a", "boto.b", "boto.c"; we only want the top_level: "boto"
365
- top_level_logger = pkg_logger.split('.')[0] if '.' in pkg_logger else pkg_logger
481
+ top_level_logger = (
482
+ pkg_logger.split(".")[0] if "." in pkg_logger else pkg_logger
483
+ )
366
484
 
367
485
  if top_level_logger not in top_level_loggers + never_suppress:
368
486
  top_level_loggers.append(top_level_logger)
369
487
  logging.getLogger(top_level_logger).setLevel(logging.CRITICAL)
370
- logger.debug(f'Suppressing the following loggers: {set(top_level_loggers)}')
488
+ logger.debug(f"Suppressing the following loggers: {set(top_level_loggers)}")