PyPI - toil - Versions diffs - 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl - Mend

toil 6.1.0a1py3-none-any.whl → 8.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

toil/__init__.py +122 -315
toil/batchSystems/__init__.py +1 -0
toil/batchSystems/abstractBatchSystem.py +173 -89
toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
toil/batchSystems/awsBatch.py +244 -135
toil/batchSystems/cleanup_support.py +26 -16
toil/batchSystems/contained_executor.py +31 -28
toil/batchSystems/gridengine.py +86 -50
toil/batchSystems/htcondor.py +166 -89
toil/batchSystems/kubernetes.py +632 -382
toil/batchSystems/local_support.py +20 -15
toil/batchSystems/lsf.py +134 -81
toil/batchSystems/lsfHelper.py +13 -11
toil/batchSystems/mesos/__init__.py +41 -29
toil/batchSystems/mesos/batchSystem.py +290 -151
toil/batchSystems/mesos/executor.py +79 -50
toil/batchSystems/mesos/test/__init__.py +31 -23
toil/batchSystems/options.py +46 -28
toil/batchSystems/registry.py +53 -19
toil/batchSystems/singleMachine.py +296 -125
toil/batchSystems/slurm.py +603 -138
toil/batchSystems/torque.py +47 -33
toil/bus.py +186 -76
toil/common.py +664 -368
toil/cwl/__init__.py +1 -1
toil/cwl/cwltoil.py +1136 -483
toil/cwl/utils.py +17 -22
toil/deferred.py +63 -42
toil/exceptions.py +5 -3
toil/fileStores/__init__.py +5 -5
toil/fileStores/abstractFileStore.py +140 -60
toil/fileStores/cachingFileStore.py +717 -269
toil/fileStores/nonCachingFileStore.py +116 -87
toil/job.py +1225 -368
toil/jobStores/abstractJobStore.py +416 -266
toil/jobStores/aws/jobStore.py +863 -477
toil/jobStores/aws/utils.py +201 -120
toil/jobStores/conftest.py +3 -2
toil/jobStores/fileJobStore.py +292 -154
toil/jobStores/googleJobStore.py +140 -74
toil/jobStores/utils.py +36 -15
toil/leader.py +668 -272
toil/lib/accelerators.py +115 -18
toil/lib/aws/__init__.py +74 -31
toil/lib/aws/ami.py +122 -87
toil/lib/aws/iam.py +284 -108
toil/lib/aws/s3.py +31 -0
toil/lib/aws/session.py +214 -39
toil/lib/aws/utils.py +287 -231
toil/lib/bioio.py +13 -5
toil/lib/compatibility.py +11 -6
toil/lib/conversions.py +104 -47
toil/lib/docker.py +131 -103
toil/lib/ec2.py +361 -199
toil/lib/ec2nodes.py +174 -106
toil/lib/encryption/_dummy.py +5 -3
toil/lib/encryption/_nacl.py +10 -6
toil/lib/encryption/conftest.py +1 -0
toil/lib/exceptions.py +26 -7
toil/lib/expando.py +5 -3
toil/lib/ftp_utils.py +217 -0
toil/lib/generatedEC2Lists.py +127 -19
toil/lib/humanize.py +6 -2
toil/lib/integration.py +341 -0
toil/lib/io.py +141 -15
toil/lib/iterables.py +4 -2
toil/lib/memoize.py +12 -8
toil/lib/misc.py +66 -21
toil/lib/objects.py +2 -2
toil/lib/resources.py +68 -15
toil/lib/retry.py +126 -81
toil/lib/threading.py +299 -82
toil/lib/throttle.py +16 -15
toil/options/common.py +843 -409
toil/options/cwl.py +175 -90
toil/options/runner.py +50 -0
toil/options/wdl.py +73 -17
toil/provisioners/__init__.py +117 -46
toil/provisioners/abstractProvisioner.py +332 -157
toil/provisioners/aws/__init__.py +70 -33
toil/provisioners/aws/awsProvisioner.py +1145 -715
toil/provisioners/clusterScaler.py +541 -279
toil/provisioners/gceProvisioner.py +282 -179
toil/provisioners/node.py +155 -79
toil/realtimeLogger.py +34 -22
toil/resource.py +137 -75
toil/server/app.py +128 -62
toil/server/celery_app.py +3 -1
toil/server/cli/wes_cwl_runner.py +82 -53
toil/server/utils.py +54 -28
toil/server/wes/abstract_backend.py +64 -26
toil/server/wes/amazon_wes_utils.py +21 -15
toil/server/wes/tasks.py +121 -63
toil/server/wes/toil_backend.py +142 -107
toil/server/wsgi_app.py +4 -3
toil/serviceManager.py +58 -22
toil/statsAndLogging.py +224 -70
toil/test/__init__.py +282 -183
toil/test/batchSystems/batchSystemTest.py +460 -210
toil/test/batchSystems/batch_system_plugin_test.py +90 -0
toil/test/batchSystems/test_gridengine.py +173 -0
toil/test/batchSystems/test_lsf_helper.py +67 -58
toil/test/batchSystems/test_slurm.py +110 -49
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +56 -0
toil/test/cwl/cwlTest.py +496 -287
toil/test/cwl/measure_default_memory.cwl +12 -0
toil/test/cwl/not_run_required_input.cwl +29 -0
toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
toil/test/cwl/seqtk_seq.cwl +1 -1
toil/test/docs/scriptsTest.py +69 -46
toil/test/jobStores/jobStoreTest.py +427 -264
toil/test/lib/aws/test_iam.py +118 -50
toil/test/lib/aws/test_s3.py +16 -9
toil/test/lib/aws/test_utils.py +5 -6
toil/test/lib/dockerTest.py +118 -141
toil/test/lib/test_conversions.py +113 -115
toil/test/lib/test_ec2.py +58 -50
toil/test/lib/test_integration.py +104 -0
toil/test/lib/test_misc.py +12 -5
toil/test/mesos/MesosDataStructuresTest.py +23 -10
toil/test/mesos/helloWorld.py +7 -6
toil/test/mesos/stress.py +25 -20
toil/test/options/__init__.py +13 -0
toil/test/options/options.py +42 -0
toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
toil/test/provisioners/clusterScalerTest.py +440 -250
toil/test/provisioners/clusterTest.py +166 -44
toil/test/provisioners/gceProvisionerTest.py +174 -100
toil/test/provisioners/provisionerTest.py +25 -13
toil/test/provisioners/restartScript.py +5 -4
toil/test/server/serverTest.py +188 -141
toil/test/sort/restart_sort.py +137 -68
toil/test/sort/sort.py +134 -66
toil/test/sort/sortTest.py +91 -49
toil/test/src/autoDeploymentTest.py +141 -101
toil/test/src/busTest.py +20 -18
toil/test/src/checkpointTest.py +8 -2
toil/test/src/deferredFunctionTest.py +49 -35
toil/test/src/dockerCheckTest.py +32 -24
toil/test/src/environmentTest.py +135 -0
toil/test/src/fileStoreTest.py +539 -272
toil/test/src/helloWorldTest.py +7 -4
toil/test/src/importExportFileTest.py +61 -31
toil/test/src/jobDescriptionTest.py +46 -21
toil/test/src/jobEncapsulationTest.py +2 -0
toil/test/src/jobFileStoreTest.py +74 -50
toil/test/src/jobServiceTest.py +187 -73
toil/test/src/jobTest.py +121 -71
toil/test/src/miscTests.py +19 -18
toil/test/src/promisedRequirementTest.py +82 -36
toil/test/src/promisesTest.py +7 -6
toil/test/src/realtimeLoggerTest.py +10 -6
toil/test/src/regularLogTest.py +71 -37
toil/test/src/resourceTest.py +80 -49
toil/test/src/restartDAGTest.py +36 -22
toil/test/src/resumabilityTest.py +9 -2
toil/test/src/retainTempDirTest.py +45 -14
toil/test/src/systemTest.py +12 -8
toil/test/src/threadingTest.py +44 -25
toil/test/src/toilContextManagerTest.py +10 -7
toil/test/src/userDefinedJobArgTypeTest.py +8 -5
toil/test/src/workerTest.py +73 -23
toil/test/utils/toilDebugTest.py +103 -33
toil/test/utils/toilKillTest.py +4 -5
toil/test/utils/utilsTest.py +245 -106
toil/test/wdl/wdltoil_test.py +818 -149
toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
toil/toilState.py +120 -35
toil/utils/toilConfig.py +13 -4
toil/utils/toilDebugFile.py +44 -27
toil/utils/toilDebugJob.py +214 -27
toil/utils/toilDestroyCluster.py +11 -6
toil/utils/toilKill.py +8 -3
toil/utils/toilLaunchCluster.py +256 -140
toil/utils/toilMain.py +37 -16
toil/utils/toilRsyncCluster.py +32 -14
toil/utils/toilSshCluster.py +49 -22
toil/utils/toilStats.py +356 -273
toil/utils/toilStatus.py +292 -139
toil/utils/toilUpdateEC2Instances.py +3 -1
toil/version.py +12 -12
toil/wdl/utils.py +5 -5
toil/wdl/wdltoil.py +3913 -1033
toil/worker.py +367 -184
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
toil-8.0.0.dist-info/METADATA +173 -0
toil-8.0.0.dist-info/RECORD +253 -0
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
toil-6.1.0a1.dist-info/METADATA +0 -125
toil-6.1.0a1.dist-info/RECORD +0 -237
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0

toil/leader.py CHANGED Viewed

@@ -21,30 +21,36 @@ import os
 import pickle
 import sys
 import time
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Optional, Union
 import enlighten
 from toil import resolveEntryPoint
 from toil.batchSystems import DeadlockException
-from toil.batchSystems.abstractBatchSystem import (AbstractBatchSystem,
-                                                   BatchJobExitReason)
-from toil.bus import (JobCompletedMessage,
-                      JobFailedMessage,
-                      JobIssuedMessage,
-                      JobMissingMessage,
-                      JobUpdatedMessage,
-                      QueueSizeMessage,
-                      gen_message_bus_path)
+from toil.batchSystems.abstractBatchSystem import (
+    EXIT_STATUS_UNAVAILABLE_VALUE,
+    AbstractBatchSystem,
+    BatchJobExitReason,
+)
+from toil.bus import (
+    JobCompletedMessage,
+    JobFailedMessage,
+    JobIssuedMessage,
+    JobMissingMessage,
+    JobUpdatedMessage,
+    QueueSizeMessage,
+    get_job_kind,
+)
 from toil.common import Config, ToilMetrics
 from toil.cwl.utils import CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
 from toil.exceptions import FailedJobsException
-from toil.job import (CheckpointJobDescription,
-                      JobDescription,
-                      ServiceJobDescription,
-                      TemporaryID)
-from toil.jobStores.abstractJobStore import (AbstractJobStore,
-                                             NoSuchJobException)
+from toil.job import (
+    CheckpointJobDescription,
+    JobDescription,
+    ServiceJobDescription,
+    TemporaryID,
+)
+from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchJobException
 from toil.lib.throttle import LocalThrottle
 from toil.provisioners.abstractProvisioner import AbstractProvisioner
 from toil.provisioners.clusterScaler import ScalerThread
@@ -78,13 +84,15 @@ class Leader:
     consulting the job store, and issuing them in the batch system.
     """
-    def __init__(self,
-                 config: Config,
-                 batchSystem: AbstractBatchSystem,
-                 provisioner: Optional[AbstractProvisioner],
-                 jobStore: AbstractJobStore,
-                 rootJob: JobDescription,
-                 jobCache: Optional[Dict[Union[str, TemporaryID], JobDescription]] = None) -> None:
+    def __init__(
+        self,
+        config: Config,
+        batchSystem: AbstractBatchSystem,
+        provisioner: Optional[AbstractProvisioner],
+        jobStore: AbstractJobStore,
+        rootJob: JobDescription,
+        jobCache: Optional[dict[Union[str, TemporaryID], JobDescription]] = None,
+    ) -> None:
         """
         Create a Toil Leader object.
@@ -114,14 +122,11 @@ class Leader:
         # state change information about jobs.
         self.toilState = ToilState(self.jobStore)
-        if self.config.write_messages is None:
-            # The user hasn't specified a place for the message bus so we
-            # should make one.
-            self.config.write_messages = gen_message_bus_path()
         # Message bus messages need to go to the given file.
         # Keep a reference to the return value so the listener stays alive.
-        self._message_subscription = self.toilState.bus.connect_output_file(self.config.write_messages)
+        self._message_subscription = self.toilState.bus.connect_output_file(
+            self.config.write_messages
+        )
         # Connect to the message bus, so we will get all the messages of these
         # types in an inbox.
@@ -136,17 +141,22 @@ class Leader:
         # this, somehow, so they can also see messages from this?
         self.toilState.load_workflow(rootJob, jobCache=jobCache)
-        logger.debug("Found %s jobs to start and %i jobs with successors to run",
-                     self._messages.count(JobUpdatedMessage), len(self.toilState.successorCounts))
+        logger.debug(
+            "Found %s jobs to start and %i jobs with successors to run",
+            self._messages.count(JobUpdatedMessage),
+            len(self.toilState.successorCounts),
+        )
         # Batch system
         self.batchSystem = batchSystem
         if len(self.batchSystem.getIssuedBatchJobIDs()) != 0:
-            raise RuntimeError("The initialized batchsystem did not start with 0 active jobs.")
+            raise RuntimeError(
+                "The initialized batchsystem did not start with 0 active jobs."
+            )
         logger.debug("Checked batch system has no running jobs and no updated jobs")
         # Map of batch system IDs to job store IDs
-        self.issued_jobs_by_batch_system_id: Dict[int, str] = {}
+        self.issued_jobs_by_batch_system_id: dict[int, str] = {}
         # Number of preemptible jobs currently being run by batch system
         self.preemptibleJobsIssued = 0
@@ -154,10 +164,12 @@ class Leader:
         # Tracking the number service jobs issued,
         # this is used limit the number of services issued to the batch system
         self.serviceJobsIssued = 0
-        self.serviceJobsToBeIssued: List[str] = [] # A queue of IDs of service jobs that await scheduling
+        self.serviceJobsToBeIssued: list[str] = (
+            []
+        )  # A queue of IDs of service jobs that await scheduling
         # Equivalents for service jobs to be run on preemptible nodes
         self.preemptibleServiceJobsIssued = 0
-        self.preemptibleServiceJobsToBeIssued: List[str] = []
+        self.preemptibleServiceJobsToBeIssued: list[str] = []
         # Timing of the rescuing method
         self.timeSinceJobsLastRescued = None
@@ -165,7 +177,7 @@ class Leader:
         # For each issued job's batch system ID, how many times did we not see
         # it when we should have? If this hits a threshold, the job is declared
         # missing and killed and possibly retried.
-        self.reissueMissingJobs_missingHash: Dict[int, int] = {}
+        self.reissueMissingJobs_missingHash: dict[int, int] = {}
         # Class used to create/destroy nodes in the cluster, may be None if
         # using a statically defined cluster
@@ -183,7 +195,7 @@ class Leader:
         self.statsAndLogging = StatsAndLogging(self.jobStore, self.config)
         # Set used to monitor deadlocked jobs
-        self.potentialDeadlockedJobs: Set[str] = set()
+        self.potentialDeadlockedJobs: set[str] = set()
         self.potentialDeadlockTime = 0
         # A dashboard that runs on the leader node in AWS clusters to track the state
@@ -191,8 +203,13 @@ class Leader:
         self.toilMetrics: Optional[ToilMetrics] = None
         # internal jobs we should not expose at top level debugging
-        self.debugJobNames = ("CWLJob", "CWLWorkflow", "CWLScatter", "CWLGather",
-                              "ResolveIndirect")
+        self.debugJobNames = (
+            "CWLJob",
+            "CWLWorkflow",
+            "CWLScatter",
+            "CWLGather",
+            "ResolveIndirect",
+        )
         self.deadlockThrottler = LocalThrottle(self.config.deadlockCheckInterval)
@@ -210,8 +227,10 @@ class Leader:
         self.GOOD_COLOR = (0, 60, 108)
         self.BAD_COLOR = (253, 199, 0)
         # And set a format that shows failures
-        self.PROGRESS_BAR_FORMAT = ('{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} '
-                                    '({count_1:d} failures) [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]')
+        self.PROGRESS_BAR_FORMAT = (
+            "{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} "
+            "({count_1:d} failures) [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]"
+        )
         # TODO: No way to set background color on the terminal for the bar.
         # What exit code should the process use if the workflow failed?
@@ -229,16 +248,25 @@ class Leader:
         """
         self.jobStore.write_kill_flag(kill=False)
-        with enlighten.get_manager(stream=sys.stderr, enabled=not self.config.disableProgress) as manager:
+        with enlighten.get_manager(
+            stream=sys.stderr, enabled=not self.config.disableProgress
+        ) as manager:
             # Set up the fancy console UI if desirable
-            self.progress_overall = manager.counter(total=0, desc='Workflow Progress', unit='jobs',
-                                                    color=self.GOOD_COLOR, bar_format=self.PROGRESS_BAR_FORMAT)
+            self.progress_overall = manager.counter(
+                total=0,
+                desc="Workflow Progress",
+                unit="jobs",
+                color=self.GOOD_COLOR,
+                bar_format=self.PROGRESS_BAR_FORMAT,
+            )
             self.progress_failed = self.progress_overall.add_subcounter(self.BAD_COLOR)
             # Start the stats/logging aggregation thread
             self.statsAndLogging.start()
             if self.config.metrics:
-                self.toilMetrics = ToilMetrics(self.toilState.bus, provisioner=self.provisioner)
+                self.toilMetrics = ToilMetrics(
+                    self.toilState.bus, provisioner=self.provisioner
+                )
             try:
@@ -255,10 +283,13 @@ class Leader:
                         self.innerLoop()
                     finally:
                         if self.clusterScaler is not None:
-                            logger.debug('Waiting for workers to shutdown.')
+                            logger.debug("Waiting for workers to shutdown.")
                             startTime = time.time()
                             self.clusterScaler.shutdown()
-                            logger.debug('Worker shutdown complete in %s seconds.', time.time() - startTime)
+                            logger.debug(
+                                "Worker shutdown complete in %s seconds.",
+                                time.time() - startTime,
+                            )
                 finally:
                     # Ensure service manager thread is properly shutdown
@@ -271,37 +302,59 @@ class Leader:
                     self.toilMetrics.shutdown()
             # Filter the failed jobs
-            self.toilState.totalFailedJobs = [j for j in self.toilState.totalFailedJobs if self.toilState.job_exists(j)]
+            self.toilState.totalFailedJobs = [
+                j
+                for j in self.toilState.totalFailedJobs
+                if self.toilState.job_exists(j)
+            ]
             try:
                 self.create_status_sentinel_file(self.toilState.totalFailedJobs)
             except OSError as e:
-                logger.debug(f'Error from importFile with hardlink=True: {e}')
+                logger.debug(f"Error from importFile with hardlink=True: {e}")
-            logger.info("Finished toil run %s" %
-                         ("successfully." if not self.toilState.totalFailedJobs \
-                    else ("with %s failed jobs." % len(self.toilState.totalFailedJobs))))
+            logger.info(
+                "Finished toil run %s"
+                % (
+                    "successfully."
+                    if not self.toilState.totalFailedJobs
+                    else ("with %s failed jobs." % len(self.toilState.totalFailedJobs))
+                )
+            )
             if len(self.toilState.totalFailedJobs):
                 failed_jobs = []
                 for job_id in self.toilState.totalFailedJobs:
                     # Refresh all the failed jobs to get e.g. the log file IDs that the workers wrote
                     self.toilState.reset_job(job_id)
-                    failed_jobs.append(self.toilState.get_job(job_id))
-                logger.info("Failed jobs at end of the run: %s", ' '.join(str(j) for j in failed_jobs))
-                raise FailedJobsException(self.jobStore, failed_jobs, exit_code=self.recommended_fail_exit_code)
+                    try:
+                        failed_jobs.append(self.toilState.get_job(job_id))
+                    except NoSuchJobException:
+                        # Job actually finished and was removed
+                        pass
+                logger.info(
+                    "Failed jobs at end of the run: %s",
+                    " ".join(str(j) for j in failed_jobs),
+                )
+                raise FailedJobsException(
+                    self.jobStore,
+                    failed_jobs,
+                    exit_code=self.recommended_fail_exit_code,
+                )
             return self.jobStore.get_root_job_return_value()
     def create_status_sentinel_file(self, fail: bool) -> None:
         """Create a file in the jobstore indicating failure or success."""
-        logName = 'failed.log' if fail else 'succeeded.log'
+        logName = "failed.log" if fail else "succeeded.log"
         localLog = os.path.join(os.getcwd(), logName)
-        open(localLog, 'w').close()
-        self.jobStore.import_file('file://' + localLog, logName, hardlink=True)
+        open(localLog, "w").close()
+        self.jobStore.import_file("file://" + localLog, logName, hardlink=True)
-        if os.path.exists(localLog):  # Bandaid for Jenkins tests failing stochastically and unexplainably.
+        if os.path.exists(
+            localLog
+        ):  # Bandaid for Jenkins tests failing stochastically and unexplainably.
             os.remove(localLog)
     def _handledFailedSuccessor(self, successor_id: str, predecessor_id: str) -> bool:
@@ -313,8 +366,11 @@ class Leader:
         :returns: True if there are still active successors.
                   False if all successors have failed and the job is queued to run to handle the failed successors.
         """
-        logger.debug("Successor job: %s of job: %s has failed """
-                     "predecessors", self.toilState.get_job(successor_id), self.toilState.get_job(predecessor_id))
+        logger.debug(
+            "Successor job: %s of job: %s has failed " "" "predecessors",
+            self.toilState.get_job(successor_id),
+            self.toilState.get_job(predecessor_id),
+        )
         # Add the job to the set having failed successors
         self.toilState.hasFailedSuccessors.add(predecessor_id)
@@ -328,9 +384,12 @@ class Leader:
         # If the job now has no active successors, add to active jobs
         # so it can be processed as a job with failed successors.
         if self.toilState.count_pending_successors(predecessor_id) == 0:
-            logger.debug("Job: %s has no successors to run "
-                         "and some are failed, adding to list of jobs "
-                         "with failed successors", self.toilState.get_job(predecessor_id))
+            logger.debug(
+                "Job: %s has no successors to run "
+                "and some are failed, adding to list of jobs "
+                "with failed successors",
+                self.toilState.get_job(predecessor_id),
+            )
             self._messages.publish(JobUpdatedMessage(predecessor_id, 0))
             # Report no successors are running
             return False
@@ -338,7 +397,9 @@ class Leader:
             # Some successors are still active
             return True
-    def _checkSuccessorReadyToRunMultiplePredecessors(self, successor_id: str, predecessor_id: str) -> bool:
+    def _checkSuccessorReadyToRunMultiplePredecessors(
+        self, successor_id: str, predecessor_id: str
+    ) -> bool:
         """
         Check if a successor job is ready to run when there are multiple predecessors.
@@ -359,8 +420,11 @@ class Leader:
         # Grab the predecessor for reporting
         predecessor = self.toilState.get_job(predecessor_id)
-        logger.debug("Successor job: %s of job: %s has multiple "
-                     "predecessors", successor, predecessor)
+        logger.debug(
+            "Successor job: %s of job: %s has multiple " "predecessors",
+            successor,
+            predecessor,
+        )
         # Add the predecessor as a finished predecessor to the successor
         successor.predecessorsFinished.add(predecessor_id)
@@ -379,13 +443,17 @@ class Leader:
         if len(successor.predecessorsFinished) == successor.predecessorNumber:
             # All the successor's predecessors are done now.
             # Remove the successor job from the set of waiting multi-predecessor jobs.
-            self.toilState.jobsToBeScheduledWithMultiplePredecessors.remove(successor_id)
+            self.toilState.jobsToBeScheduledWithMultiplePredecessors.remove(
+                successor_id
+            )
             return True
         else:
             # The job is not ready to run
             return False
-    def _makeJobSuccessorReadyToRun(self, successor_id: str, predecessor_id: str) -> bool:
+    def _makeJobSuccessorReadyToRun(
+        self, successor_id: str, predecessor_id: str
+    ) -> bool:
         """
         Make a successor job ready to run if possible.
@@ -393,7 +461,7 @@ class Leader:
         :param predecessor_id: The job which the successor comes after.
         :returns: False if the successor job should not yet be run or True otherwise.
         """
-        #Build map from successor to predecessors.
+        # Build map from successor to predecessors.
         if successor_id not in self.toilState.successor_to_predecessors:
             self.toilState.successor_to_predecessors[successor_id] = set()
         if not isinstance(successor_id, str):
@@ -404,9 +472,15 @@ class Leader:
         # Grab the successor
         successor = self.toilState.get_job(successor_id)
-        logger.debug("Added job %s as coming after job %s", successor, self.toilState.get_job(predecessor_id))
+        logger.debug(
+            "Added job %s as coming after job %s",
+            successor,
+            self.toilState.get_job(predecessor_id),
+        )
         if successor.predecessorNumber > 1:
-            return self._checkSuccessorReadyToRunMultiplePredecessors(successor_id, predecessor_id)
+            return self._checkSuccessorReadyToRunMultiplePredecessors(
+                successor_id, predecessor_id
+            )
         else:
             return True
@@ -425,13 +499,20 @@ class Leader:
         next_successors = predecessor.nextSuccessors()
         if next_successors is None or len(next_successors) == 0:
-            raise RuntimeError(f"Job {self} trying to run successors, but it doesn't have any")
-        logger.debug("Job: %s has %i successors to schedule",
-                     predecessor_id, len(next_successors))
-        #Record the number of successors that must be completed before
-        #the job can be considered again
+            raise RuntimeError(
+                f"Job {self} trying to run successors, but it doesn't have any"
+            )
+        logger.debug(
+            "Job: %s has %i successors to schedule",
+            predecessor_id,
+            len(next_successors),
+        )
+        # Record the number of successors that must be completed before
+        # the job can be considered again
         if self.toilState.count_pending_successors(predecessor_id) != 0:
-            raise RuntimeError('Attempted to schedule successors of the same job twice!')
+            raise RuntimeError(
+                "Attempted to schedule successors of the same job twice!"
+            )
         self.toilState.successors_pending(predecessor_id, len(next_successors))
         # For each successor schedule if all predecessors have been completed
@@ -442,7 +523,11 @@ class Leader:
             except NoSuchJobException:
                 # Job already done and gone, but probably shouldn't be. Or maybe isn't visible yet.
                 # TODO: Shouldn't this be an error?
-                logger.warning("Job %s is a successor of %s but is already done and gone.", successor_id, predecessor_id)
+                logger.warning(
+                    "Job %s is a successor of %s but is already done and gone.",
+                    successor_id,
+                    predecessor_id,
+                )
                 # Don't try and run it
                 continue
             if self._makeJobSuccessorReadyToRun(successor_id, predecessor_id):
@@ -464,46 +549,62 @@ class Leader:
             # The job has services running; signal for them to be killed.
             # Once they are killed, then the job will be updated again and then
             # scheduled to be removed.
-            logger.warning("Telling job %s to terminate its services due to successor failure",
-                           predecessor)
-            self.serviceManager.kill_services(self.toilState.servicesIssued[predecessor_id],
-                                              error=True)
+            logger.warning(
+                "Telling job %s to terminate its services due to successor failure",
+                predecessor,
+            )
+            self.serviceManager.kill_services(
+                self.toilState.servicesIssued[predecessor_id], error=True
+            )
         elif self.toilState.count_pending_successors(predecessor_id) > 0:
             # The job has non-service jobs running; wait for them to finish.
             # the job will be re-added to the updated jobs when these jobs
             # are done
-            logger.debug("Job %s with ID: %s with failed successors still has successor jobs running",
-                         predecessor, predecessor_id)
-        elif (isinstance(predecessor, CheckpointJobDescription) and
-              predecessor.checkpoint is not None and
-              predecessor.remainingTryCount > 1):
+            logger.debug(
+                "Job %s with ID: %s with failed successors still has successor jobs running",
+                predecessor,
+                predecessor_id,
+            )
+        elif (
+            isinstance(predecessor, CheckpointJobDescription)
+            and predecessor.checkpoint is not None
+            and predecessor.remainingTryCount > 1
+        ):
             # If the job is a checkpoint and has remaining retries...
             # The logic behind using > 1 rather than > 0 here: Since this job has
             # been tried once (without decreasing its try count as the job
             # itself was successful), and its subtree failed, it shouldn't be retried
             # unless it has more than 1 try.
             if predecessor_id in self.toilState.jobs_issued:
-                logger.debug('Checkpoint job %s was updated while issued', predecessor_id)
+                logger.debug(
+                    "Checkpoint job %s was updated while issued", predecessor_id
+                )
             else:
                 # It hasn't already been reissued.
                 # This check lets us be robust against repeated job update
                 # messages (such as from services starting *and* failing), by
                 # making sure that we don't stay in a state that where we
                 # reissue the job every time we get one.
-                logger.warning('Job: %s is being restarted as a checkpoint after the total '
-                               'failure of jobs in its subtree.', predecessor_id)
+                logger.warning(
+                    "Job: %s is being restarted as a checkpoint after the total "
+                    "failure of jobs in its subtree.",
+                    predecessor_id,
+                )
                 self.issueJob(predecessor)
         else:
             # Mark it totally failed
-            logger.debug("Job %s is being processed as completely failed", predecessor_id)
+            logger.debug(
+                "Job %s is being processed as completely failed", predecessor_id
+            )
             self.processTotallyFailedJob(predecessor_id)
     def _processReadyJob(self, job_id: str, result_status: int):
         # We operate on the JobDescription mostly.
         readyJob = self.toilState.get_job(job_id)
-        logger.debug('Updating status of job %s with result status: %s',
-                     readyJob, result_status)
+        logger.debug(
+            "Updating status of job %s with result status: %s", readyJob, result_status
+        )
         # TODO: Filter out nonexistent successors/services now, so we can tell
         # if they are all done and the job needs deleting?
@@ -516,14 +617,17 @@ class Leader:
             # want to act on it; we want to wait until it gets the update it
             # gets when the service manager is done trying to start its
             # services.
-            logger.debug("Got a job to update which is still owned by the service "
-                         "manager: %s", readyJob.jobStoreID)
+            logger.debug(
+                "Got a job to update which is still owned by the service "
+                "manager: %s",
+                readyJob.jobStoreID,
+            )
         elif readyJob.jobStoreID in self.toilState.hasFailedSuccessors:
             self._processFailedSuccessors(job_id)
-        elif readyJob.command is not None or result_status != 0:
-            # The job has a command it must be run before any successors.
+        elif readyJob.has_body() or result_status != 0:
+            # The job has a body it must be run before any successors.
             # Similarly, if the job previously failed we rerun it, even if it doesn't have a
-            # command to run, to eliminate any parts of the stack now completed.
+            # body to run, to eliminate any parts of the stack now completed.
             isServiceJob = readyJob.jobStoreID in self.toilState.service_to_client
             # We want to run the job, and expend one of its "tries" (possibly
@@ -531,8 +635,9 @@ class Leader:
             # If the job has run out of tries or is a service job whose error flag has
             # been indicated, fail the job.
-            if (readyJob.remainingTryCount == 0 or
-                (isServiceJob and not self.jobStore.file_exists(readyJob.errorJobStoreID))):
+            if readyJob.remainingTryCount == 0 or (
+                isServiceJob and not self.jobStore.file_exists(readyJob.errorJobStoreID)
+            ):
                 self.processTotallyFailedJob(job_id)
                 logger.warning("Job %s is completely failed", readyJob)
             else:
@@ -543,28 +648,39 @@ class Leader:
             # Build a map from the service jobs to the job and a map
             # of the services created for the job
             if readyJob.jobStoreID in self.toilState.servicesIssued:
-                raise RuntimeError(f"The ready job: {readyJob.jobStoreID} was already issued.")
+                raise RuntimeError(
+                    f"The ready job: {readyJob.jobStoreID} was already issued."
+                )
             self.toilState.servicesIssued[readyJob.jobStoreID] = set()
             for serviceJobList in readyJob.serviceHostIDsInBatches():
                 for serviceID in serviceJobList:
                     if serviceID in self.toilState.service_to_client:
-                        raise RuntimeError(f"The ready service ID: {serviceID} was already added.")
+                        raise RuntimeError(
+                            f"The ready service ID: {serviceID} was already added."
+                        )
+                    # TODO: Why do we refresh here?
                     self.toilState.reset_job(serviceID)
                     serviceHost = self.toilState.get_job(serviceID)
                     self.toilState.service_to_client[serviceID] = readyJob.jobStoreID
                     self.toilState.servicesIssued[readyJob.jobStoreID].add(serviceID)
-            logger.debug("Giving job: %s to service manager to schedule its jobs", readyJob)
+            logger.debug(
+                "Giving job: %s to service manager to schedule its jobs", readyJob
+            )
             # Use the service manager to start the services
             self.serviceManager.put_client(job_id)
         elif readyJob.nextSuccessors() is not None:
             # There are successors to run
             self._runJobSuccessors(job_id)
         elif readyJob.jobStoreID in self.toilState.servicesIssued:
-            logger.debug("Telling job: %s to terminate its services due to the "
-                         "successful completion of its successor jobs",
-                         readyJob)
-            self.serviceManager.kill_services(self.toilState.servicesIssued[readyJob.jobStoreID], error=False)
+            logger.debug(
+                "Telling job: %s to terminate its services due to the "
+                "successful completion of its successor jobs",
+                readyJob,
+            )
+            self.serviceManager.kill_services(
+                self.toilState.servicesIssued[readyJob.jobStoreID], error=False
+            )
         else:
             # There are no remaining tasks to schedule within the job.
             #
@@ -593,7 +709,10 @@ class Leader:
                 try:
                     self.toilState.delete_job(readyJob.jobStoreID)
                 except Exception as e:
-                    logger.exception("Re-processing success for job we could not remove: %s", readyJob)
+                    logger.exception(
+                        "Re-processing success for job we could not remove: %s",
+                        readyJob,
+                    )
                     # Kick it back to being handled as succeeded again. We
                     # don't want to have a failure here cause a Toil-level
                     # retry which causes more actual jobs to try to run.
@@ -605,12 +724,18 @@ class Leader:
                     self.processRemovedJob(readyJob, 0)
             else:
                 self.processTotallyFailedJob(job_id)
-                logger.error("Job: %s is empty but completely failed - something is very wrong", readyJob.jobStoreID)
+                logger.error(
+                    "Job: %s is empty but completely failed - something is very wrong",
+                    readyJob.jobStoreID,
+                )
     def _processReadyJobs(self):
         """Process jobs that are ready to be scheduled/have successors to schedule."""
-        logger.debug('Built the jobs list, currently have %i jobs to update and %i jobs issued',
-                     self._messages.count(JobUpdatedMessage), self.getNumberOfJobsIssued())
+        logger.debug(
+            "Built the jobs list, currently have %i jobs to update and %i jobs issued",
+            self._messages.count(JobUpdatedMessage),
+            self.getNumberOfJobsIssued(),
+        )
         # Now go through and, for each job that has updated this tick, process it.
@@ -625,9 +750,13 @@ class Leader:
             if message.job_id in handled_with_status:
                 if handled_with_status[message.job_id] == message.result_status:
                     # This is a harmless duplicate
-                    logger.debug("Job %s already updated this tick with status %s and "
-                                 "we've received duplicate message %s", message.job_id,
-                                 handled_with_status[message.job_id], message)
+                    logger.debug(
+                        "Job %s already updated this tick with status %s and "
+                        "we've received duplicate message %s",
+                        message.job_id,
+                        handled_with_status[message.job_id],
+                        message,
+                    )
                 else:
                     # This is a conflicting update. We may have already treated
                     # a job as succeeding but now we've heard it's failed, or
@@ -635,9 +764,13 @@ class Leader:
                     # This probably shouldn't happen, but does because the
                     # scheduler is not correct somehow and hasn't been for a
                     # long time. Complain about it.
-                    logger.warning("Job %s already updated this tick with status %s "
-                                   "but we've now received %s", message.job_id,
-                                   handled_with_status[message.job_id], message)
+                    logger.warning(
+                        "Job %s already updated this tick with status %s "
+                        "but we've now received %s",
+                        message.job_id,
+                        handled_with_status[message.job_id],
+                        message,
+                    )
                 # Either way, we only want to handle one update per tick, like
                 # the old dict-based implementation.
                 continue
@@ -655,16 +788,21 @@ class Leader:
             if service_id is None:
                 break
-            logger.debug('Launching service job: %s', self.toilState.get_job(service_id))
+            logger.debug(
+                "Launching service job: %s", self.toilState.get_job(service_id)
+            )
             self.issueServiceJob(service_id)
     def _processJobsWithRunningServices(self):
         """Get jobs whose services have started."""
         while True:
             client_id = self.serviceManager.get_ready_client(0)
-            if client_id is None: # Stop trying to get jobs when function returns None
+            if client_id is None:  # Stop trying to get jobs when function returns None
                 break
-            logger.debug('Job: %s has established its services; all services are running', client_id)
+            logger.debug(
+                "Job: %s has established its services; all services are running",
+                client_id,
+            )
             # Grab the client job description
             client = self.toilState.get_job(client_id)
@@ -677,9 +815,9 @@ class Leader:
         """Get jobs whose services have failed to start."""
         while True:
             client_id = self.serviceManager.get_unservable_client(0)
-            if client_id is None: # Stop trying to get jobs when function returns None
+            if client_id is None:  # Stop trying to get jobs when function returns None
                 break
-            logger.debug('Job: %s has failed to establish its services.', client_id)
+            logger.debug("Job: %s has failed to establish its services.", client_id)
             # Grab the client job description
             client = self.toilState.get_job(client_id)
@@ -694,29 +832,56 @@ class Leader:
     def _gatherUpdatedJobs(self, updatedJobTuple):
         """Gather any new, updated JobDescriptions from the batch system."""
         bsID, exitStatus, exitReason, wallTime = (
-            updatedJobTuple.jobID, updatedJobTuple.exitStatus, updatedJobTuple.exitReason,
-            updatedJobTuple.wallTime)
+            updatedJobTuple.jobID,
+            updatedJobTuple.exitStatus,
+            updatedJobTuple.exitReason,
+            updatedJobTuple.wallTime,
+        )
         # easy, track different state
         try:
-            updatedJob = self.toilState.get_job(self.issued_jobs_by_batch_system_id[bsID])
+            updatedJob = self.toilState.get_job(
+                self.issued_jobs_by_batch_system_id[bsID]
+            )
         except KeyError:
-            logger.warning("A result seems to already have been processed for job %s", bsID)
+            logger.warning(
+                "A result seems to already have been processed for job %s", bsID
+            )
         else:
             if exitStatus == 0:
-                logger.debug('Job ended: %s', updatedJob)
+                logger.debug("Job ended: %s", updatedJob)
             else:
-                logger.warning(f'Job failed with exit value {exitStatus}: {updatedJob}\n'
-                               f'Exit reason: {exitReason}')
+                status_string = (
+                    str(exitStatus)
+                    if exitStatus != EXIT_STATUS_UNAVAILABLE_VALUE
+                    else "<UNAVAILABLE>"
+                )
+                logger.warning(
+                    f"Job failed with exit value {status_string}: {updatedJob}\n"
+                    f"Exit reason: {BatchJobExitReason.to_string(exitReason)}"
+                )
+                # This logic is undefined for which of the failing jobs will send its exit code
+                # when there are multiple failing jobs with different exit statuses
+                self.recommended_fail_exit_code = exitStatus
                 if exitStatus == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
                     # This is a CWL job informing us that the workflow is
                     # asking things of us that Toil can't do. When we raise an
                     # exception because of this, make sure to forward along
                     # this exit code.
                     logger.warning("This indicates an unsupported CWL requirement!")
-                    self.recommended_fail_exit_code = CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
+                    self.recommended_fail_exit_code = (
+                        CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
+                    )
             # Tell everyone it stopped running.
-            self._messages.publish(JobCompletedMessage(updatedJob.get_job_kind(), updatedJob.jobStoreID, exitStatus))
-            self.process_finished_job(bsID, exitStatus, wall_time=wallTime, exit_reason=exitReason)
+            self._messages.publish(
+                JobCompletedMessage(
+                    get_job_kind(updatedJob.get_names()),
+                    updatedJob.jobStoreID,
+                    exitStatus,
+                )
+            )
+            self.process_finished_job(
+                bsID, exitStatus, wall_time=wallTime, exit_reason=exitReason
+            )
     def _processLostJobs(self):
         """Process jobs that have gone awry."""
@@ -724,7 +889,9 @@ class Leader:
         # gather for rescueJobsFrequency seconds) check if there are any jobs
         # that have run too long (see self.reissueOverLongJobs) or which have
         # gone missing from the batch system (see self.reissueMissingJobs)
-        if ((time.time() - self.timeSinceJobsLastRescued) >= self.config.rescueJobsFrequency):
+        if (
+            time.time() - self.timeSinceJobsLastRescued
+        ) >= self.config.rescueJobsFrequency:
             # We only rescue jobs every N seconds, and when we have apparently
             # exhausted the current job supply
             self.reissueOverLongJobs()
@@ -744,9 +911,11 @@ class Leader:
         """
         self.timeSinceJobsLastRescued = time.time()
-        while self._messages.count(JobUpdatedMessage) > 0 or \
-              self.getNumberOfJobsIssued() or \
-              self.serviceManager.get_job_count():
+        while (
+            self._messages.count(JobUpdatedMessage) > 0
+            or self.getNumberOfJobsIssued()
+            or self.serviceManager.get_job_count()
+        ):
             if self._messages.count(JobUpdatedMessage) > 0:
                 self._processReadyJobs()
@@ -798,13 +967,21 @@ class Leader:
         if not self._messages.empty():
             raise RuntimeError(f"Pending messages at shutdown: {self._messages}")
         if self.toilState.successorCounts != {}:
-            raise RuntimeError(f"Jobs waiting on successors at shutdown: {self.toilState.successorCounts}")
+            raise RuntimeError(
+                f"Jobs waiting on successors at shutdown: {self.toilState.successorCounts}"
+            )
         if self.toilState.successor_to_predecessors != {}:
-            raise RuntimeError(f"Successors pending for their predecessors at shutdown: {self.toilState.successor_to_predecessors}")
+            raise RuntimeError(
+                f"Successors pending for their predecessors at shutdown: {self.toilState.successor_to_predecessors}"
+            )
         if self.toilState.service_to_client != {}:
-            raise RuntimeError(f"Services pending for their clients at shutdown: {self.toilState.service_to_client}")
+            raise RuntimeError(
+                f"Services pending for their clients at shutdown: {self.toilState.service_to_client}"
+            )
         if self.toilState.servicesIssued != {}:
-            raise RuntimeError(f"Services running at shutdown: {self.toilState.servicesIssued}")
+            raise RuntimeError(
+                f"Services running at shutdown: {self.toilState.servicesIssued}"
+            )
     def checkForDeadlocks(self):
         """Check if the system is deadlocked running service jobs."""
@@ -814,18 +991,22 @@ class Leader:
         # If there are no updated jobs and at least some jobs running
         if totalServicesIssued >= totalRunningJobs and totalRunningJobs > 0:
             # Collect all running service job store IDs into a set to compare with the deadlock set
-            running_service_ids: Set[str] = set()
+            running_service_ids: set[str] = set()
             for js_id in self.issued_jobs_by_batch_system_id.values():
                 job = self.toilState.get_job(js_id)
-                if isinstance(job, ServiceJobDescription) and self.serviceManager.is_running(js_id):
+                if isinstance(
+                    job, ServiceJobDescription
+                ) and self.serviceManager.is_running(js_id):
                     running_service_ids.add(js_id)
             if len(running_service_ids) > totalRunningJobs:
                 # This is too many services.
                 # TODO: couldn't more jobs have started since we polled the
                 # running job count?
-                raise RuntimeError(f"Supposedly running {len(running_service_ids)} services, which is"
-                                   f"more than the {totalRunningJobs} currently running jobs overall.")
+                raise RuntimeError(
+                    f"Supposedly running {len(running_service_ids)} services, which is"
+                    f"more than the {totalRunningJobs} currently running jobs overall."
+                )
             # If all the running jobs are active services then we have a potential deadlock
             if len(running_service_ids) == totalRunningJobs:
@@ -839,27 +1020,49 @@ class Leader:
                     # Use a generic message if none is available
                     message = "Cluster may be too small."
                 # See if this is a new potential deadlock
                 if self.potentialDeadlockedJobs != running_service_ids:
-                    logger.warning(("Potential deadlock detected! All %s running jobs are service jobs, "
-                                    "with no normal jobs to use them! %s"), totalRunningJobs, message)
+                    logger.warning(
+                        (
+                            "Potential deadlock detected! All %s running jobs are service jobs, "
+                            "with no normal jobs to use them! %s"
+                        ),
+                        totalRunningJobs,
+                        message,
+                    )
                     self.potentialDeadlockedJobs = running_service_ids
                     self.potentialDeadlockTime = time.time()
                 else:
                     # We wait self.config.deadlockWait seconds before declaring the system deadlocked
                     stuckFor = time.time() - self.potentialDeadlockTime
                     if stuckFor >= self.config.deadlockWait:
-                        logger.error("We have been deadlocked since %s on these service jobs: %s",
-                                     self.potentialDeadlockTime, self.potentialDeadlockedJobs)
-                        raise DeadlockException(("The workflow is service deadlocked - all %d running jobs "
-                                                 "have been the same active services for at least %s seconds") % (totalRunningJobs, self.config.deadlockWait))
+                        logger.error(
+                            "We have been deadlocked since %s on these service jobs: %s",
+                            self.potentialDeadlockTime,
+                            self.potentialDeadlockedJobs,
+                        )
+                        raise DeadlockException(
+                            (
+                                "The workflow is service deadlocked - all %d running jobs "
+                                "have been the same active services for at least %s seconds"
+                            )
+                            % (totalRunningJobs, self.config.deadlockWait)
+                        )
                     else:
                         # Complain that we are still stuck.
-                        waitingNormalJobs = self.getNumberOfJobsIssued() - totalServicesIssued
-                        logger.warning(("Potentially deadlocked for %.0f seconds. Waiting at most %.0f more seconds "
-                                        "for any of %d issued non-service jobs to schedule and start. %s"),
-                                       stuckFor, self.config.deadlockWait - stuckFor, waitingNormalJobs, message)
+                        waitingNormalJobs = (
+                            self.getNumberOfJobsIssued() - totalServicesIssued
+                        )
+                        logger.warning(
+                            (
+                                "Potentially deadlocked for %.0f seconds. Waiting at most %.0f more seconds "
+                                "for any of %d issued non-service jobs to schedule and start. %s"
+                            ),
+                            stuckFor,
+                            self.config.deadlockWait - stuckFor,
+                            waitingNormalJobs,
+                            message,
+                        )
             else:
                 # We have observed non-service jobs running, so reset the potential deadlock
                 self.feed_deadlock_watchdog()
@@ -880,34 +1083,38 @@ class Leader:
         """Add a job to the queue of jobs currently trying to run."""
         # Never issue the same job multiple times simultaneously
         if jobNode.jobStoreID in self.toilState.jobs_issued:
-            raise RuntimeError(f"Attempted to issue {jobNode} multiple times simultaneously!")
+            raise RuntimeError(
+                f"Attempted to issue {jobNode} multiple times simultaneously!"
+            )
-        workerCommand = [resolveEntryPoint('_toil_worker'),
-                         jobNode.jobName,
-                         self.jobStoreLocator,
-                         jobNode.jobStoreID]
+        workerCommand = [
+            resolveEntryPoint("_toil_worker"),
+            jobNode.jobName,
+            self.jobStoreLocator,
+            jobNode.jobStoreID,
+        ]
         for context in self.batchSystem.getWorkerContexts():
             # For each context manager hook the batch system wants to run in
             # the worker, serialize and send it.
-            workerCommand.append('--context')
-            workerCommand.append(base64.b64encode(pickle.dumps(context)).decode('utf-8'))
-        # We locally override the command. This shouldn't get persisted back to
-        # the job store, or we will detach the job body from the job
-        # description. TODO: Don't do it this way! It's weird!
-        jobNode.command = ' '.join(workerCommand)
+            workerCommand.append("--context")
+            workerCommand.append(
+                base64.b64encode(pickle.dumps(context)).decode("utf-8")
+            )
-        omp_threads = os.environ.get('OMP_NUM_THREADS') \
-            or str(max(1, int(jobNode.cores)))  # make sure OMP_NUM_THREADS is a positive integer
+        omp_threads = os.environ.get("OMP_NUM_THREADS") or str(
+            max(1, int(jobNode.cores))
+        )  # make sure OMP_NUM_THREADS is a positive integer
         job_environment = {
             # Set the number of cores used by OpenMP applications
-            'OMP_NUM_THREADS': omp_threads,
+            "OMP_NUM_THREADS": omp_threads,
         }
         # jobBatchSystemID is an int for each job
-        jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode, job_environment=job_environment)
+        jobBatchSystemID = self.batchSystem.issueBatchJob(
+            " ".join(workerCommand), jobNode, job_environment=job_environment
+        )
         # Record the job by the ID the batch system will use to talk about it with us
         self.issued_jobs_by_batch_system_id[jobBatchSystemID] = jobNode.jobStoreID
         # Record that this job is issued right now and shouldn't e.g. be issued again.
@@ -917,11 +1124,18 @@ class Leader:
             # so increment this value after the job is added to the issuedJob dict
             self.preemptibleJobsIssued += 1
         cur_logger = logger.debug if jobNode.local else logger.info
-        cur_logger("Issued job %s with job batch system ID: "
-                   "%s and %s",
-                   jobNode, str(jobBatchSystemID), jobNode.requirements_string())
+        cur_logger(
+            "Issued job %s with job batch system ID: " "%s and %s",
+            jobNode,
+            str(jobBatchSystemID),
+            jobNode.requirements_string(),
+        )
         # Tell everyone it is issued and the queue size changed
-        self._messages.publish(JobIssuedMessage(jobNode.get_job_kind(), jobNode.jobStoreID, jobBatchSystemID))
+        self._messages.publish(
+            JobIssuedMessage(
+                get_job_kind(jobNode.get_names()), jobNode.jobStoreID, jobBatchSystemID
+            )
+        )
         self._messages.publish(QueueSizeMessage(self.getNumberOfJobsIssued()))
         # Tell the user there's another job to do
         self.progress_overall.total += 1
@@ -941,7 +1155,9 @@ class Leader:
         # Grab the service job description
         service = self.toilState.get_job(service_id)
         if not isinstance(service, ServiceJobDescription):
-            raise RuntimeError("The grabbed service job description is not the right type.")
+            raise RuntimeError(
+                "The grabbed service job description is not the right type."
+            )
         if service.preemptible:
             self.preemptibleServiceJobsToBeIssued.append(service_id)
@@ -951,14 +1167,23 @@ class Leader:
     def issueQueingServiceJobs(self):
         """Issues any queuing service jobs up to the limit of the maximum allowed."""
-        while len(self.serviceJobsToBeIssued) > 0 and self.serviceJobsIssued < self.config.maxServiceJobs:
+        while (
+            len(self.serviceJobsToBeIssued) > 0
+            and self.serviceJobsIssued < self.config.maxServiceJobs
+        ):
             self.issueJob(self.toilState.get_job(self.serviceJobsToBeIssued.pop()))
             self.serviceJobsIssued += 1
-        while len(self.preemptibleServiceJobsToBeIssued) > 0 and self.preemptibleServiceJobsIssued < self.config.maxPreemptibleServiceJobs:
-            self.issueJob(self.toilState.get_job(self.preemptibleServiceJobsToBeIssued.pop()))
+        while (
+            len(self.preemptibleServiceJobsToBeIssued) > 0
+            and self.preemptibleServiceJobsIssued
+            < self.config.maxPreemptibleServiceJobs
+        ):
+            self.issueJob(
+                self.toilState.get_job(self.preemptibleServiceJobsToBeIssued.pop())
+            )
             self.preemptibleServiceJobsIssued += 1
-    def getNumberOfJobsIssued(self, preemptible: Optional[bool]=None) -> int:
+    def getNumberOfJobsIssued(self, preemptible: Optional[bool] = None) -> int:
         """
         Get number of jobs that have been added by issueJob(s) and not removed by removeJob.
@@ -1008,12 +1233,16 @@ class Leader:
         """
         if jobBatchSystemID not in self.issued_jobs_by_batch_system_id:
             raise RuntimeError("Job was already removed or was never issued.")
-        issuedDesc = self.toilState.get_job(self.issued_jobs_by_batch_system_id[jobBatchSystemID])
+        issuedDesc = self.toilState.get_job(
+            self.issued_jobs_by_batch_system_id[jobBatchSystemID]
+        )
         if issuedDesc.preemptible:
             # len(issued_jobs_by_batch_system_id) should always be greater than or equal to preemptibleJobsIssued,
             # so decrement this value before removing the job from the issuedJob map
             if self.preemptibleJobsIssued <= 0:
-                raise RuntimeError("The number of preemptive issued jobs cannot be negative.")
+                raise RuntimeError(
+                    "The number of preemptive issued jobs cannot be negative."
+                )
             self.preemptibleJobsIssued -= 1
         # It's not issued anymore.
         del self.issued_jobs_by_batch_system_id[jobBatchSystemID]
@@ -1033,19 +1262,24 @@ class Leader:
         return issuedDesc
-    def getJobs(self, preemptible: Optional[bool] = None) -> List[JobDescription]:
+    def getJobs(self, preemptible: Optional[bool] = None) -> list[JobDescription]:
         """
         Get all issued jobs.
         :param preemptible: If specified, select only preemptible or only non-preemptible jobs.
         """
-        jobs = [self.toilState.get_job(job_store_id) for job_store_id in self.issued_jobs_by_batch_system_id.values()]
+        jobs = [
+            self.toilState.get_job(job_store_id)
+            for job_store_id in self.issued_jobs_by_batch_system_id.values()
+        ]
         if preemptible is not None:
             jobs = [job for job in jobs if job.preemptible == preemptible]
         return jobs
-    def killJobs(self, jobsToKill):
+    def killJobs(
+        self, jobsToKill, exit_reason: BatchJobExitReason = BatchJobExitReason.KILLED
+    ):
         """
         Kills the given set of jobs and then sends them for processing.
@@ -1059,7 +1293,9 @@ class Leader:
             self.batchSystem.killBatchJobs(jobsToKill)
             for jobBatchSystemID in jobsToKill:
                 # Reissue immediately, noting that we killed the job
-                willRerun = self.process_finished_job(jobBatchSystemID, 1, exit_reason=BatchJobExitReason.KILLED)
+                willRerun = self.process_finished_job(
+                    jobBatchSystemID, 1, exit_reason=exit_reason
+                )
                 if willRerun:
                     # Compose a list of all the jobs that will run again
@@ -1067,8 +1303,7 @@ class Leader:
         return jobsRerunning
-    #Following functions handle error cases for when jobs have gone awry with the batch system.
+    # Following functions handle error cases for when jobs have gone awry with the batch system.
     def reissueOverLongJobs(self) -> None:
         """
@@ -1079,20 +1314,30 @@ class Leader:
         """
         maxJobDuration = self.config.maxJobDuration
         jobsToKill = []
-        if maxJobDuration < 10000000:  # We won't bother doing anything if rescue time > 16 weeks.
+        if (
+            maxJobDuration < 10000000
+        ):  # We won't bother doing anything if rescue time > 16 weeks.
             runningJobs = self.batchSystem.getRunningBatchJobIDs()
             for jobBatchSystemID in list(runningJobs.keys()):
                 if runningJobs[jobBatchSystemID] > maxJobDuration:
-                    logger.warning("The job: %s has been running for: %s seconds, more than the "
-                                "max job duration: %s, we'll kill it",
-                                self.issued_jobs_by_batch_system_id[jobBatchSystemID],
-                                str(runningJobs[jobBatchSystemID]),
-                                str(maxJobDuration))
+                    logger.warning(
+                        "The job: %s has been running for: %s seconds, more than the "
+                        "max job duration: %s, we'll kill it",
+                        self.issued_jobs_by_batch_system_id[jobBatchSystemID],
+                        str(runningJobs[jobBatchSystemID]),
+                        str(maxJobDuration),
+                    )
                     jobsToKill.append(jobBatchSystemID)
-            reissued = self.killJobs(jobsToKill)
+            reissued = self.killJobs(
+                jobsToKill, exit_reason=BatchJobExitReason.MAXJOBDURATION
+            )
             if len(jobsToKill) > 0:
                 # Summarize our actions
-                logger.info("Killed %d over long jobs and reissued %d of them", len(jobsToKill), len(reissued))
+                logger.info(
+                    "Killed %d over long jobs and reissued %d of them",
+                    len(jobsToKill),
+                    len(reissued),
+                )
     def reissueMissingJobs(self, killAfterNTimesMissing=3):
         """
@@ -1104,11 +1349,13 @@ class Leader:
         """
         issuedJobs = set(self.batchSystem.getIssuedBatchJobIDs())
         jobBatchSystemIDsSet = set(list(self.issued_jobs_by_batch_system_id.keys()))
-        #Clean up the reissueMissingJobs_missingHash hash, getting rid of jobs that have turned up
+        # Clean up the reissueMissingJobs_missingHash hash, getting rid of jobs that have turned up
         missingJobIDsSet = set(list(self.reissueMissingJobs_missingHash.keys()))
         for jobBatchSystemID in missingJobIDsSet.difference(jobBatchSystemIDsSet):
             self.reissueMissingJobs_missingHash.pop(jobBatchSystemID)
-            logger.warning("Batch system id: %s is no longer missing", str(jobBatchSystemID))
+            logger.warning(
+                "Batch system id: %s is no longer missing", str(jobBatchSystemID)
+            )
         # checks we have no unexpected jobs running
         if not issuedJobs.issubset(jobBatchSystemIDsSet):
             raise RuntimeError("An unexpected job is still running.")
@@ -1120,24 +1367,33 @@ class Leader:
             else:
                 self.reissueMissingJobs_missingHash[jobBatchSystemID] = 1
             timesMissing = self.reissueMissingJobs_missingHash[jobBatchSystemID]
-            logger.warning("Job store ID %s with batch system id %s is missing for the %i time",
-                        jobStoreID, str(jobBatchSystemID), timesMissing)
+            logger.warning(
+                "Job store ID %s with batch system id %s is missing for the %i time",
+                jobStoreID,
+                str(jobBatchSystemID),
+                timesMissing,
+            )
             # Tell everyone it is missing
             self._messages.publish(JobMissingMessage(jobStoreID))
             if timesMissing == killAfterNTimesMissing:
                 self.reissueMissingJobs_missingHash.pop(jobBatchSystemID)
                 jobsToKill.append(jobBatchSystemID)
-        self.killJobs(jobsToKill)
-        return len( self.reissueMissingJobs_missingHash ) == 0 #We use this to inform
-        #if there are missing jobs
+        self.killJobs(jobsToKill, exit_reason=BatchJobExitReason.MISSING)
+        return len(self.reissueMissingJobs_missingHash) == 0  # We use this to inform
+        # if there are missing jobs
     def processRemovedJob(self, issuedJob, result_status):
         if result_status != 0:
-            logger.warning("Despite the batch system claiming failure the "
-                        "job %s seems to have finished and been removed", issuedJob)
+            logger.warning(
+                "Despite the batch system claiming failure the "
+                "job %s seems to have finished and been removed",
+                issuedJob,
+            )
         self._updatePredecessorStatus(issuedJob.jobStoreID)
-    def process_finished_job(self, batch_system_id, result_status, wall_time=None, exit_reason=None) -> bool:
+    def process_finished_job(
+        self, batch_system_id, result_status, wall_time=None, exit_reason=None
+    ) -> bool:
         """
         Process finished jobs.
@@ -1157,15 +1413,21 @@ class Leader:
             self.progress_overall.update(incr=-1)
             self.progress_failed.update(incr=1)
-        # Delegate to the vers
-        return self.process_finished_job_description(issued_job, result_status, wall_time, exit_reason, batch_system_id)
-    def process_finished_job_description(self, finished_job: JobDescription, result_status: int,
-                                         wall_time: Optional[float] = None,
-                                         exit_reason: Optional[BatchJobExitReason] = None,
-                                         batch_system_id: Optional[int] = None) -> bool:
+        # Delegate to the version that uses a JobDescription
+        return self.process_finished_job_description(
+            issued_job, result_status, wall_time, exit_reason, batch_system_id
+        )
+    def process_finished_job_description(
+        self,
+        finished_job: JobDescription,
+        result_status: int,
+        wall_time: Optional[float] = None,
+        exit_reason: Optional[BatchJobExitReason] = None,
+        batch_system_id: Optional[int] = None,
+    ) -> bool:
         """
-        Process a finished JobDescription based upon its succees or failure.
+        Process a finished JobDescription based upon its success or failure.
         If wall-clock time is available, informs the cluster scaler about the
         job finishing.
@@ -1185,22 +1447,67 @@ class Leader:
             # TODO: Use message bus?
             self.clusterScaler.addCompletedJob(finished_job, wall_time)
         if self.toilState.job_exists(job_store_id):
-            logger.debug("Job %s continues to exist (i.e. has more to do)", finished_job)
+            logger.debug(
+                "Job %s continues to exist (i.e. has more to do)", finished_job
+            )
             try:
                 # Reload the job as modified by the worker
-                self.toilState.reset_job(job_store_id)
-                replacement_job = self.toilState.get_job(job_store_id)
+                if finished_job.has_body():
+                    # The worker was expected to do some work. We expect the
+                    # worker to have updated the job description.
+                    # If the job succeeded, we wait around to see the update
+                    # and fail the job if we don't see it.
+                    if result_status == 0:
+                        timeout = self.config.job_store_timeout
+                        complaint = (
+                            f"has no new version available after {timeout} "
+                            "seconds. Either worker updates to "
+                            "the job store are delayed longer than your "
+                            "--jobStoreTimeout, or the worker trying to run the "
+                            "job was killed (or never started)."
+                        )
+                    else:
+                        timeout = 0
+                        complaint = (
+                            "has no new version available immediately. The "
+                            "batch system may have killed (or never started) "
+                            "the Toil worker."
+                        )
+                    change_detected = self.toilState.reset_job_expecting_change(
+                        job_store_id, timeout
+                    )
+                    replacement_job = self.toilState.get_job(job_store_id)
+                    if not change_detected:
+                        logger.warning("Job %s %s", replacement_job, complaint)
+                        if result_status == 0:
+                            # Make the job fail because we ran it and it finished
+                            # and we never heard back.
+                            logger.error(
+                                "Marking ostensibly successful job %s that did "
+                                "not report in to the job store before "
+                                "--jobStoreTimeout as having been partitioned "
+                                "from us.",
+                                replacement_job,
+                            )
+                            result_status = EXIT_STATUS_UNAVAILABLE_VALUE
+                            exit_reason = BatchJobExitReason.PARTITION
+                else:
+                    # If there was no body sent, the worker won't commit any
+                    # changes to the job description. So don't wait around for
+                    # any and don't complain if we don't see them.
+                    self.toilState.reset_job(job_store_id)
+                    replacement_job = self.toilState.get_job(job_store_id)
             except NoSuchJobException:
                 # We have a ghost job - the job has been deleted but a stale
                 # read from e.g. a non-POSIX-compliant filesystem gave us a
                 # false positive when we checked for its existence. Process the
                 # job from here as any other job removed from the job store.
-                # This is a hack until we can figure out how to actually always
-                # have a strongly-consistent communications channel. See
-                # https://github.com/BD2KGenomics/toil/issues/1091
-                logger.warning('Got a stale read for job %s; caught its '
-                'completion in time, but other jobs may try to run twice! Fix '
-                'the consistency of your job store storage!', finished_job)
+                logger.debug(
+                    "Job %s is actually complete upon closer inspection", finished_job
+                )
                 self.processRemovedJob(finished_job, result_status)
                 return False
             if replacement_job.logJobStoreFileID is not None:
@@ -1208,17 +1515,31 @@ class Leader:
                     # more memory efficient than read().striplines() while leaving off the
                     # trailing \n left when using readlines()
                     # http://stackoverflow.com/a/15233739
-                    StatsAndLogging.logWithFormatting(job_store_id, log_stream, method=logger.warning,
-                                                      message='The job seems to have left a log file, indicating failure: %s' % replacement_job)
+                    StatsAndLogging.logWithFormatting(
+                        f'Log from job "{job_store_id}"',
+                        log_stream,
+                        method=logger.warning,
+                        message="The job seems to have left a log file, indicating failure: %s"
+                        % replacement_job,
+                    )
                 if self.config.writeLogs or self.config.writeLogsGzip:
                     with replacement_job.getLogFileHandle(self.jobStore) as log_stream:
-                        StatsAndLogging.writeLogFiles(replacement_job.chainedJobs, log_stream, self.config, failed=True)
+                        # Send log data from the job store to each per-job log file involved.
+                        StatsAndLogging.writeLogFiles(
+                            [names.stats_name for names in replacement_job.get_chain()],
+                            log_stream,
+                            self.config,
+                            failed=True,
+                        )
             if result_status != 0:
                 # If the batch system returned a non-zero exit code then the worker
                 # is assumed not to have captured the failure of the job, so we
                 # reduce the try count here.
                 if replacement_job.logJobStoreFileID is None:
-                    logger.warning("No log file is present, despite job failing: %s", replacement_job)
+                    logger.warning(
+                        "No log file is present, despite job failing: %s",
+                        replacement_job,
+                    )
                 if batch_system_id is not None:
                     # Look for any standard output/error files created by the batch system.
@@ -1227,31 +1548,60 @@ class Leader:
                     # --workDir / TOIL_WORKDIR is on a shared file system.
                     # They live directly in the Toil work directory because that is
                     # guaranteed to exist on the leader and workers.
-                    file_list = glob.glob(self.batchSystem.format_std_out_err_glob(batch_system_id))
+                    file_list = glob.glob(
+                        self.batchSystem.format_std_out_err_glob(batch_system_id)
+                    )
                     for log_file in file_list:
                         try:
-                            log_stream = open(log_file, 'rb')
+                            log_stream = open(log_file, "rb")
                         except:
-                            logger.warning('The batch system left a file %s, but it could not be opened' % log_file)
+                            logger.warning(
+                                "The batch system left a file %s, but it could not be opened"
+                                % log_file
+                            )
                         else:
                             with log_stream:
                                 if os.path.getsize(log_file) > 0:
-                                    StatsAndLogging.logWithFormatting(job_store_id, log_stream, method=logger.warning,
-                                                                      message='The batch system left a non-empty file %s:' % log_file)
-                                    if self.config.writeLogs or self.config.writeLogsGzip:
-                                        file_root, _ = os.path.splitext(os.path.basename(log_file))
-                                        job_names = replacement_job.chainedJobs
-                                        if job_names is None:   # For jobs that fail this way, replacement_job.chainedJobs is not guaranteed to be set
-                                            job_names = [str(replacement_job)]
-                                        job_names = [j + '_' + file_root for j in job_names]
+                                    StatsAndLogging.logWithFormatting(
+                                        f'Log from job "{job_store_id}"',
+                                        log_stream,
+                                        method=logger.warning,
+                                        message="The batch system left a non-empty file %s:"
+                                        % log_file,
+                                    )
+                                    if (
+                                        self.config.writeLogs
+                                        or self.config.writeLogsGzip
+                                    ):
+                                        file_root, _ = os.path.splitext(
+                                            os.path.basename(log_file)
+                                        )
+                                        job_names = [
+                                            names.stats_name
+                                            for names in replacement_job.get_chain()
+                                        ]
+                                        # Tack the batch system log file name onto each job's name
+                                        job_names = [
+                                            j + "_" + file_root for j in job_names
+                                        ]
                                         log_stream.seek(0)
-                                        StatsAndLogging.writeLogFiles(job_names, log_stream, self.config, failed=True)
+                                        StatsAndLogging.writeLogFiles(
+                                            job_names,
+                                            log_stream,
+                                            self.config,
+                                            failed=True,
+                                        )
                                 else:
-                                    logger.warning('The batch system left an empty file %s' % log_file)
+                                    logger.warning(
+                                        "The batch system left an empty file %s"
+                                        % log_file
+                                    )
                 # Tell the job to reset itself after a failure.
                 # It needs to know the failure reason if available; some are handled specially.
-                replacement_job.setupJobAfterFailure(exit_status=result_status, exit_reason=exit_reason)
+                replacement_job.setupJobAfterFailure(
+                    exit_status=result_status, exit_reason=exit_reason
+                )
                 self.toilState.commit_job(job_store_id)
             elif job_store_id in self.toilState.hasFailedSuccessors:
@@ -1259,18 +1609,20 @@ class Leader:
                 self.toilState.hasFailedSuccessors.remove(job_store_id)
             # Now that we know the job is done we can add it to the list of updated jobs
-            self._messages.publish(JobUpdatedMessage(replacement_job.jobStoreID, result_status))
+            self._messages.publish(
+                JobUpdatedMessage(replacement_job.jobStoreID, result_status)
+            )
             logger.debug("Added job: %s to updated jobs", replacement_job)
             # Return True if it will rerun (still has retries) and false if it
             # is completely failed.
             return replacement_job.remainingTryCount > 0
-        else:  #The job is done
+        else:  # The job is done
             self.processRemovedJob(finished_job, result_status)
             # Being done, it won't run again.
             return False
-    def getSuccessors(self, job_id: str, alreadySeenSuccessors: Set[str]) -> Set[str]:
+    def getSuccessors(self, job_id: str, alreadySeenSuccessors: set[str]) -> set[str]:
         """
         Get successors of the given job by walking the job graph recursively.
@@ -1278,6 +1630,7 @@ class Leader:
         :returns: The set of found successors. This set is added to alreadySeenSuccessors.
         """
         successors = set()
         def successorRecursion(job_id: str) -> None:
             # TODO: do we need to reload from the job store here, or is the cache OK?
             jobDesc = self.toilState.get_job(job_id)
@@ -1309,12 +1662,15 @@ class Leader:
         # Tell everyone it failed
-        self._messages.publish(JobFailedMessage(job_desc.get_job_kind(), job_id))
+        self._messages.publish(
+            JobFailedMessage(get_job_kind(job_desc.get_names()), job_id)
+        )
         if job_id in self.toilState.service_to_client:
             # Is a service job
-            logger.debug("Service job is being processed as a totally failed job: %s", job_desc)
+            logger.debug(
+                "Service job is being processed as a totally failed job: %s", job_desc
+            )
             if not isinstance(job_desc, ServiceJobDescription):
                 raise RuntimeError("The service job description type is incorrect.")
@@ -1338,8 +1694,13 @@ class Leader:
             # properly, and to remember that this service failed with an error
             # and possibly never started.
             if client_id in self.toilState.servicesIssued:
-                self.serviceManager.kill_services(self.toilState.servicesIssued[client_id], error=True)
-                logger.warning("Job: %s is instructing all other services of its parent job to quit", job_desc)
+                self.serviceManager.kill_services(
+                    self.toilState.servicesIssued[client_id], error=True
+                )
+                logger.warning(
+                    "Job: %s is instructing all other services of its parent job to quit",
+                    job_desc,
+                )
             # This ensures that the job will not attempt to run any of it's
             # successors on the stack
@@ -1363,9 +1724,14 @@ class Leader:
             # Any successor already in toilState.failedSuccessors will not be traversed
             # All successors traversed will be added to toilState.failedSuccessors and returned
             # as a set (unseenSuccessors).
-            unseenSuccessors = self.getSuccessors(job_id, self.toilState.failedSuccessors)
-            logger.debug("Found new failed successors: %s of job: %s", " ".join(
-                         unseenSuccessors), job_desc)
+            unseenSuccessors = self.getSuccessors(
+                job_id, self.toilState.failedSuccessors
+            )
+            logger.debug(
+                "Found new failed successors: %s of job: %s",
+                " ".join(unseenSuccessors),
+                job_desc,
+            )
             # For each newly found successor
             for successorJobStoreID in unseenSuccessors:
@@ -1376,7 +1742,9 @@ class Leader:
                     # For each such predecessor job
                     # (we remove the successor from toilState.successor_to_predecessors to avoid doing
                     # this multiple times for each failed predecessor)
-                    for predecessor_id in self.toilState.successor_to_predecessors.pop(successorJobStoreID):
+                    for predecessor_id in self.toilState.successor_to_predecessors.pop(
+                        successorJobStoreID
+                    ):
                         predecessor = self.toilState.get_job(predecessor_id)
@@ -1385,8 +1753,11 @@ class Leader:
                         # Indicate that it has failed jobs.
                         self.toilState.hasFailedSuccessors.add(predecessor_id)
-                        logger.debug("Marking job: %s as having failed successors (found by "
-                                     "reading successors failed job)", predecessor)
+                        logger.debug(
+                            "Marking job: %s as having failed successors (found by "
+                            "reading successors failed job)",
+                            predecessor,
+                        )
                         # If the predecessor has no remaining successors, add to list of updated jobs
                         if self.toilState.count_pending_successors(predecessor_id) == 0:
@@ -1400,8 +1771,12 @@ class Leader:
                     # Mark the predecessor as failed
                     self.toilState.hasFailedSuccessors.add(predecessor_id)
-                    logger.debug("Totally failed job: %s is marking direct predecessor: %s "
-                                 "as having failed jobs", job_desc, self.toilState.get_job(predecessor_id))
+                    logger.debug(
+                        "Totally failed job: %s is marking direct predecessor: %s "
+                        "as having failed jobs",
+                        job_desc,
+                        self.toilState.get_job(predecessor_id),
+                    )
                 self._updatePredecessorStatus(job_id)
@@ -1411,38 +1786,59 @@ class Leader:
             # Is a service host job, so its predecessor is its client
             client_id = self.toilState.service_to_client.pop(jobStoreID)
             self.toilState.servicesIssued[client_id].remove(jobStoreID)
-            if len(self.toilState.servicesIssued[client_id]) == 0: # Predecessor job has
+            if (
+                len(self.toilState.servicesIssued[client_id]) == 0
+            ):  # Predecessor job has
                 # all its services terminated
-                self.toilState.servicesIssued.pop(client_id) # The job has no running services
+                self.toilState.servicesIssued.pop(
+                    client_id
+                )  # The job has no running services
-                logger.debug('Job %s is no longer waiting on services; all services have stopped', self.toilState.get_job(client_id))
+                logger.debug(
+                    "Job %s is no longer waiting on services; all services have stopped",
+                    self.toilState.get_job(client_id),
+                )
                 # Now we know the job is done we can add it to the list of
                 # updated job files
                 self._messages.publish(JobUpdatedMessage(client_id, 0))
             else:
-                logger.debug('Job %s is still waiting on %d services',
-                             self.toilState.get_job(client_id),
-                             len(self.toilState.servicesIssued[client_id]))
+                logger.debug(
+                    "Job %s is still waiting on %d services",
+                    self.toilState.get_job(client_id),
+                    len(self.toilState.servicesIssued[client_id]),
+                )
         elif jobStoreID not in self.toilState.successor_to_predecessors:
-            #We have reach the root job
+            # We have reach the root job
             if self._messages.count(JobUpdatedMessage) != 0:
                 raise RuntimeError("Root job is done but other jobs are still updated")
             if len(self.toilState.successor_to_predecessors) != 0:
-                raise RuntimeError("Job {} is finished and had no predecessor, but we have other outstanding jobs "
-                 "with predecessors: {}".format(jobStoreID, self.toilState.successor_to_predecessors.keys()))
+                raise RuntimeError(
+                    "Job {} is finished and had no predecessor, but we have other outstanding jobs "
+                    "with predecessors: {}".format(
+                        jobStoreID, self.toilState.successor_to_predecessors.keys()
+                    )
+                )
             if len(self.toilState.successorCounts) != 0:
-                raise RuntimeError("Root job is done but jobs waiting on successors: {self.toilState.successorCounts}")
-            logger.debug("Reached root job %s so no predecessors to clean up" % jobStoreID)
+                raise RuntimeError(
+                    "Root job is done but jobs waiting on successors: {self.toilState.successorCounts}"
+                )
+            logger.debug(
+                "Reached root job %s so no predecessors to clean up" % jobStoreID
+            )
         else:
             # Is a non-root, non-service job
             logger.debug("Cleaning the predecessors of %s" % jobStoreID)
             # For each predecessor
-            for predecessor_id in self.toilState.successor_to_predecessors.pop(jobStoreID):
+            for predecessor_id in self.toilState.successor_to_predecessors.pop(
+                jobStoreID
+            ):
                 if not isinstance(predecessor_id, str):
-                    raise RuntimeError("Predecessor ID should be str but is {type(predecessor_id)}")
+                    raise RuntimeError(
+                        "Predecessor ID should be str but is {type(predecessor_id)}"
+                    )
                 predecessor = self.toilState.get_job(predecessor_id)
                 # Tell the predecessor that this job is done (keep only other successor jobs)

toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

toil 6.1.0a1py3-none-any.whl → 8.0.0py3-none-any.whl