PyPI - toil - Versions diffs - 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl - Mend

toil 7.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

toil/__init__.py +124 -86
toil/batchSystems/__init__.py +1 -0
toil/batchSystems/abstractBatchSystem.py +137 -77
toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
toil/batchSystems/awsBatch.py +237 -128
toil/batchSystems/cleanup_support.py +22 -16
toil/batchSystems/contained_executor.py +30 -26
toil/batchSystems/gridengine.py +85 -49
toil/batchSystems/htcondor.py +164 -87
toil/batchSystems/kubernetes.py +622 -386
toil/batchSystems/local_support.py +17 -12
toil/batchSystems/lsf.py +132 -79
toil/batchSystems/lsfHelper.py +13 -11
toil/batchSystems/mesos/__init__.py +41 -29
toil/batchSystems/mesos/batchSystem.py +288 -149
toil/batchSystems/mesos/executor.py +77 -49
toil/batchSystems/mesos/test/__init__.py +31 -23
toil/batchSystems/options.py +39 -29
toil/batchSystems/registry.py +53 -19
toil/batchSystems/singleMachine.py +293 -123
toil/batchSystems/slurm.py +651 -155
toil/batchSystems/torque.py +46 -32
toil/bus.py +141 -73
toil/common.py +784 -397
toil/cwl/__init__.py +1 -1
toil/cwl/cwltoil.py +1137 -534
toil/cwl/utils.py +17 -22
toil/deferred.py +62 -41
toil/exceptions.py +5 -3
toil/fileStores/__init__.py +5 -5
toil/fileStores/abstractFileStore.py +88 -57
toil/fileStores/cachingFileStore.py +711 -247
toil/fileStores/nonCachingFileStore.py +113 -75
toil/job.py +1031 -349
toil/jobStores/abstractJobStore.py +387 -243
toil/jobStores/aws/jobStore.py +772 -412
toil/jobStores/aws/utils.py +161 -109
toil/jobStores/conftest.py +1 -0
toil/jobStores/fileJobStore.py +289 -151
toil/jobStores/googleJobStore.py +137 -70
toil/jobStores/utils.py +36 -15
toil/leader.py +614 -269
toil/lib/accelerators.py +115 -18
toil/lib/aws/__init__.py +55 -28
toil/lib/aws/ami.py +122 -87
toil/lib/aws/iam.py +284 -108
toil/lib/aws/s3.py +31 -0
toil/lib/aws/session.py +204 -58
toil/lib/aws/utils.py +290 -213
toil/lib/bioio.py +13 -5
toil/lib/compatibility.py +11 -6
toil/lib/conversions.py +83 -49
toil/lib/docker.py +131 -103
toil/lib/dockstore.py +379 -0
toil/lib/ec2.py +322 -209
toil/lib/ec2nodes.py +174 -105
toil/lib/encryption/_dummy.py +5 -3
toil/lib/encryption/_nacl.py +10 -6
toil/lib/encryption/conftest.py +1 -0
toil/lib/exceptions.py +26 -7
toil/lib/expando.py +4 -2
toil/lib/ftp_utils.py +217 -0
toil/lib/generatedEC2Lists.py +127 -19
toil/lib/history.py +1271 -0
toil/lib/history_submission.py +681 -0
toil/lib/humanize.py +6 -2
toil/lib/io.py +121 -12
toil/lib/iterables.py +4 -2
toil/lib/memoize.py +12 -8
toil/lib/misc.py +83 -18
toil/lib/objects.py +2 -2
toil/lib/resources.py +19 -7
toil/lib/retry.py +125 -87
toil/lib/threading.py +282 -80
toil/lib/throttle.py +15 -14
toil/lib/trs.py +390 -0
toil/lib/web.py +38 -0
toil/options/common.py +850 -402
toil/options/cwl.py +185 -90
toil/options/runner.py +50 -0
toil/options/wdl.py +70 -19
toil/provisioners/__init__.py +111 -46
toil/provisioners/abstractProvisioner.py +322 -157
toil/provisioners/aws/__init__.py +62 -30
toil/provisioners/aws/awsProvisioner.py +980 -627
toil/provisioners/clusterScaler.py +541 -279
toil/provisioners/gceProvisioner.py +283 -180
toil/provisioners/node.py +147 -79
toil/realtimeLogger.py +34 -22
toil/resource.py +137 -75
toil/server/app.py +127 -61
toil/server/celery_app.py +3 -1
toil/server/cli/wes_cwl_runner.py +84 -55
toil/server/utils.py +56 -31
toil/server/wes/abstract_backend.py +64 -26
toil/server/wes/amazon_wes_utils.py +21 -15
toil/server/wes/tasks.py +121 -63
toil/server/wes/toil_backend.py +142 -107
toil/server/wsgi_app.py +4 -3
toil/serviceManager.py +58 -22
toil/statsAndLogging.py +183 -65
toil/test/__init__.py +263 -179
toil/test/batchSystems/batchSystemTest.py +438 -195
toil/test/batchSystems/batch_system_plugin_test.py +18 -7
toil/test/batchSystems/test_gridengine.py +173 -0
toil/test/batchSystems/test_lsf_helper.py +67 -58
toil/test/batchSystems/test_slurm.py +265 -49
toil/test/cactus/test_cactus_integration.py +20 -22
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +375 -72
toil/test/cwl/measure_default_memory.cwl +12 -0
toil/test/cwl/not_run_required_input.cwl +29 -0
toil/test/cwl/optional-file.cwl +18 -0
toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
toil/test/docs/scriptsTest.py +60 -34
toil/test/jobStores/jobStoreTest.py +412 -235
toil/test/lib/aws/test_iam.py +116 -48
toil/test/lib/aws/test_s3.py +16 -9
toil/test/lib/aws/test_utils.py +5 -6
toil/test/lib/dockerTest.py +118 -141
toil/test/lib/test_conversions.py +113 -115
toil/test/lib/test_ec2.py +57 -49
toil/test/lib/test_history.py +212 -0
toil/test/lib/test_misc.py +12 -5
toil/test/lib/test_trs.py +161 -0
toil/test/mesos/MesosDataStructuresTest.py +23 -10
toil/test/mesos/helloWorld.py +7 -6
toil/test/mesos/stress.py +25 -20
toil/test/options/options.py +7 -2
toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
toil/test/provisioners/clusterScalerTest.py +440 -250
toil/test/provisioners/clusterTest.py +81 -42
toil/test/provisioners/gceProvisionerTest.py +174 -100
toil/test/provisioners/provisionerTest.py +25 -13
toil/test/provisioners/restartScript.py +5 -4
toil/test/server/serverTest.py +188 -141
toil/test/sort/restart_sort.py +137 -68
toil/test/sort/sort.py +134 -66
toil/test/sort/sortTest.py +91 -49
toil/test/src/autoDeploymentTest.py +140 -100
toil/test/src/busTest.py +20 -18
toil/test/src/checkpointTest.py +8 -2
toil/test/src/deferredFunctionTest.py +49 -35
toil/test/src/dockerCheckTest.py +33 -26
toil/test/src/environmentTest.py +20 -10
toil/test/src/fileStoreTest.py +538 -271
toil/test/src/helloWorldTest.py +7 -4
toil/test/src/importExportFileTest.py +61 -31
toil/test/src/jobDescriptionTest.py +32 -17
toil/test/src/jobEncapsulationTest.py +2 -0
toil/test/src/jobFileStoreTest.py +74 -50
toil/test/src/jobServiceTest.py +187 -73
toil/test/src/jobTest.py +120 -70
toil/test/src/miscTests.py +19 -18
toil/test/src/promisedRequirementTest.py +82 -36
toil/test/src/promisesTest.py +7 -6
toil/test/src/realtimeLoggerTest.py +6 -6
toil/test/src/regularLogTest.py +71 -37
toil/test/src/resourceTest.py +80 -49
toil/test/src/restartDAGTest.py +36 -22
toil/test/src/resumabilityTest.py +9 -2
toil/test/src/retainTempDirTest.py +45 -14
toil/test/src/systemTest.py +12 -8
toil/test/src/threadingTest.py +44 -25
toil/test/src/toilContextManagerTest.py +10 -7
toil/test/src/userDefinedJobArgTypeTest.py +8 -5
toil/test/src/workerTest.py +33 -16
toil/test/utils/toilDebugTest.py +70 -58
toil/test/utils/toilKillTest.py +4 -5
toil/test/utils/utilsTest.py +239 -102
toil/test/wdl/wdltoil_test.py +789 -148
toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
toil/toilState.py +52 -26
toil/utils/toilConfig.py +13 -4
toil/utils/toilDebugFile.py +44 -27
toil/utils/toilDebugJob.py +85 -25
toil/utils/toilDestroyCluster.py +11 -6
toil/utils/toilKill.py +8 -3
toil/utils/toilLaunchCluster.py +251 -145
toil/utils/toilMain.py +37 -16
toil/utils/toilRsyncCluster.py +27 -14
toil/utils/toilSshCluster.py +45 -22
toil/utils/toilStats.py +75 -36
toil/utils/toilStatus.py +226 -119
toil/utils/toilUpdateEC2Instances.py +3 -1
toil/version.py +6 -6
toil/wdl/utils.py +5 -5
toil/wdl/wdltoil.py +3528 -1053
toil/worker.py +370 -149
toil-8.1.0b1.dist-info/METADATA +178 -0
toil-8.1.0b1.dist-info/RECORD +259 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
toil-7.0.0.dist-info/METADATA +0 -158
toil-7.0.0.dist-info/RECORD +0 -244
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0

toil/worker.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import atexit
 import base64
 import copy
 import json
@@ -22,26 +23,36 @@ import shutil
 import signal
 import socket
 import stat
+import subprocess
 import sys
+import threading
 import time
 import traceback
+from collections.abc import Iterator
 from contextlib import contextmanager
-from typing import Any, Callable, Iterator, List, Set, Optional
+from typing import Any, Callable, Optional
 from configargparse import ArgParser
 from toil import logProcessContext
 from toil.common import Config, Toil, safeUnpickleFromStream
-from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
-                            CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE)
+from toil.cwl.utils import (
+    CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
+    CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
+)
 from toil.deferred import DeferredFunctionManager
 from toil.fileStores.abstractFileStore import AbstractFileStore
-from toil.job import CheckpointJobDescription, Job, JobDescription, DebugStoppingPointReached
+from toil.job import (
+    CheckpointJobDescription,
+    DebugStoppingPointReached,
+    Job,
+    JobDescription,
+)
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.expando import MagicExpando
 from toil.lib.io import make_public_dir
 from toil.lib.resources import ResourceMonitor
-from toil.statsAndLogging import configure_root_logger, set_log_level, install_log_color
+from toil.statsAndLogging import configure_root_logger, install_log_color, set_log_level
 logger = logging.getLogger(__name__)
@@ -49,10 +60,11 @@ logger = logging.getLogger(__name__)
 class StatsDict(MagicExpando):
     """Subclass of MagicExpando for type-checking purposes."""
-    jobs: List[MagicExpando]
+    jobs: list[MagicExpando]
-def nextChainable(predecessor: JobDescription, job_store: AbstractJobStore, config: Config) -> Optional[JobDescription]:
+def nextChainable(
+    predecessor: JobDescription, job_store: AbstractJobStore, config: Config
+) -> Optional[JobDescription]:
     """
     Returns the next chainable job's JobDescription after the given predecessor
     JobDescription, if one exists, or None if the chain must terminate.
@@ -61,24 +73,41 @@ def nextChainable(predecessor: JobDescription, job_store: AbstractJobStore, conf
     :param job_store: The JobStore to fetch JobDescriptions from.
     :param config: The configuration for the current run.
     """
-    #If no more jobs to run or services not finished, quit
-    if predecessor.nextSuccessors() is None or len(predecessor.services) > 0 or (isinstance(predecessor, CheckpointJobDescription) and predecessor.checkpoint is not None):
-        logger.debug("Stopping running chain of jobs: no successors: %s, services: %s, checkpoint: %s",
-                     predecessor.nextSuccessors() is None, len(predecessor.services), (isinstance(predecessor, CheckpointJobDescription) and predecessor.checkpoint is not None))
+    # If no more jobs to run or services not finished, quit
+    if (
+        predecessor.nextSuccessors() is None
+        or len(predecessor.services) > 0
+        or (
+            isinstance(predecessor, CheckpointJobDescription)
+            and predecessor.checkpoint is not None
+        )
+    ):
+        logger.debug(
+            "Stopping running chain of jobs: no successors: %s, services: %s, checkpoint: %s",
+            predecessor.nextSuccessors() is None,
+            len(predecessor.services),
+            (
+                isinstance(predecessor, CheckpointJobDescription)
+                and predecessor.checkpoint is not None
+            ),
+        )
         return None
-    #Get the next set of jobs to run
+    # Get the next set of jobs to run
     jobs = list(predecessor.nextSuccessors() or set())
     if len(jobs) == 0:
         # If there are no jobs, we might just not have any children.
-        logger.debug("Stopping running chain of jobs because job has no ready children or follow-ons")
+        logger.debug(
+            "Stopping running chain of jobs because job has no ready children or follow-ons"
+        )
         return None
-    #If there are 2 or more jobs to run in parallel we quit
+    # If there are 2 or more jobs to run in parallel we quit
     if len(jobs) >= 2:
-        logger.debug("No more jobs can run in series by this worker,"
-                    " it's got %i successors", len(jobs))
+        logger.debug(
+            "No more jobs can run in series by this worker," " it's got %i successors",
+            len(jobs),
+        )
         logger.debug("Two distinct successors are %s and %s", jobs[0], jobs[1])
         return None
@@ -90,8 +119,8 @@ def nextChainable(predecessor: JobDescription, job_store: AbstractJobStore, conf
     # Load the successor JobDescription
     successor = job_store.load_job(successorID)
-    #We check the requirements of the successor to see if we can run it
-    #within the current worker
+    # We check the requirements of the successor to see if we can run it
+    # within the current worker
     if successor.memory > predecessor.memory:
         logger.debug("We need more memory for the next job, so finishing")
         return None
@@ -102,14 +131,20 @@ def nextChainable(predecessor: JobDescription, job_store: AbstractJobStore, conf
         logger.debug("We need more disk for the next job, so finishing")
         return None
     if successor.preemptible != predecessor.preemptible:
-        logger.debug("Preemptibility is different for the next job, returning to the leader")
+        logger.debug(
+            "Preemptibility is different for the next job, returning to the leader"
+        )
         return None
     if successor.predecessorNumber > 1:
-        logger.debug("The next job has multiple predecessors; we must return to the leader.")
+        logger.debug(
+            "The next job has multiple predecessors; we must return to the leader."
+        )
         return None
     if len(successor.services) > 0:
-        logger.debug("The next job requires services that will not yet be started; we must return to the leader.")
+        logger.debug(
+            "The next job requires services that will not yet be started; we must return to the leader."
+        )
         return None
     if isinstance(successor, CheckpointJobDescription):
@@ -117,7 +152,11 @@ def nextChainable(predecessor: JobDescription, job_store: AbstractJobStore, conf
         logger.debug("Next job is checkpoint, so finishing")
         return None
-    if not config.run_local_jobs_on_workers and predecessor.local and not successor.local:
+    if (
+        not config.run_local_jobs_on_workers
+        and predecessor.local
+        and not successor.local
+    ):
         # This job might be running on the leader, but the next job may not.
         #
         # TODO: Optimize by detecting whether we actually are on the leader,
@@ -128,6 +167,86 @@ def nextChainable(predecessor: JobDescription, job_store: AbstractJobStore, conf
     # Made it through! This job is chainable.
     return successor
+def unstick_worker(interval: float = 120, timeout: float = 120) -> None:
+    """
+    Thread function that tries to prevent the process from getting stuck.
+    Meant to be used as a daemon thread: does not have a shutdown signal but
+    cleans up on exit.
+    :param interval: Try to unstick the process at intervals of this many
+        seconds.
+    :param timeout: Stop child processes that take longer than this many
+        seconds to finish.
+    """
+    # We've observed Toil getting stuck reading the job from the job store,
+    # either due to a problem with the FileJobStore or with local temp storage,
+    # but then get unstuck as soon as someone logged in and ran lsof on the
+    # Toil process. So we make sure to do that to ourselves every once in a
+    # while as long as the worker is running.
+    # Figure out our process ID
+    pid = os.getpid()
+    child: Optional[subprocess.Popen[bytes]] = None
+    def clean_up_child() -> None:
+        """
+        Cleanup function to run at daemon thread shutdown when the main thread
+        terminates without shutting us down.
+        Also used to kill the child process if it takes too long.
+        """
+        if child is not None:
+            # Kill the child immediately if it is running
+            child.kill()
+            try:
+                # Wait one last time to try and reap the child process
+                child.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                pass
+    atexit.register(clean_up_child)
+    # TODO: If we handle daemon thread shutdown just fine, why do we bother
+    # with all the event stuff? Why not cut it?
+    # Wait the interval before trying the first unstick
+    time.sleep(interval)
+    while True:
+        # Run an lsof on our PID, which has been observed to unstick reads.
+        #
+        # We rely on the thread being able to go away and atexit() hooks
+        # happening in the middle of a wait with a timeout.
+        #
+        # We also want to handle the case where the child process gets so
+        # gummed up that it can't exit when killed.
+        # Preserve errors form child process but not output
+        child = subprocess.Popen(
+            ["lsof", "-p", str(pid)],
+            stdin=subprocess.DEVNULL,
+            stdout=subprocess.DEVNULL,
+        )
+        try:
+            child.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            logger.warning("Running lsof took too long!")
+            clean_up_child()
+            if child.returncode is None:
+                # Kill didn't take
+                logger.warning("Could not promptly kill child process: %s", child.pid)
+        if child.returncode != 0:
+            # Something went wrong, which is suspicious. Either it failed or it
+            # timed out and could not be killed promptly.
+            logger.warning("Could not list open files on ourselves. Return code: %s", child.returncode)
+        # Wait the interval.
+        time.sleep(interval)
 def workerScript(
     job_store: AbstractJobStore,
     config: Config,
@@ -135,7 +254,7 @@ def workerScript(
     job_store_id: str,
     redirect_output_to_log_file: bool = True,
     local_worker_temp_dir: Optional[str] = None,
-    debug_flags: Optional[Set[str]] = None
+    debug_flags: Optional[set[str]] = None,
 ) -> int:
     """
     Worker process script, runs a job.
@@ -162,7 +281,7 @@ def workerScript(
     logger.debug("Worker started for job %s...", job_name)
     ##########################################
-    #Create the worker killer, if requested
+    # Create the worker killer, if requested
     ##########################################
     logFileByteReportLimit = config.maxLogFileSize
@@ -203,10 +322,17 @@ def workerScript(
         # before it does. Either way, init will have to clean it up for us.
     ##########################################
-    #Load the environment for the job
+    # Create the worker unsticker
     ##########################################
+    unstick_thread = threading.Thread(target=unstick_worker, args=())
+    unstick_thread.daemon = True
+    unstick_thread.start()
-    #First load the environment for the job.
+    ##########################################
+    # Load the environment for the job
+    ##########################################
+    # First load the environment for the job.
     with job_store.read_shared_file_stream("environment.pickle") as fileHandle:
         environment = safeUnpickleFromStream(fileHandle)
     env_reject = {
@@ -224,15 +350,15 @@ def workerScript(
         "XDG_SESSION_ID",
         "XDG_RUNTIME_DIR",
         "XDG_DATA_DIRS",
-        "DBUS_SESSION_BUS_ADDRESS"
+        "DBUS_SESSION_BUS_ADDRESS",
     }
     for i in environment:
         if i == "PATH":
             # Handle path specially. Sometimes e.g. leader may not include
             # /bin, but the Toil appliance needs it.
-            if i in os.environ and os.environ[i] != '':
+            if i in os.environ and os.environ[i] != "":
                 # Use the provided PATH and then the local system's PATH
-                os.environ[i] = environment[i] + ':' + os.environ[i]
+                os.environ[i] = environment[i] + ":" + os.environ[i]
             else:
                 # Use the provided PATH only
                 os.environ[i] = environment[i]
@@ -240,41 +366,45 @@ def workerScript(
             os.environ[i] = environment[i]
     # sys.path is used by __import__ to find modules
     if "PYTHONPATH" in environment:
-        for e in environment["PYTHONPATH"].split(':'):
-            if e != '':
+        for e in environment["PYTHONPATH"].split(":"):
+            if e != "":
                 sys.path.append(e)
     ##########################################
-    #Setup the temporary directories.
+    # Setup the temporary directories.
     ##########################################
     # Dir to put all this worker's temp files in.
     if config.workflowID is None:
         raise RuntimeError("The worker workflow ID was never set.")
     toilWorkflowDir = Toil.getLocalWorkflowDir(config.workflowID, config.workDir)
     # Dir to put lock files in, ideally not on NFS.
-    toil_coordination_dir = Toil.get_local_workflow_coordination_dir(config.workflowID, config.workDir, config.coordination_dir)
+    toil_coordination_dir = Toil.get_local_workflow_coordination_dir(
+        config.workflowID, config.workDir, config.coordination_dir
+    )
     if local_worker_temp_dir is None:
         # Invent a temp directory to work in
         local_worker_temp_dir = make_public_dir(toilWorkflowDir)
     os.chmod(local_worker_temp_dir, 0o755)
     ##########################################
-    #Setup the logging
+    # Setup the logging
     ##########################################
-    #This is mildly tricky because we don't just want to
-    #redirect stdout and stderr for this Python process; we want to redirect it
-    #for this process and all children. Consequently, we can't just replace
-    #sys.stdout and sys.stderr; we need to mess with the underlying OS-level
-    #file descriptors. See <http://stackoverflow.com/a/11632982/402891>
+    # This is mildly tricky because we don't just want to
+    # redirect stdout and stderr for this Python process; we want to redirect it
+    # for this process and all children. Consequently, we can't just replace
+    # sys.stdout and sys.stderr; we need to mess with the underlying OS-level
+    # file descriptors. See <http://stackoverflow.com/a/11632982/402891>
-    #When we start, standard input is file descriptor 0, standard output is
-    #file descriptor 1, and standard error is file descriptor 2.
+    # When we start, standard input is file descriptor 0, standard output is
+    # file descriptor 1, and standard error is file descriptor 2.
     # Do we even want to redirect output? Let the config make us not do it.
-    redirect_output_to_log_file = redirect_output_to_log_file and not config.disableWorkerOutputCapture
+    redirect_output_to_log_file = (
+        redirect_output_to_log_file and not config.disableWorkerOutputCapture
+    )
-    #What file do we want to point FDs 1 and 2 to?
+    # What file do we want to point FDs 1 and 2 to?
     tempWorkerLogPath = os.path.join(local_worker_temp_dir, "worker_log.txt")
     if redirect_output_to_log_file:
@@ -322,6 +452,7 @@ def workerScript(
     def blockFn() -> bool:
         return True
     job = None
     try:
@@ -365,7 +496,10 @@ def workerScript(
         # If a checkpoint exists, restart from the checkpoint
         ##########################################
-        if isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None:
+        if (
+            isinstance(jobDesc, CheckpointJobDescription)
+            and jobDesc.checkpoint is not None
+        ):
             # The job is a checkpoint, and is being restarted after previously completing
             logger.debug("Job is a checkpoint")
             # If the checkpoint still has extant successors or services, its
@@ -381,23 +515,33 @@ def workerScript(
             # Otherwise, the job and successors are done, and we can cleanup stuff we couldn't clean
             # because of the job being a checkpoint
             else:
-                logger.debug("The checkpoint jobs seems to have completed okay, removing any checkpoint files to delete.")
-                #Delete any remnant files
-                list(map(job_store.delete_file, list(filter(job_store.file_exists, jobDesc.checkpointFilesToDelete))))
+                logger.debug(
+                    "The checkpoint jobs seems to have completed okay, removing any checkpoint files to delete."
+                )
+                # Delete any remnant files
+                list(
+                    map(
+                        job_store.delete_file,
+                        list(
+                            filter(
+                                job_store.file_exists, jobDesc.checkpointFilesToDelete
+                            )
+                        ),
+                    )
+                )
         ##########################################
-        #Setup the stats, if requested
+        # Setup the stats
         ##########################################
-        if config.stats:
-            # Remember the cores from the first job, which is how many we have reserved for us.
-            statsDict.workers.requested_cores = jobDesc.cores
-            startClock = ResourceMonitor.get_total_cpu_time()
+        # Remember the cores from the first job, which is how many we have reserved for us.
+        statsDict.workers.requested_cores = jobDesc.cores
+        startClock = ResourceMonitor.get_total_cpu_time()
         startTime = time.time()
         while True:
             ##########################################
-            #Run the job body, if there is one
+            # Run the job body, if there is one
             ##########################################
             logger.info("Working on job %s", jobDesc)
@@ -417,33 +561,48 @@ def workerScript(
                         job.set_debug_flag(flag)
                 # Create a fileStore object for the job
-                fileStore = AbstractFileStore.createFileStore(job_store, jobDesc, local_worker_temp_dir, blockFn,
-                                                              caching=config.caching)
-                with job._executor(stats=statsDict if config.stats else None,
-                                   fileStore=fileStore):
-                    with deferredFunctionManager.open() as defer:
-                        with fileStore.open(job):
-                            # Get the next block function to wait on committing this job
-                            blockFn = fileStore.waitForCommit
-                            # Run the job, save new successors, and set up
-                            # locally (but don't commit) successor
-                            # relationships and job completion.
-                            # Pass everything as name=value because Cactus
-                            # likes to override _runner when it shouldn't and
-                            # it needs some hope of finding the arguments it
-                            # wants across multiple Toil versions. We also
-                            # still pass a jobGraph argument to placate old
-                            # versions of Cactus.
-                            job._runner(jobGraph=None, jobStore=job_store, fileStore=fileStore, defer=defer)
-                            # When the executor for the job finishes it will
-                            # kick off a commit with the link to the job body
-                            # cut.
-                # Accumulate messages from this job & any subsequent chained jobs
-                statsDict.workers.logs_to_leader += fileStore.logging_messages
-                statsDict.workers.logging_user_streams += fileStore.logging_user_streams
+                fileStore = AbstractFileStore.createFileStore(
+                    job_store,
+                    jobDesc,
+                    local_worker_temp_dir,
+                    blockFn,
+                    caching=config.caching,
+                )
+                try:
+                    with job._executor(
+                        stats=statsDict, fileStore=fileStore
+                    ):
+                        with deferredFunctionManager.open() as defer:
+                            with fileStore.open(job):
+                                # Get the next block function to wait on committing this job
+                                blockFn = fileStore.waitForCommit
+                                # Run the job, save new successors, and set up
+                                # locally (but don't commit) successor
+                                # relationships and job completion.
+                                # Pass everything as name=value because Cactus
+                                # likes to override _runner when it shouldn't and
+                                # it needs some hope of finding the arguments it
+                                # wants across multiple Toil versions. We also
+                                # still pass a jobGraph argument to placate old
+                                # versions of Cactus.
+                                job._runner(
+                                    jobGraph=None,
+                                    jobStore=job_store,
+                                    fileStore=fileStore,
+                                    defer=defer,
+                                )
+                                # When the executor for the job finishes it will
+                                # kick off a commit with the link to the job body
+                                # cut.
+                finally:
+                    # Accumulate messages from this job & any subsequent chained jobs.
+                    # Keep the messages even if the job fails.
+                    statsDict.workers.logs_to_leader += fileStore.logging_messages
+                    statsDict.workers.logging_user_streams += (
+                        fileStore.logging_user_streams
+                    )
                 logger.info("Completed body for %s", jobDesc)
@@ -458,7 +617,7 @@ def workerScript(
                 raise RuntimeError("The termination flag is set")
             ##########################################
-            #Establish if we can run another job within the worker
+            # Establish if we can run another job within the worker
             ##########################################
             successor = nextChainable(jobDesc, job_store, config)
             if successor is None or config.disableChaining:
@@ -481,9 +640,13 @@ def workerScript(
             # Make sure nothing has gone wrong and we can really chain
             if jobDesc.memory < successor.memory:
-                raise RuntimeError("Cannot chain jobs. A job's memory cannot be less than it's successor.")
+                raise RuntimeError(
+                    "Cannot chain jobs. A job's memory cannot be less than it's successor."
+                )
             if jobDesc.cores < successor.cores:
-                raise RuntimeError("Cannot chain jobs. A job's cores cannot be less than it's successor.")
+                raise RuntimeError(
+                    "Cannot chain jobs. A job's cores cannot be less than it's successor."
+                )
             # Save the successor's original ID, so we can clean it (and its
             # body) up after we finish executing it.
@@ -499,8 +662,13 @@ def workerScript(
             # Build a fileStore to update the job and commit the replacement.
             # TODO: can we have a commit operation without an entire FileStore???
-            fileStore = AbstractFileStore.createFileStore(job_store, jobDesc, local_worker_temp_dir, blockFn,
-                                                          caching=config.caching)
+            fileStore = AbstractFileStore.createFileStore(
+                job_store,
+                jobDesc,
+                local_worker_temp_dir,
+                blockFn,
+                caching=config.caching,
+            )
             # Update blockFn to wait for that commit operation.
             blockFn = fileStore.waitForCommit
@@ -511,40 +679,53 @@ def workerScript(
             logger.debug("Starting the next job")
         ##########################################
-        #Finish up the stats
+        # Finish up the stats
         ##########################################
-        if config.stats:
-            totalCPUTime, totalMemoryUsage = ResourceMonitor.get_total_cpu_time_and_memory_usage()
-            statsDict.workers.time = str(time.time() - startTime)
-            statsDict.workers.clock = str(totalCPUTime - startClock)
-            statsDict.workers.memory = str(totalMemoryUsage)
-            # Say the worker used the max disk we saw from any job
-            max_bytes = 0
-            for job_stats in statsDict.jobs:
-                if "disk" in job_stats:
-                    max_bytes = max(max_bytes, int(job_stats.disk))
-            statsDict.workers.disk = str(max_bytes)
-            # Count the jobs executed.
-            # TODO: toil stats could compute this but its parser is too general to hook into simply.
-            statsDict.workers.jobs_run  = len(statsDict.jobs)
+        totalCPUTime, totalMemoryUsage = (
+            ResourceMonitor.get_total_cpu_time_and_memory_usage()
+        )
+        statsDict.workers.time = str(time.time() - startTime)
+        statsDict.workers.clock = str(totalCPUTime - startClock)
+        statsDict.workers.memory = str(totalMemoryUsage)
+        # Say the worker used the max disk we saw from any job
+        max_bytes = 0
+        for job_stats in statsDict.jobs:
+            if "disk" in job_stats:
+                max_bytes = max(max_bytes, int(job_stats.disk))
+        statsDict.workers.disk = str(max_bytes)
+        # Count the jobs executed.
+        # TODO: toil stats could compute this but its parser is too general to hook into simply.
+        statsDict.workers.jobs_run = len(statsDict.jobs)
         # log the worker log path here so that if the file is truncated the path can still be found
         if redirect_output_to_log_file:
-            logger.info("Worker log can be found at %s. Set --cleanWorkDir to retain this log", local_worker_temp_dir)
-        logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds", time.time() - startTime)
+            logger.info(
+                "Worker log can be found at %s. Set --cleanWorkDir to retain this log",
+                local_worker_temp_dir,
+            )
+        logger.info(
+            "Finished running the chain of jobs on this node, we ran for a total of %f seconds",
+            time.time() - startTime,
+        )
     ##########################################
-    #Trapping where worker goes wrong
+    # Trapping where worker goes wrong
     ##########################################
     except DebugStoppingPointReached:
         # Job wants the worker to stop for debugging
         raise
-    except BaseException as e: #Case that something goes wrong in worker, or we are asked to stop
+    except (
+        BaseException
+    ) as e:  # Case that something goes wrong in worker, or we are asked to stop
         if not isinstance(e, SystemExit):
-            logger.critical("Worker crashed with traceback:\n%s", traceback.format_exc())
-        logger.error("Exiting the worker because of a failed job on host %s", socket.gethostname())
+            logger.critical(
+                "Worker crashed with traceback:\n%s", traceback.format_exc()
+            )
+        logger.error(
+            "Exiting the worker because of a failed job on host %s",
+            socket.gethostname(),
+        )
         if isinstance(e, CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION):
             # We need to inform the leader that this is a CWL workflow problem
             # and it needs to inform its caller.
@@ -552,6 +733,15 @@ def workerScript(
         elif isinstance(e, SystemExit) and isinstance(e.code, int) and e.code != 0:
             # We're meant to be exiting with a particular code.
             failure_exit_code = e.code
+        else:
+            try:
+                from WDL.runtime.error import CommandFailed
+                if isinstance(e, CommandFailed):
+                    failure_exit_code = e.exit_status
+            except ImportError:
+                # WDL dependency not available
+                pass
         AbstractFileStore._terminateEvent.set()
     finally:
         # Get rid of our deferred function manager now so we can't mistake it
@@ -567,16 +757,15 @@ def workerScript(
             logger.debug("cwltool.main._terminate_processess exception: %s", (e))
             raise e
     ##########################################
-    #Wait for the asynchronous chain of writes/updates to finish
+    # Wait for the asynchronous chain of writes/updates to finish
     ##########################################
     blockFn()
     ##########################################
-    #All the asynchronous worker/update threads must be finished now,
-    #so safe to test if they completed okay
+    # All the asynchronous worker/update threads must be finished now,
+    # so safe to test if they completed okay
     ##########################################
     if AbstractFileStore._terminateEvent.is_set():
@@ -589,7 +778,7 @@ def workerScript(
         jobAttemptFailed = True
     ##########################################
-    #Cleanup
+    # Cleanup
     ##########################################
     # Close the worker logging
@@ -628,40 +817,55 @@ def workerScript(
             jobDesc.jobStoreID, cleanup=True
         )
         with job_store.update_file_stream(logJobStoreFileID) as w:
-            with open(tempWorkerLogPath, 'rb') as f:
-                if os.path.getsize(tempWorkerLogPath) > logFileByteReportLimit !=0:
+            with open(tempWorkerLogPath, "rb") as f:
+                if os.path.getsize(tempWorkerLogPath) > logFileByteReportLimit != 0:
                     if logFileByteReportLimit > 0:
-                        f.seek(-logFileByteReportLimit, 2)  # seek to last tooBig bytes of file
+                        f.seek(
+                            -logFileByteReportLimit, 2
+                        )  # seek to last tooBig bytes of file
                     elif logFileByteReportLimit < 0:
-                        f.seek(logFileByteReportLimit, 0)  # seek to first tooBig bytes of file
+                        f.seek(
+                            logFileByteReportLimit, 0
+                        )  # seek to first tooBig bytes of file
                 # Dump the possibly-invalid-Unicode bytes into the log file
-                w.write(f.read()) # TODO load file using a buffer
+                w.write(f.read())  # TODO load file using a buffer
         # Commit log file reference back to JobStore
         job_store.update_job(jobDesc)
-    elif ((debugging or (config.writeLogsFromAllJobs and not jobDesc.local))
-          and redirect_output_to_log_file):  # write log messages
-        with open(tempWorkerLogPath, 'rb') as logFile:
+    elif (
+        debugging or (config.writeLogsFromAllJobs and not jobDesc.local)
+    ) and redirect_output_to_log_file:  # write log messages
+        with open(tempWorkerLogPath, "rb") as logFile:
             if os.path.getsize(tempWorkerLogPath) > logFileByteReportLimit != 0:
                 if logFileByteReportLimit > 0:
-                    logFile.seek(-logFileByteReportLimit, 2)  # seek to last tooBig bytes of file
+                    logFile.seek(
+                        -logFileByteReportLimit, 2
+                    )  # seek to last tooBig bytes of file
                 elif logFileByteReportLimit < 0:
-                    logFile.seek(logFileByteReportLimit, 0)  # seek to first tooBig bytes of file
+                    logFile.seek(
+                        logFileByteReportLimit, 0
+                    )  # seek to first tooBig bytes of file
             # Make sure lines are Unicode so they can be JSON serialized as part of the dict.
             # We may have damaged the Unicode text by cutting it at an arbitrary byte so we drop bad characters.
-            logMessages = [line.decode('utf-8', 'skip') for line in logFile.read().splitlines()]
+            logMessages = [
+                line.decode("utf-8", "skip") for line in logFile.read().splitlines()
+            ]
         statsDict.logs.names = [names.stats_name for names in jobDesc.get_chain()]
         statsDict.logs.messages = logMessages
-    if debugging or config.stats or statsDict.workers.logs_to_leader or statsDict.workers.logging_user_streams:
-        # We have stats/logging to report back.
-        # We report even if the job attempt failed.
-        # TODO: Will that upset analysis of the stats?
-        job_store.write_logs(json.dumps(statsDict, ensure_ascii=True))
+    # We have stats/logging to report back.
+    # We report even if the job attempt failed.
+    # TODO: Will that upset analysis of the stats?
+    job_store.write_logs(json.dumps(statsDict, ensure_ascii=True))
     # Remove the temp dir
     cleanUp = config.cleanWorkDir
-    if cleanUp == 'always' or (cleanUp == 'onSuccess' and not jobAttemptFailed) or (cleanUp == 'onError' and jobAttemptFailed):
+    if (
+        cleanUp == "always"
+        or (cleanUp == "onSuccess" and not jobAttemptFailed)
+        or (cleanUp == "onError" and jobAttemptFailed)
+    ):
         def make_parent_writable(func: Callable[[str], Any], path: str, _: Any) -> None:
             """
             When encountering an error removing a file or directory, make sure
@@ -672,9 +876,17 @@ def workerScript(
             """
             # Just chmod it for rwx for user. This can't work anyway if it isn't ours.
             try:
-                os.chmod(os.path.dirname(path),  stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
+                os.chmod(
+                    os.path.dirname(path), stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
+                )
             except PermissionError as e:
-                logger.error('Could not set permissions on %s to allow cleanup of %s: %s', os.path.dirname(path), path, e)
+                logger.error(
+                    "Could not set permissions on %s to allow cleanup of %s: %s",
+                    os.path.dirname(path),
+                    path,
+                    e,
+                )
         shutil.rmtree(local_worker_temp_dir, onerror=make_parent_writable)
     # This must happen after the log file is done with, else there is no place to put the log
@@ -683,13 +895,13 @@ def workerScript(
             # We can now safely get rid of the JobDescription, and all jobs it chained up
             job_store.delete_job(merged_in.job_store_id)
     if jobAttemptFailed:
         return failure_exit_code
     else:
         return 0
-def parse_args(args: List[str]) -> Any:
+def parse_args(args: list[str]) -> Any:
     """
     Parse command-line arguments to the worker.
     """
@@ -703,26 +915,33 @@ def parse_args(args: List[str]) -> Any:
     # Now add all the options to it
     # Base required job information
-    parser.add_argument("jobName", type=str,
-        help="Text name of the job being run")
-    parser.add_argument("jobStoreLocator", type=str,
-        help="Information required to connect to the job store")
-    parser.add_argument("jobStoreID", type=str,
-        help="ID of the job within the job store")
+    parser.add_argument("jobName", type=str, help="Text name of the job being run")
+    parser.add_argument(
+        "jobStoreLocator",
+        type=str,
+        help="Information required to connect to the job store",
+    )
+    parser.add_argument(
+        "jobStoreID", type=str, help="ID of the job within the job store"
+    )
     # Additional worker abilities
-    parser.add_argument("--context", default=[], action="append",
+    parser.add_argument(
+        "--context",
+        default=[],
+        action="append",
         help="""Pickled, base64-encoded context manager(s) to run job inside of.
                 Allows the Toil leader to pass setup and cleanup work provided by the
                 batch system, in the form of pickled Python context manager objects,
                 that the worker can then run before/after the job on the batch
-                system's behalf.""")
+                system's behalf.""",
+    )
     return parser.parse_args(args)
 @contextmanager
-def in_contexts(contexts: List[str]) -> Iterator[None]:
+def in_contexts(contexts: list[str]) -> Iterator[None]:
     """
     Unpickle and enter all the pickled, base64-encoded context managers in the
     given list. Then do the body, then leave them all.
@@ -736,10 +955,12 @@ def in_contexts(contexts: List[str]) -> Iterator[None]:
         rest = contexts[1:]
         try:
-            manager = pickle.loads(base64.b64decode(first.encode('utf-8')))
+            manager = pickle.loads(base64.b64decode(first.encode("utf-8")))
         except:
             exc_info = sys.exc_info()
-            logger.error('Exception while unpickling context manager: ', exc_info=exc_info)
+            logger.error(
+                "Exception while unpickling context manager: ", exc_info=exc_info
+            )
             raise
         with manager:
@@ -749,14 +970,14 @@ def in_contexts(contexts: List[str]) -> Iterator[None]:
                 yield
-def main(argv: Optional[List[str]] = None) -> None:
+def main(argv: Optional[list[str]] = None) -> None:
     if argv is None:
         argv = sys.argv
     # Parse our command line
     options = parse_args(argv)
     ##########################################
-    #Load the jobStore/config file
+    # Load the jobStore/config file
     ##########################################
     job_store = Toil.resumeJobStore(options.jobStoreLocator)

toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

toil 7.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl