PyPI - toil - Versions diffs - 8.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl - Mend

toil 8.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

toil/__init__.py +4 -4
toil/batchSystems/options.py +1 -0
toil/batchSystems/slurm.py +227 -83
toil/common.py +161 -45
toil/cwl/cwltoil.py +31 -10
toil/job.py +47 -38
toil/jobStores/aws/jobStore.py +46 -10
toil/lib/aws/session.py +14 -3
toil/lib/aws/utils.py +92 -35
toil/lib/dockstore.py +379 -0
toil/lib/ec2nodes.py +3 -2
toil/lib/history.py +1271 -0
toil/lib/history_submission.py +681 -0
toil/lib/io.py +22 -1
toil/lib/misc.py +18 -0
toil/lib/retry.py +10 -10
toil/lib/{integration.py → trs.py} +95 -46
toil/lib/web.py +38 -0
toil/options/common.py +17 -2
toil/options/cwl.py +10 -0
toil/provisioners/gceProvisioner.py +4 -4
toil/server/cli/wes_cwl_runner.py +3 -3
toil/server/utils.py +2 -3
toil/statsAndLogging.py +35 -1
toil/test/batchSystems/test_slurm.py +172 -2
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +105 -2
toil/test/cwl/optional-file.cwl +18 -0
toil/test/lib/test_history.py +212 -0
toil/test/lib/test_trs.py +161 -0
toil/test/wdl/wdltoil_test.py +1 -1
toil/version.py +10 -10
toil/wdl/wdltoil.py +23 -9
toil/worker.py +113 -33
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/METADATA +9 -4
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/RECORD +40 -34
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
toil/test/lib/test_integration.py +0 -104
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0

toil/statsAndLogging.py CHANGED Viewed

@@ -24,6 +24,7 @@ from typing import IO, TYPE_CHECKING, Any, Callable, Optional, Union
 from toil.lib.conversions import strtobool
 from toil.lib.expando import Expando
+from toil.lib.history import HistoryManager
 from toil.lib.resources import ResourceMonitor
 if TYPE_CHECKING:
@@ -168,6 +169,7 @@ class StatsAndLogging:
         The following function is used for collating stats/reporting log messages from the workers.
         Works inside of a thread, collates as long as the stop flag is not True.
         """
         #  Overall timing
         startTime = time.time()
         startClock = ResourceMonitor.get_total_cpu_time()
@@ -231,8 +233,40 @@ class StatsAndLogging:
                 )
                 cls.writeLogFiles(jobNames, messages, config=config)
+            try:
+                jobs = stats.jobs
+            except AttributeError:
+                pass
+            else:
+                for job in jobs:
+                    try:
+                        # Here we're talking to job._executor which fills in these stats.
+                        # Convince MyPy we won't be sent any job stats without
+                        # a workflow ID. You can't set up the job store without
+                        # one, but if we're somehow missing one, keep the stats
+                        # and logging thread up.
+                        assert config.workflowID is not None
+                        # TODO: Use better job names!
+                        HistoryManager.record_job_attempt(
+                            config.workflowID,
+                            config.workflowAttemptNumber,
+                            job.class_name,
+                            job.succeeded == "True",
+                            float(job.start),
+                            float(job.time),
+                            cores=float(job.requested_cores),
+                            cpu_seconds=float(job.clock),
+                            memory_bytes=int(job.memory) * 1024,
+                            disk_bytes=int(job.disk)
+                        )
+                    except:
+                        logger.exception("Could not record job attempt in history!")
+                        # Keep going. Don't fail the workflow for history-related issues.
         while True:
-            # This is a indirect way of getting a message to the thread to exit
+            # This is an indirect way of getting a message to the thread to exit
             if stop.is_set():
                 jobStore.read_logs(callback)
                 break

toil/test/batchSystems/test_slurm.py CHANGED Viewed

@@ -1,17 +1,24 @@
+import errno
 import textwrap
 from queue import Queue
+import logging
 import pytest
+import sys
 import toil.batchSystems.slurm
 from toil.batchSystems.abstractBatchSystem import (
     EXIT_STATUS_UNAVAILABLE_VALUE,
     BatchJobExitReason,
+    BatchSystemSupport,
 )
 from toil.common import Config
 from toil.lib.misc import CalledProcessErrorStderr
 from toil.test import ToilTest
+logger = logging.getLogger(__name__)
 # TODO: Come up with a better way to mock the commands then monkey-patching the
 # command-calling functions.
@@ -29,6 +36,9 @@ def call_sacct(args, **_) -> str:
         1236|FAILED|0:2
         1236.extern|COMPLETED|0:0
     """
+    if sum(len(a) for a in args) > 1000:
+        # Simulate if the argument list is too long
+        raise OSError(errno.E2BIG, "Argument list is too long")
     # Fake output per fake job-id.
     sacct_info = {
         609663: "609663|FAILED|0:2\n609663.extern|COMPLETED|0:0\n",
@@ -173,14 +183,34 @@ def call_sacct_raises(*_):
         1, "sacct: error: Problem talking to the database: " "Connection timed out"
     )
+def call_sinfo(*_) -> str:
+    """
+    Simulate asking for partition info from Slurm
+    """
+    stdout = textwrap.dedent(
+        """\
+        PARTITION GRES TIMELIMIT PRIO_TIER CPUS MEMORY
+        short* (null) 1:00:00 500 256+ 1996800+
+        medium (null) 12:00:00 500 256+ 1996800+
+        long (null) 14-00:00:00 500 256+ 1996800+
+        gpu gpu:A100:8 7-00:00:00 5000 256 996800
+        gpu gpu:A5500:8 7-00:00:00 5000 256 1996800
+        high_priority gpu:A5500:8 7-00:00:00 65000 256 1996800
+        high_priority (null) 7-00:00:00 65000 256+ 1996800+
+        simple_nodelist gpu:A100:8 1:00 65000 256 996800
+        simple_nodelist gpu:A5500:8 1:00 65000 256 1996800
+        simple_nodelist (null) 1:00 65000 256+ 1996800+
+        """
+    )
+    return stdout
-class FakeBatchSystem:
+class FakeBatchSystem(BatchSystemSupport):
     """
     Class that implements a minimal Batch System, needed to create a Worker (see below).
     """
     def __init__(self):
-        self.config = self.__fake_config()
+        super().__init__(self.__fake_config(), float("inf"), sys.maxsize, sys.maxsize)
     def getWaitDuration(self):
         return 10
@@ -198,8 +228,12 @@ class FakeBatchSystem:
         config.workflowID = str(uuid4())
         config.cleanWorkDir = "always"
+        toil.batchSystems.slurm.SlurmBatchSystem.setOptions(lambda o: setattr(config, o, None))
         return config
+# Make the mock class not have abstract methods anymore, even though we don't
+# implement them. See <https://stackoverflow.com/a/17345619>.
+FakeBatchSystem.__abstractmethods__ = set()
 class SlurmTest(ToilTest):
     """
@@ -262,6 +296,13 @@ class SlurmTest(ToilTest):
         result = self.worker._getJobDetailsFromSacct(list(expected_result))
         assert result == expected_result, f"{result} != {expected_result}"
+    def test_getJobDetailsFromSacct_argument_list_too_big(self):
+        self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
+        expected_result = {i: (None, None) for i in range(2000)}
+        result = self.worker._getJobDetailsFromSacct(list(expected_result))
+        assert result == expected_result, f"{result} != {expected_result}"
     ####
     #### tests for _getJobDetailsFromScontrol()
     ####
@@ -449,3 +490,132 @@ class SlurmTest(ToilTest):
             pass
         else:
             assert False, "Exception CalledProcessErrorStderr not raised"
+    ###
+    ### Tests for partition selection
+    ##
+    def test_PartitionSet_get_partition(self):
+        self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
+        ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
+        # At zero. short will win because simple_nodelist has higher priority.
+        self.assertEqual(ps.get_partition(0), "short")
+        # Easily within the partition
+        self.assertEqual(ps.get_partition(10 * 60), "short")
+        # Exactly on the boundary
+        self.assertEqual(ps.get_partition(60 * 60), "short")
+        # Well within the next partition
+        self.assertEqual(ps.get_partition(2 * 60 * 60), "medium")
+        # Can only fit in long
+        self.assertEqual(ps.get_partition(8 * 24 * 60 * 60), "long")
+        # Could fit in gpu or long
+        self.assertEqual(ps.get_partition(6 * 24 * 60 * 60), "long")
+        # Can't fit in anything
+        with self.assertRaises(Exception):
+            ps.get_partition(365 * 24 * 60 * 60)
+    def test_PartitionSet_default_gpu_partition(self):
+        self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
+        ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
+        # Make sure we picked the useful-length GPU partition and not the super
+        # short one.
+        self.assertEqual(ps.default_gpu_partition.partition_name, "gpu")
+    def test_prepareSbatch_partition(self):
+        self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
+        ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
+        self.worker.boss.partitions = ps
+        # This is in seconds
+        self.worker.boss.config.slurm_time = 30
+        # Without a partition override in the environment, we should get the
+        # "short" partition for this job
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--partition=short" in command
+        # With a partition override, we should not. But the override will be rewritten.
+        self.worker.boss.config.slurm_args = "--something --partition foo --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--partition=short" not in command
+        assert "--partition=foo" in command
+        # All ways of setting partition should work, including =
+        self.worker.boss.config.slurm_args = "--something --partition=foo --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--partition=short" not in command
+        assert "--partition=foo" in command
+        # And short options
+        self.worker.boss.config.slurm_args = "--something -p foo --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--partition=short" not in command
+        assert "--partition=foo" in command
+        # Partition settings from the config should override automatic selection
+        self.worker.boss.config.slurm_partition = "foobar"
+        self.worker.boss.config.slurm_args = "--something --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--partition=foobar" in command
+        # But they should be overridden by the argument overrides
+        self.worker.boss.config.slurm_args = "--something --partition=baz --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--partition=baz" in command
+    def test_prepareSbatch_time(self):
+        self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
+        ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
+        self.worker.boss.partitions = ps
+        # This is in seconds
+        self.worker.boss.config.slurm_time = 30
+        # Without a time override in the environment, we should use the normal
+        # time and the "short" partition
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        logger.debug("Command: %s", command)
+        assert "--time=0:30" in command
+        assert "--partition=short" in command
+        # With a time override, we should use it, slightly translated, and it
+        # should change the selected partition.
+        self.worker.boss.config.slurm_args = "--something --time 10:00:00 --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        logger.debug("Command: %s", command)
+        assert "--partition=medium" in command
+        assert "--time=0:36000" in command
+        # All ways of setting time should work, including =
+        self.worker.boss.config.slurm_args = "--something --time=10:00:00 --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        logger.debug("Command: %s", command)
+        assert "--partition=medium" in command
+        assert "--time=0:36000" in command
+        # And short options
+        self.worker.boss.config.slurm_args = "--something -t 10:00:00 --somethingElse"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        logger.debug("Command: %s", command)
+        assert "--partition=medium" in command
+        assert "--time=0:36000" in command
+    def test_prepareSbatch_export(self):
+        self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sinfo)
+        ps = toil.batchSystems.slurm.SlurmBatchSystem.PartitionSet()
+        self.worker.boss.partitions = ps
+        # Without any overrides, we need --export=ALL
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--export=ALL" in command
+        # With overrides, we don't get --export=ALL
+        self.worker.boss.config.slurm_args = "--export=foo"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--export=ALL" not in command
+        # With --export-file, we don't get --export=ALL as documented.
+        self.worker.boss.config.slurm_args = "--export-file=./thefile.txt"
+        command = self.worker.prepareSbatch(1, 100, 5, "job5", None, None)
+        assert "--export=ALL" not in command

toil/test/cwl/conftest.py CHANGED Viewed

@@ -14,4 +14,43 @@
 # https://pytest.org/latest/example/pythoncollection.html
+import json
+import logging
+from io import StringIO
+from typing import Any, Dict, List, Optional, Tuple
+from cwltest import utils
+logger = logging.getLogger(__name__)
 collect_ignore = ["spec"]
+# Hook into Pytest for testing CWL conformance with Toil
+# https://pytest.org/en/6.2.x/writing_plugins.html?highlight=conftest#conftest-py-local-per-directory-plugins
+# See cwltool's reference implementation:
+# https://github.com/common-workflow-language/cwltool/blob/05af6c1357c327b3146e9f5da40e7c0aa3e6d976/tests/cwl-conformance/cwltool-conftest.py
+def pytest_cwl_execute_test(
+        config: utils.CWLTestConfig,
+        processfile: str,
+        jobfile: Optional[str]
+) -> Tuple[int, Optional[Dict[str, Any]]]:
+    """Use the CWL reference runner (cwltool) to execute tests."""
+    from toil.cwl.cwltoil import main
+    stdout = StringIO()
+    argsl: List[str] = [f"--outdir={config.outdir}"]
+    if config.runner_quiet:
+        argsl.append("--quiet")
+    elif config.verbose:
+        argsl.append("--debug")
+    argsl.extend(config.args)
+    argsl.append(processfile)
+    if jobfile:
+        argsl.append(jobfile)
+    try:
+        result = main(args=argsl, stdout=stdout)
+    except Exception as e:
+        logger.error(e)
+        return 1, {}
+    out = stdout.getvalue()
+    return result, json.loads(out) if out else {}

toil/test/cwl/cwlTest.py CHANGED Viewed

@@ -449,7 +449,7 @@ class CWLWorkflowTest(ToilTest):
         main_args = [
             "--outdir",
             self.outDir,
-            "#workflow/github.com/dockstore-testing/md5sum-checker",
+            "#workflow/github.com/dockstore-testing/md5sum-checker:master",
             "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/refs/heads/master/md5sum/md5sum-input-cwl.json"
         ]
         cwltoil.main(main_args, stdout=stdout)
@@ -703,6 +703,50 @@ class CWLWorkflowTest(ToilTest):
         except subprocess.CalledProcessError:
             pass
+    def test_caching(self) -> None:
+        log.info("Running CWL caching test.")
+        from toil.cwl import cwltoil
+        outDir = self._createTempDir()
+        cacheDir = self._createTempDir()
+        cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl")
+        log_path = os.path.join(outDir, "log")
+        cmd = [
+            "--outdir",
+            outDir,
+            "--jobStore",
+            os.path.join(outDir, "jobStore"),
+            "--clean=always",
+            "--no-container",
+            "--cachedir",
+            cacheDir,
+            os.path.join(cwlDir, "revsort.cwl"),
+            os.path.join(cwlDir, "revsort-job.json"),
+        ]
+        st = StringIO()
+        ret = cwltoil.main(cmd, stdout=st)
+        assert ret == 0
+        # cwltool hashes certain steps into directories, ensure it exists
+        # since cwltool caches per task and revsort has 2 cwl tasks, there should be 2 directories and 2 status files
+        assert (len(os.listdir(cacheDir)) == 4)
+        # Rerun the workflow to ensure there is a cache hit and that we don't rerun the tools
+        st = StringIO()
+        cmd = [
+                  "--writeLogsFromAllJobs=True",
+                  "--writeLogs",
+                  log_path
+              ] + cmd
+        ret = cwltoil.main(cmd, stdout=st)
+        assert ret == 0
+        # Ensure all of the worker logs are using their cached outputs
+        for file in os.listdir(log_path):
+            assert "Using cached output" in open(os.path.join(log_path, file), encoding="utf-8").read()
     @needs_aws_s3
     def test_streamable(self, extra_args: Optional[list[str]] = None) -> None:
         """
@@ -926,6 +970,58 @@ class CWLWorkflowTest(ToilTest):
             }
         }
+    def test_missing_import(self) -> None:
+        tmp_path = self._createTempDir()
+        out_dir = os.path.join(tmp_path, "cwl-out-dir")
+        toil = "toil-cwl-runner"
+        options = [
+            f"--outdir={out_dir}",
+            "--clean=always",
+        ]
+        cmd = [toil] + options + ["src/toil/test/cwl/revsort.cwl", "src/toil/test/cwl/revsort-job-missing.json"]
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+        # Make sure that the missing file is mentioned in the log so the user knows
+        assert b"missing.txt" in stderr
+        assert p.returncode == 1
+    @needs_aws_s3
+    def test_optional_secondary_files_exists(self) -> None:
+        tmp_path = self._createTempDir()
+        out_dir = os.path.join(tmp_path, "cwl-out-dir")
+        cwlfile = "src/toil/test/cwl/optional-file.cwl"
+        jobfile = "src/toil/test/cwl/optional-file-exists.json"
+        args = [
+            os.path.join(self.rootDir, cwlfile),
+            os.path.join(self.rootDir, jobfile),
+            f"--outdir={out_dir}"
+        ]
+        from toil.cwl import cwltoil
+        ret = cwltoil.main(args)
+        assert ret == 0
+        assert os.path.exists(os.path.join(out_dir, "wdl_templates_old.zip"))
+    @needs_aws_s3
+    def test_optional_secondary_files_missing(self) -> None:
+        tmp_path = self._createTempDir()
+        out_dir = os.path.join(tmp_path, "cwl-out-dir")
+        cwlfile = "src/toil/test/cwl/optional-file.cwl"
+        jobfile = "src/toil/test/cwl/optional-file-missing.json"
+        args = [
+            os.path.join(self.rootDir, cwlfile),
+            os.path.join(self.rootDir, jobfile),
+            f"--outdir={out_dir}"
+        ]
+        from toil.cwl import cwltoil
+        ret = cwltoil.main(args)
+        assert ret == 0
+        assert not os.path.exists(os.path.join(out_dir, "hello_old.zip"))
 @needs_cwl
 @needs_online
@@ -1194,7 +1290,6 @@ class CWLv12Test(ToilTest):
             must_support_all_features=must_support_all_features,
             junit_file=junit_file,
         )
     @slow
     @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
     def test_run_conformance_with_caching(self) -> None:
@@ -1203,6 +1298,14 @@ class CWLv12Test(ToilTest):
             junit_file=os.path.join(self.rootDir, "caching-conformance-1.2.junit.xml"),
         )
+    @slow
+    @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
+    def test_run_conformance_with_task_caching(self) -> None:
+        self.test_run_conformance(
+            junit_file=os.path.join(self.rootDir, "task-caching-conformance-1.2.junit.xml"),
+            extra_args=["--cachedir", self._createTempDir("task_cache")]
+        )
     @slow
     @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
     def test_run_conformance_with_in_place_update(self) -> None:

toil/test/cwl/optional-file.cwl ADDED Viewed

@@ -0,0 +1,18 @@
+#!/usr/bin/env cwl-runner
+class: Workflow
+cwlVersion: v1.2
+inputs:
+    inputFile:
+      type: File
+      secondaryFiles:
+        - ^_old.zip?
+outputs:
+  out_file:
+    type: File
+    outputSource:
+      - inputFile
+steps:
+  []

toil 8.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

toil 8.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl