PyPI - toil - Versions diffs - 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl - Mend

toil 7.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

toil/__init__.py +124 -86
toil/batchSystems/__init__.py +1 -0
toil/batchSystems/abstractBatchSystem.py +137 -77
toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
toil/batchSystems/awsBatch.py +237 -128
toil/batchSystems/cleanup_support.py +22 -16
toil/batchSystems/contained_executor.py +30 -26
toil/batchSystems/gridengine.py +85 -49
toil/batchSystems/htcondor.py +164 -87
toil/batchSystems/kubernetes.py +622 -386
toil/batchSystems/local_support.py +17 -12
toil/batchSystems/lsf.py +132 -79
toil/batchSystems/lsfHelper.py +13 -11
toil/batchSystems/mesos/__init__.py +41 -29
toil/batchSystems/mesos/batchSystem.py +288 -149
toil/batchSystems/mesos/executor.py +77 -49
toil/batchSystems/mesos/test/__init__.py +31 -23
toil/batchSystems/options.py +39 -29
toil/batchSystems/registry.py +53 -19
toil/batchSystems/singleMachine.py +293 -123
toil/batchSystems/slurm.py +651 -155
toil/batchSystems/torque.py +46 -32
toil/bus.py +141 -73
toil/common.py +784 -397
toil/cwl/__init__.py +1 -1
toil/cwl/cwltoil.py +1137 -534
toil/cwl/utils.py +17 -22
toil/deferred.py +62 -41
toil/exceptions.py +5 -3
toil/fileStores/__init__.py +5 -5
toil/fileStores/abstractFileStore.py +88 -57
toil/fileStores/cachingFileStore.py +711 -247
toil/fileStores/nonCachingFileStore.py +113 -75
toil/job.py +1031 -349
toil/jobStores/abstractJobStore.py +387 -243
toil/jobStores/aws/jobStore.py +772 -412
toil/jobStores/aws/utils.py +161 -109
toil/jobStores/conftest.py +1 -0
toil/jobStores/fileJobStore.py +289 -151
toil/jobStores/googleJobStore.py +137 -70
toil/jobStores/utils.py +36 -15
toil/leader.py +614 -269
toil/lib/accelerators.py +115 -18
toil/lib/aws/__init__.py +55 -28
toil/lib/aws/ami.py +122 -87
toil/lib/aws/iam.py +284 -108
toil/lib/aws/s3.py +31 -0
toil/lib/aws/session.py +204 -58
toil/lib/aws/utils.py +290 -213
toil/lib/bioio.py +13 -5
toil/lib/compatibility.py +11 -6
toil/lib/conversions.py +83 -49
toil/lib/docker.py +131 -103
toil/lib/dockstore.py +379 -0
toil/lib/ec2.py +322 -209
toil/lib/ec2nodes.py +174 -105
toil/lib/encryption/_dummy.py +5 -3
toil/lib/encryption/_nacl.py +10 -6
toil/lib/encryption/conftest.py +1 -0
toil/lib/exceptions.py +26 -7
toil/lib/expando.py +4 -2
toil/lib/ftp_utils.py +217 -0
toil/lib/generatedEC2Lists.py +127 -19
toil/lib/history.py +1271 -0
toil/lib/history_submission.py +681 -0
toil/lib/humanize.py +6 -2
toil/lib/io.py +121 -12
toil/lib/iterables.py +4 -2
toil/lib/memoize.py +12 -8
toil/lib/misc.py +83 -18
toil/lib/objects.py +2 -2
toil/lib/resources.py +19 -7
toil/lib/retry.py +125 -87
toil/lib/threading.py +282 -80
toil/lib/throttle.py +15 -14
toil/lib/trs.py +390 -0
toil/lib/web.py +38 -0
toil/options/common.py +850 -402
toil/options/cwl.py +185 -90
toil/options/runner.py +50 -0
toil/options/wdl.py +70 -19
toil/provisioners/__init__.py +111 -46
toil/provisioners/abstractProvisioner.py +322 -157
toil/provisioners/aws/__init__.py +62 -30
toil/provisioners/aws/awsProvisioner.py +980 -627
toil/provisioners/clusterScaler.py +541 -279
toil/provisioners/gceProvisioner.py +283 -180
toil/provisioners/node.py +147 -79
toil/realtimeLogger.py +34 -22
toil/resource.py +137 -75
toil/server/app.py +127 -61
toil/server/celery_app.py +3 -1
toil/server/cli/wes_cwl_runner.py +84 -55
toil/server/utils.py +56 -31
toil/server/wes/abstract_backend.py +64 -26
toil/server/wes/amazon_wes_utils.py +21 -15
toil/server/wes/tasks.py +121 -63
toil/server/wes/toil_backend.py +142 -107
toil/server/wsgi_app.py +4 -3
toil/serviceManager.py +58 -22
toil/statsAndLogging.py +183 -65
toil/test/__init__.py +263 -179
toil/test/batchSystems/batchSystemTest.py +438 -195
toil/test/batchSystems/batch_system_plugin_test.py +18 -7
toil/test/batchSystems/test_gridengine.py +173 -0
toil/test/batchSystems/test_lsf_helper.py +67 -58
toil/test/batchSystems/test_slurm.py +265 -49
toil/test/cactus/test_cactus_integration.py +20 -22
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +375 -72
toil/test/cwl/measure_default_memory.cwl +12 -0
toil/test/cwl/not_run_required_input.cwl +29 -0
toil/test/cwl/optional-file.cwl +18 -0
toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
toil/test/docs/scriptsTest.py +60 -34
toil/test/jobStores/jobStoreTest.py +412 -235
toil/test/lib/aws/test_iam.py +116 -48
toil/test/lib/aws/test_s3.py +16 -9
toil/test/lib/aws/test_utils.py +5 -6
toil/test/lib/dockerTest.py +118 -141
toil/test/lib/test_conversions.py +113 -115
toil/test/lib/test_ec2.py +57 -49
toil/test/lib/test_history.py +212 -0
toil/test/lib/test_misc.py +12 -5
toil/test/lib/test_trs.py +161 -0
toil/test/mesos/MesosDataStructuresTest.py +23 -10
toil/test/mesos/helloWorld.py +7 -6
toil/test/mesos/stress.py +25 -20
toil/test/options/options.py +7 -2
toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
toil/test/provisioners/clusterScalerTest.py +440 -250
toil/test/provisioners/clusterTest.py +81 -42
toil/test/provisioners/gceProvisionerTest.py +174 -100
toil/test/provisioners/provisionerTest.py +25 -13
toil/test/provisioners/restartScript.py +5 -4
toil/test/server/serverTest.py +188 -141
toil/test/sort/restart_sort.py +137 -68
toil/test/sort/sort.py +134 -66
toil/test/sort/sortTest.py +91 -49
toil/test/src/autoDeploymentTest.py +140 -100
toil/test/src/busTest.py +20 -18
toil/test/src/checkpointTest.py +8 -2
toil/test/src/deferredFunctionTest.py +49 -35
toil/test/src/dockerCheckTest.py +33 -26
toil/test/src/environmentTest.py +20 -10
toil/test/src/fileStoreTest.py +538 -271
toil/test/src/helloWorldTest.py +7 -4
toil/test/src/importExportFileTest.py +61 -31
toil/test/src/jobDescriptionTest.py +32 -17
toil/test/src/jobEncapsulationTest.py +2 -0
toil/test/src/jobFileStoreTest.py +74 -50
toil/test/src/jobServiceTest.py +187 -73
toil/test/src/jobTest.py +120 -70
toil/test/src/miscTests.py +19 -18
toil/test/src/promisedRequirementTest.py +82 -36
toil/test/src/promisesTest.py +7 -6
toil/test/src/realtimeLoggerTest.py +6 -6
toil/test/src/regularLogTest.py +71 -37
toil/test/src/resourceTest.py +80 -49
toil/test/src/restartDAGTest.py +36 -22
toil/test/src/resumabilityTest.py +9 -2
toil/test/src/retainTempDirTest.py +45 -14
toil/test/src/systemTest.py +12 -8
toil/test/src/threadingTest.py +44 -25
toil/test/src/toilContextManagerTest.py +10 -7
toil/test/src/userDefinedJobArgTypeTest.py +8 -5
toil/test/src/workerTest.py +33 -16
toil/test/utils/toilDebugTest.py +70 -58
toil/test/utils/toilKillTest.py +4 -5
toil/test/utils/utilsTest.py +239 -102
toil/test/wdl/wdltoil_test.py +789 -148
toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
toil/toilState.py +52 -26
toil/utils/toilConfig.py +13 -4
toil/utils/toilDebugFile.py +44 -27
toil/utils/toilDebugJob.py +85 -25
toil/utils/toilDestroyCluster.py +11 -6
toil/utils/toilKill.py +8 -3
toil/utils/toilLaunchCluster.py +251 -145
toil/utils/toilMain.py +37 -16
toil/utils/toilRsyncCluster.py +27 -14
toil/utils/toilSshCluster.py +45 -22
toil/utils/toilStats.py +75 -36
toil/utils/toilStatus.py +226 -119
toil/utils/toilUpdateEC2Instances.py +3 -1
toil/version.py +6 -6
toil/wdl/utils.py +5 -5
toil/wdl/wdltoil.py +3528 -1053
toil/worker.py +370 -149
toil-8.1.0b1.dist-info/METADATA +178 -0
toil-8.1.0b1.dist-info/RECORD +259 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
toil-7.0.0.dist-info/METADATA +0 -158
toil-7.0.0.dist-info/RECORD +0 -244
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
{toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0

toil/test/cwl/cwlTest.py CHANGED Viewed

@@ -23,16 +23,10 @@ import sys
 import unittest
 import uuid
 import zipfile
 from functools import partial
 from io import StringIO
 from pathlib import Path
-from typing import (TYPE_CHECKING,
-                    Callable,
-                    Dict,
-                    List,
-                    Optional,
-                    cast)
+from typing import TYPE_CHECKING, Callable, Optional, cast
 from unittest.mock import Mock, call
 from urllib.request import urlretrieve
@@ -46,32 +40,33 @@ sys.path.insert(0, pkg_root)  # noqa
 from schema_salad.exceptions import ValidationException
-from toil.cwl.utils import (DirectoryStructure,
-                            download_structure,
-                            visit_cwl_class_and_reduce,
-                            visit_top_cwl_class)
+from toil.cwl.utils import (
+    DirectoryStructure,
+    download_structure,
+    visit_cwl_class_and_reduce,
+    visit_top_cwl_class,
+)
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.lib.threading import cpu_count
-from toil.provisioners import cluster_factory
-from toil.test import (ToilTest,
-                       needs_aws_ec2,
-                       needs_aws_s3,
-                       needs_cwl,
-                       needs_docker,
-                       needs_docker_cuda,
-                       needs_env_var,
-                       needs_fetchable_appliance,
-                       needs_gridengine,
-                       needs_kubernetes,
-                       needs_local_cuda,
-                       needs_lsf,
-                       needs_mesos,
-                       needs_online,
-                       needs_slurm,
-                       needs_torque,
-                       needs_wes_server,
-                       slow)
+from toil.test import (
+    ToilTest,
+    needs_aws_s3,
+    needs_cwl,
+    needs_docker,
+    needs_docker_cuda,
+    needs_gridengine,
+    needs_kubernetes,
+    needs_local_cuda,
+    needs_lsf,
+    needs_mesos,
+    needs_online,
+    needs_singularity_or_docker,
+    needs_slurm,
+    needs_torque,
+    needs_wes_server,
+    slow,
+)
 log = logging.getLogger(__name__)
 CONFORMANCE_TEST_TIMEOUT = 10000
@@ -86,7 +81,7 @@ def run_conformance_tests(
     selected_tests: Optional[str] = None,
     selected_tags: Optional[str] = None,
     skipped_tests: Optional[str] = None,
-    extra_args: Optional[List[str]] = None,
+    extra_args: Optional[list[str]] = None,
     must_support_all_features: bool = False,
     junit_file: Optional[str] = None,
 ) -> None:
@@ -147,7 +142,7 @@ def run_conformance_tests(
             "--relax-path-checks",
             # Defaults to 20s but we can't start hundreds of nodejs processes that fast on our CI potatoes
             "--eval-timeout=600",
-            f"--caching={caching}"
+            f"--caching={caching}",
         ]
         if extra_args:
@@ -181,34 +176,55 @@ def run_conformance_tests(
         cmd.extend(["--"] + args_passed_directly_to_runner)
         log.info("Running: '%s'", "' '".join(cmd))
+        output_lines: list[str] = []
         try:
-            output = subprocess.check_output(cmd, cwd=workDir, stderr=subprocess.STDOUT)
+            child = subprocess.Popen(
+                cmd, cwd=workDir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+            )
+            if child.stdout is not None:
+                for line_bytes in child.stdout:
+                    # Pass through all the logs
+                    line_text = line_bytes.decode("utf-8", errors="replace").rstrip()
+                    output_lines.append(line_text)
+                    log.info(line_text)
+            # Once it's done writing, amke sure it succeeded.
+            child.wait()
+            log.info("CWL tests finished with exit code %s", child.returncode)
+            if child.returncode != 0:
+                # Act like check_output and raise an error.
+                raise subprocess.CalledProcessError(child.returncode, " ".join(cmd))
         finally:
             if job_store_override:
                 # Clean up the job store we used for all the tests, if it is still there.
                 subprocess.run(["toil", "clean", job_store_override])
     except subprocess.CalledProcessError as e:
+        log.info("CWL test runner return code was unsuccessful")
         only_unsupported = False
         # check output -- if we failed but only have unsupported features, we're okay
         p = re.compile(
             r"(?P<failures>\d+) failures, (?P<unsupported>\d+) unsupported features"
         )
-        error_log = e.output.decode("utf-8")
-        for line in error_log.split("\n"):
-            m = p.search(line)
+        for line_text in output_lines:
+            m = p.search(line_text)
             if m:
                 if int(m.group("failures")) == 0 and int(m.group("unsupported")) > 0:
                     only_unsupported = True
                     break
         if (not only_unsupported) or must_support_all_features:
-            print(error_log)
+            log.error(
+                "CWL tests gave unacceptable output:\n%s", "\n".join(output_lines)
+            )
             raise e
+        log.info("Unsuccessful return code is OK")
 TesterFuncType = Callable[[str, str, "CWLObjectType"], None]
 @needs_cwl
 class CWLWorkflowTest(ToilTest):
     """
@@ -222,11 +238,14 @@ class CWLWorkflowTest(ToilTest):
         self.outDir = f"/tmp/toil-cwl-test-{str(uuid.uuid4())}"
         os.makedirs(self.outDir)
         self.rootDir = self._projectRootPath()
+        self.jobStoreDir = f"./jobstore-{str(uuid.uuid4())}"
     def tearDown(self) -> None:
         """Clean up outputs."""
         if os.path.exists(self.outDir):
             shutil.rmtree(self.outDir)
+        if os.path.exists(self.jobStoreDir):
+            shutil.rmtree(self.jobStoreDir)
         unittest.TestCase.tearDown(self)
     def test_cwl_cmdline_input(self) -> None:
@@ -234,6 +253,7 @@ class CWLWorkflowTest(ToilTest):
         Test that running a CWL workflow with inputs specified on the command line passes.
         """
         from toil.cwl import cwltoil
         cwlfile = "src/toil/test/cwl/conditional_wf.cwl"
         args = [cwlfile, "--message", "str", "--sleep", "2"]
         st = StringIO()
@@ -245,7 +265,7 @@ class CWLWorkflowTest(ToilTest):
         cwlfile: str,
         jobfile: str,
         expect: "CWLObjectType",
-        main_args: List[str] = [],
+        main_args: list[str] = [],
         out_name: str = "output",
         output_here: bool = False,
     ) -> None:
@@ -255,13 +275,7 @@ class CWLWorkflowTest(ToilTest):
         main_args = main_args[:]
         if not output_here:
             # Don't just dump output in the working directory.
-            main_args.extend(
-                [
-                    "--logDebug",
-                    "--outdir",
-                    self.outDir
-                ]
-            )
+            main_args.extend(["--logDebug", "--outdir", self.outDir])
         main_args.extend(
             [
                 os.path.join(self.rootDir, cwlfile),
@@ -276,7 +290,12 @@ class CWLWorkflowTest(ToilTest):
         self.assertEqual(out, expect)
         for k, v in expect.items():
-            if isinstance(v, dict) and "class" in v and v["class"] == "File" and "path" in v:
+            if (
+                isinstance(v, dict)
+                and "class" in v
+                and v["class"] == "File"
+                and "path" in v
+            ):
                 # This is a top-level output file.
                 # None of our output files should be executable.
                 self.assertTrue(os.path.exists(v["path"]))
@@ -402,6 +421,11 @@ class CWLWorkflowTest(ToilTest):
             "revsort.cwl", partial(self._tester, main_args=["--no-compute-checksum"])
         )
+    def test_run_revsort_no_container(self) -> None:
+        self.revsort(
+            "revsort.cwl", partial(self._tester, main_args=["--no-container"])
+        )
     def test_run_revsort2(self) -> None:
         self.revsort("revsort2.cwl", self._tester)
@@ -415,6 +439,24 @@ class CWLWorkflowTest(ToilTest):
             self._expected_colon_output(self.outDir),
             out_name="result",
         )
+    @pytest.mark.integrative
+    @needs_singularity_or_docker
+    def test_run_dockstore_trs(self) -> None:
+        from toil.cwl import cwltoil
+        stdout = StringIO()
+        main_args = [
+            "--outdir",
+            self.outDir,
+            "#workflow/github.com/dockstore-testing/md5sum-checker:master",
+            "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/refs/heads/master/md5sum/md5sum-input-cwl.json"
+        ]
+        cwltoil.main(main_args, stdout=stdout)
+        out = json.loads(stdout.getvalue())
+        with open(out.get("output_file", {}).get("location")[len("file://") :]) as f:
+            computed_hash = f.read().strip()
+        self.assertEqual(computed_hash, "00579a00e3e7fa0674428ac7049423e2")
     def test_glob_dir_bypass_file_store(self) -> None:
         self.maxDiff = 1000
@@ -426,7 +468,7 @@ class CWLWorkflowTest(ToilTest):
                 "src/toil/test/cwl/empty.json",
                 self._expected_glob_dir_output(os.getcwd()),
                 main_args=["--bypass-file-store"],
-                output_here=True
+                output_here=True,
             )
         finally:
             # Clean up anything we made in the current directory.
@@ -435,6 +477,69 @@ class CWLWorkflowTest(ToilTest):
             except FileNotFoundError:
                 pass
+    def test_required_input_condition_protection(self) -> None:
+        # This doesn't run containerized
+        self._tester(
+            "src/toil/test/cwl/not_run_required_input.cwl",
+            "src/toil/test/cwl/empty.json",
+            {},
+        )
+    @needs_slurm
+    def test_slurm_node_memory(self) -> None:
+        pass
+        # Run the workflow. This will either finish quickly and tell us the
+        # memory we got, or take a long time because it requested a whole
+        # node's worth of memory and no nodes are free right now. We need to
+        # support both.
+        # And if we run out of time we need to stop the workflow gracefully and
+        # cancel the Slurm jobs.
+        main_args = [
+            f"--jobStore={self.jobStoreDir}",
+            # Avoid racing to toil kill before the jobstore is removed
+            "--clean=never",
+            "--batchSystem=slurm",
+            "--no-cwl-default-ram",
+            "--slurmDefaultAllMem=True",
+            "--outdir",
+            self.outDir,
+            os.path.join(self.rootDir, "src/toil/test/cwl/measure_default_memory.cwl"),
+        ]
+        try:
+            log.debug("Start test workflow")
+            child = subprocess.Popen(
+                ["toil-cwl-runner"] + main_args, stdout=subprocess.PIPE
+            )
+            output, _ = child.communicate(timeout=60)
+        except subprocess.TimeoutExpired:
+            # The job didn't finish quickly; presumably waiting for a full node.
+            # Stop the workflow
+            log.debug("Workflow might be waiting for a full node. Stop it.")
+            subprocess.check_call(["toil", "kill", self.jobStoreDir])
+            # Wait another little bit for it to clean up, making sure to collect output in case it is blocked on writing
+            child.communicate(timeout=20)
+            # Kill it off in case it is still running
+            child.kill()
+            # Reap it
+            child.wait()
+            # The test passes
+        else:
+            out = json.loads(output)
+            log.debug("Workflow output: %s", out)
+            memory_string = out["memory"]
+            log.debug("Observed memory: %s", memory_string)
+            # If there's no memory limit enforced, Slurm will return "unlimited".
+            # Set result to something sensible.
+            if memory_string.strip() == "unlimited":
+                result = 4 * 1024 * 1024
+            else:
+                result = int(memory_string)
+            # We should see more than the CWL default or the Toil default, assuming Slurm nodes of reasonable size (3 GiB).
+            self.assertGreater(result, 3 * 1024 * 1024)
     @needs_aws_s3
     def test_download_s3(self) -> None:
         self.download("download_s3.json", self._tester)
@@ -446,7 +551,10 @@ class CWLWorkflowTest(ToilTest):
         self.download("download_https.json", self._tester)
     def test_download_https_reference(self) -> None:
-        self.download("download_https.json", partial(self._tester, main_args=["--reference-inputs"]))
+        self.download(
+            "download_https.json",
+            partial(self._tester, main_args=["--reference-inputs"]),
+        )
     def test_download_file(self) -> None:
         self.download("download_file.json", self._tester)
@@ -457,7 +565,10 @@ class CWLWorkflowTest(ToilTest):
     @needs_aws_s3
     def test_download_directory_s3_reference(self) -> None:
-        self.download_directory("download_directory_s3.json", partial(self._tester, main_args=["--reference-inputs"]))
+        self.download_directory(
+            "download_directory_s3.json",
+            partial(self._tester, main_args=["--reference-inputs"]),
+        )
     def test_download_directory_file(self) -> None:
         self.download_directory("download_directory_file.json", self._tester)
@@ -502,7 +613,10 @@ class CWLWorkflowTest(ToilTest):
             "src/toil/test/cwl/seqtk_seq.cwl",
             "src/toil/test/cwl/seqtk_seq_job.json",
             self._expected_seqtk_output(self.outDir),
-            main_args=["--default-container", "quay.io/biocontainers/seqtk:1.4--he4a0461_1"],
+            main_args=[
+                "--default-container",
+                "quay.io/biocontainers/seqtk:1.4--he4a0461_1",
+            ],
             out_name="output1",
         )
@@ -566,7 +680,11 @@ class CWLWorkflowTest(ToilTest):
         # Force a failure by trying to use an incorrect version of `rev` from the PATH
         os.environ["PATH"] = path_with_bogus_rev()
         try:
-            subprocess.check_output(["toil-cwl-runner"] + cmd, env=os.environ.copy(), stderr=subprocess.STDOUT)
+            subprocess.check_output(
+                ["toil-cwl-runner"] + cmd,
+                env=os.environ.copy(),
+                stderr=subprocess.STDOUT,
+            )
             self.fail("Expected problem job with incorrect PATH did not fail")
         except subprocess.CalledProcessError:
             pass
@@ -576,13 +694,61 @@ class CWLWorkflowTest(ToilTest):
         cwltoil.main(cmd)
         # Should fail because previous job completed successfully
         try:
-            subprocess.check_output(["toil-cwl-runner"] + cmd, env=os.environ.copy(), stderr=subprocess.STDOUT)
+            subprocess.check_output(
+                ["toil-cwl-runner"] + cmd,
+                env=os.environ.copy(),
+                stderr=subprocess.STDOUT,
+            )
             self.fail("Restart with missing directory did not fail")
         except subprocess.CalledProcessError:
             pass
+    def test_caching(self) -> None:
+        log.info("Running CWL caching test.")
+        from toil.cwl import cwltoil
+        outDir = self._createTempDir()
+        cacheDir = self._createTempDir()
+        cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl")
+        log_path = os.path.join(outDir, "log")
+        cmd = [
+            "--outdir",
+            outDir,
+            "--jobStore",
+            os.path.join(outDir, "jobStore"),
+            "--clean=always",
+            "--no-container",
+            "--cachedir",
+            cacheDir,
+            os.path.join(cwlDir, "revsort.cwl"),
+            os.path.join(cwlDir, "revsort-job.json"),
+        ]
+        st = StringIO()
+        ret = cwltoil.main(cmd, stdout=st)
+        assert ret == 0
+        # cwltool hashes certain steps into directories, ensure it exists
+        # since cwltool caches per task and revsort has 2 cwl tasks, there should be 2 directories and 2 status files
+        assert (len(os.listdir(cacheDir)) == 4)
+        # Rerun the workflow to ensure there is a cache hit and that we don't rerun the tools
+        st = StringIO()
+        cmd = [
+                  "--writeLogsFromAllJobs=True",
+                  "--writeLogs",
+                  log_path
+              ] + cmd
+        ret = cwltoil.main(cmd, stdout=st)
+        assert ret == 0
+        # Ensure all of the worker logs are using their cached outputs
+        for file in os.listdir(log_path):
+            assert "Using cached output" in open(os.path.join(log_path, file), encoding="utf-8").read()
     @needs_aws_s3
-    def test_streamable(self, extra_args: Optional[List[str]] = None) -> None:
+    def test_streamable(self, extra_args: Optional[list[str]] = None) -> None:
         """
         Test that a file with 'streamable'=True is a named pipe.
         This is a CWL1.2 feature.
@@ -666,7 +832,6 @@ class CWLWorkflowTest(ToilTest):
         except ValidationException as e:
             # Make sure we chastise the user appropriately.
             assert "expressions are not allowed" in str(e)
     @staticmethod
     def _expected_seqtk_output(outDir: str) -> "CWLObjectType":
@@ -723,7 +888,7 @@ class CWLWorkflowTest(ToilTest):
                 "size": 0,
                 "class": "File",
                 "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
-                "path": path
+                "path": path,
             }
         }
@@ -750,9 +915,9 @@ class CWLWorkflowTest(ToilTest):
                         "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
                         "size": 0,
                         "nameroot": "test",
-                        "nameext": ".txt"
+                        "nameext": ".txt",
                     }
-                ]
+                ],
             }
         }
@@ -785,7 +950,7 @@ class CWLWorkflowTest(ToilTest):
                         "size": 1111,
                         "nameroot": "whale",
                         "nameext": ".txt",
-                        "path": f"{path}/whale.txt"
+                        "path": f"{path}/whale.txt",
                     }
                 ],
             }
@@ -805,6 +970,58 @@ class CWLWorkflowTest(ToilTest):
             }
         }
+    def test_missing_import(self) -> None:
+        tmp_path = self._createTempDir()
+        out_dir = os.path.join(tmp_path, "cwl-out-dir")
+        toil = "toil-cwl-runner"
+        options = [
+            f"--outdir={out_dir}",
+            "--clean=always",
+        ]
+        cmd = [toil] + options + ["src/toil/test/cwl/revsort.cwl", "src/toil/test/cwl/revsort-job-missing.json"]
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+        # Make sure that the missing file is mentioned in the log so the user knows
+        assert b"missing.txt" in stderr
+        assert p.returncode == 1
+    @needs_aws_s3
+    def test_optional_secondary_files_exists(self) -> None:
+        tmp_path = self._createTempDir()
+        out_dir = os.path.join(tmp_path, "cwl-out-dir")
+        cwlfile = "src/toil/test/cwl/optional-file.cwl"
+        jobfile = "src/toil/test/cwl/optional-file-exists.json"
+        args = [
+            os.path.join(self.rootDir, cwlfile),
+            os.path.join(self.rootDir, jobfile),
+            f"--outdir={out_dir}"
+        ]
+        from toil.cwl import cwltoil
+        ret = cwltoil.main(args)
+        assert ret == 0
+        assert os.path.exists(os.path.join(out_dir, "wdl_templates_old.zip"))
+    @needs_aws_s3
+    def test_optional_secondary_files_missing(self) -> None:
+        tmp_path = self._createTempDir()
+        out_dir = os.path.join(tmp_path, "cwl-out-dir")
+        cwlfile = "src/toil/test/cwl/optional-file.cwl"
+        jobfile = "src/toil/test/cwl/optional-file-missing.json"
+        args = [
+            os.path.join(self.rootDir, cwlfile),
+            os.path.join(self.rootDir, jobfile),
+            f"--outdir={out_dir}"
+        ]
+        from toil.cwl import cwltoil
+        ret = cwltoil.main(args)
+        assert ret == 0
+        assert not os.path.exists(os.path.join(out_dir, "hello_old.zip"))
 @needs_cwl
 @needs_online
@@ -853,7 +1070,7 @@ class CWLv10Test(ToilTest):
         caching: bool = False,
         selected_tests: Optional[str] = None,
         skipped_tests: Optional[str] = None,
-        extra_args: Optional[List[str]] = None,
+        extra_args: Optional[list[str]] = None,
     ) -> None:
         run_conformance_tests(
             workDir=self.workDir,
@@ -981,7 +1198,7 @@ class CWLv11Test(ToilTest):
         caching: bool = False,
         batchSystem: Optional[str] = None,
         skipped_tests: Optional[str] = None,
-        extra_args: Optional[List[str]] = None,
+        extra_args: Optional[list[str]] = None,
     ) -> None:
         run_conformance_tests(
             workDir=self.cwlSpec,
@@ -1055,7 +1272,7 @@ class CWLv12Test(ToilTest):
         batchSystem: Optional[str] = None,
         selected_tests: Optional[str] = None,
         skipped_tests: Optional[str] = None,
-        extra_args: Optional[List[str]] = None,
+        extra_args: Optional[list[str]] = None,
         must_support_all_features: bool = False,
         junit_file: Optional[str] = None,
     ) -> None:
@@ -1073,15 +1290,20 @@ class CWLv12Test(ToilTest):
             must_support_all_features=must_support_all_features,
             junit_file=junit_file,
         )
     @slow
     @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
     def test_run_conformance_with_caching(self) -> None:
         self.test_run_conformance(
             caching=True,
-            junit_file = os.path.join(
-                self.rootDir, "caching-conformance-1.2.junit.xml"
-            )
+            junit_file=os.path.join(self.rootDir, "caching-conformance-1.2.junit.xml"),
+        )
+    @slow
+    @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
+    def test_run_conformance_with_task_caching(self) -> None:
+        self.test_run_conformance(
+            junit_file=os.path.join(self.rootDir, "task-caching-conformance-1.2.junit.xml"),
+            extra_args=["--cachedir", self._createTempDir("task_cache")]
         )
     @slow
@@ -1093,10 +1315,11 @@ class CWLv12Test(ToilTest):
         features.
         """
         self.test_run_conformance(
-            extra_args=["--bypass-file-store"], must_support_all_features=True,
-            junit_file = os.path.join(
+            extra_args=["--bypass-file-store"],
+            must_support_all_features=True,
+            junit_file=os.path.join(
                 self.rootDir, "in-place-update-conformance-1.2.junit.xml"
-            )
+            ),
         )
     @slow
@@ -1272,6 +1495,9 @@ def test_log_dir_echo_stderr(tmp_path: Path) -> None:
     assert output == "hello\n"
+# TODO: It's not clear how this test tests filename conflict resolution; it
+# seems like it runs a python script to copy some files and makes sure the
+# workflow doesn't fail.
 @needs_cwl
 @pytest.mark.cwl_small_log_dir
 def test_filename_conflict_resolution(tmp_path: Path) -> None:
@@ -1294,6 +1520,27 @@ def test_filename_conflict_resolution(tmp_path: Path) -> None:
     assert b"Finished toil run successfully" in stderr
     assert p.returncode == 0
+@needs_cwl
+@pytest.mark.cwl_small_log_dir
+def test_filename_conflict_resolution_3_or_more(tmp_path: Path) -> None:
+    out_dir = tmp_path / "cwl-out-dir"
+    toil = "toil-cwl-runner"
+    options = [
+        f"--outdir={out_dir}",
+        "--clean=always",
+    ]
+    cwl = os.path.join(os.path.dirname(__file__), "scatter_duplicate_outputs.cwl")
+    cmd = [toil] + options + [cwl]
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+    assert b"Finished toil run successfully" in stderr
+    assert p.returncode == 0
+    assert (
+        len(os.listdir(out_dir)) == 9
+    ), "All 9 files made by the scatter should be in the directory"
 @needs_cwl
 @needs_docker
 @pytest.mark.cwl_small_log_dir
@@ -1316,6 +1563,7 @@ def test_filename_conflict_detection(tmp_path: Path) -> None:
     assert b"File staging conflict" in stderr
     assert p.returncode != 0
 @needs_cwl
 @needs_docker
 @pytest.mark.cwl_small_log_dir
@@ -1357,7 +1605,10 @@ def test_pick_value_with_one_null_value(caplog: pytest.LogCaptureFixture) -> Non
     with caplog.at_level(logging.WARNING, logger="toil.cwl.cwltoil"):
         cwltoil.main(args)
         for line in caplog.messages:
-            assert "You had a conditional step that did not run, but you did not use pickValue to handle the skipped input." not in line
+            assert (
+                "You had a conditional step that did not run, but you did not use pickValue to handle the skipped input."
+                not in line
+            )
 @needs_cwl
@@ -1395,6 +1646,8 @@ def test_workflow_echo_string_scatter_capture_stdout() -> None:
     cmd = [toil, jobstore, option_1, option_2, option_3, cwl]
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = p.communicate()
+    log.debug("Workflow standard output: %s", stdout)
+    assert len(stdout) > 0
     outputs = json.loads(stdout)
     out_list = outputs["list_out"]
     assert len(out_list) == 2, f"outList shoud have two file elements {out_list}"
@@ -1505,7 +1758,7 @@ def test_visit_cwl_class_and_reduce() -> None:
     up_count = 0
     up_child_count = 0
-    def op_up(thing: "CWLObjectType", down_value: int, child_results: List[str]) -> str:
+    def op_up(thing: "CWLObjectType", down_value: int, child_results: list[str]) -> str:
         """
         Check the down return value and the up return values, and count
         what we visit going up and what child relationships we have.
@@ -1559,9 +1812,9 @@ def test_download_structure(tmp_path: Path) -> None:
     # These will be populated.
     # TODO: This cache seems unused. Remove it?
     # This maps filesystem path to CWL URI
-    index: Dict[str, str] = {}
+    index: dict[str, str] = {}
     # This maps CWL URI to filesystem path
-    existing: Dict[str, str] = {}
+    existing: dict[str, str] = {}
     # Do the download
     download_structure(file_store, index, existing, structure, to_dir)
@@ -1614,3 +1867,53 @@ def test_download_structure(tmp_path: Path) -> None:
         ],
         any_order=True,
     )
+@needs_cwl
+@pytest.mark.timeout(300)
+def test_import_on_workers() -> None:
+    args = [
+        "src/toil/test/cwl/download.cwl",
+        "src/toil/test/cwl/download_file.json",
+        "--runImportsOnWorkers",
+        "--importWorkersDisk=10MiB",
+        "--realTimeLogging=True",
+        "--logLevel=INFO",
+        "--logColors=False",
+    ]
+    from toil.cwl import cwltoil
+    detector = ImportWorkersMessageHandler()
+    # Set up a log message detector to the root logger
+    logging.getLogger().addHandler(detector)
+    cwltoil.main(args)
+    assert detector.detected is True
+# StreamHandler is generic, _typeshed doesn't exist at runtime, do a bit of typing trickery, see https://github.com/python/typeshed/issues/5680
+if TYPE_CHECKING:
+    from _typeshed import SupportsWrite
+    _stream_handler = logging.StreamHandler[SupportsWrite[str]]
+else:
+    _stream_handler = logging.StreamHandler
+class ImportWorkersMessageHandler(_stream_handler):
+    """
+    Detect the import workers log message and set a flag.
+    """
+    def __init__(self) -> None:
+        self.detected = False  # Have we seen the message we want?
+        super().__init__(sys.stderr)
+    def emit(self, record: logging.LogRecord) -> None:
+        if (record.msg % record.args).startswith(
+            "Issued job 'CWLImportJob' CWLImportJob"
+        ):
+            self.detected = True

toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

toil 7.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl