PyPI - toil - Versions diffs - 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl - Mend

toil 9.0.0py3-none-any.whl → 9.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

toil/batchSystems/abstractBatchSystem.py +13 -5
toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
toil/batchSystems/kubernetes.py +13 -2
toil/batchSystems/mesos/batchSystem.py +33 -2
toil/batchSystems/slurm.py +191 -16
toil/cwl/cwltoil.py +17 -82
toil/fileStores/__init__.py +1 -1
toil/fileStores/abstractFileStore.py +5 -2
toil/fileStores/cachingFileStore.py +1 -1
toil/job.py +30 -14
toil/jobStores/abstractJobStore.py +24 -19
toil/jobStores/aws/jobStore.py +862 -1963
toil/jobStores/aws/utils.py +24 -270
toil/jobStores/googleJobStore.py +25 -9
toil/jobStores/utils.py +0 -327
toil/leader.py +27 -22
toil/lib/aws/config.py +22 -0
toil/lib/aws/s3.py +477 -9
toil/lib/aws/utils.py +22 -33
toil/lib/checksum.py +88 -0
toil/lib/conversions.py +33 -31
toil/lib/directory.py +217 -0
toil/lib/ec2.py +97 -29
toil/lib/exceptions.py +2 -1
toil/lib/expando.py +2 -2
toil/lib/generatedEC2Lists.py +73 -16
toil/lib/io.py +33 -2
toil/lib/memoize.py +21 -7
toil/lib/pipes.py +385 -0
toil/lib/retry.py +1 -1
toil/lib/threading.py +1 -1
toil/lib/web.py +4 -5
toil/provisioners/__init__.py +5 -2
toil/provisioners/aws/__init__.py +43 -36
toil/provisioners/aws/awsProvisioner.py +22 -13
toil/provisioners/node.py +60 -12
toil/resource.py +3 -13
toil/test/__init__.py +14 -16
toil/test/batchSystems/test_slurm.py +103 -14
toil/test/cwl/staging_cat.cwl +27 -0
toil/test/cwl/staging_make_file.cwl +25 -0
toil/test/cwl/staging_workflow.cwl +43 -0
toil/test/cwl/zero_default.cwl +61 -0
toil/test/docs/scripts/tutorial_staging.py +17 -8
toil/test/jobStores/jobStoreTest.py +23 -133
toil/test/lib/aws/test_iam.py +7 -7
toil/test/lib/aws/test_s3.py +30 -33
toil/test/lib/aws/test_utils.py +9 -9
toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
toil/test/src/autoDeploymentTest.py +2 -3
toil/test/src/fileStoreTest.py +89 -87
toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
toil/test/utils/toilKillTest.py +35 -28
toil/test/wdl/md5sum/md5sum.json +1 -1
toil/test/wdl/wdltoil_test.py +98 -38
toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
toil/utils/toilDebugFile.py +6 -3
toil/utils/toilStats.py +17 -2
toil/version.py +6 -6
toil/wdl/wdltoil.py +1032 -546
toil/worker.py +5 -2
{toil-9.0.0.dist-info → toil-9.1.0.dist-info}/METADATA +12 -12
{toil-9.0.0.dist-info → toil-9.1.0.dist-info}/RECORD +68 -61
toil/lib/iterables.py +0 -112
toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
{toil-9.0.0.dist-info → toil-9.1.0.dist-info}/WHEEL +0 -0
{toil-9.0.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
{toil-9.0.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
{toil-9.0.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0

toil/test/wdl/wdltoil_test.py CHANGED Viewed

@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
 WDL_CONFORMANCE_TEST_REPO = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
-WDL_CONFORMANCE_TEST_COMMIT = "baf44bcc7e6f6927540adf77d91b26a5558ae4b7"
+WDL_CONFORMANCE_TEST_COMMIT = "46b5f85ee38ec60d0b8b9c35928b5104a2af83d5"
 # These tests are known to require things not implemented by
 # Toil and will not be run in CI.
 WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL = [
@@ -110,6 +110,15 @@ class TestWDLConformance:
                 "Failed process standard error: %s",
                 p.stderr.decode("utf-8", errors="replace"),
             )
+        else:
+            logger.debug(
+                "Successful process standard output: %s",
+                p.stdout.decode("utf-8", errors="replace"),
+            )
+            logger.debug(
+                "Successful process standard error: %s",
+                p.stderr.decode("utf-8", errors="replace"),
+            )
         p.check_returncode()
@@ -195,6 +204,30 @@ class TestWDLConformance:
         self.check(p)
+    # estimated running time: 10 minutes (once all the appropriate tests get
+    # marked as "development")
+    @slow
+    def test_conformance_tests_development(self, wdl_conformance_test_repo: Path) -> None:
+        os.chdir(wdl_conformance_test_repo)
+        commands = [
+            exactPython,
+            "run.py",
+            "--runner",
+            "toil-wdl-runner",
+            "--conformance-file",
+            "conformance.yaml",
+            "-v",
+            "development",
+        ]
+        if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
+            commands.append("--exclude-numbers")
+            commands.append(
+                ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
+            )
+        p = subprocess.run(commands, capture_output=True)
+        self.check(p)
     @slow
     def test_conformance_tests_integration(
         self, wdl_conformance_test_repo: Path
@@ -856,25 +889,29 @@ class TestWDL:
         env["TOIL_DOCKSTORE_TOKEN"] = "99cf5578ebe94b194d7864630a86258fa3d6cedcc17d757b5dd49e64ee3b68c3"
         # Enable history for when <https://github.com/DataBiosphere/toil/pull/5258> merges
         env["TOIL_HISTORY"] = "True"
+        try:
+            output_log = subprocess.check_output(
+                self.base_command
+                + [
+                    wdl_file,
+                    json_input,
+                    "--logDebug",
+                    "-o",
+                    str(tmp_path),
+                    "--outputDialect",
+                    "miniwdl",
+                    "--publishWorkflowMetrics=current",
+                ]
+                + (extra_args or []),
+                stderr=subprocess.STDOUT,
+                env=env,
+            ).decode("utf-8", errors="replace")
+        except subprocess.CalledProcessError as e:
+            logger.error("Test run of Toil failed: %s", e.stdout.decode("utf-8", errors="replace"))
+            raise
-        output_log = subprocess.check_output(
-            self.base_command
-            + [
-                wdl_file,
-                json_input,
-                "--logDebug",
-                "-o",
-                str(tmp_path),
-                "--outputDialect",
-                "miniwdl",
-                "--publishWorkflowMetrics=current",
-            ]
-            + (extra_args or []),
-            stderr=subprocess.STDOUT,
-            env=env,
-        )
-        assert b'Workflow metrics were accepted by Dockstore.' in output_log
+        assert "Workflow metrics were accepted by Dockstore." in output_log, f"No acceptance message in log: {output_log}"
     @slow
     @needs_docker_cuda
@@ -1138,34 +1175,57 @@ class TestWDLToilBench(unittest.TestCase):
         """
         from toil.wdl.wdltoil import (
-            DirectoryNamingStateDict,
             choose_human_readable_directory,
         )
-        state: DirectoryNamingStateDict = {}
-        # The first time we should get a path with the task name and without the ID
+        # The first time we should get a path with the task name
         first_chosen = choose_human_readable_directory(
-            "root", "taskname", "111-222-333", state
+            "root", "taskname", "https://example.com/some/directory"
         )
         assert first_chosen.startswith("root")
-        assert "taskname" in first_chosen
-        assert "111-222-333" not in first_chosen
-        # If we use the same ID we should get the same result
-        same_id = choose_human_readable_directory(
-            "root", "taskname", "111-222-333", state
+        # If we use the same parent we should get the same result
+        same_parent = choose_human_readable_directory(
+            "root", "taskname", "https://example.com/some/directory"
+        )
+        assert same_parent == first_chosen
+        # If we use a lower parent with a URL, we do not necessarily need to be
+        # inside the higher parent.
+        # If we use a URL with a creative number of slashes, it should be distinct.
+        slash_parent = choose_human_readable_directory(
+            "root", "taskname", "https://example.com/some/directory//////"
+        )
+        assert slash_parent != first_chosen
+        # If we use the same parent URL but a different task we should get the same result
+        other_task = choose_human_readable_directory(
+            "root", "taskname2", "https://example.com/some/directory"
+        )
+        assert other_task == first_chosen
+        # If we use a different parent we should get a different result still obeying the constraints
+        diff_parent = choose_human_readable_directory(
+            "root", "taskname", "/data/tmp/files/somewhere"
+        )
+        assert diff_parent != first_chosen
+        assert diff_parent.startswith("root")
+        assert "taskname" in diff_parent
+        # If we use a subpath parent with a filename we should get a path inside it.
+        diff_parent_subpath = choose_human_readable_directory(
+            "root", "taskname", "/data/tmp/files/somewhere/else"
         )
-        assert same_id == first_chosen
+        assert os.path.dirname(diff_parent_subpath) == diff_parent
-        # If we use a different ID we should get a different result still obeying the constraints
-        diff_id = choose_human_readable_directory(
-            "root", "taskname", "222-333-444", state
+        # If we use the same parent path but a different task we should get a different result.
+        other_task_directory = choose_human_readable_directory(
+            "root", "taskname2", "/data/tmp/files/somewhere"
         )
-        assert diff_id != first_chosen
-        assert diff_id.startswith("root")
-        assert "taskname" in diff_id
-        assert "222-333-444" not in diff_id
+        assert other_task_directory != diff_parent
+        assert other_task_directory.startswith("root")
+        assert "taskname2" in other_task_directory
     def test_uri_packing(self) -> None:
         """
@@ -1181,7 +1241,7 @@ class TestWDLToilBench(unittest.TestCase):
         file_basename = "thefile.txt"
         # Pack and unpack it
-        uri = pack_toil_uri(file_id, task_path, dir_id, file_basename)
+        uri = pack_toil_uri(file_id, task_path, str(dir_id), file_basename)
         unpacked = unpack_toil_uri(uri)
         # Make sure we got what we put in

toil/test/wdl/wdltoil_test_kubernetes.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import unittest
 from uuid import uuid4
+import logging
 import pytest
 from toil.provisioners import cluster_factory
@@ -12,6 +13,8 @@ from toil.test.wdl.wdltoil_test import (
 )
+logger = logging.getLogger(__name__)
 @integrative
 @slow
 @pytest.mark.timeout(1800)
@@ -52,6 +55,7 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
         workflow that performs an image pull on the worker.
         :return:
         """
         self.numWorkers = "1"
         self.requestedLeaderStorage = 30
         # create the cluster
@@ -64,6 +68,8 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
         wdl_dir = "wdl_conformance_tests"
+        logger.info("Cloning WDL tests onto cluster...")
         # get the wdl-conformance-tests repo to get WDL tasks to run
         self.sshUtil(
             [
@@ -78,6 +84,9 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
         # run WDL workflow that will run singularity
         test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
+        logger.info("Running workflow...")
         self.sshUtil(
             [
                 "bash",

toil/utils/toilDebugFile.py CHANGED Viewed

@@ -47,12 +47,12 @@ def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> N
         jobStoreHits = glob(directoryname=options.jobStore, glob_pattern=jobStoreFile)
         for jobStoreFileID in jobStoreHits:
             logger.debug(
-                f"Copying job store file: {jobStoreFileID} to {options.localFilePath[0]}"
+                f"Copying job store file: {jobStoreFileID} to {options.localFilePath}"
             )
             jobStore.read_file(
                 jobStoreFileID,
                 os.path.join(
-                    options.localFilePath[0], os.path.basename(jobStoreFileID)
+                    options.localFilePath, os.path.basename(jobStoreFileID)
                 ),
                 symlink=options.useSymlinks,
             )
@@ -97,7 +97,10 @@ def printContentsOfJobStore(
 def main() -> None:
     parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
     parser.add_argument(
-        "--localFilePath", nargs=1, help="Location to which to copy job store files."
+        "--localFilePath",
+        type=str,
+        default=".",
+        help="Location to which to copy job store files."
     )
     parser.add_argument(
         "--fetch",

toil/utils/toilStats.py CHANGED Viewed

@@ -326,6 +326,8 @@ def sprint_tag(
     out_str += header + "\n"
     out_str += sub_header + "\n"
     out_str += tag_str + "\n"
+    if tag.excess_cpu > 0:
+        out_str += f" ({tag.excess_cpu} used more CPU than requested!)\n"
     return out_str
@@ -507,13 +509,25 @@ def build_element(
                 float(item.get(category_key, defaults[category])), category
             )
             values.append(category_value)
+    excess_cpu_items = 0
     for index in range(0, len(item_values[CATEGORIES[0]])):
         # For each item, compute the computed categories
-        item_values["wait"].append(
+        # Compute wait time (allocated CPU time wasted).
+        # Note that if any item uses *more* CPU cores than requested, at any
+        # time, that decreases the amount of wait we're able to see from that
+        # item. If it hapens a lot, our computed wait could go negative, so we
+        # bound it below at 0.
+        wait_value = (
             item_values["time"][index] * item_values["cores"][index]
             - item_values["clock"][index]
         )
+        if wait_value < 0:
+            # Remember an item used more CPU than allocated.
+            excess_cpu_items += 1
+            wait_value = 0
+        item_values["wait"].append(wait_value)
     for category, values in item_values.items():
         values.sort()
@@ -531,6 +545,7 @@ def build_element(
         item_element["average_" + category] = float(sum(values) / len(values))
         item_element["min_" + category] = float(min(values))
         item_element["max_" + category] = float(max(values))
+    item_element["excess_cpu"] = excess_cpu_items
     element[item_name] = item_element

toil/version.py CHANGED Viewed

@@ -1,14 +1,14 @@
-baseVersion = '9.0.0'
+baseVersion = '9.1.0'
 cgcloudVersion = '1.6.0a1.dev393'
-version = '9.0.0-fa1e35a17670e1927036914ca624bfd591f8349c'
+version = '9.1.0-e341bb669efe78f93308e5ff1f02f7e375973511'
 cacheTag = 'cache-local-py3.9'
 mainCacheTag = 'cache-master-py3.9'
-distVersion = '9.0.0'
+distVersion = '9.1.0'
 exactPython = 'python3.9'
 python = 'python3.9'
-dockerTag = '9.0.0-fa1e35a17670e1927036914ca624bfd591f8349c-py3.9'
-currentCommit = 'fa1e35a17670e1927036914ca624bfd591f8349c'
+dockerTag = '9.1.0-e341bb669efe78f93308e5ff1f02f7e375973511-py3.9'
+currentCommit = 'e341bb669efe78f93308e5ff1f02f7e375973511'
 dockerRegistry = 'quay.io/ucsc_cgl'
 dockerName = 'toil'
 dirty = False
-cwltool_version = '3.1.20250110105449'
+cwltool_version = '3.1.20250715140722'

toil 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

toil 9.0.0py3-none-any.whl → 9.1.0py3-none-any.whl