PyPI - toil - Versions diffs - 8.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl - Mend

toil 8.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

toil/__init__.py +4 -4
toil/batchSystems/options.py +1 -0
toil/batchSystems/slurm.py +227 -83
toil/common.py +161 -45
toil/cwl/cwltoil.py +31 -10
toil/job.py +47 -38
toil/jobStores/aws/jobStore.py +46 -10
toil/lib/aws/session.py +14 -3
toil/lib/aws/utils.py +92 -35
toil/lib/dockstore.py +379 -0
toil/lib/ec2nodes.py +3 -2
toil/lib/history.py +1271 -0
toil/lib/history_submission.py +681 -0
toil/lib/io.py +22 -1
toil/lib/misc.py +18 -0
toil/lib/retry.py +10 -10
toil/lib/{integration.py → trs.py} +95 -46
toil/lib/web.py +38 -0
toil/options/common.py +17 -2
toil/options/cwl.py +10 -0
toil/provisioners/gceProvisioner.py +4 -4
toil/server/cli/wes_cwl_runner.py +3 -3
toil/server/utils.py +2 -3
toil/statsAndLogging.py +35 -1
toil/test/batchSystems/test_slurm.py +172 -2
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +105 -2
toil/test/cwl/optional-file.cwl +18 -0
toil/test/lib/test_history.py +212 -0
toil/test/lib/test_trs.py +161 -0
toil/test/wdl/wdltoil_test.py +1 -1
toil/version.py +10 -10
toil/wdl/wdltoil.py +23 -9
toil/worker.py +113 -33
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/METADATA +9 -4
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/RECORD +40 -34
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
toil/test/lib/test_integration.py +0 -104
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
{toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0

toil/test/lib/test_history.py ADDED Viewed

@@ -0,0 +1,212 @@
+# Copyright (C) 2015-2025 Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import logging
+import pytest
+import time
+from toil.test import ToilTest
+from toil.lib.history import HistoryManager
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG)
+class HistoryTest(ToilTest):
+    """
+    Tests for Toil history tracking.
+    Each test gets its own history database.
+    """
+    def setUp(self) -> None:
+        super().setUp()
+        # Apply a temp dir override to history tracking
+        temp_dir = self._createTempDir()
+        HistoryManager.database_path_override = os.path.join(temp_dir, "test-db.sqlite")
+        # Flag on job history tracking
+        self.original_flag = HistoryManager.JOB_HISTORY_ENABLED
+        HistoryManager.JOB_HISTORY_ENABLED = True
+    def tearDown(self) -> None:
+        # Remove the temp dir override from history tracking
+        HistoryManager.database_path_override = None
+        # Restore job history tracking flag
+        HistoryManager.JOB_HISTORY_ENABLED = self.original_flag
+        super().tearDown()
+    def make_fake_workflow(self, workflow_id: str) -> None:
+        # Make a fake workflow
+        workflow_jobstore_spec = "file:/tmp/tree"
+        HistoryManager.record_workflow_creation(workflow_id, workflow_jobstore_spec)
+        workflow_name = "SuperCoolWF"
+        workflow_trs_spec = "#wf:v1"
+        HistoryManager.record_workflow_metadata(workflow_id, workflow_name, workflow_trs_spec)
+        # Give it a job
+        workflow_attempt_number = 1
+        job_name = "DoThing"
+        succeeded = True
+        start_time = time.time()
+        runtime = 0.1
+        HistoryManager.record_job_attempt(
+            workflow_id,
+            workflow_attempt_number,
+            job_name,
+            succeeded,
+            start_time,
+            runtime,
+        )
+        # Give it a workflow attempt with the same details.
+        HistoryManager.record_workflow_attempt(
+            workflow_id,
+            workflow_attempt_number,
+            succeeded,
+            start_time,
+            runtime,
+        )
+    def test_history_submittable_detection(self) -> None:
+        """
+        Make sure that a submittable workflow shows up as such before
+        submission and doesn't afterward.
+        """
+        workflow_id = "123"
+        self.make_fake_workflow(workflow_id)
+        workflow_attempt_number = 1
+        # Make sure we have data
+        self.assertEqual(HistoryManager.count_workflows(), 1)
+        self.assertEqual(HistoryManager.count_workflow_attempts(), 1)
+        self.assertEqual(HistoryManager.count_job_attempts(), 1)
+        # Make sure we see it as submittable
+        submittable_workflow_attempts = HistoryManager.get_submittable_workflow_attempts()
+        self.assertEqual(len(submittable_workflow_attempts), 1)
+        # Make sure we see its jobs as submittable
+        with_submittable_job_attempts = HistoryManager.get_workflow_attempts_with_submittable_job_attempts()
+        self.assertEqual(len(with_submittable_job_attempts), 1)
+        # Make sure we actually see the job
+        submittable_job_attempts = HistoryManager.get_unsubmitted_job_attempts(workflow_id, workflow_attempt_number)
+        self.assertEqual(len(submittable_job_attempts), 1)
+        # Pretend we submitted them.
+        HistoryManager.mark_job_attempts_submitted([j.id for j in submittable_job_attempts])
+        HistoryManager.mark_workflow_attempt_submitted(workflow_id, workflow_attempt_number)
+        # Make sure they are no longer matching
+        self.assertEqual(len(HistoryManager.get_submittable_workflow_attempts()), 0)
+        self.assertEqual(len(HistoryManager.get_workflow_attempts_with_submittable_job_attempts()), 0)
+        self.assertEqual(len(HistoryManager.get_unsubmitted_job_attempts(workflow_id, workflow_attempt_number)), 0)
+        # Make sure we still have data
+        self.assertEqual(HistoryManager.count_workflows(), 1)
+        self.assertEqual(HistoryManager.count_workflow_attempts(), 1)
+        self.assertEqual(HistoryManager.count_job_attempts(), 1)
+    def test_history_deletion(self) -> None:
+        workflow_id = "123"
+        self.make_fake_workflow(workflow_id)
+        workflow_attempt_number = 1
+        # Make sure we can see the workflow for deletion by age but not by done-ness
+        self.assertEqual(len(HistoryManager.get_oldest_workflow_ids()), 1)
+        self.assertEqual(len(HistoryManager.get_fully_submitted_workflow_ids()), 0)
+        # Pretend we submitted the workflow.
+        HistoryManager.mark_job_attempts_submitted([j.id for j in HistoryManager.get_unsubmitted_job_attempts(workflow_id, workflow_attempt_number)])
+        HistoryManager.mark_workflow_attempt_submitted(workflow_id, workflow_attempt_number)
+        # Make sure we can see the workflow for deletion by done-ness
+        self.assertEqual(len(HistoryManager.get_fully_submitted_workflow_ids()), 1)
+        # Add a new workflow
+        other_workflow_id = "456"
+        self.make_fake_workflow(other_workflow_id)
+        # Make sure we can see the both for deletion by age but only one by done-ness
+        self.assertEqual(len(HistoryManager.get_oldest_workflow_ids()), 2)
+        self.assertEqual(len(HistoryManager.get_fully_submitted_workflow_ids()), 1)
+        # Make sure the older workflow is first.
+        self.assertEqual(HistoryManager.get_oldest_workflow_ids(), [workflow_id, other_workflow_id])
+        # Delete the new workflow
+        HistoryManager.delete_workflow(other_workflow_id)
+        # Make sure we can see the old one
+        self.assertEqual(HistoryManager.get_oldest_workflow_ids(), [workflow_id])
+        self.assertEqual(HistoryManager.get_fully_submitted_workflow_ids(), [workflow_id])
+        # Delete the old workflow
+        HistoryManager.delete_workflow(workflow_id)
+        # Make sure we have no data
+        self.assertEqual(HistoryManager.count_workflows(), 0)
+        self.assertEqual(HistoryManager.count_workflow_attempts(), 0)
+        self.assertEqual(HistoryManager.count_job_attempts(), 0)
+    def test_history_size_limit(self) -> None:
+        """
+        Make sure the database size can be controlled.
+        """
+        for workflow_id in ("WorkflowThatTakesUpSomeSpace,ActuallyMoreThanTheLaterOnesTake" + str(i) for i in range(10)):
+            self.make_fake_workflow(workflow_id)
+        # We should see the workflows.
+        self.assertEqual(HistoryManager.count_workflows(), 10)
+        # And they take up space.
+        small_size = HistoryManager.get_database_byte_size()
+        self.assertGreater(small_size, 0)
+        # Add a bunch more
+        for workflow_id in ("WorkflowThatTakesUpSpace" + str(i) for i in range(50)):
+            self.make_fake_workflow(workflow_id)
+        # We should see that this is now a much larger database
+        large_size = HistoryManager.get_database_byte_size()
+        logger.info("Increased database size from %s to %s", small_size, large_size)
+        self.assertGreater(large_size, small_size)
+        # We should be able to shrink it back down
+        HistoryManager.enforce_byte_size_limit(small_size)
+        reduced_size = HistoryManager.get_database_byte_size()
+        logger.info("Decreased database size from %s to %s", large_size, reduced_size)
+        # The database should be small enough
+        self.assertLessEqual(reduced_size, small_size)
+        # There should still be some workflow attempts left in the smaller database (though probably not the first ones)
+        remaining_workflows = HistoryManager.count_workflows()
+        logger.info("Still have %s workflows", remaining_workflows)
+        self.assertGreater(remaining_workflows, 0)

toil/test/lib/test_trs.py ADDED Viewed

@@ -0,0 +1,161 @@
+# Copyright (C) 2015-2024 Regents of the University of California
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import logging
+import pytest
+from typing import IO
+import urllib.request
+from urllib.error import URLError
+from toil.lib.retry import retry
+from toil.lib.trs import find_workflow, fetch_workflow
+from toil.test import ToilTest, needs_online
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG)
+@pytest.mark.integrative
+@needs_online
+class DockstoreLookupTest(ToilTest):
+    """
+    Make sure we can look up workflows on Dockstore.
+    """
+    @retry(errors=[URLError, RuntimeError])
+    def read_result(self, url_or_path: str) -> IO[bytes]:
+        """
+        Read a file or URL.
+        Binary mode to allow testing for binary file support.
+        This lets us test that we have the right workflow contents and not care
+        how we are being shown them.
+        """
+        if url_or_path.startswith("http://") or url_or_path.startswith("https://"):
+            response = urllib.request.urlopen(url_or_path)
+            if response.status != 200:
+                raise RuntimeError(f"HTTP error response: {response}")
+            return response
+        else:
+            return open(url_or_path, "rb")
+    # TODO: Tests that definitely test a clear cache
+    def test_lookup_from_page_url(self) -> None:
+        PAGE_URL = "https://dockstore.org/workflows/github.com/dockstore/bcc2020-training/HelloWorld:master?tab=info"
+        trs_id, trs_version, language = find_workflow(PAGE_URL)
+        self.assertEqual(trs_id, "#workflow/github.com/dockstore/bcc2020-training/HelloWorld")
+        self.assertEqual(trs_version, "master")
+        self.assertEqual(language, "WDL")
+    def test_lookup_from_trs_with_version(self) -> None:
+        TRS_ID = "#workflow/github.com/dockstore-testing/md5sum-checker"
+        TRS_VERSION = "master"
+        trs_id, trs_version, language = find_workflow(f"{TRS_ID}:{TRS_VERSION}")
+        self.assertEqual(trs_id, TRS_ID)
+        self.assertEqual(trs_version, TRS_VERSION)
+        self.assertEqual(language, "CWL")
+    def test_lookup_from_trs_no_version(self) -> None:
+        TRS_ID = "#workflow/github.com/dockstore-testing/md5sum-checker"
+        with pytest.raises(ValueError):
+            # We don't yet have a way to read Dockstore's default version info,
+            # so it's not safe to apply any default version when multiple
+            # versions exist.
+            trs_id, trs_version, language = find_workflow(TRS_ID)
+    # TODO: Add a test with a workflow that we know has and will only ever
+    # have one version, to test version auto-detection in that case.
+    def test_get(self) -> None:
+        TRS_ID = "#workflow/github.com/dockstore-testing/md5sum-checker"
+        TRS_VERSION = "master"
+        LANGUAGE = "CWL"
+        # Despite "-checker" in the ID, this actually refers to the base md5sum
+        # workflow that just happens to have a checker *available*, not to the
+        # checker workflow itself.
+        WORKFLOW_URL = "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/master/md5sum/md5sum-workflow.cwl"
+        looked_up = fetch_workflow(TRS_ID, TRS_VERSION, LANGUAGE)
+        data_from_lookup = self.read_result(looked_up).read()
+        data_from_source = self.read_result(WORKFLOW_URL).read()
+        self.assertEqual(data_from_lookup, data_from_source)
+    def test_get_from_trs_cached(self) -> None:
+        TRS_ID = "#workflow/github.com/dockstore-testing/md5sum-checker"
+        TRS_VERSION = "master"
+        LANGUAGE = "CWL"
+        WORKFLOW_URL = "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/master/md5sum/md5sum-workflow.cwl"
+        # This lookup may or may not be cached
+        fetch_workflow(TRS_ID, TRS_VERSION, LANGUAGE)
+        # This lookup is definitely cached
+        looked_up = fetch_workflow(TRS_ID, TRS_VERSION, LANGUAGE)
+        data_from_lookup = self.read_result(looked_up).read()
+        data_from_source = self.read_result(WORKFLOW_URL).read()
+        self.assertEqual(data_from_lookup, data_from_source)
+    def test_lookup_from_trs_with_version(self) -> None:
+        TRS_VERSIONED_ID = "#workflow/github.com/dockstore-testing/md5sum-checker:workflowWithHTTPImport"
+        trs_id, trs_version, language = find_workflow(TRS_VERSIONED_ID)
+        parts = TRS_VERSIONED_ID.split(":")
+        self.assertEqual(trs_id, parts[0])
+        self.assertEqual(trs_version, parts[1])
+        self.assertEqual(language, "CWL")
+    def test_lookup_from_trs_nonexistent_workflow(self) -> None:
+        TRS_VERSIONED_ID = "#workflow/github.com/adamnovak/veryfakerepo:notARealVersion"
+        with self.assertRaises(FileNotFoundError):
+            looked_up = find_workflow(TRS_VERSIONED_ID)
+    def test_lookup_from_trs_nonexistent_workflow_bad_format(self) -> None:
+        TRS_VERSIONED_ID = "#workflow/AbsoluteGarbage:notARealVersion"
+        with self.assertRaises(FileNotFoundError):
+            looked_up = find_workflow(TRS_VERSIONED_ID)
+    def test_lookup_from_trs_nonexistent_version(self) -> None:
+        TRS_VERSIONED_ID = "#workflow/github.com/dockstore-testing/md5sum-checker:notARealVersion"
+        with self.assertRaises(FileNotFoundError):
+            looked_up = find_workflow(TRS_VERSIONED_ID)
+    def test_get_nonexistent_workflow(self) -> None:
+        TRS_ID = "#workflow/github.com/adamnovak/veryfakerepo"
+        TRS_VERSION = "notARealVersion"
+        LANGUAGE = "CWL"
+        with self.assertRaises(FileNotFoundError):
+            looked_up = fetch_workflow(TRS_ID, TRS_VERSION, LANGUAGE)
+    def test_get_nonexistent_version(self) -> None:
+        TRS_ID = "#workflow/github.com/dockstore-testing/md5sum-checker"
+        TRS_VERSION = "notARealVersion"
+        LANGUAGE = "CWL"
+        with self.assertRaises(FileNotFoundError):
+            looked_up = fetch_workflow(TRS_ID, TRS_VERSION, LANGUAGE)
+    def test_get_nonexistent_workflow_bad_format(self) -> None:
+        # Dockstore enforces an ID pattern and blames your request if you ask
+        # about something that doesn't follow it. So don't follow it.
+        TRS_ID = "#workflow/AbsoluteGarbage"
+        TRS_VERSION = "notARealVersion"
+        LANGUAGE = "CWL"
+        with self.assertRaises(FileNotFoundError):
+            looked_up = fetch_workflow(TRS_ID, TRS_VERSION, LANGUAGE)

toil/test/wdl/wdltoil_test.py CHANGED Viewed

@@ -933,7 +933,7 @@ class WDLToilBenchTests(ToilTest):
         )
         self.assertEqual(same_id, first_chosen)
-        # If we use a different ID we shoudl get a different result still obeying the constraints
+        # If we use a different ID we should get a different result still obeying the constraints
         diff_id = choose_human_readable_directory(
             "root", "taskname", "222-333-444", state
         )

toil/version.py CHANGED Viewed

@@ -1,14 +1,14 @@
-baseVersion = '8.0.0'
+baseVersion = '8.1.0b1'
 cgcloudVersion = '1.6.0a1.dev393'
-version = '8.0.0-d2ae0ea9ab49f238670dbf6aafd20de7afdd8514'
-cacheTag = 'cache-local-py3.13'
-mainCacheTag = 'cache-master-py3.13'
-distVersion = '8.0.0'
-exactPython = 'python3.13'
-python = 'python3.13'
-dockerTag = '8.0.0-d2ae0ea9ab49f238670dbf6aafd20de7afdd8514-py3.13'
-currentCommit = 'd2ae0ea9ab49f238670dbf6aafd20de7afdd8514'
-dockerRegistry = 'quay.io/stxue'
+version = '8.1.0b1-4bb05349c027096ab4785259e39b2648118b5dd7'
+cacheTag = 'cache-local-py3.9'
+mainCacheTag = 'cache-master-py3.9'
+distVersion = '8.1.0b1'
+exactPython = 'python3.9'
+python = 'python3.9'
+dockerTag = '8.1.0b1-4bb05349c027096ab4785259e39b2648118b5dd7-py3.9'
+currentCommit = '4bb05349c027096ab4785259e39b2648118b5dd7'
+dockerRegistry = 'quay.io/ucsc_cgl'
 dockerName = 'toil'
 dirty = False
 cwltool_version = '3.1.20250110105449'

toil/wdl/wdltoil.py CHANGED Viewed

@@ -103,8 +103,8 @@ from toil.jobStores.abstractJobStore import (
 from toil.lib.exceptions import UnimplementedURLException
 from toil.lib.accelerators import get_individual_local_accelerators
 from toil.lib.conversions import VALID_PREFIXES, convert_units, human2bytes
+from toil.lib.trs import resolve_workflow
 from toil.lib.io import mkdtemp, is_any_url, is_file_url, TOIL_URI_SCHEME, is_standard_url, is_toil_url, is_remote_url
-from toil.lib.integration import resolve_workflow
 from toil.lib.memoize import memoize
 from toil.lib.misc import get_user_name
 from toil.lib.resources import ResourceMonitor
@@ -515,10 +515,14 @@ async def toil_read_source(
             # TODO: this is probably sync work that would be better as async work here
             AbstractJobStore.read_from_url(candidate_uri, destination_buffer)
         except Exception as e:
-            # TODO: we need to assume any error is just a not-found,
-            # because the exceptions thrown by read_from_url()
+            if isinstance(e, SyntaxError) or isinstance(e, NameError):
+                # These are probably actual problems with the code and not
+                # failures in reading the URL.
+                raise
+            # TODO: we need to assume in general that an error is just a
+            # not-found, because the exceptions thrown by read_from_url()
             # implementations are not specified.
-            logger.debug("Tried to fetch %s from %s but got %s", uri, candidate_uri, e)
+            logger.debug("Tried to fetch %s from %s but got %s: %s", uri, candidate_uri, type(e), e)
             continue
         # If we get here, we got it probably.
         try:
@@ -5438,17 +5442,25 @@ def main() -> None:
     )
     try:
-        with Toil(options) as toil:
+        wdl_uri, trs_spec = resolve_workflow(options.wdl_uri, supported_languages={"WDL"})
+        with Toil(options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec) as toil:
             if options.restart:
                 output_bindings = toil.restart()
             else:
                 # TODO: Move all the input parsing outside the Toil context
                 # manager to avoid leaving a job store behind if the workflow
                 # can't start.
-                # Load the WDL document
+                # MiniWDL load code internally uses asyncio.get_event_loop()
+                # which might not get an event loop if somebody has ever called
+                # set_event_loop. So we need to make sure an event loop is
+                # available.
+                asyncio.set_event_loop(asyncio.new_event_loop())
+                # Load the WDL document.
                 document: WDL.Tree.Document = WDL.load(
-                    resolve_workflow(options.wdl_uri, supported_languages={"WDL"}),
+                    wdl_uri,
                     read_source=toil_read_source,
                 )
@@ -5564,12 +5576,14 @@ def main() -> None:
                     inputs_search_path.append(input_source_uri)
                     match = re.match(
-                        r"https://raw\.githubusercontent\.com/[^/]*/[^/]*/[^/]*/",
+                        r"https://raw\.githubusercontent\.com/[^/]*/[^/]*/(refs/heads/)?[^/]*/",
                         input_source_uri,
                     )
                     if match:
                         # Special magic for Github repos to make e.g.
                         # https://raw.githubusercontent.com/vgteam/vg_wdl/44a03d9664db3f6d041a2f4a69bbc4f65c79533f/params/giraffe.json
+                        # or
+                        # https://raw.githubusercontent.com/vgteam/vg_wdl/refs/heads/giraffedv/params/giraffe.json
                         # work when it references things relative to repo root.
                         logger.info(
                             "Inputs appear to come from a Github repository; adding repository root to file search path"

toil 8.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

toil 8.0.0py3-none-any.whl → 8.1.0b1py3-none-any.whl