PyPI - toil - Versions diffs - 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl - Mend

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

toil/batchSystems/abstractBatchSystem.py +13 -5
toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
toil/batchSystems/kubernetes.py +13 -2
toil/batchSystems/mesos/batchSystem.py +33 -2
toil/batchSystems/registry.py +15 -118
toil/batchSystems/slurm.py +191 -16
toil/common.py +20 -1
toil/cwl/cwltoil.py +97 -119
toil/cwl/utils.py +103 -3
toil/fileStores/__init__.py +1 -1
toil/fileStores/abstractFileStore.py +5 -2
toil/fileStores/cachingFileStore.py +1 -1
toil/job.py +30 -14
toil/jobStores/abstractJobStore.py +35 -255
toil/jobStores/aws/jobStore.py +864 -1964
toil/jobStores/aws/utils.py +24 -270
toil/jobStores/fileJobStore.py +2 -1
toil/jobStores/googleJobStore.py +32 -13
toil/jobStores/utils.py +0 -327
toil/leader.py +27 -22
toil/lib/accelerators.py +1 -1
toil/lib/aws/config.py +22 -0
toil/lib/aws/s3.py +477 -9
toil/lib/aws/utils.py +22 -33
toil/lib/checksum.py +88 -0
toil/lib/conversions.py +33 -31
toil/lib/directory.py +217 -0
toil/lib/ec2.py +97 -29
toil/lib/exceptions.py +2 -1
toil/lib/expando.py +2 -2
toil/lib/generatedEC2Lists.py +138 -19
toil/lib/io.py +33 -2
toil/lib/memoize.py +21 -7
toil/lib/misc.py +1 -1
toil/lib/pipes.py +385 -0
toil/lib/plugins.py +106 -0
toil/lib/retry.py +1 -1
toil/lib/threading.py +1 -1
toil/lib/url.py +320 -0
toil/lib/web.py +4 -5
toil/options/cwl.py +13 -1
toil/options/runner.py +17 -10
toil/options/wdl.py +12 -1
toil/provisioners/__init__.py +5 -2
toil/provisioners/aws/__init__.py +43 -36
toil/provisioners/aws/awsProvisioner.py +47 -15
toil/provisioners/node.py +60 -12
toil/resource.py +3 -13
toil/server/app.py +12 -6
toil/server/cli/wes_cwl_runner.py +2 -2
toil/server/wes/abstract_backend.py +21 -43
toil/server/wes/toil_backend.py +2 -2
toil/test/__init__.py +16 -18
toil/test/batchSystems/batchSystemTest.py +2 -9
toil/test/batchSystems/batch_system_plugin_test.py +7 -0
toil/test/batchSystems/test_slurm.py +103 -14
toil/test/cwl/cwlTest.py +181 -8
toil/test/cwl/staging_cat.cwl +27 -0
toil/test/cwl/staging_make_file.cwl +25 -0
toil/test/cwl/staging_workflow.cwl +43 -0
toil/test/cwl/zero_default.cwl +61 -0
toil/test/docs/scripts/tutorial_staging.py +17 -8
toil/test/docs/scriptsTest.py +2 -1
toil/test/jobStores/jobStoreTest.py +23 -133
toil/test/lib/aws/test_iam.py +7 -7
toil/test/lib/aws/test_s3.py +30 -33
toil/test/lib/aws/test_utils.py +9 -9
toil/test/lib/test_url.py +69 -0
toil/test/lib/url_plugin_test.py +105 -0
toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
toil/test/provisioners/clusterTest.py +15 -2
toil/test/provisioners/gceProvisionerTest.py +1 -1
toil/test/server/serverTest.py +78 -36
toil/test/src/autoDeploymentTest.py +2 -3
toil/test/src/fileStoreTest.py +89 -87
toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
toil/test/utils/toilKillTest.py +35 -28
toil/test/wdl/md5sum/md5sum-gs.json +1 -1
toil/test/wdl/md5sum/md5sum.json +1 -1
toil/test/wdl/testfiles/read_file.wdl +18 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
toil/test/wdl/wdltoil_test.py +171 -162
toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
toil/utils/toilDebugFile.py +6 -3
toil/utils/toilSshCluster.py +23 -0
toil/utils/toilStats.py +17 -2
toil/utils/toilUpdateEC2Instances.py +1 -0
toil/version.py +10 -10
toil/wdl/wdltoil.py +1179 -825
toil/worker.py +16 -8
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
toil/lib/iterables.py +0 -112
toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0

toil/cwl/utils.py CHANGED Viewed

@@ -20,11 +20,26 @@ import posixpath
 import stat
 from collections.abc import Iterable, MutableMapping, MutableSequence
 from pathlib import PurePosixPath
-from typing import Any, Callable, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    TypeVar,
+    Union,
+    Optional,
+    cast,
+    MutableSequence,
+    MutableMapping,
+    TYPE_CHECKING,
+)
+from urllib.parse import unquote, urlparse
+if TYPE_CHECKING:
+    # This module needs to be importable even if cwltool is not installed.
+    from cwltool.utils import CWLObjectType, CWLOutputType
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.jobStores.abstractJobStore import AbstractJobStore
+from toil.lib.url import URLAccess
 logger = logging.getLogger(__name__)
@@ -208,7 +223,7 @@ def download_structure(
                 )
             else:
                 # We need to download from some other kind of URL.
-                size, executable = AbstractJobStore.read_from_url(
+                size, executable = URLAccess.read_from_url(
                     value, open(dest_path, "wb")
                 )
                 if executable:
@@ -219,3 +234,88 @@ def download_structure(
             # TODO: why?
             index[dest_path] = value
             existing[value] = dest_path
+def trim_mounts_op_down(file_or_directory: "CWLObjectType") -> None:
+    """
+    No-op function for mount-point trimming.
+    """
+    return
+def sniff_location(file_or_directory: "CWLObjectType") -> Optional[str]:
+    """
+    Get the local bare path for a CWL file or directory, or None.
+    :return: None if we don't have a local path or file URI
+    """
+    if file_or_directory.get('location') is None and file_or_directory.get('path') is None:
+        # file or directory is defined by contents or listing respectively, this is not redundant
+        return None
+    # Since we only consider mountable paths, if path is not file URI or bare path, don't consider it
+    path_or_url = cast(str, file_or_directory.get('location') or file_or_directory.get('path'))
+    parsed = urlparse(path_or_url)
+    if parsed.scheme == 'file':
+        return unquote(parsed.path)
+    elif parsed.scheme == '':
+        return path_or_url
+    else:
+        return None
+def trim_mounts_op_up(file_or_directory: "CWLObjectType", op_down_ret: None, child_results: list[bool]) -> bool:
+    """
+    Remove subtrees of the CWL file or directory object tree that only have redundant stuff in them.
+    Nonredundant for something in a directory means its path or location is not within the parent directory or doesn't match its basename
+    Nonredundant for something in a secondary file means its path or location is not adjacent to the primary file or doesn't match its basename
+    If on a File:
+    Returns True if anything in secondary files is nonredundant or has nonredundant children to this file, false otherwise
+    If on a Directory:
+    Returns True if anything in top level listing is nonredundant or has nonredundant children, otherwise false.
+    If something in the listing is redundant and all children are redundant, then delete it
+    :param file_or_directory: CWL file or CWL directory type
+    :return: boolean
+    """
+    own_path = sniff_location(file_or_directory)
+    if own_path is None:
+        return True
+    # basename should be set as we are the implementation
+    own_basename = cast(str, file_or_directory['basename'])
+    # If the basename does not match the path, then this is nonredundant
+    if not own_path.endswith("/" + own_basename):
+        return True
+    if file_or_directory['class'] == 'File':
+        if any(child_results):
+            # one of the children was detected as not redundant
+            return True
+        for secondary in cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('secondaryFiles', [])):
+            # secondary files should already be flagged nonredundant if they don't have either a path or location
+            secondary_path = sniff_location(secondary)
+            secondary_basename = cast(str, secondary['basename'])
+            # If we swap the secondary basename for the primary basename in the primary path, and they don't match, then they are nonredundant
+            if os.path.join(own_path[:-len(own_basename)], secondary_basename) != secondary_path:
+                return True
+    else:
+        listings = cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('listing', []))
+        if len(listings) == 0:
+            return False
+        # We assume child_results is in the same order as the directory listing
+        # iterate backwards to avoid iteration issues
+        for i in range(len(listings) - 1, -1, -1):
+            if child_results[i] is False:
+                if os.path.join(own_path, cast(str, listings[i]['basename'])) == sniff_location(listings[i]):
+                    del listings[i]
+        # If one of the listings was nonredundant, then this directory is also nonredundant
+        if any(child_results):
+            return True
+    return False
+def remove_redundant_mounts(cwljob: "CWLObjectType") -> None:
+    """
+    Remove any redundant mount points from the listing. Modifies the CWL object in place.
+    """
+    visit_cwl_class_and_reduce(cwljob, ["Directory", "File"], trim_mounts_op_down, trim_mounts_op_up)

toil/fileStores/__init__.py CHANGED Viewed

@@ -28,7 +28,7 @@ class FileID(str):
     the job store if unavailable in the ID.
     """
-    def __new__(cls, fileStoreID: str, *args: Any) -> "FileID":
+    def __new__(cls, fileStoreID: str, *args: Any, **kwargs: dict[str, Any]) -> "FileID":
         return super().__new__(cls, fileStoreID)
     def __init__(self, fileStoreID: str, size: int, executable: bool = False) -> None:

toil/fileStores/abstractFileStore.py CHANGED Viewed

@@ -671,13 +671,16 @@ class AbstractFileStore(ABC):
         Send a logging message to the leader. The message will also be \
         logged by the worker at the same level.
+        Does not depend on the commit system, so this is safe to use during an
+        ansynchronous commit, or without a commit afterward.
         :param text: The string to log.
         :param level: The logging level.
         """
-        logger.log(level=level, msg=("LOG-TO-MASTER: " + text))
+        logger.log(level=level, msg=("LOG-TO-LEADER: " + text))
         self.logging_messages.append(dict(text=text, level=level))
-    @deprecated(new_function_name="export_file")
+    @deprecated(new_function_name="log_to_leader")
     def logToMaster(self, text: str, level: int = logging.INFO) -> None:
         self.log_to_leader(text, level)

toil/fileStores/cachingFileStore.py CHANGED Viewed

@@ -1207,7 +1207,7 @@ class CachingFileStore(AbstractFileStore):
             # its temp dir and database entry.
             self._deallocateSpaceForJob()
-    def writeGlobalFile(self, localFileName, cleanup=False, executable=False):
+    def writeGlobalFile(self, localFileName, cleanup=False):
         """
         Creates a file in the jobstore and returns a FileID reference.
         """

toil/job.py CHANGED Viewed

@@ -236,16 +236,16 @@ def parse_accelerator(
     {'count': 1, 'kind': 'gpu'}
     >>> parse_accelerator("nvidia-tesla-k80")
-    {'count': 1, 'kind': 'gpu', 'brand': 'nvidia', 'model': 'nvidia-tesla-k80'}
+    {'count': 1, 'kind': 'gpu', 'model': 'nvidia-tesla-k80', 'brand': 'nvidia'}
     >>> parse_accelerator("nvidia-tesla-k80:2")
-    {'count': 2, 'kind': 'gpu', 'brand': 'nvidia', 'model': 'nvidia-tesla-k80'}
+    {'count': 2, 'kind': 'gpu', 'model': 'nvidia-tesla-k80', 'brand': 'nvidia'}
     >>> parse_accelerator("gpu")
     {'count': 1, 'kind': 'gpu'}
     >>> parse_accelerator("cuda:1")
-    {'count': 1, 'kind': 'gpu', 'brand': 'nvidia', 'api': 'cuda'}
+    {'count': 1, 'kind': 'gpu', 'api': 'cuda', 'brand': 'nvidia'}
     >>> parse_accelerator({"kind": "gpu"})
     {'count': 1, 'kind': 'gpu'}
@@ -581,8 +581,8 @@ class Requirer:
         >>> Requirer._parseResource('cores', 1), Requirer._parseResource('disk', 1), \
         Requirer._parseResource('memory', 1)
         (1, 1, 1)
-        >>> Requirer._parseResource('cores', '1G'), Requirer._parseResource('disk', '1G'), \
-        Requirer._parseResource('memory', '1G')
+        >>> Requirer._parseResource('cores', '1Gi'), Requirer._parseResource('disk', '1Gi'), \
+        Requirer._parseResource('memory', '1Gi')
         (1073741824, 1073741824, 1073741824)
         >>> Requirer._parseResource('cores', 1.1)
         1.1
@@ -813,7 +813,6 @@ class JobDescription(Requirer):
     Subclassed into variants for checkpoint jobs and service jobs that have
     their specific parameters.
     """
     def __init__(
         self,
         requirements: Mapping[str, Union[int, str, float, bool, list]],
@@ -3146,9 +3145,8 @@ class Job:
         Will modify the job's description with changes that need to be committed back to the JobStore.
         """
-        if stats is not None:
-            startTime = time.time()
-            startClock = ResourceMonitor.get_total_cpu_time()
+        startTime = time.time()
+        startClock = ResourceMonitor.get_total_cpu_time()
         baseDir = os.getcwd()
         succeeded = False
@@ -3180,18 +3178,36 @@ class Job:
             # Change dir back to cwd dir, if changed by job (this is a safety issue)
             if os.getcwd() != baseDir:
                 os.chdir(baseDir)
+            totalCpuTime, total_memory_kib = (
+                ResourceMonitor.get_total_cpu_time_and_memory_usage()
+            )
+            job_time = time.time() - startTime
+            job_cpu_time = totalCpuTime - startClock
+            allocated_cpu_time = job_time * self.cores
+            if job_cpu_time > allocated_cpu_time and allocated_cpu_time > 0:
+                # Too much CPU was used by this job! Maybe we're using a batch
+                # system that doesn't/can't sandbox us and we started too many
+                # threads. Complain to the user!
+                excess_factor = job_cpu_time / allocated_cpu_time
+                fileStore.log_to_leader(
+                    f"Job {self.description} used {excess_factor:.2f}x more "
+                    f"CPU than the requested {self.cores} cores. Consider "
+                    f"increasing the job's required CPU cores or limiting the "
+                    f"number of processes/threads launched.",
+                    level=logging.WARNING
+                )
             # Finish up the stats
             if stats is not None:
-                totalCpuTime, total_memory_kib = (
-                    ResourceMonitor.get_total_cpu_time_and_memory_usage()
-                )
                 stats.jobs.append(
                     # TODO: We represent everything as strings in the stats
                     # even though the JSON transport can take bools and floats.
                     Expando(
                         start=str(startTime),
-                        time=str(time.time() - startTime),
-                        clock=str(totalCpuTime - startClock),
+                        time=str(job_time),
+                        clock=str(job_cpu_time),
                         class_name=self._jobName(),
                         memory=str(total_memory_kib),
                         requested_cores=str(self.cores), # TODO: Isn't this really consumed cores?

toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl