PyPI - toil - Versions diffs - 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl - Mend

toil 6.1.0a1py3-none-any.whl → 7.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

toil/__init__.py +1 -232
toil/batchSystems/abstractBatchSystem.py +41 -17
toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
toil/batchSystems/awsBatch.py +8 -8
toil/batchSystems/cleanup_support.py +7 -3
toil/batchSystems/contained_executor.py +4 -5
toil/batchSystems/gridengine.py +1 -1
toil/batchSystems/htcondor.py +5 -5
toil/batchSystems/kubernetes.py +25 -11
toil/batchSystems/local_support.py +3 -3
toil/batchSystems/lsf.py +9 -9
toil/batchSystems/mesos/batchSystem.py +4 -4
toil/batchSystems/mesos/executor.py +3 -2
toil/batchSystems/options.py +9 -0
toil/batchSystems/singleMachine.py +11 -10
toil/batchSystems/slurm.py +129 -16
toil/batchSystems/torque.py +1 -1
toil/bus.py +45 -3
toil/common.py +56 -31
toil/cwl/cwltoil.py +442 -371
toil/deferred.py +1 -1
toil/exceptions.py +1 -1
toil/fileStores/abstractFileStore.py +69 -20
toil/fileStores/cachingFileStore.py +6 -22
toil/fileStores/nonCachingFileStore.py +6 -15
toil/job.py +270 -86
toil/jobStores/abstractJobStore.py +37 -31
toil/jobStores/aws/jobStore.py +280 -218
toil/jobStores/aws/utils.py +60 -31
toil/jobStores/conftest.py +2 -2
toil/jobStores/fileJobStore.py +3 -3
toil/jobStores/googleJobStore.py +3 -4
toil/leader.py +89 -38
toil/lib/aws/__init__.py +26 -10
toil/lib/aws/iam.py +2 -2
toil/lib/aws/session.py +62 -22
toil/lib/aws/utils.py +73 -37
toil/lib/conversions.py +24 -1
toil/lib/ec2.py +118 -69
toil/lib/expando.py +1 -1
toil/lib/generatedEC2Lists.py +8 -8
toil/lib/io.py +42 -4
toil/lib/misc.py +1 -3
toil/lib/resources.py +57 -16
toil/lib/retry.py +12 -5
toil/lib/threading.py +29 -14
toil/lib/throttle.py +1 -1
toil/options/common.py +31 -30
toil/options/wdl.py +5 -0
toil/provisioners/__init__.py +9 -3
toil/provisioners/abstractProvisioner.py +12 -2
toil/provisioners/aws/__init__.py +20 -15
toil/provisioners/aws/awsProvisioner.py +406 -329
toil/provisioners/gceProvisioner.py +2 -2
toil/provisioners/node.py +13 -5
toil/server/app.py +1 -1
toil/statsAndLogging.py +93 -23
toil/test/__init__.py +27 -12
toil/test/batchSystems/batchSystemTest.py +40 -33
toil/test/batchSystems/batch_system_plugin_test.py +79 -0
toil/test/batchSystems/test_slurm.py +22 -7
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +58 -0
toil/test/cwl/cwlTest.py +245 -236
toil/test/cwl/seqtk_seq.cwl +1 -1
toil/test/docs/scriptsTest.py +11 -14
toil/test/jobStores/jobStoreTest.py +40 -54
toil/test/lib/aws/test_iam.py +2 -2
toil/test/lib/test_ec2.py +1 -1
toil/test/options/__init__.py +13 -0
toil/test/options/options.py +37 -0
toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
toil/test/provisioners/clusterTest.py +99 -16
toil/test/server/serverTest.py +2 -2
toil/test/src/autoDeploymentTest.py +1 -1
toil/test/src/dockerCheckTest.py +2 -1
toil/test/src/environmentTest.py +125 -0
toil/test/src/fileStoreTest.py +1 -1
toil/test/src/jobDescriptionTest.py +18 -8
toil/test/src/jobTest.py +1 -1
toil/test/src/realtimeLoggerTest.py +4 -0
toil/test/src/workerTest.py +52 -19
toil/test/utils/toilDebugTest.py +62 -4
toil/test/utils/utilsTest.py +23 -21
toil/test/wdl/wdltoil_test.py +49 -21
toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
toil/toilState.py +68 -9
toil/utils/toilDebugFile.py +1 -1
toil/utils/toilDebugJob.py +153 -26
toil/utils/toilLaunchCluster.py +12 -2
toil/utils/toilRsyncCluster.py +7 -2
toil/utils/toilSshCluster.py +7 -3
toil/utils/toilStats.py +310 -266
toil/utils/toilStatus.py +98 -52
toil/version.py +11 -11
toil/wdl/wdltoil.py +644 -225
toil/worker.py +125 -83
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
toil-7.0.0.dist-info/METADATA +158 -0
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
toil-6.1.0a1.dist-info/METADATA +0 -125
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0

toil/utils/toilDebugJob.py CHANGED Viewed

@@ -12,66 +12,193 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Debug tool for running a toil job locally."""
+import gc
 import logging
+import os
 import pprint
 import sys
+from pathlib import Path
+from typing import Optional, List, Tuple
 from toil.common import Config, Toil, parser_with_common_options
+from toil.job import FilesDownloadedStoppingPointReached
 from toil.jobStores.fileJobStore import FileJobStore
 from toil.statsAndLogging import set_logging_from_options
 from toil.utils.toilDebugFile import printContentsOfJobStore
+from toil.utils.toilStatus import ToilStatus
 from toil.worker import workerScript
 logger = logging.getLogger(__name__)
 def main() -> None:
-    parser = parser_with_common_options(jobstore_option=True, prog="toil debug-job")
-    parser.add_argument("jobID", type=str, nargs='?', default=None,
-                        help="The job store id of a job within the provided jobstore to run by itself.")
-    parser.add_argument("--printJobInfo", type=str,
-                        help="Dump debugging info about this job ID")
+    parser = parser_with_common_options(jobstore_option=True, prog="toil debug-job", default_log_level=logging.DEBUG)
+    parser.add_argument("job", type=str,
+                        help="The job store id or job name of a job within the provided jobstore")
+    parser.add_argument("--printJobInfo", action="store_true",
+                        help="Dump debugging info about the job instead of running it")
+    parser.add_argument("--retrieveTaskDirectory", dest="retrieve_task_directory", type=str, default=None,
+                        help="Download CWL or WDL task inputs to the given directory and stop.")
     options = parser.parse_args()
     set_logging_from_options(options)
+    if options.retrieve_task_directory is not None and os.path.exists(options.retrieve_task_directory):
+        # The logic to duplicate container mounts depends on stuff not already existing.
+        logger.error(
+            "The directory %s given for --retrieveTaskDirectory already exists. "
+            "Stopping to avoid clobbering existing files.",
+            options.retrieve_task_directory
+        )
+        sys.exit(1)
     jobStore = Toil.resumeJobStore(options.jobStore)
     # Get the config with the workflow ID from the job store
     config = jobStore.config
     # But override its options
     config.setOptions(options)
-    did_something = False
+    # Find the job
+    if jobStore.job_exists(options.job):
+        # The user asked for a particular job and it exists
+        job_id = options.job
+    else:
+        # Go search by name and fill in job_id
+        # TODO: break out job store scan logic so it doesn't need to re-connect
+        # to the job store.
+        status = ToilStatus(options.jobStore)
+        hits = []
+        suggestion = None
+        for job in status.jobsToReport:
+            if options.job in (job.jobName, job.unitName, job.displayName):
+                # Find all the jobs that sort of match
+                hits.append(job)
+            if suggestion is None and job.remainingTryCount == 0:
+                # How about this nice failing job instead?
+                suggestion = job
+        if len(hits) == 0:
+            # No hits
+            if suggestion is None:
+                logger.critical("No job found with ID or name \"%s\". No jobs are completely failed.", options.job)
+            else:
+                logger.critical("No job found with ID or name \"%s\". How about the failed job %s instead?", options.job, suggestion)
+            sys.exit(1)
+        elif len(hits) > 1:
+            # Several hits, maybe only one has failed
+            completely_failed_hits = [job for job in hits if job.remainingTryCount == 0]
+            if len(completely_failed_hits) == 0:
+                logger.critical("Multiple jobs match \"%s\" but none are completely failed: %s", options.job, hits)
+                sys.exit(1)
+            elif len(completely_failed_hits) > 0:
+                logger.critical("Multiple jobs matching \"%s\" are completely failed: %s", options.job, completely_failed_hits)
+                sys.exit(1)
+            else:
+                # We found one completely failed job, they probably mean that one.
+                logger.info("There are %s jobs matching \"%s\"; assuming you mean the failed one: %s", options.job, completely_failed_hits[0])
+                job_id = completely_failed_hits[0].jobStoreID
+        else:
+            # We found one job with this name, so they must mean that one
+            logger.info("Looked up job named \"%s\": %s", options.job, hits[0])
+            job_id = hits[0].jobStoreID
     if options.printJobInfo:
+        # Report on the job
         if isinstance(jobStore, FileJobStore):
             # List all its files if we can
-            printContentsOfJobStore(job_store=jobStore, job_id=options.printJobInfo)
+            printContentsOfJobStore(job_store=jobStore, job_id=job_id)
         # Print the job description itself
-        job_desc = jobStore.load_job(options.printJobInfo)
+        job_desc = jobStore.load_job(job_id)
         print(f"Job: {job_desc}")
         pprint.pprint(job_desc.__dict__)
+    else:
+        # Run the job
-        did_something = True
+        debug_flags = set()
+        local_worker_temp_dir = None
+        if options.retrieve_task_directory is not None:
+            # Pick a directory in it (which may be removed by the worker) as the worker's temp dir.
+            local_worker_temp_dir = os.path.join(options.retrieve_task_directory, "worker")
+            # Make sure it exists
+            os.makedirs(local_worker_temp_dir, exist_ok=True)
+            # And tell the job to just download files
+            debug_flags.add("download_only")
+        # We might need to reconstruct a container environment.
+        host_and_job_paths: Optional[List[Tuple[str, str]]] = None
+        # Track if the run succeeded without error
+        run_succeeded = False
-    # TODO: Option to print list of successor jobs
-    # TODO: Option to run job within python debugger, allowing step through of arguments
-    # idea would be to have option to import pdb and set breakpoint at the start of the user's code
+        logger.info(f"Running the following job locally: {job_id}")
+        try:
+            workerScript(jobStore, config, job_id, job_id, redirect_output_to_log_file=False, local_worker_temp_dir=local_worker_temp_dir, debug_flags=debug_flags)
+        except FilesDownloadedStoppingPointReached as e:
+            # We asked for the files to be downloaded and now they are.
+            assert options.retrieve_task_directory is not None
+            if e.host_and_job_paths is not None:
+                # Capture the container mapping so we can reconstruct the container environment after we unwind the worker stack.
+                host_and_job_paths = e.host_and_job_paths
+        else:
+            # No error!
+            run_succeeded = True
-    if options.jobID is not None:
-        # We actually want to run a job.
+        # Make sure the deferred function manager cleans up and logs its
+        # shutdown before we start writing any reports.
+        gc.collect()
-        jobID = options.jobID
-        logger.debug(f"Running the following job locally: {jobID}")
-        workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False)
-        logger.debug(f"Finished running: {jobID}")
-        # Even if the job fails, the worker script succeeds unless something goes wrong with it internally.
+        if run_succeeded:
+            logger.info(f"Successfully ran: {job_id}")
-        did_something = True
+        if host_and_job_paths is not None:
+            # We need to make a place that looks like the job paths half of these.
-    if not did_something:
-        # Somebody forgot to tell us to do anything.
-        # Show the usage instructions.
-        parser.print_help()
-        sys.exit(1)
+            # Sort by job-side path so we do children before parents, to
+            # stop us from accidentally making children inside moutned
+            # parents.
+            sorted_mounts = sorted(host_and_job_paths, key=lambda t: t[1], reverse=True)
+            fake_job_root = os.path.join(options.retrieve_task_directory, "inside")
+            os.makedirs(fake_job_root, exist_ok=True)
+            for host_path, job_path in sorted_mounts:
+                if not os.path.exists(host_path):
+                    logger.error("Job intended to mount %s as %s but it does not exist!", host_path, job_path)
+                    continue
+                if not job_path.startswith("/"):
+                    logger.error("Job intended to mount %s as %s but destination is a relative path!", host_path, job_path)
+                    continue
+                # Drop the slash because we are building a chroot-ish mini filesystem.
+                job_relative_path = job_path[1:]
+                if job_relative_path.startswith("/"):
+                    # We are having trouble understanding what the job
+                    # intended to do. Stop working on this mount.
+                    logger.error("Job intended to mount %s as %s but destination starts with multiple slashes for some reason!", host_path, job_path)
+                    continue
+                fake_job_path = os.path.join(fake_job_root, job_relative_path)
+                if os.path.exists(fake_job_path):
+                    logger.error("Job intended to mount %s as %s but that location is already mounted!", host_path, job_path)
+                    continue
+                logger.info("Job mounted %s as %s", host_path, job_path)
+                # Make sure the directory to contain the mount exists.
+                fake_job_containing_path = os.path.dirname(fake_job_path)
+                os.makedirs(fake_job_containing_path, exist_ok=True)
+                top_pathobj = Path(os.path.abspath(options.retrieve_task_directory))
+                source_pathobj = Path(host_path)
+                if top_pathobj in source_pathobj.parents:
+                    # We're linking to a file we already downloaded (probably).
+                    # Make a relative symlink so the whole assemblage can move.
+                    host_path = os.path.relpath(host_path, fake_job_containing_path)
+                # Make a symlink to simulate the mount
+                os.symlink(host_path, fake_job_path)
+            logger.info("Reconstructed job container filesystem at %s", fake_job_root)
+    # TODO: Option to print list of successor jobs
+    # TODO: Option to run job within python debugger, allowing step through of arguments
+    # idea would be to have option to import pdb and set breakpoint at the start of the user's code

toil/utils/toilLaunchCluster.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import Dict, List, Tuple, Union
 from toil import applianceSelf
 from toil.common import parser_with_common_options
 from toil.lib.aws import build_tag_dict_from_env
+from toil.lib.conversions import opt_strtobool
 from toil.provisioners import (check_valid_node_types,
                                cluster_factory,
                                parse_node_types)
@@ -31,7 +32,11 @@ logger = logging.getLogger(__name__)
 def create_tags_dict(tags: List[str]) -> Dict[str, str]:
     tags_dict = dict()
     for tag in tags:
-        key, value = tag.split('=')
+        try:
+            key, value = tag.split('=')
+        except ValueError:
+            logger.error("Tag specification '%s' must contain '='", tag)
+            raise
         tags_dict[key] = value
     return tags_dict
@@ -114,6 +119,10 @@ def main() -> None:
                         help="Any additional security groups to attach to EC2 instances. Note that a security group "
                              "with its name equal to the cluster name will always be created, thus ensure that "
                              "the extra security groups do not have the same name as the cluster name.")
+    parser.add_argument("--allowFuse", type=opt_strtobool, default=True,
+                        help="Enable both the leader and worker nodes to be able to run Singularity with FUSE. For "
+                             "Kubernetes, this will make the leader privileged and ask workers to run as privileged. "
+                             "(default: %(default)s)")
     #TODO Set Aws Profile in CLI options
     options = parser.parse_args()
     set_logging_from_options(options)
@@ -178,7 +187,8 @@ def main() -> None:
                               clusterName=options.clusterName,
                               clusterType=options.clusterType,
                               zone=options.zone,
-                              nodeStorage=options.nodeStorage)
+                              nodeStorage=options.nodeStorage,
+                              enable_fuse=options.allowFuse)
     cluster.launchCluster(leaderNodeType=options.leaderNodeType,
                           leaderStorage=options.leaderStorage,

toil/utils/toilRsyncCluster.py CHANGED Viewed

@@ -14,9 +14,10 @@
 """Rsyncs into the toil appliance container running on the leader of the cluster."""
 import argparse
 import logging
+import sys
 from toil.common import parser_with_common_options
-from toil.provisioners import cluster_factory
+from toil.provisioners import cluster_factory, NoSuchClusterException
 from toil.statsAndLogging import set_logging_from_options
 logger = logging.getLogger(__name__)
@@ -37,4 +38,8 @@ def main() -> None:
     cluster = cluster_factory(provisioner=options.provisioner,
                               clusterName=options.clusterName,
                               zone=options.zone)
-    cluster.getLeader().coreRsync(args=options.args, strict=not options.insecure)
+    try:
+        cluster.getLeader().coreRsync(args=options.args, strict=not options.insecure)
+    except NoSuchClusterException as e:
+        logger.error(e)
+        sys.exit(1)

toil/utils/toilSshCluster.py CHANGED Viewed

@@ -18,7 +18,7 @@ import sys
 from typing import List
 from toil.common import parser_with_common_options
-from toil.provisioners import cluster_factory
+from toil.provisioners import cluster_factory, NoSuchClusterException
 from toil.statsAndLogging import set_logging_from_options
 logger = logging.getLogger(__name__)
@@ -54,5 +54,9 @@ def main() -> None:
     sshOptions.extend(['-L', f'{options.grafana_port}:localhost:3000',
                        '-L', '9090:localhost:9090'])
-    cluster.getLeader().sshAppliance(*command, strict=not options.insecure, tty=sys.stdin.isatty(),
-                                     sshOptions=sshOptions)
+    try:
+        cluster.getLeader().sshAppliance(*command, strict=not options.insecure, tty=sys.stdin.isatty(),
+                                         sshOptions=sshOptions)
+    except NoSuchClusterException as e:
+        logger.error(e)
+        sys.exit(1)

toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

toil 6.1.0a1py3-none-any.whl → 7.0.0py3-none-any.whl