PyPI - toil - Versions diffs - 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl - Mend

toil 6.1.0a1py3-none-any.whl → 7.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

toil/__init__.py +1 -232
toil/batchSystems/abstractBatchSystem.py +41 -17
toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
toil/batchSystems/awsBatch.py +8 -8
toil/batchSystems/cleanup_support.py +7 -3
toil/batchSystems/contained_executor.py +4 -5
toil/batchSystems/gridengine.py +1 -1
toil/batchSystems/htcondor.py +5 -5
toil/batchSystems/kubernetes.py +25 -11
toil/batchSystems/local_support.py +3 -3
toil/batchSystems/lsf.py +9 -9
toil/batchSystems/mesos/batchSystem.py +4 -4
toil/batchSystems/mesos/executor.py +3 -2
toil/batchSystems/options.py +9 -0
toil/batchSystems/singleMachine.py +11 -10
toil/batchSystems/slurm.py +129 -16
toil/batchSystems/torque.py +1 -1
toil/bus.py +45 -3
toil/common.py +56 -31
toil/cwl/cwltoil.py +442 -371
toil/deferred.py +1 -1
toil/exceptions.py +1 -1
toil/fileStores/abstractFileStore.py +69 -20
toil/fileStores/cachingFileStore.py +6 -22
toil/fileStores/nonCachingFileStore.py +6 -15
toil/job.py +270 -86
toil/jobStores/abstractJobStore.py +37 -31
toil/jobStores/aws/jobStore.py +280 -218
toil/jobStores/aws/utils.py +60 -31
toil/jobStores/conftest.py +2 -2
toil/jobStores/fileJobStore.py +3 -3
toil/jobStores/googleJobStore.py +3 -4
toil/leader.py +89 -38
toil/lib/aws/__init__.py +26 -10
toil/lib/aws/iam.py +2 -2
toil/lib/aws/session.py +62 -22
toil/lib/aws/utils.py +73 -37
toil/lib/conversions.py +24 -1
toil/lib/ec2.py +118 -69
toil/lib/expando.py +1 -1
toil/lib/generatedEC2Lists.py +8 -8
toil/lib/io.py +42 -4
toil/lib/misc.py +1 -3
toil/lib/resources.py +57 -16
toil/lib/retry.py +12 -5
toil/lib/threading.py +29 -14
toil/lib/throttle.py +1 -1
toil/options/common.py +31 -30
toil/options/wdl.py +5 -0
toil/provisioners/__init__.py +9 -3
toil/provisioners/abstractProvisioner.py +12 -2
toil/provisioners/aws/__init__.py +20 -15
toil/provisioners/aws/awsProvisioner.py +406 -329
toil/provisioners/gceProvisioner.py +2 -2
toil/provisioners/node.py +13 -5
toil/server/app.py +1 -1
toil/statsAndLogging.py +93 -23
toil/test/__init__.py +27 -12
toil/test/batchSystems/batchSystemTest.py +40 -33
toil/test/batchSystems/batch_system_plugin_test.py +79 -0
toil/test/batchSystems/test_slurm.py +22 -7
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +58 -0
toil/test/cwl/cwlTest.py +245 -236
toil/test/cwl/seqtk_seq.cwl +1 -1
toil/test/docs/scriptsTest.py +11 -14
toil/test/jobStores/jobStoreTest.py +40 -54
toil/test/lib/aws/test_iam.py +2 -2
toil/test/lib/test_ec2.py +1 -1
toil/test/options/__init__.py +13 -0
toil/test/options/options.py +37 -0
toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
toil/test/provisioners/clusterTest.py +99 -16
toil/test/server/serverTest.py +2 -2
toil/test/src/autoDeploymentTest.py +1 -1
toil/test/src/dockerCheckTest.py +2 -1
toil/test/src/environmentTest.py +125 -0
toil/test/src/fileStoreTest.py +1 -1
toil/test/src/jobDescriptionTest.py +18 -8
toil/test/src/jobTest.py +1 -1
toil/test/src/realtimeLoggerTest.py +4 -0
toil/test/src/workerTest.py +52 -19
toil/test/utils/toilDebugTest.py +62 -4
toil/test/utils/utilsTest.py +23 -21
toil/test/wdl/wdltoil_test.py +49 -21
toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
toil/toilState.py +68 -9
toil/utils/toilDebugFile.py +1 -1
toil/utils/toilDebugJob.py +153 -26
toil/utils/toilLaunchCluster.py +12 -2
toil/utils/toilRsyncCluster.py +7 -2
toil/utils/toilSshCluster.py +7 -3
toil/utils/toilStats.py +310 -266
toil/utils/toilStatus.py +98 -52
toil/version.py +11 -11
toil/wdl/wdltoil.py +644 -225
toil/worker.py +125 -83
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
toil-7.0.0.dist-info/METADATA +158 -0
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
toil-6.1.0a1.dist-info/METADATA +0 -125
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
{toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0

toil/deferred.py CHANGED Viewed

@@ -178,7 +178,7 @@ class DeferredFunctionManager:
         try:
             def defer(deferredFunction):
-                # Just serialize defered functions one after the other.
+                # Just serialize deferred functions one after the other.
                 # If serializing later ones fails, eariler ones will still be intact.
                 # We trust dill to protect sufficiently against partial reads later.
                 logger.debug("Deferring function %s" % repr(deferredFunction))

toil/exceptions.py CHANGED Viewed

@@ -36,7 +36,7 @@ class FailedJobsException(Exception):
             for job_desc in failed_jobs:
                 if job_desc.logJobStoreFileID:
                     with job_desc.getLogFileHandle(job_store) as f:
-                        self.msg += "\n" + StatsAndLogging.formatLogStream(f, job_desc)
+                        self.msg += "\n" + StatsAndLogging.formatLogStream(f, f'Log from job "{job_desc}"')
         # catch failures to prepare more complex details and only return the basics
         except Exception:
             logger.exception("Exception when compiling information about failed jobs")

toil/fileStores/abstractFileStore.py CHANGED Viewed

@@ -37,11 +37,12 @@ from typing import (IO,
 import dill
-from toil.common import Toil, cacheDirName
+from toil.common import Toil, cacheDirName, getDirSizeRecursively
 from toil.fileStores import FileID
-from toil.job import Job, JobDescription
+from toil.job import Job, JobDescription, DebugStoppingPointReached
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.compatibility import deprecated
+from toil.lib.conversions import bytes2human
 from toil.lib.io import WriteWatchingStream, mkdtemp
 logger = logging.getLogger(__name__)
@@ -112,11 +113,10 @@ class AbstractFileStore(ABC):
         assert self.jobStore.config.workflowID is not None
         self.workflow_dir: str = Toil.getLocalWorkflowDir(self.jobStore.config.workflowID, self.jobStore.config.workDir)
         self.coordination_dir: str =Toil.get_local_workflow_coordination_dir(self.jobStore.config.workflowID, self.jobStore.config.workDir, self.jobStore.config.coordination_dir)
-        self.jobName: str = (
-            self.jobDesc.command.split()[1] if self.jobDesc.command else ""
-        )
+        self.jobName: str = str(self.jobDesc)
         self.waitForPreviousCommit = waitForPreviousCommit
-        self.loggingMessages: List[Dict[str, Union[int, str]]] = []
+        self.logging_messages: List[Dict[str, Union[int, str]]] = []
+        self.logging_user_streams: List[dict[str, str]] = []
         # Records file IDs of files deleted during the current job. Doesn't get
         # committed back until the job is completely successful, because if the
         # job is re-run it will need to be able to re-delete these files.
@@ -125,6 +125,8 @@ class AbstractFileStore(ABC):
         # Holds records of file ID, or file ID and local path, for reporting
         # the accessed files of failed jobs.
         self._accessLog: List[Tuple[str, ...]] = []
+        # Holds total bytes of observed disk usage for the last job run under open()
+        self._job_disk_used: Optional[int] = None
     @staticmethod
     def createFileStore(
@@ -187,15 +189,43 @@ class AbstractFileStore(ABC):
         :param job: The job instance of the toil job to run.
         """
-        failed = True
+        job_requested_disk = job.disk
         try:
             yield
             failed = False
-        finally:
-            # Do a finally instead of an except/raise because we don't want
-            # to appear as "another exception occurred" in the stack trace.
-            if failed:
+        except BaseException as e:
+            if isinstance(e, DebugStoppingPointReached):
+                self._dumpAccessLogs(job_type="Debugged", log_level=logging.INFO)
+            else:
                 self._dumpAccessLogs()
+            raise
+        finally:
+            # See how much disk space is used at the end of the job.
+            # Not a real peak disk usage, but close enough to be useful for warning the user.
+            self._job_disk_used = getDirSizeRecursively(self.localTempDir)
+            # Report disk usage
+            percent: float = 0.0
+            if job_requested_disk and job_requested_disk > 0:
+                percent = float(self._job_disk_used) / job_requested_disk * 100
+            disk_usage: str = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(self._job_disk_used)}B [{self._job_disk_used}B] used, "
+                               f"{bytes2human(job_requested_disk)}B [{job_requested_disk}B] requested).")
+            if self._job_disk_used > job_requested_disk:
+                self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
+                                 f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
+                                 level=logging.WARNING)
+            else:
+                self.log_to_leader(disk_usage, level=logging.DEBUG)
+    def get_disk_usage(self) -> Optional[int]:
+        """
+        Get the number of bytes of disk used by the last job run under open().
+        Disk usage is measured at the end of the job.
+        TODO: Sample periodically and record peak usage.
+        """
+        return self._job_disk_used
     # Functions related to temp files and directories
     def getLocalTempDir(self) -> str:
@@ -331,14 +361,16 @@ class AbstractFileStore(ABC):
             yield wrappedStream, fileID
-    def _dumpAccessLogs(self) -> None:
+    def _dumpAccessLogs(self, job_type: str = "Failed", log_level: int = logging.WARNING) -> None:
         """
-        When something goes wrong, log a report.
+        Log a report of the files accessed.
         Includes the files that were accessed while the file store was open.
+        :param job_type: Adjective to describe the job in the report.
         """
         if len(self._accessLog) > 0:
-            logger.warning('Failed job accessed files:')
+            logger.log(log_level, '%s job accessed files:', job_type)
             for item in self._accessLog:
                 # For each access record
@@ -347,14 +379,14 @@ class AbstractFileStore(ABC):
                     file_id, dest_path = item
                     if os.path.exists(dest_path):
                         if os.path.islink(dest_path):
-                            logger.warning('Symlinked file \'%s\' to path \'%s\'', file_id, dest_path)
+                            logger.log(log_level, 'Symlinked file \'%s\' to path \'%s\'', file_id, dest_path)
                         else:
-                            logger.warning('Downloaded file \'%s\' to path \'%s\'', file_id, dest_path)
+                            logger.log(log_level, 'Downloaded file \'%s\' to path \'%s\'', file_id, dest_path)
                     else:
-                        logger.warning('Downloaded file \'%s\' to path \'%s\' (gone!)', file_id, dest_path)
+                        logger.log(log_level, 'Downloaded file \'%s\' to path \'%s\' (gone!)', file_id, dest_path)
                 else:
                     # Otherwise dump without the name
-                    logger.warning('Streamed file \'%s\'', *item)
+                    logger.log(log_level, 'Streamed file \'%s\'', *item)
     def logAccess(
         self, fileStoreID: Union[FileID, str], destination: Union[str, None] = None
@@ -611,13 +643,30 @@ class AbstractFileStore(ABC):
         :param level: The logging level.
         """
         logger.log(level=level, msg=("LOG-TO-MASTER: " + text))
-        self.loggingMessages.append(dict(text=text, level=level))
+        self.logging_messages.append(dict(text=text, level=level))
     @deprecated(new_function_name='export_file')
     def logToMaster(self, text: str, level: int = logging.INFO) -> None:
         self.log_to_leader(text, level)
+    def log_user_stream(self, name: str, stream: IO[bytes]) -> None:
+        """
+        Send a stream of UTF-8 text to the leader as a named log stream.
+        Useful for things like the error logs of Docker containers. The leader
+        will show it to the user or organize it appropriately for user-level
+        log information.
+        :param name: A hierarchical, .-delimited string.
+        :param stream: A stream of encoded text. Encoding errors will be
+            tolerated.
+        """
+        # Read the whole stream into memory
+        steam_data = stream.read().decode('utf-8', errors='replace')
+        # And remember it for the worker to fish out
+        self.logging_user_streams.append(dict(name=name, text=steam_data))
     # Functions run after the completion of the job.
     @abstractmethod

toil/fileStores/cachingFileStore.py CHANGED Viewed

@@ -32,13 +32,12 @@ from typing import (Any,
                     Sequence,
                     Tuple)
-from toil.common import cacheDirName, getDirSizeRecursively, getFileSystemSize
+from toil.common import cacheDirName, getFileSystemSize
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.job import Job, JobDescription
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.compatibility import deprecated
-from toil.lib.conversions import bytes2human
 from toil.lib.io import (atomic_copy,
                          atomic_copyobj,
                          make_public_dir,
@@ -1037,11 +1036,12 @@ class CachingFileStore(AbstractFileStore):
         # Create a working directory for the job
         startingDir = os.getcwd()
         # Move self.localTempDir from the worker directory set up in __init__ to a per-job directory.
-        self.localTempDir = make_public_dir(in_directory=self.localTempDir)
+        self.localTempDir = make_public_dir(self.localTempDir, suggested_name="job")
         # Check the status of all jobs on this node. If there are jobs that started and died before
         # cleaning up their presence from the database, clean them up ourselves.
         self._removeDeadJobs(self.coordination_dir, self.con)
-        # Get the requirements for the job.
+        # Get the disk requirement for the job, which we will use to know if we
+        # have filled the cache or not.
         self.jobDiskBytes = job.disk
         logger.debug('Actually running job (%s) with ID (%s) which wants %d of our %d bytes.',
@@ -1055,22 +1055,6 @@ class CachingFileStore(AbstractFileStore):
             with super().open(job):
                 yield
         finally:
-            # See how much disk space is used at the end of the job.
-            # Not a real peak disk usage, but close enough to be useful for warning the user.
-            # TODO: Push this logic into the abstract file store
-            disk: int = getDirSizeRecursively(self.localTempDir)
-            percent: float = 0.0
-            if self.jobDiskBytes and self.jobDiskBytes > 0:
-                percent = float(disk) / self.jobDiskBytes * 100
-            disk_usage: str = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
-                               f"{bytes2human(self.jobDiskBytes)}B [{self.jobDiskBytes}B] requested).")
-            if disk > self.jobDiskBytes:
-                self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
-                                 f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
-                                 level=logging.WARNING)
-            else:
-                self.log_to_leader(disk_usage, level=logging.DEBUG)
             # Go back up to the per-worker local temp directory.
             os.chdir(startingDir)
             self.cleanupInProgress = True
@@ -1095,7 +1079,7 @@ class CachingFileStore(AbstractFileStore):
         # Create an empty file to get an ID.
         # Make sure to pass along the file basename.
         # TODO: this empty file could leak if we die now...
-        fileID = self.jobStore.getEmptyFileStoreID(creatorID, cleanup, os.path.basename(localFileName))
+        fileID = self.jobStore.get_empty_file_store_id(creatorID, cleanup, os.path.basename(localFileName))
         # Work out who we are
         with self.as_process() as me:
@@ -1875,7 +1859,7 @@ class CachingFileStore(AbstractFileStore):
             logger.debug('Starting commit of %s forked from %s', state_to_commit, self.jobDesc)
             # Make sure the deep copy isn't summoning ghosts of old job
             # versions. It must be as new or newer at this point.
-            self.jobDesc.check_new_version(state_to_commit)
+            self.jobDesc.assert_is_not_newer_than(state_to_commit)
             # Bump the original's version since saving will do that too and we
             # don't want duplicate versions.

toil/fileStores/nonCachingFileStore.py CHANGED Viewed

@@ -35,13 +35,12 @@ from typing import (IO,
 import dill
-from toil.common import getDirSizeRecursively, getFileSystemSize
+from toil.common import getFileSystemSize
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.job import Job, JobDescription
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.compatibility import deprecated
-from toil.lib.conversions import bytes2human
 from toil.lib.io import make_public_dir, robust_rmtree
 from toil.lib.retry import ErrorCondition, retry
 from toil.lib.threading import get_process_name, process_name_exists
@@ -102,9 +101,8 @@ class NonCachingFileStore(AbstractFileStore):
     @contextmanager
     def open(self, job: Job) -> Generator[None, None, None]:
-        jobReqs = job.disk
         startingDir = os.getcwd()
-        self.localTempDir: str = make_public_dir(in_directory=self.localTempDir)
+        self.localTempDir: str = make_public_dir(self.localTempDir, suggested_name="job")
         self._removeDeadJobs(self.coordination_dir)
         self.jobStateFile = self._createJobStateFile()
         self.check_for_state_corruption()
@@ -116,16 +114,6 @@ class NonCachingFileStore(AbstractFileStore):
             with super().open(job):
                 yield
         finally:
-            disk = getDirSizeRecursively(self.localTempDir)
-            percent = float(disk) / jobReqs * 100 if jobReqs > 0 else 0.0
-            disk_usage = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
-                          f"{bytes2human(jobReqs)}B [{jobReqs}B] requested).")
-            if disk > jobReqs:
-                self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
-                                 f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
-                                 level=logging.WARNING)
-            else:
-                self.log_to_leader(disk_usage, level=logging.DEBUG)
             os.chdir(startingDir)
             # Finally delete the job from the worker
             self.check_for_state_corruption()
@@ -362,7 +350,10 @@ class NonCachingFileStore(AbstractFileStore):
         jobState = {'jobProcessName': get_process_name(self.coordination_dir),
                     'jobName': self.jobName,
                     'jobDir': self.localTempDir}
-        (fd, jobStateFile) = tempfile.mkstemp(suffix='.jobState.tmp', dir=self.coordination_dir)
+        try:
+            (fd, jobStateFile) = tempfile.mkstemp(suffix='.jobState.tmp', dir=self.coordination_dir)
+        except Exception as e:
+            raise RuntimeError("Could not make state file in " + self.coordination_dir) from e
         with open(fd, 'wb') as fH:
             # Write data
             dill.dump(jobState, fH)

toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

toil 6.1.0a1py3-none-any.whl → 7.0.0py3-none-any.whl