PyPI - toil - Versions diffs - 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl - Mend

toil 6.0.0py3-none-any.whl → 6.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

toil/batchSystems/abstractBatchSystem.py +19 -4
toil/batchSystems/abstractGridEngineBatchSystem.py +22 -22
toil/batchSystems/cleanup_support.py +7 -3
toil/batchSystems/lsf.py +7 -7
toil/batchSystems/slurm.py +85 -14
toil/bus.py +38 -0
toil/common.py +20 -18
toil/cwl/cwltoil.py +81 -63
toil/exceptions.py +1 -1
toil/fileStores/abstractFileStore.py +53 -4
toil/fileStores/cachingFileStore.py +4 -20
toil/fileStores/nonCachingFileStore.py +5 -14
toil/job.py +46 -30
toil/jobStores/abstractJobStore.py +21 -23
toil/jobStores/aws/utils.py +5 -4
toil/jobStores/fileJobStore.py +1 -1
toil/leader.py +17 -14
toil/lib/conversions.py +19 -0
toil/lib/generatedEC2Lists.py +8 -8
toil/lib/io.py +28 -2
toil/lib/resources.py +8 -1
toil/lib/threading.py +27 -12
toil/options/common.py +5 -7
toil/options/wdl.py +5 -0
toil/provisioners/abstractProvisioner.py +8 -0
toil/statsAndLogging.py +36 -8
toil/test/batchSystems/test_slurm.py +21 -6
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +58 -0
toil/test/cwl/cwlTest.py +243 -151
toil/test/docs/scriptsTest.py +2 -2
toil/test/jobStores/jobStoreTest.py +7 -5
toil/test/lib/test_ec2.py +1 -1
toil/test/options/__init__.py +13 -0
toil/test/options/options.py +37 -0
toil/test/provisioners/clusterTest.py +9 -8
toil/test/utils/toilDebugTest.py +1 -1
toil/test/utils/utilsTest.py +3 -3
toil/test/wdl/wdltoil_test.py +91 -16
toil/utils/toilDebugFile.py +1 -1
toil/utils/toilStats.py +309 -266
toil/utils/toilStatus.py +1 -1
toil/version.py +9 -9
toil/wdl/wdltoil.py +341 -189
toil/worker.py +31 -16
{toil-6.0.0.dist-info → toil-6.1.0.dist-info}/METADATA +6 -7
{toil-6.0.0.dist-info → toil-6.1.0.dist-info}/RECORD +51 -47
{toil-6.0.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
{toil-6.0.0.dist-info → toil-6.1.0.dist-info}/WHEEL +0 -0
{toil-6.0.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -0
{toil-6.0.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0

toil/cwl/cwltoil.py CHANGED Viewed

@@ -34,27 +34,25 @@ import stat
 import sys
 import textwrap
 import uuid
-from tempfile import NamedTemporaryFile, gettempdir
+from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
 from threading import Thread
-from typing import (
-    IO,
-    Any,
-    Callable,
-    Dict,
-    Iterator,
-    List,
-    Mapping,
-    MutableMapping,
-    MutableSequence,
-    Optional,
-    TextIO,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-    cast,
-    Sequence,
-)
+from typing import (IO,
+                    Any,
+                    Callable,
+                    Dict,
+                    Iterator,
+                    List,
+                    Mapping,
+                    MutableMapping,
+                    MutableSequence,
+                    Optional,
+                    Sequence,
+                    TextIO,
+                    Tuple,
+                    Type,
+                    TypeVar,
+                    Union,
+                    cast)
 from urllib.parse import quote, unquote, urlparse, urlsplit
 import cwl_utils.errors
@@ -68,36 +66,30 @@ import cwltool.load_tool
 import cwltool.main
 import cwltool.resolver
 import schema_salad.ref_resolver
-from configargparse import ArgParser, SUPPRESS, Namespace
+from configargparse import SUPPRESS, ArgParser, Namespace
 from cwltool.loghandler import _logger as cwllogger
 from cwltool.loghandler import defaultStreamHandler
 from cwltool.mpi import MpiConfig
 from cwltool.mutation import MutationManager
 from cwltool.pathmapper import MapperEnt, PathMapper
-from cwltool.process import (
-    Process,
-    add_sizes,
-    compute_checksums,
-    fill_in_defaults,
-    shortname,
-)
+from cwltool.process import (Process,
+                             add_sizes,
+                             compute_checksums,
+                             fill_in_defaults,
+                             shortname)
 from cwltool.secrets import SecretStore
-from cwltool.software_requirements import (
-    DependenciesConfiguration,
-    get_container_from_software_requirements,
-)
+from cwltool.software_requirements import (DependenciesConfiguration,
+                                           get_container_from_software_requirements)
 from cwltool.stdfsaccess import StdFsAccess, abspath
-from cwltool.utils import (
-    CWLObjectType,
-    CWLOutputType,
-    DirectoryType,
-    adjustDirObjs,
-    aslist,
-    downloadHttpFile,
-    get_listing,
-    normalizeFilesDirs,
-    visit_class,
-)
+from cwltool.utils import (CWLObjectType,
+                           CWLOutputType,
+                           DirectoryType,
+                           adjustDirObjs,
+                           aslist,
+                           downloadHttpFile,
+                           get_listing,
+                           normalizeFilesDirs,
+                           visit_class)
 from ruamel.yaml.comments import CommentedMap, CommentedSeq
 from schema_salad.avro.schema import Names
 from schema_salad.exceptions import ValidationException
@@ -110,18 +102,17 @@ from toil.common import Toil, addOptions
 from toil.cwl import check_cwltool_version
 check_cwltool_version()
-from toil.cwl.utils import (
-    CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
-    CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
-    download_structure,
-    get_from_structure,
-    visit_cwl_class_and_reduce,
-)
+from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
+                            CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
+                            download_structure,
+                            get_from_structure,
+                            visit_cwl_class_and_reduce)
 from toil.exceptions import FailedJobsException
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
-from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
+from toil.jobStores.abstractJobStore import (AbstractJobStore,
+                                             NoSuchFileException)
 from toil.jobStores.fileJobStore import FileJobStore
 from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
 from toil.lib.io import mkdtemp
@@ -1987,7 +1978,7 @@ def upload_file(
     Uploads local files to the Toil file store, and sets their location to a
     reference to the toil file store.
     Unless skip_remote is set, downloads remote files into the file store and
     sets their locations to references into the file store as well.
     """
@@ -2614,6 +2605,13 @@ class CWLJob(CWLNamedJob):
                 streaming_allowed=runtime_context.streaming_allowed,
             )
+        # Collect standard output and standard error somewhere if they don't go to files.
+        # We need to keep two FDs to these because cwltool will close what we give it.
+        default_stdout = TemporaryFile()
+        runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
+        default_stderr = TemporaryFile()
+        runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
         process_uuid = uuid.uuid4()  # noqa F841
         started_at = datetime.datetime.now()  # noqa F841
@@ -2622,13 +2620,34 @@ class CWLJob(CWLNamedJob):
         logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
         runtime_context.name = self.description.unitName
-        output, status = ToilSingleJobExecutor().execute(
-            process=self.cwltool,
-            job_order_object=cwljob,
-            runtime_context=runtime_context,
-            logger=cwllogger,
-        )
-        ended_at = datetime.datetime.now()  # noqa F841
+        status = "did_not_run"
+        try:
+            output, status = ToilSingleJobExecutor().execute(
+                process=self.cwltool,
+                job_order_object=cwljob,
+                runtime_context=runtime_context,
+                logger=cwllogger,
+            )
+        finally:
+            ended_at = datetime.datetime.now()  # noqa F841
+            # Log any output/error data
+            default_stdout.seek(0, os.SEEK_END)
+            if default_stdout.tell() > 0:
+                default_stdout.seek(0)
+                file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
+                if status != "success":
+                    default_stdout.seek(0)
+                    logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
+            default_stderr.seek(0, os.SEEK_END)
+            if default_stderr.tell():
+                default_stderr.seek(0)
+                file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
+                if status != "success":
+                    default_stderr.seek(0)
+                    logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
         if status != "success":
             raise cwl_utils.errors.WorkflowException(status)
@@ -3352,12 +3371,12 @@ def determine_load_listing(
     1. no_listing: DIRECTORY_NAME.listing will be undefined.
         e.g.
             inputs.DIRECTORY_NAME.listing == unspecified
     2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
         deep of DIRECTORY_NAME's contents.
-        e.g.
+        e.g.
             inputs.DIRECTORY_NAME.listing == [items in directory]
              inputs.DIRECTORY_NAME.listing[0].listing == undefined
@@ -3576,7 +3595,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
         dependencies_configuration = DependenciesConfiguration(options)
         job_script_provider = dependencies_configuration
-    options.default_container = None
     runtime_context = cwltool.context.RuntimeContext(vars(options))
     runtime_context.toplevel = True  # enable discovery of secondaryFiles
     runtime_context.find_default_container = functools.partial(
@@ -3789,7 +3807,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
                 Callable[[str], FileID],
                 functools.partial(toil.import_file, symlink=True),
             )
             # Import all the input files, some of which may be missing optional
             # files.
             logger.info("Importing input files...")

toil/exceptions.py CHANGED Viewed

@@ -36,7 +36,7 @@ class FailedJobsException(Exception):
             for job_desc in failed_jobs:
                 if job_desc.logJobStoreFileID:
                     with job_desc.getLogFileHandle(job_store) as f:
-                        self.msg += "\n" + StatsAndLogging.formatLogStream(f, job_desc)
+                        self.msg += "\n" + StatsAndLogging.formatLogStream(f, f'Log from job "{job_desc}"')
         # catch failures to prepare more complex details and only return the basics
         except Exception:
             logger.exception("Exception when compiling information about failed jobs")

toil/fileStores/abstractFileStore.py CHANGED Viewed

@@ -37,11 +37,12 @@ from typing import (IO,
 import dill
-from toil.common import Toil, cacheDirName
+from toil.common import Toil, cacheDirName, getDirSizeRecursively
 from toil.fileStores import FileID
 from toil.job import Job, JobDescription
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.compatibility import deprecated
+from toil.lib.conversions import bytes2human
 from toil.lib.io import WriteWatchingStream, mkdtemp
 logger = logging.getLogger(__name__)
@@ -116,7 +117,8 @@ class AbstractFileStore(ABC):
             self.jobDesc.command.split()[1] if self.jobDesc.command else ""
         )
         self.waitForPreviousCommit = waitForPreviousCommit
-        self.loggingMessages: List[Dict[str, Union[int, str]]] = []
+        self.logging_messages: List[Dict[str, Union[int, str]]] = []
+        self.logging_user_streams: List[dict[str, str]] = []
         # Records file IDs of files deleted during the current job. Doesn't get
         # committed back until the job is completely successful, because if the
         # job is re-run it will need to be able to re-delete these files.
@@ -125,6 +127,8 @@ class AbstractFileStore(ABC):
         # Holds records of file ID, or file ID and local path, for reporting
         # the accessed files of failed jobs.
         self._accessLog: List[Tuple[str, ...]] = []
+        # Holds total bytes of observed disk usage for the last job run under open()
+        self._job_disk_used: Optional[int] = None
     @staticmethod
     def createFileStore(
@@ -188,6 +192,7 @@ class AbstractFileStore(ABC):
         :param job: The job instance of the toil job to run.
         """
         failed = True
+        job_requested_disk = job.disk
         try:
             yield
             failed = False
@@ -197,6 +202,33 @@ class AbstractFileStore(ABC):
             if failed:
                 self._dumpAccessLogs()
+            # See how much disk space is used at the end of the job.
+            # Not a real peak disk usage, but close enough to be useful for warning the user.
+            self._job_disk_used = getDirSizeRecursively(self.localTempDir)
+            # Report disk usage
+            percent: float = 0.0
+            if job_requested_disk and job_requested_disk > 0:
+                percent = float(self._job_disk_used) / job_requested_disk * 100
+            disk_usage: str = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(self._job_disk_used)}B [{self._job_disk_used}B] used, "
+                               f"{bytes2human(job_requested_disk)}B [{job_requested_disk}B] requested).")
+            if self._job_disk_used > job_requested_disk:
+                self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
+                                 f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
+                                 level=logging.WARNING)
+            else:
+                self.log_to_leader(disk_usage, level=logging.DEBUG)
+    def get_disk_usage(self) -> Optional[int]:
+        """
+        Get the number of bytes of disk used by the last job run under open().
+        Disk usage is measured at the end of the job.
+        TODO: Sample periodically and record peak usage.
+        """
+        return self._job_disk_used
     # Functions related to temp files and directories
     def getLocalTempDir(self) -> str:
         """
@@ -611,13 +643,30 @@ class AbstractFileStore(ABC):
         :param level: The logging level.
         """
         logger.log(level=level, msg=("LOG-TO-MASTER: " + text))
-        self.loggingMessages.append(dict(text=text, level=level))
+        self.logging_messages.append(dict(text=text, level=level))
     @deprecated(new_function_name='export_file')
     def logToMaster(self, text: str, level: int = logging.INFO) -> None:
         self.log_to_leader(text, level)
+    def log_user_stream(self, name: str, stream: IO[bytes]) -> None:
+        """
+        Send a stream of UTF-8 text to the leader as a named log stream.
+        Useful for things like the error logs of Docker containers. The leader
+        will show it to the user or organize it appropriately for user-level
+        log information.
+        :param name: A hierarchical, .-delimited string.
+        :param stream: A stream of encoded text. Encoding errors will be
+            tolerated.
+        """
+        # Read the whole stream into memory
+        steam_data = stream.read().decode('utf-8', errors='replace')
+        # And remember it for the worker to fish out
+        self.logging_user_streams.append(dict(name=name, text=steam_data))
     # Functions run after the completion of the job.
     @abstractmethod

toil/fileStores/cachingFileStore.py CHANGED Viewed

@@ -32,13 +32,12 @@ from typing import (Any,
                     Sequence,
                     Tuple)
-from toil.common import cacheDirName, getDirSizeRecursively, getFileSystemSize
+from toil.common import cacheDirName, getFileSystemSize
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.job import Job, JobDescription
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.compatibility import deprecated
-from toil.lib.conversions import bytes2human
 from toil.lib.io import (atomic_copy,
                          atomic_copyobj,
                          make_public_dir,
@@ -1041,7 +1040,8 @@ class CachingFileStore(AbstractFileStore):
         # Check the status of all jobs on this node. If there are jobs that started and died before
         # cleaning up their presence from the database, clean them up ourselves.
         self._removeDeadJobs(self.coordination_dir, self.con)
-        # Get the requirements for the job.
+        # Get the disk requirement for the job, which we will use to know if we
+        # have filled the cache or not.
         self.jobDiskBytes = job.disk
         logger.debug('Actually running job (%s) with ID (%s) which wants %d of our %d bytes.',
@@ -1055,22 +1055,6 @@ class CachingFileStore(AbstractFileStore):
             with super().open(job):
                 yield
         finally:
-            # See how much disk space is used at the end of the job.
-            # Not a real peak disk usage, but close enough to be useful for warning the user.
-            # TODO: Push this logic into the abstract file store
-            disk: int = getDirSizeRecursively(self.localTempDir)
-            percent: float = 0.0
-            if self.jobDiskBytes and self.jobDiskBytes > 0:
-                percent = float(disk) / self.jobDiskBytes * 100
-            disk_usage: str = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
-                               f"{bytes2human(self.jobDiskBytes)}B [{self.jobDiskBytes}B] requested).")
-            if disk > self.jobDiskBytes:
-                self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
-                                 f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
-                                 level=logging.WARNING)
-            else:
-                self.log_to_leader(disk_usage, level=logging.DEBUG)
             # Go back up to the per-worker local temp directory.
             os.chdir(startingDir)
             self.cleanupInProgress = True
@@ -1095,7 +1079,7 @@ class CachingFileStore(AbstractFileStore):
         # Create an empty file to get an ID.
         # Make sure to pass along the file basename.
         # TODO: this empty file could leak if we die now...
-        fileID = self.jobStore.getEmptyFileStoreID(creatorID, cleanup, os.path.basename(localFileName))
+        fileID = self.jobStore.get_empty_file_store_id(creatorID, cleanup, os.path.basename(localFileName))
         # Work out who we are
         with self.as_process() as me:

toil/fileStores/nonCachingFileStore.py CHANGED Viewed

@@ -35,13 +35,12 @@ from typing import (IO,
 import dill
-from toil.common import getDirSizeRecursively, getFileSystemSize
+from toil.common import getFileSystemSize
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.job import Job, JobDescription
 from toil.jobStores.abstractJobStore import AbstractJobStore
 from toil.lib.compatibility import deprecated
-from toil.lib.conversions import bytes2human
 from toil.lib.io import make_public_dir, robust_rmtree
 from toil.lib.retry import ErrorCondition, retry
 from toil.lib.threading import get_process_name, process_name_exists
@@ -102,7 +101,6 @@ class NonCachingFileStore(AbstractFileStore):
     @contextmanager
     def open(self, job: Job) -> Generator[None, None, None]:
-        jobReqs = job.disk
         startingDir = os.getcwd()
         self.localTempDir: str = make_public_dir(in_directory=self.localTempDir)
         self._removeDeadJobs(self.coordination_dir)
@@ -116,16 +114,6 @@ class NonCachingFileStore(AbstractFileStore):
             with super().open(job):
                 yield
         finally:
-            disk = getDirSizeRecursively(self.localTempDir)
-            percent = float(disk) / jobReqs * 100 if jobReqs > 0 else 0.0
-            disk_usage = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
-                          f"{bytes2human(jobReqs)}B [{jobReqs}B] requested).")
-            if disk > jobReqs:
-                self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
-                                 f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
-                                 level=logging.WARNING)
-            else:
-                self.log_to_leader(disk_usage, level=logging.DEBUG)
             os.chdir(startingDir)
             # Finally delete the job from the worker
             self.check_for_state_corruption()
@@ -362,7 +350,10 @@ class NonCachingFileStore(AbstractFileStore):
         jobState = {'jobProcessName': get_process_name(self.coordination_dir),
                     'jobName': self.jobName,
                     'jobDir': self.localTempDir}
-        (fd, jobStateFile) = tempfile.mkstemp(suffix='.jobState.tmp', dir=self.coordination_dir)
+        try:
+            (fd, jobStateFile) = tempfile.mkstemp(suffix='.jobState.tmp', dir=self.coordination_dir)
+        except Exception as e:
+            raise RuntimeError("Could not make state file in " + self.coordination_dir) from e
         with open(fd, 'wb') as fH:
             # Write data
             dill.dump(jobState, fH)

toil/job.py CHANGED Viewed

@@ -45,6 +45,7 @@ from typing import (TYPE_CHECKING,
 from configargparse import ArgParser
+from toil.bus import Names
 from toil.lib.compatibility import deprecated
 if sys.version_info >= (3, 8):
@@ -710,7 +711,6 @@ class Requirer:
             parts = ['no requirements']
         return ', '.join(parts)
 class JobDescription(Requirer):
     """
     Stores all the information that the Toil Leader ever needs to know about a Job.
@@ -814,11 +814,14 @@ class JobDescription(Requirer):
         # in the process of being committed.
         self.filesToDelete = []
-        # Holds JobStore Job IDs of the jobs that have been chained into this
+        # Holds job names and IDs of the jobs that have been chained into this
         # job, and which should be deleted when this job finally is deleted
         # (but not before). The successor relationships with them will have
-        # been cut, so we need to hold onto them somehow.
-        self.merged_jobs = []
+        # been cut, so we need to hold onto them somehow. Includes each
+        # chained-in job with its original ID, and also this job's ID with its
+        # original names, or is empty if no chaining has happened.
+        # The first job in the chain comes first in the list.
+        self._merged_job_names: List[Names] = []
         # The number of direct predecessors of the job. Needs to be stored at
         # the JobDescription to support dynamically-created jobs with multiple
@@ -867,9 +870,26 @@ class JobDescription(Requirer):
         # And we log who made the version (by PID)
         self._job_version_writer = 0
-        # Human-readable names of jobs that were run as part of this job's
-        # invocation, starting with this job
-        self.chainedJobs = []
+    def get_names(self) -> Names:
+        """
+        Get the names and ID of this job as a named tuple.
+        """
+        return Names(self.jobName, self.unitName, self.displayName, self.displayName, str(self.jobStoreID))
+    def get_chain(self) -> List[Names]:
+        """
+        Get all the jobs that executed in this job's chain, in order.
+        For each job, produces a named tuple with its various names and its
+        original job store ID. The jobs in the chain are in execution order.
+        If the job hasn't run yet or it didn't chain, produces a one-item list.
+        """
+        if len(self._merged_job_names) == 0:
+            # We haven't merged so we're just ourselves.
+            return [self.get_names()]
+        else:
+            return list(self._merged_job_names)
     def serviceHostIDsInBatches(self) -> Iterator[List[str]]:
         """
@@ -1045,8 +1065,23 @@ class JobDescription(Requirer):
         self.successor_phases = old_phases + self.successor_phases
         # When deleting, we need to delete the files for our old ID, and also
-        # anything that needed to be deleted for the job we are replacing.
-        self.merged_jobs += [self.jobStoreID] + other.merged_jobs
+        # anything that needed to be deleted for the job we are replacing. And
+        # we need to keep track of all the names of jobs involved for logging.
+        # We need first the job we are merging into if nothing has merged into
+        # it yet, then anything that already merged into it (including it),
+        # then us if nothing has yet merged into us, then anything that merged
+        # into us (inclusing us)
+        _merged_job_names = []
+        if len(other._merged_job_names) == 0:
+            _merged_job_names.append(other.get_names())
+        _merged_job_names += other._merged_job_names
+        if len(self._merged_job_names) == 0:
+            _merged_job_names.append(self.get_names())
+        _merged_job_names += self._merged_job_names
+        self._merged_job_names = _merged_job_names
+        # Now steal its ID.
         self.jobStoreID = other.jobStoreID
         if len(other.filesToDelete) > 0:
@@ -1263,26 +1298,6 @@ class JobDescription(Requirer):
         self._job_version_writer = os.getpid()
         logger.debug("New job version: %s", self)
-    def get_job_kind(self) -> str:
-        """
-        Return an identifying string for the job.
-        The result may contain spaces.
-        Returns: Either the unit name, job name, or display name, which identifies
-                 the kind of job it is to toil.
-                 Otherwise "Unknown Job" in case no identifier is available
-        """
-        if self.unitName:
-            return self.unitName
-        elif self.jobName:
-            return self.jobName
-        elif self.displayName:
-            return self.displayName
-        else:
-            return "Unknown Job"
 class ServiceJobDescription(JobDescription):
     """A description of a job that hosts a service."""
@@ -2787,7 +2802,8 @@ class Job:
                     clock=str(totalCpuTime - startClock),
                     class_name=self._jobName(),
                     memory=str(totalMemoryUsage),
-                    requested_cores=str(self.cores)
+                    requested_cores=str(self.cores),
+                    disk=str(fileStore.get_disk_usage())
                 )
             )

toil/jobStores/abstractJobStore.py CHANGED Viewed

@@ -835,16 +835,17 @@ class AbstractJobStore(ABC):
             root_job_description = self.load_root_job()
             reachable_from_root: Set[str] = set()
-            # Add first root job outside of the loop below.
-            reachable_from_root.add(str(root_job_description.jobStoreID))
-            # add all of root's linked service jobs as well
-            for service_jobstore_id in root_job_description.services:
-                if haveJob(service_jobstore_id):
-                    reachable_from_root.add(service_jobstore_id)
-            for merged_jobstore_id in root_job_description.merged_jobs:
+            for merged_in in root_job_description.get_chain():
+                # Add the job itself and any other jobs that chained with it.
                 # Keep merged-in jobs around themselves, but don't bother
                 # exploring them, since we took their successors.
-                reachable_from_root.add(merged_jobstore_id)
+                reachable_from_root.add(merged_in.job_store_id)
+            # add all of root's linked service jobs as well
+            for service_job_store_id in root_job_description.services:
+                if haveJob(service_job_store_id):
+                    reachable_from_root.add(service_job_store_id)
             # Unprocessed means it might have successor jobs we need to add.
             unprocessed_job_descriptions = [root_job_description]
@@ -852,24 +853,21 @@ class AbstractJobStore(ABC):
             while unprocessed_job_descriptions:
                 new_job_descriptions_to_process = []  # Reset.
                 for job_description in unprocessed_job_descriptions:
-                    for successor_jobstore_id in job_description.allSuccessors():
-                        if successor_jobstore_id not in reachable_from_root and haveJob(successor_jobstore_id):
-                            successor_job_description = getJobDescription(successor_jobstore_id)
-                            # Add each successor job.
-                            reachable_from_root.add(
-                                str(successor_job_description.jobStoreID)
-                            )
+                    for merged_in in job_description.get_chain():
+                        # Add the job and anything chained with it.
+                        # Keep merged-in jobs around themselves, but don't bother
+                        # exploring them, since we took their successors.
+                        reachable_from_root.add(merged_in.job_store_id)
+                    for successor_job_store_id in job_description.allSuccessors():
+                        if successor_job_store_id not in reachable_from_root and haveJob(successor_job_store_id):
+                            successor_job_description = getJobDescription(successor_job_store_id)
                             # Add all of the successor's linked service jobs as well.
-                            for service_jobstore_id in successor_job_description.services:
-                                if haveJob(service_jobstore_id):
-                                    reachable_from_root.add(service_jobstore_id)
+                            for service_job_store_id in successor_job_description.services:
+                                if haveJob(service_job_store_id):
+                                    reachable_from_root.add(service_job_store_id)
                             new_job_descriptions_to_process.append(successor_job_description)
-                    for merged_jobstore_id in job_description.merged_jobs:
-                        # Keep merged-in jobs around themselves, but don't bother
-                        # exploring them, since we took their successors.
-                        reachable_from_root.add(merged_jobstore_id)
                 unprocessed_job_descriptions = new_job_descriptions_to_process
             logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")

toil 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

toil 6.0.0py3-none-any.whl → 6.1.0py3-none-any.whl