PyPI - toil - Versions diffs - 9.0.0__py3-none-any.whl → 9.1.1__py3-none-any.whl - Mend

toil 9.0.0py3-none-any.whl → 9.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

toil/batchSystems/abstractBatchSystem.py +13 -5
toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
toil/batchSystems/kubernetes.py +13 -2
toil/batchSystems/mesos/batchSystem.py +33 -2
toil/batchSystems/slurm.py +191 -16
toil/cwl/cwltoil.py +17 -82
toil/fileStores/__init__.py +1 -1
toil/fileStores/abstractFileStore.py +5 -2
toil/fileStores/cachingFileStore.py +1 -1
toil/job.py +30 -14
toil/jobStores/abstractJobStore.py +24 -19
toil/jobStores/aws/jobStore.py +862 -1963
toil/jobStores/aws/utils.py +24 -270
toil/jobStores/googleJobStore.py +25 -9
toil/jobStores/utils.py +0 -327
toil/leader.py +27 -22
toil/lib/aws/config.py +22 -0
toil/lib/aws/s3.py +477 -9
toil/lib/aws/utils.py +22 -33
toil/lib/checksum.py +88 -0
toil/lib/conversions.py +33 -31
toil/lib/directory.py +217 -0
toil/lib/ec2.py +97 -29
toil/lib/exceptions.py +2 -1
toil/lib/expando.py +2 -2
toil/lib/generatedEC2Lists.py +73 -16
toil/lib/io.py +33 -2
toil/lib/memoize.py +21 -7
toil/lib/pipes.py +385 -0
toil/lib/retry.py +1 -1
toil/lib/threading.py +1 -1
toil/lib/web.py +4 -5
toil/provisioners/__init__.py +5 -2
toil/provisioners/aws/__init__.py +43 -36
toil/provisioners/aws/awsProvisioner.py +22 -13
toil/provisioners/node.py +60 -12
toil/resource.py +3 -13
toil/test/__init__.py +14 -16
toil/test/batchSystems/test_slurm.py +103 -14
toil/test/cwl/staging_cat.cwl +27 -0
toil/test/cwl/staging_make_file.cwl +25 -0
toil/test/cwl/staging_workflow.cwl +43 -0
toil/test/cwl/zero_default.cwl +61 -0
toil/test/docs/scripts/tutorial_staging.py +17 -8
toil/test/jobStores/jobStoreTest.py +23 -133
toil/test/lib/aws/test_iam.py +7 -7
toil/test/lib/aws/test_s3.py +30 -33
toil/test/lib/aws/test_utils.py +9 -9
toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
toil/test/src/autoDeploymentTest.py +2 -3
toil/test/src/fileStoreTest.py +89 -87
toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
toil/test/utils/toilKillTest.py +35 -28
toil/test/wdl/md5sum/md5sum.json +1 -1
toil/test/wdl/testfiles/gather.wdl +52 -0
toil/test/wdl/wdltoil_test.py +120 -38
toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
toil/utils/toilDebugFile.py +6 -3
toil/utils/toilStats.py +17 -2
toil/version.py +6 -6
toil/wdl/wdltoil.py +1038 -549
toil/worker.py +5 -2
{toil-9.0.0.dist-info → toil-9.1.1.dist-info}/METADATA +12 -12
{toil-9.0.0.dist-info → toil-9.1.1.dist-info}/RECORD +69 -61
toil/lib/iterables.py +0 -112
toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
{toil-9.0.0.dist-info → toil-9.1.1.dist-info}/WHEEL +0 -0
{toil-9.0.0.dist-info → toil-9.1.1.dist-info}/entry_points.txt +0 -0
{toil-9.0.0.dist-info → toil-9.1.1.dist-info}/licenses/LICENSE +0 -0
{toil-9.0.0.dist-info → toil-9.1.1.dist-info}/top_level.txt +0 -0

toil/batchSystems/abstractBatchSystem.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from dataclasses import dataclass
 import enum
 import logging
 import os
@@ -72,10 +73,13 @@ class BatchJobExitReason(enum.IntEnum):
         except ValueError:
             return str(value)
-class UpdatedBatchJobInfo(NamedTuple):
+@dataclass
+class UpdatedBatchJobInfo:
     jobID: int
-    exitStatus: int
+    """
+    The Toil batch system ID of the job.
+    """
+    exitStatus: int = EXIT_STATUS_UNAVAILABLE_VALUE
     """
     The exit status (integer value) of the job. 0 implies successful.
@@ -83,8 +87,12 @@ class UpdatedBatchJobInfo(NamedTuple):
     (e.g. job is lost, or otherwise died but actual exit code was not reported).
     """
-    exitReason: Optional[BatchJobExitReason]
-    wallTime: Union[float, int, None]
+    exitReason: Optional[BatchJobExitReason] = None
+    wallTime: Union[float, int, None] = None
+    backing_id: Optional[str] = None
+    """
+    The identifier for the job in the backing scheduler, if available.
+    """
 # Information required for worker cleanup on shutdown of the batch system.

toil/batchSystems/abstractGridEngineBatchSystem.py CHANGED Viewed

@@ -159,14 +159,21 @@ class AbstractGridEngineBatchSystem(BatchSystemCleanupSupport):
                 logger.debug("Running %r", subLine)
                 batchJobID = self.boss.with_retries(self.submitJob, subLine)
                 if self.boss._outbox is not None:
-                    # JobID corresponds to the toil version of the jobID, dif from jobstore idea of the id, batchjobid is what we get from slurm
+                    # JobID corresponds to the toil version of the jobID,
+                    # different from the jobstore's idea of the id. batchjobid
+                    # is what we get from e.g. slurm
                     self.boss._outbox.publish(
                         ExternalBatchIdMessage(
                             jobID, batchJobID, self.boss.__class__.__name__
                         )
                     )
-                logger.debug("Submitted job %s", str(batchJobID))
+                logger.info(
+                    "Job %s with batch system ID %s queued as job %s",
+                    jobName,
+                    jobID,
+                    str(batchJobID)
+                )
                 # Store dict for mapping Toil job ID to batch job ID
                 # TODO: Note that this currently stores a tuple of (batch system
@@ -251,8 +258,8 @@ class AbstractGridEngineBatchSystem(BatchSystemCleanupSupport):
                     self.coalesce_job_exit_codes, batch_job_id_list
                 )
                 # We got the statuses as a batch
-                for running_job_id, status in zip(running_job_list, statuses):
-                    activity = self._handle_job_status(running_job_id, status, activity)
+                for running_job_id, status, backing_id in zip(running_job_list, statuses, batch_job_id_list):
+                    activity = self._handle_job_status(running_job_id, status, activity, backing_id)
             self._checkOnJobsCache = activity
             self._checkOnJobsTimestamp = datetime.now()
@@ -263,6 +270,7 @@ class AbstractGridEngineBatchSystem(BatchSystemCleanupSupport):
             job_id: int,
             status: Union[int, tuple[int, Optional[BatchJobExitReason]], None],
             activity: bool,
+            backing_id: str,
         ) -> bool:
             """
             Helper method for checkOnJobs to handle job statuses
@@ -275,7 +283,11 @@ class AbstractGridEngineBatchSystem(BatchSystemCleanupSupport):
                     code, reason = status
                 self.updatedJobsQueue.put(
                     UpdatedBatchJobInfo(
-                        jobID=job_id, exitStatus=code, exitReason=reason, wallTime=None
+                        jobID=job_id,
+                        exitStatus=code,
+                        exitReason=reason,
+                        wallTime=None,
+                        backing_id=backing_id,
                     )
                 )
                 self.forgetJob(job_id)

toil/batchSystems/kubernetes.py CHANGED Viewed

@@ -37,6 +37,7 @@ from threading import Condition, Event, RLock, Thread
 from typing import Any, Callable, Literal, Optional, TypeVar, Union, cast, overload
 from toil.lib.conversions import opt_strtobool
+from toil.lib.throttle import LocalThrottle
 if sys.version_info < (3, 10):
     from typing_extensions import ParamSpec
@@ -281,6 +282,10 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
         # in the queue or any resource becomes available.
         self._work_available: Condition = Condition(lock=self._mutex)
+        # To make sure we don't spam the log when the metrics server is down,
+        # we use a throttle
+        self._metrics_throttle: LocalThrottle = LocalThrottle(600)
         self.schedulingThread: Thread = Thread(target=self._scheduler, daemon=True)
         self.schedulingThread.start()
@@ -1363,7 +1368,8 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
                 # This is the sort of error we would expect from an overloaded
                 # Kubernetes or a dead metrics service.
                 # We can't tell that the pod is stuck, so say that it isn't.
-                logger.warning("Could not query metrics service: %s", e)
+                if self._metrics_throttle.throttle(False):
+                    logger.warning("Kubernetes metrics service is not available: %s", e)
                 return False
             else:
                 raise
@@ -1602,6 +1608,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
                         exitStatus=exitCode,
                         wallTime=runtime,
                         exitReason=exitReason,
+                        backing_id=jobObject.metadata.name,
                     )
                     if (exitReason == BatchJobExitReason.FAILED) or (
@@ -1855,7 +1862,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
         # Return the one finished job we found
         return UpdatedBatchJobInfo(
-            jobID=jobID, exitStatus=exitCode, wallTime=runtime, exitReason=None
+            jobID=jobID,
+            exitStatus=exitCode,
+            wallTime=runtime,
+            exitReason=None,
+            backing_id=jobObject.metadata.name,
         )
     def _waitForJobDeath(self, jobName: str) -> None:

toil/batchSystems/mesos/batchSystem.py CHANGED Viewed

@@ -103,6 +103,9 @@ class MesosBatchSystem(BatchSystemLocalSupport, AbstractScalableBatchSystem, Sch
         if config.mesos_framework_id is not None:
             self.mesos_framework_id = config.mesos_framework_id
+        # How long in seconds to wait to register before declaring Mesos unreachable.
+        self.mesos_timeout = 60
         # Written to when Mesos kills tasks, as directed by Toil.
         # Jobs must not enter this set until they are removed from runningJobMap.
         self.killedJobIds = set()
@@ -345,17 +348,38 @@ class MesosBatchSystem(BatchSystemLocalSupport, AbstractScalableBatchSystem, Sch
             framework.roles = config.mesos_role
             framework.capabilities = [dict(type="MULTI_ROLE")]
+        endpoint = self._resolveAddress(self.mesos_endpoint)
+        log.info("Connecting to Mesos at %s...", self.mesos_endpoint)
         # Make the driver which implements most of the scheduler logic and calls back to us for the user-defined parts.
         # Make sure it will call us with nice namespace-y addicts
         self.driver = MesosSchedulerDriver(
             self,
             framework,
-            self._resolveAddress(self.mesos_endpoint),
+            endpoint,
             use_addict=True,
             implicit_acknowledgements=True,
         )
         self.driver.start()
+        wait_count = 0
+        while self.frameworkId is None:
+            # Wait to register with Mesos, and eventually fail if it just isn't
+            # responding.
+            # TODO: Use a condition instead of a spin wait.
+            if wait_count >= self.mesos_timeout:
+                error_message = f"Could not connect to Mesos endpoint at {self.mesos_endpoint}"
+                log.error(error_message)
+                self.shutdown()
+                raise RuntimeError(error_message)
+            elif wait_count > 1 and wait_count % 10 == 0:
+                log.warning("Waiting for Mesos registration (try %s/%s)", wait_count, self.mesos_timeout)
+            time.sleep(1)
+            wait_count += 1
     @staticmethod
     def _resolveAddress(address):
         """
@@ -394,10 +418,17 @@ class MesosBatchSystem(BatchSystemLocalSupport, AbstractScalableBatchSystem, Sch
         """
         Invoked when the scheduler successfully registers with a Mesos master
         """
-        log.debug("Registered with framework ID %s", frameworkId.value)
+        log.info("Registered with Mesos as framework ID %s", frameworkId.value)
         # Save the framework ID
         self.frameworkId = frameworkId.value
+    def error(self, driver, message):
+        """
+        Invoked when Mesos reports an unrecoverable error.
+        """
+        log.error("Mesos error: %s", message)
+        super().error(driver, message)
     def _declineAllOffers(self, driver, offers):
         for offer in offers:
             driver.declineOffer(offer.id)

toil/batchSystems/slurm.py CHANGED Viewed

@@ -18,9 +18,11 @@ import logging
 import math
 import os
 import sys
-from argparse import SUPPRESS, ArgumentParser, _ArgumentGroup
 import shlex
-from typing import Callable, NamedTuple, TypeVar
+from argparse import SUPPRESS, ArgumentParser, _ArgumentGroup
+from datetime import datetime, timedelta, timezone
+from typing import Callable, NamedTuple, Optional, TypeVar
 from toil.batchSystems.abstractBatchSystem import (
     EXIT_STATUS_UNAVAILABLE_VALUE,
@@ -350,9 +352,18 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
         ) -> list[int | tuple[int, BatchJobExitReason | None] | None]:
             """
             Collect all job exit codes in a single call.
-            :param batch_job_id_list: list of Job ID strings, where each string has the form
-            "<job>[.<task>]".
-            :return: list of job exit codes or exit code, exit reason pairs associated with the list of job IDs.
+            :param batch_job_id_list: list of Job ID strings, where each string
+                has the form ``<job>[.<task>]``.
+            :return: list of job exit codes or exit code, exit reason pairs
+                associated with the list of job IDs.
+            :raises CalledProcessErrorStderr: if communicating with Slurm went
+                wrong.
+            :raises OSError: if job details are not available becasue a Slurm
+                command could not start.
             """
             logger.log(
                 TRACE, "Getting exit codes for slurm jobs: %s", batch_job_id_list
@@ -387,15 +398,54 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             Helper function for `getJobExitCode` and `coalesce_job_exit_codes`.
             Fetch job details from Slurm's accounting system or job control system.
             :param job_id_list: list of integer Job IDs.
-            :return: dict of job statuses, where key is the integer job ID, and value is a tuple
-            containing the job's state and exit code.
+            :return: dict of job statuses, where key is the integer job ID, and
+                value is a tuple containing the job's state and exit code.
+            :raises CalledProcessErrorStderr: if communicating with Slurm went
+                wrong.
+            :raises OSError: if job details are not available becasue a Slurm
+                command could not start.
             """
+            status_dict = {}
+            scontrol_problem: Optional[Exception] = None
+            try:
+                # Get all the job details we can from scontrol, which we think
+                # might be faster/less dangerous than sacct searching, even
+                # though it can't be aimed at more than one job.
+                status_dict.update(self._getJobDetailsFromScontrol(job_id_list))
+            except (CalledProcessErrorStderr, OSError) as e:
+                if isinstance(e, OSError):
+                    logger.warning("Could not run scontrol: %s", e)
+                else:
+                    logger.warning("Error from scontrol: %s", e)
+                scontrol_problem = e
+            logger.debug("After scontrol, got statuses: %s", status_dict)
+            # See what's not handy in scontrol (or everything if we couldn't
+            # call it).
+            sacct_job_id_list = self._remaining_jobs(job_id_list, status_dict)
+            logger.debug("Remaining jobs to find out about: %s", sacct_job_id_list)
             try:
-                status_dict = self._getJobDetailsFromSacct(job_id_list)
+                # Ask sacct about those jobs
+                status_dict.update(self._getJobDetailsFromSacct(sacct_job_id_list))
             except (CalledProcessErrorStderr, OSError) as e:
                 if isinstance(e, OSError):
                     logger.warning("Could not run sacct: %s", e)
-                status_dict = self._getJobDetailsFromScontrol(job_id_list)
+                else:
+                    logger.warning("Error from sacct: %s", e)
+                if scontrol_problem is not None:
+                    # Neither approach worked at all
+                    raise
+            # One of the methods worked, so we have at least (None, None)
+            # values filled in for all jobs.
+            assert len(status_dict) == len(job_id_list)
             return status_dict
         def _get_job_return_code(
@@ -466,15 +516,123 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
             return state_token
+        def _remaining_jobs(self, job_id_list: list[int], job_details: dict[int, tuple[str | None, int | None]]) -> list[int]:
+            """
+            Given a list of job IDs and a list of job details (state and exit
+            code), get the list of job IDs where the details are (None, None)
+            (or are missing).
+            """
+            return [
+                j
+                for j in job_id_list
+                if job_details.get(j, (None, None)) == (None, None)
+            ]
         def _getJobDetailsFromSacct(
-            self, job_id_list: list[int]
+            self,
+            job_id_list: list[int],
+        ) -> dict[int, tuple[str | None, int | None]]:
+            """
+            Get SLURM job exit codes for the jobs in `job_id_list` by running `sacct`.
+            Handles querying manageable time periods until all jobs have information.
+            There is no guarantee of inter-job consistency: one job may really
+            finish after another, but we might see the earlier-finishing job
+            still running and the later-finishing job finished.
+            :param job_id_list: list of integer batch job IDs.
+            :return: dict of job statuses, where key is the job-id, and value
+                is a tuple containing the job's state and exit code. Jobs with
+                no information reported from Slurm will have (None, None).
+            """
+            # Pick a now
+            now = datetime.now().astimezone(None)
+            # Decide when to start the search (first copy of past midnight)
+            begin_time = now.replace(
+                hour=0,
+                minute=0,
+                second=0,
+                microsecond=0,
+                fold=0
+            )
+            # And when to end (a day after that)
+            end_time = begin_time + timedelta(days=1)
+            while end_time < now:
+                # If something goes really weird, advance up to our chosen now
+                end_time += timedelta(days=1)
+            # If we don't go around the loop at least once, we might end up
+            # with an empty dict being returned, which shouldn't happen. We
+            # need the (None, None) entries for jobs we can't find.
+            assert end_time >= self.boss.start_time
+            results: dict[int, tuple[str | None, int | None]] = {}
+            while len(job_id_list) > 0 and end_time >= self.boss.start_time:
+                # There are still jobs to look for and our search isn't
+                # exclusively for stuff that only existed before our workflow
+                # started.
+                results.update(
+                    self._get_job_details_from_sacct_for_range(
+                        job_id_list,
+                        begin_time,
+                        end_time
+                    )
+                )
+                job_id_list = self._remaining_jobs(job_id_list, results)
+                # If we have to search again, search the previous day. But
+                # overlap a tiny bit so the endpoints don't exactly match, in
+                # case Slurm is not working with inclusive intervals.
+                # TODO: is Slurm working with inclusive intervals?
+                end_time = begin_time + timedelta(seconds=1)
+                begin_time = end_time - timedelta(days=1, seconds=1)
+            if end_time < self.boss.start_time and len(job_id_list) > 0:
+                # This is suspicious.
+                logger.warning(
+                    "Could not find any information from sacct after "
+                    "workflow start at %s about jobs: %s",
+                    self.boss.start_time.isoformat(),
+                    job_id_list
+                )
+            return results
+        def _get_job_details_from_sacct_for_range(
+            self,
+            job_id_list: list[int],
+            begin_time: datetime,
+            end_time: datetime,
         ) -> dict[int, tuple[str | None, int | None]]:
             """
             Get SLURM job exit codes for the jobs in `job_id_list` by running `sacct`.
+            Internally, Slurm's accounting thinks in wall clock time, so for
+            efficiency you need to only search relevant real-time periods.
             :param job_id_list: list of integer batch job IDs.
-            :return: dict of job statuses, where key is the job-id, and value is a tuple
-            containing the job's state and exit code.
+            :param begin_time: An aware datetime of the earliest time to search
+            :param end_time: An aware datetime of the latest time to search
+            :return: dict of job statuses, where key is the job-id, and value
+                is a tuple containing the job's state and exit code. Jobs with
+                no information reported from Slurm will have (None, None).
             """
+            assert begin_time.tzinfo is not None, "begin_time must be aware"
+            assert end_time.tzinfo is not None, "end_time must be aware"
+            def stringify(t: datetime) -> str:
+                """
+                Convert an aware time local time, and format it *without* a
+                trailing time zone indicator.
+                """
+                # TODO: What happens when we get an aware time that's ambiguous
+                # in local time? Or when the local timezone changes while we're
+                # sending things to Slurm or doing a progressive search back?
+                naive_t = t.astimezone(None).replace(tzinfo=None)
+                return naive_t.isoformat(timespec="seconds")
             job_ids = ",".join(str(id) for id in job_id_list)
             args = [
                 "sacct",
@@ -485,8 +643,10 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
                 "JobIDRaw,State,ExitCode",  # specify output columns
                 "-P",  # separate columns with pipes
                 "-S",
-                "1970-01-01",
-            ]  # override start time limit
+                stringify(begin_time),
+                "-E",
+                stringify(end_time),
+            ]
             # Collect the job statuses in a dict; key is the job-id, value is a tuple containing
             # job state and exit status. Initialize dict before processing output of `sacct`.
@@ -500,8 +660,20 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
                     if len(job_id_list) == 1:
                         # 1 is too big, we can't recurse further, bail out
                         raise
-                    job_statuses.update(self._getJobDetailsFromSacct(job_id_list[:len(job_id_list)//2]))
-                    job_statuses.update(self._getJobDetailsFromSacct(job_id_list[len(job_id_list)//2:]))
+                    job_statuses.update(
+                        self._get_job_details_from_sacct_for_range(
+                            job_id_list[:len(job_id_list)//2],
+                            begin_time,
+                            end_time,
+                        )
+                    )
+                    job_statuses.update(
+                        self._get_job_details_from_sacct_for_range(
+                            job_id_list[len(job_id_list)//2:],
+                            begin_time,
+                            end_time,
+                        )
+                    )
                     return job_statuses
                 else:
                     raise
@@ -847,6 +1019,9 @@ class SlurmBatchSystem(AbstractGridEngineBatchSystem):
     ) -> None:
         super().__init__(config, maxCores, maxMemory, maxDisk)
         self.partitions = SlurmBatchSystem.PartitionSet()
+        # Record when the workflow started, so we know when to stop looking for
+        # jobs we ran.
+        self.start_time = datetime.now().astimezone(None)
     # Override issuing jobs so we can check if we need to use Slurm's magic
     # whole-node-memory feature.

toil/cwl/cwltoil.py CHANGED Viewed

@@ -110,6 +110,11 @@ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
 from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
 from toil.common import Config, Toil, addOptions
 from toil.cwl import check_cwltool_version
+from toil.lib.directory import (
+    DirectoryContents,
+    decode_directory,
+    encode_directory,
+)
 from toil.lib.trs import resolve_workflow
 from toil.lib.misc import call_command
 from toil.provisioners.clusterScaler import JobTooBigError
@@ -1156,7 +1161,7 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
     """Subclass the cwltool command line tool to provide the custom ToilPathMapper."""
     def _initialworkdir(
-        self, j: cwltool.job.JobBase, builder: cwltool.builder.Builder
+        self, j: Optional[cwltool.job.JobBase], builder: cwltool.builder.Builder
     ) -> None:
         """
         Hook the InitialWorkDirRequirement setup to make sure that there are no
@@ -1166,6 +1171,9 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
         # Set up the initial work dir with all its files
         super()._initialworkdir(j, builder)
+        if j is None:
+            return  # Only testing
         # The initial work dir listing is now in j.generatefiles["listing"]
         # Also j.generatefiles is a CWL Directory.
         # So check the initial working directory.
@@ -1219,79 +1227,6 @@ def toil_make_tool(
 # URI instead of raising an error right away, in case it is optional.
 MISSING_FILE = "missing://"
-DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
-def check_directory_dict_invariants(contents: DirectoryContents) -> None:
-    """
-    Make sure a directory structure dict makes sense. Throws an error
-    otherwise.
-    Currently just checks to make sure no empty-string keys exist.
-    """
-    for name, item in contents.items():
-        if name == "":
-            raise RuntimeError(
-                "Found nameless entry in directory: " + json.dumps(contents, indent=2)
-            )
-        if isinstance(item, dict):
-            check_directory_dict_invariants(item)
-def decode_directory(
-    dir_path: str,
-) -> tuple[DirectoryContents, Optional[str], str]:
-    """
-    Decode a directory from a "toildir:" path to a directory (or a file in it).
-    Returns the decoded directory dict, the remaining part of the path (which may be
-    None), and the deduplication key string that uniquely identifies the
-    directory.
-    """
-    if not dir_path.startswith("toildir:"):
-        raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
-    # We will decode the directory and then look inside it
-    # Since this was encoded by upload_directory we know the
-    # next piece is encoded JSON describing the directory structure,
-    # and it can't contain any slashes.
-    parts = dir_path[len("toildir:") :].split("/", 1)
-    # Before the first slash is the encoded data describing the directory contents
-    dir_data = parts[0]
-    # Decode what to download
-    contents = json.loads(
-        base64.urlsafe_b64decode(dir_data.encode("utf-8")).decode("utf-8")
-    )
-    check_directory_dict_invariants(contents)
-    if len(parts) == 1 or parts[1] == "/":
-        # We didn't have any subdirectory
-        return contents, None, dir_data
-    else:
-        # We have a path below this
-        return contents, parts[1], dir_data
-def encode_directory(contents: DirectoryContents) -> str:
-    """
-    Encode a directory from a "toildir:" path to a directory (or a file in it).
-    Takes the directory dict, which is a dict from name to URI for a file or
-    dict for a subdirectory.
-    """
-    check_directory_dict_invariants(contents)
-    return "toildir:" + base64.urlsafe_b64encode(
-        json.dumps(contents).encode("utf-8")
-    ).decode("utf-8")
 class ToilFsAccess(StdFsAccess):
     """
     Custom filesystem access class which handles toil filestore references.
@@ -1360,7 +1295,7 @@ class ToilFsAccess(StdFsAccess):
             # Decode its contents, the path inside it to the file (if any), and
             # the key to use for caching the directory.
-            contents, subpath, cache_key = decode_directory(path)
+            contents, subpath, cache_key, _, _ = decode_directory(path)
             logger.debug("Decoded directory contents: %s", contents)
             if cache_key not in self.dir_to_download:
@@ -1462,7 +1397,7 @@ class ToilFsAccess(StdFsAccess):
             # Handle local files
             return open(self._abs(fn), mode)
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             if cache_key in self.dir_to_download:
                 # This is already available locally, so fall back on the local copy
                 return open(self._abs(fn), mode)
@@ -1503,7 +1438,7 @@ class ToilFsAccess(StdFsAccess):
             except NoSuchFileException:
                 return False
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(path)
+            contents, subpath, cache_key, _, _ = decode_directory(path)
             if subpath is None:
                 # The toildir directory itself exists
                 return True
@@ -1530,7 +1465,7 @@ class ToilFsAccess(StdFsAccess):
         elif parse.scheme == "toildir":
             # Decode its contents, the path inside it to the file (if any), and
             # the key to use for caching the directory.
-            contents, subpath, cache_key = decode_directory(path)
+            contents, subpath, cache_key, _, _ = decode_directory(path)
             # We can't get the size of just a directory.
             if subpath is None:
@@ -1564,7 +1499,7 @@ class ToilFsAccess(StdFsAccess):
             # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
             return True
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             if subpath is None:
                 # This is the toildir directory itself
                 return False
@@ -1583,7 +1518,7 @@ class ToilFsAccess(StdFsAccess):
         elif parse.scheme == "toilfile":
             return False
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             if subpath is None:
                 # This is the toildir directory itself.
                 # TODO: We assume directories can't be deleted.
@@ -1611,7 +1546,7 @@ class ToilFsAccess(StdFsAccess):
         elif parse.scheme == "toilfile":
             raise RuntimeError(f"Cannot list a file: {fn}")
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             here = contents
             if subpath is not None:
                 got = get_from_structure(contents, subpath)
@@ -2402,7 +2337,7 @@ def toilStageFiles(
                     if file_id_or_contents.startswith("toildir:"):
                         # Get the directory contents and the path into them, if any
-                        here, subpath, _ = decode_directory(file_id_or_contents)
+                        here, subpath, _, _, _ = decode_directory(file_id_or_contents)
                         if subpath is not None:
                             for part in subpath.split("/"):
                                 here = cast(DirectoryContents, here[part])

toil/fileStores/__init__.py CHANGED Viewed

@@ -28,7 +28,7 @@ class FileID(str):
     the job store if unavailable in the ID.
     """
-    def __new__(cls, fileStoreID: str, *args: Any) -> "FileID":
+    def __new__(cls, fileStoreID: str, *args: Any, **kwargs: dict[str, Any]) -> "FileID":
         return super().__new__(cls, fileStoreID)
     def __init__(self, fileStoreID: str, size: int, executable: bool = False) -> None:

toil 9.0.0__py3-none-any.whl → 9.1.1__py3-none-any.whl

toil 9.0.0py3-none-any.whl → 9.1.1py3-none-any.whl