PyPI - toil - Versions diffs - 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl - Mend

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

toil/batchSystems/abstractBatchSystem.py +13 -5
toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
toil/batchSystems/kubernetes.py +13 -2
toil/batchSystems/mesos/batchSystem.py +33 -2
toil/batchSystems/registry.py +15 -118
toil/batchSystems/slurm.py +191 -16
toil/common.py +20 -1
toil/cwl/cwltoil.py +97 -119
toil/cwl/utils.py +103 -3
toil/fileStores/__init__.py +1 -1
toil/fileStores/abstractFileStore.py +5 -2
toil/fileStores/cachingFileStore.py +1 -1
toil/job.py +30 -14
toil/jobStores/abstractJobStore.py +35 -255
toil/jobStores/aws/jobStore.py +864 -1964
toil/jobStores/aws/utils.py +24 -270
toil/jobStores/fileJobStore.py +2 -1
toil/jobStores/googleJobStore.py +32 -13
toil/jobStores/utils.py +0 -327
toil/leader.py +27 -22
toil/lib/accelerators.py +1 -1
toil/lib/aws/config.py +22 -0
toil/lib/aws/s3.py +477 -9
toil/lib/aws/utils.py +22 -33
toil/lib/checksum.py +88 -0
toil/lib/conversions.py +33 -31
toil/lib/directory.py +217 -0
toil/lib/ec2.py +97 -29
toil/lib/exceptions.py +2 -1
toil/lib/expando.py +2 -2
toil/lib/generatedEC2Lists.py +138 -19
toil/lib/io.py +33 -2
toil/lib/memoize.py +21 -7
toil/lib/misc.py +1 -1
toil/lib/pipes.py +385 -0
toil/lib/plugins.py +106 -0
toil/lib/retry.py +1 -1
toil/lib/threading.py +1 -1
toil/lib/url.py +320 -0
toil/lib/web.py +4 -5
toil/options/cwl.py +13 -1
toil/options/runner.py +17 -10
toil/options/wdl.py +12 -1
toil/provisioners/__init__.py +5 -2
toil/provisioners/aws/__init__.py +43 -36
toil/provisioners/aws/awsProvisioner.py +47 -15
toil/provisioners/node.py +60 -12
toil/resource.py +3 -13
toil/server/app.py +12 -6
toil/server/cli/wes_cwl_runner.py +2 -2
toil/server/wes/abstract_backend.py +21 -43
toil/server/wes/toil_backend.py +2 -2
toil/test/__init__.py +16 -18
toil/test/batchSystems/batchSystemTest.py +2 -9
toil/test/batchSystems/batch_system_plugin_test.py +7 -0
toil/test/batchSystems/test_slurm.py +103 -14
toil/test/cwl/cwlTest.py +181 -8
toil/test/cwl/staging_cat.cwl +27 -0
toil/test/cwl/staging_make_file.cwl +25 -0
toil/test/cwl/staging_workflow.cwl +43 -0
toil/test/cwl/zero_default.cwl +61 -0
toil/test/docs/scripts/tutorial_staging.py +17 -8
toil/test/docs/scriptsTest.py +2 -1
toil/test/jobStores/jobStoreTest.py +23 -133
toil/test/lib/aws/test_iam.py +7 -7
toil/test/lib/aws/test_s3.py +30 -33
toil/test/lib/aws/test_utils.py +9 -9
toil/test/lib/test_url.py +69 -0
toil/test/lib/url_plugin_test.py +105 -0
toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
toil/test/provisioners/clusterTest.py +15 -2
toil/test/provisioners/gceProvisionerTest.py +1 -1
toil/test/server/serverTest.py +78 -36
toil/test/src/autoDeploymentTest.py +2 -3
toil/test/src/fileStoreTest.py +89 -87
toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
toil/test/utils/toilKillTest.py +35 -28
toil/test/wdl/md5sum/md5sum-gs.json +1 -1
toil/test/wdl/md5sum/md5sum.json +1 -1
toil/test/wdl/testfiles/read_file.wdl +18 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
toil/test/wdl/wdltoil_test.py +171 -162
toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
toil/utils/toilDebugFile.py +6 -3
toil/utils/toilSshCluster.py +23 -0
toil/utils/toilStats.py +17 -2
toil/utils/toilUpdateEC2Instances.py +1 -0
toil/version.py +10 -10
toil/wdl/wdltoil.py +1179 -825
toil/worker.py +16 -8
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
toil/lib/iterables.py +0 -112
toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
{toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0

toil/common.py CHANGED Viewed

@@ -86,6 +86,7 @@ from toil.provisioners import add_provisioner_options, cluster_factory
 from toil.realtimeLogger import RealtimeLogger
 from toil.statsAndLogging import add_logging_options, set_logging_from_options
 from toil.version import dockerRegistry, dockerTag, version, baseVersion
+from toil.lib.url import URLAccess
 if TYPE_CHECKING:
     from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
@@ -449,6 +450,11 @@ class Config:
         self.check_configuration_consistency()
+        # Check for deprecated Toil built-in autoscaling
+        # --provisioner is guaranteed to be set
+        if self.provisioner is not None and self.batchSystem == "mesos":
+            logger.warning("Toil built-in autoscaling with Mesos is deprecated as Mesos is no longer active. Please use Kubernetes-based autoscaling instead.")
     def check_configuration_consistency(self) -> None:
         """Old checks that cannot be fit into an action class for argparse"""
         if self.writeLogs and self.writeLogsGzip:
@@ -545,6 +551,19 @@ def generate_config(filepath: str) -> None:
         "enableCaching",
         "disableCaching",
         "version",
+        # Toil built-in autoscaling with mesos is deprecated as mesos has not been updated since Python 3.10
+        "provisioner",
+        "nodeTypes"
+        "minNodes",
+        "maxNodes",
+        "targetTime",
+        "betaInertia",
+        "scaleInterval",
+        "preemtibleCompensation",
+        "nodeStorage",
+        "nodeStorageOverrides",
+        "metrics",
+        "assumeZeroOverhead"
     )
     def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
@@ -1397,7 +1416,7 @@ class Toil(ContextManager["Toil"]):
             self._batchSystem.setUserScript(userScriptResource)
     def url_exists(self, src_uri: str) -> bool:
-        return self._jobStore.url_exists(self.normalize_uri(src_uri))
+        return URLAccess.url_exists(self.normalize_uri(src_uri))
     # Importing a file with a shared file name returns None, but without one it
     # returns a file ID. Explain this to MyPy.

toil/cwl/cwltoil.py CHANGED Viewed

@@ -34,7 +34,6 @@ import stat
 import sys
 import textwrap
 import uuid
-from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
 from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
 from threading import Thread
 from typing import (
@@ -111,6 +110,11 @@ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
 from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
 from toil.common import Config, Toil, addOptions
 from toil.cwl import check_cwltool_version
+from toil.lib.directory import (
+    DirectoryContents,
+    decode_directory,
+    encode_directory,
+)
 from toil.lib.trs import resolve_workflow
 from toil.lib.misc import call_command
 from toil.provisioners.clusterScaler import JobTooBigError
@@ -122,6 +126,7 @@ from toil.cwl.utils import (
     download_structure,
     get_from_structure,
     visit_cwl_class_and_reduce,
+    remove_redundant_mounts
 )
 from toil.exceptions import FailedJobsException
 from toil.fileStores import FileID
@@ -149,6 +154,7 @@ from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
 from toil.lib.io import mkdtemp
 from toil.lib.threading import ExceptionalThread, global_mutex
 from toil.statsAndLogging import DEFAULT_LOGLEVEL
+from toil.lib.url import URLAccess
 logger = logging.getLogger(__name__)
@@ -1155,7 +1161,7 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
     """Subclass the cwltool command line tool to provide the custom ToilPathMapper."""
     def _initialworkdir(
-        self, j: cwltool.job.JobBase, builder: cwltool.builder.Builder
+        self, j: Optional[cwltool.job.JobBase], builder: cwltool.builder.Builder
     ) -> None:
         """
         Hook the InitialWorkDirRequirement setup to make sure that there are no
@@ -1165,6 +1171,9 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
         # Set up the initial work dir with all its files
         super()._initialworkdir(j, builder)
+        if j is None:
+            return  # Only testing
         # The initial work dir listing is now in j.generatefiles["listing"]
         # Also j.generatefiles is a CWL Directory.
         # So check the initial working directory.
@@ -1218,79 +1227,6 @@ def toil_make_tool(
 # URI instead of raising an error right away, in case it is optional.
 MISSING_FILE = "missing://"
-DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
-def check_directory_dict_invariants(contents: DirectoryContents) -> None:
-    """
-    Make sure a directory structure dict makes sense. Throws an error
-    otherwise.
-    Currently just checks to make sure no empty-string keys exist.
-    """
-    for name, item in contents.items():
-        if name == "":
-            raise RuntimeError(
-                "Found nameless entry in directory: " + json.dumps(contents, indent=2)
-            )
-        if isinstance(item, dict):
-            check_directory_dict_invariants(item)
-def decode_directory(
-    dir_path: str,
-) -> tuple[DirectoryContents, Optional[str], str]:
-    """
-    Decode a directory from a "toildir:" path to a directory (or a file in it).
-    Returns the decoded directory dict, the remaining part of the path (which may be
-    None), and the deduplication key string that uniquely identifies the
-    directory.
-    """
-    if not dir_path.startswith("toildir:"):
-        raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
-    # We will decode the directory and then look inside it
-    # Since this was encoded by upload_directory we know the
-    # next piece is encoded JSON describing the directory structure,
-    # and it can't contain any slashes.
-    parts = dir_path[len("toildir:") :].split("/", 1)
-    # Before the first slash is the encoded data describing the directory contents
-    dir_data = parts[0]
-    # Decode what to download
-    contents = json.loads(
-        base64.urlsafe_b64decode(dir_data.encode("utf-8")).decode("utf-8")
-    )
-    check_directory_dict_invariants(contents)
-    if len(parts) == 1 or parts[1] == "/":
-        # We didn't have any subdirectory
-        return contents, None, dir_data
-    else:
-        # We have a path below this
-        return contents, parts[1], dir_data
-def encode_directory(contents: DirectoryContents) -> str:
-    """
-    Encode a directory from a "toildir:" path to a directory (or a file in it).
-    Takes the directory dict, which is a dict from name to URI for a file or
-    dict for a subdirectory.
-    """
-    check_directory_dict_invariants(contents)
-    return "toildir:" + base64.urlsafe_b64encode(
-        json.dumps(contents).encode("utf-8")
-    ).decode("utf-8")
 class ToilFsAccess(StdFsAccess):
     """
     Custom filesystem access class which handles toil filestore references.
@@ -1359,7 +1295,7 @@ class ToilFsAccess(StdFsAccess):
             # Decode its contents, the path inside it to the file (if any), and
             # the key to use for caching the directory.
-            contents, subpath, cache_key = decode_directory(path)
+            contents, subpath, cache_key, _, _ = decode_directory(path)
             logger.debug("Decoded directory contents: %s", contents)
             if cache_key not in self.dir_to_download:
@@ -1395,7 +1331,7 @@ class ToilFsAccess(StdFsAccess):
             destination = path
         else:
             # The destination is something else.
-            if AbstractJobStore.get_is_directory(path):
+            if URLAccess.get_is_directory(path):
                 # Treat this as a directory
                 if path not in self.dir_to_download:
                     logger.debug(
@@ -1405,14 +1341,14 @@ class ToilFsAccess(StdFsAccess):
                     # Recursively fetch all the files in the directory.
                     def download_to(url: str, dest: str) -> None:
-                        if AbstractJobStore.get_is_directory(url):
+                        if URLAccess.get_is_directory(url):
                             os.mkdir(dest)
-                            for part in AbstractJobStore.list_url(url):
+                            for part in URLAccess.list_url(url):
                                 download_to(
                                     os.path.join(url, part), os.path.join(dest, part)
                                 )
                         else:
-                            AbstractJobStore.read_from_url(url, open(dest, "wb"))
+                            URLAccess.read_from_url(url, open(dest, "wb"))
                     download_to(path, dest_dir)
                     self.dir_to_download[path] = dest_dir
@@ -1425,7 +1361,7 @@ class ToilFsAccess(StdFsAccess):
                     # Try to grab it with a jobstore implementation, and save it
                     # somewhere arbitrary.
                     dest_file = NamedTemporaryFile(delete=False)
-                    AbstractJobStore.read_from_url(path, dest_file)
+                    URLAccess.read_from_url(path, dest_file)
                     dest_file.close()
                     self.dir_to_download[path] = dest_file.name
                 destination = self.dir_to_download[path]
@@ -1461,7 +1397,7 @@ class ToilFsAccess(StdFsAccess):
             # Handle local files
             return open(self._abs(fn), mode)
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             if cache_key in self.dir_to_download:
                 # This is already available locally, so fall back on the local copy
                 return open(self._abs(fn), mode)
@@ -1483,7 +1419,7 @@ class ToilFsAccess(StdFsAccess):
             return open(self._abs(fn), mode)
         else:
             # This should be supported by a job store.
-            byte_stream = AbstractJobStore.open_url(fn)
+            byte_stream = URLAccess.open_url(fn)
             if "b" in mode:
                 # Pass stream along in binary
                 return byte_stream
@@ -1502,7 +1438,7 @@ class ToilFsAccess(StdFsAccess):
             except NoSuchFileException:
                 return False
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(path)
+            contents, subpath, cache_key, _, _ = decode_directory(path)
             if subpath is None:
                 # The toildir directory itself exists
                 return True
@@ -1520,7 +1456,7 @@ class ToilFsAccess(StdFsAccess):
             return True
         else:
             # This should be supported by a job store.
-            return AbstractJobStore.url_exists(path)
+            return URLAccess.url_exists(path)
     def size(self, path: str) -> int:
         parse = urlparse(path)
@@ -1529,7 +1465,7 @@ class ToilFsAccess(StdFsAccess):
         elif parse.scheme == "toildir":
             # Decode its contents, the path inside it to the file (if any), and
             # the key to use for caching the directory.
-            contents, subpath, cache_key = decode_directory(path)
+            contents, subpath, cache_key, _, _ = decode_directory(path)
             # We can't get the size of just a directory.
             if subpath is None:
@@ -1549,7 +1485,7 @@ class ToilFsAccess(StdFsAccess):
             )
         else:
             # This should be supported by a job store.
-            size = AbstractJobStore.get_size(path)
+            size = URLAccess.get_size(path)
             if size is None:
                 # get_size can be unimplemented or unavailable
                 raise RuntimeError(f"Could not get size of {path}")
@@ -1563,7 +1499,7 @@ class ToilFsAccess(StdFsAccess):
             # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
             return True
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             if subpath is None:
                 # This is the toildir directory itself
                 return False
@@ -1572,7 +1508,7 @@ class ToilFsAccess(StdFsAccess):
             # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
             return isinstance(found, str)
         else:
-            return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
+            return self.exists(fn) and not URLAccess.get_is_directory(fn)
     def isdir(self, fn: str) -> bool:
         logger.debug("ToilFsAccess checking type of %s", fn)
@@ -1582,7 +1518,7 @@ class ToilFsAccess(StdFsAccess):
         elif parse.scheme == "toilfile":
             return False
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             if subpath is None:
                 # This is the toildir directory itself.
                 # TODO: We assume directories can't be deleted.
@@ -1592,7 +1528,7 @@ class ToilFsAccess(StdFsAccess):
             # TODO: We assume directories can't be deleted.
             return isinstance(found, dict)
         else:
-            status = AbstractJobStore.get_is_directory(fn)
+            status = URLAccess.get_is_directory(fn)
             logger.debug("AbstractJobStore said: %s", status)
             return status
@@ -1610,7 +1546,7 @@ class ToilFsAccess(StdFsAccess):
         elif parse.scheme == "toilfile":
             raise RuntimeError(f"Cannot list a file: {fn}")
         elif parse.scheme == "toildir":
-            contents, subpath, cache_key = decode_directory(fn)
+            contents, subpath, cache_key, _, _ = decode_directory(fn)
             here = contents
             if subpath is not None:
                 got = get_from_structure(contents, subpath)
@@ -1626,7 +1562,7 @@ class ToilFsAccess(StdFsAccess):
         else:
             return [
                 os.path.join(fn, entry.rstrip("/"))
-                for entry in AbstractJobStore.list_url(fn)
+                for entry in URLAccess.list_url(fn)
             ]
     def join(self, path: str, *paths: str) -> str:
@@ -1736,7 +1672,7 @@ def toil_get_file(
                                 pipe.write(data)
                     else:
                         # Stream from some other URI
-                        AbstractJobStore.read_from_url(uri, pipe)
+                        URLAccess.read_from_url(uri, pipe)
             except OSError as e:
                 # The other side of the pipe may have been closed by the
                 # reading thread, which is OK.
@@ -1779,7 +1715,7 @@ def toil_get_file(
                     # Open that path exclusively to make sure we created it
                     with open(src_path, "xb") as fh:
                         # Download into the file
-                        size, executable = AbstractJobStore.read_from_url(uri, fh)
+                        size, executable = URLAccess.read_from_url(uri, fh)
                         if executable:
                             # Set the execute bit in the file's permissions
                             os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
@@ -2401,7 +2337,7 @@ def toilStageFiles(
                     if file_id_or_contents.startswith("toildir:"):
                         # Get the directory contents and the path into them, if any
-                        here, subpath, _ = decode_directory(file_id_or_contents)
+                        here, subpath, _, _, _ = decode_directory(file_id_or_contents)
                         if subpath is not None:
                             for part in subpath.split("/"):
                                 here = cast(DirectoryContents, here[part])
@@ -2583,7 +2519,7 @@ class CWLJob(CWLNamedJob):
                 resources={},
                 mutation_manager=runtime_context.mutation_manager,
                 formatgraph=tool.formatgraph,
-                make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
+                make_fs_access=runtime_context.make_fs_access,
                 fs_access=runtime_context.make_fs_access(""),
                 job_script_provider=runtime_context.job_script_provider,
                 timeout=runtime_context.eval_timeout,
@@ -2613,6 +2549,12 @@ class CWLJob(CWLNamedJob):
             else:
                 # We use a None requirement and the Toil default applies.
                 memory = None
+        # Imposing a minimum memory limit
+        min_ram = getattr(runtime_context, "cwl_min_ram")
+        if min_ram is not None and memory is not None:
+            # Note: if the job is using the toil default memory, it won't be increased
+            memory = max(memory, min_ram)
         accelerators: Optional[list[AcceleratorRequirement]] = None
         if req.get("cudaDeviceCount", 0) > 0:
@@ -2751,6 +2693,9 @@ class CWLJob(CWLNamedJob):
         cwljob = resolve_dict_w_promises(self.cwljob, file_store)
+        # Deletes duplicate listings
+        remove_redundant_mounts(cwljob)
         if self.conditional.is_false(cwljob):
             return self.conditional.skipped_outputs()
@@ -2984,24 +2929,23 @@ def makeRootJob(
             else:
                 worker_metadata[filename] = file_data
+        if worker_metadata:
+            logger.info(
+                "Planning to import %s files on workers",
+                len(worker_metadata),
+            )
         # import the files for the leader first
         path_to_fileid = WorkerImportJob.import_files(
             list(leader_metadata.keys()), toil._jobStore
         )
-        # then install the imported files before importing the other files
-        # this way the control flow can fall from the leader to workers
-        tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
-            initialized_job_order,
-            tool,
-            path_to_fileid,
-            options.basedir,
-            options.reference_inputs,
-            options.bypass_file_store,
-        )
+        # Because installing the imported files expects all files to have been
+        # imported, we don't do that here; we combine the leader imports and
+        # the worker imports and install them all at once.
         import_job = CWLImportWrapper(
-            initialized_job_order, tool, runtime_context, worker_metadata, options
+            initialized_job_order, tool, runtime_context, worker_metadata, path_to_fileid, options
         )
         return import_job
     else:
@@ -3573,7 +3517,7 @@ class CWLInstallImportsJob(Job):
         basedir: str,
         skip_remote: bool,
         bypass_file_store: bool,
-        import_data: Promised[dict[str, FileID]],
+        import_data: list[Promised[dict[str, FileID]]],
         **kwargs: Any,
     ) -> None:
         """
@@ -3581,6 +3525,8 @@ class CWLInstallImportsJob(Job):
         to convert all file locations to URIs.
         This class is only used when runImportsOnWorkers is enabled.
+        :param import_data: List of mappings from file URI to imported file ID.
         """
         super().__init__(local=True, **kwargs)
         self.initialized_job_order = initialized_job_order
@@ -3590,6 +3536,8 @@ class CWLInstallImportsJob(Job):
         self.bypass_file_store = bypass_file_store
         self.import_data = import_data
+    # TODO: Since we only call this from the class itself now it doesn't really
+    # need to be static anymore.
     @staticmethod
     def fill_in_files(
         initialized_job_order: CWLObjectType,
@@ -3607,7 +3555,12 @@ class CWLInstallImportsJob(Job):
             """
             Return the file name's associated Toil file ID
             """
-            return candidate_to_fileid[filename]
+            try:
+                return candidate_to_fileid[filename]
+            except KeyError:
+                # Give something more useful than a KeyError if something went
+                # wrong with the importing.
+                raise RuntimeError(f"File at \"{filename}\" was never imported.")
         file_convert_function = functools.partial(
             extract_and_convert_file_to_toil_uri, fill_in_file
@@ -3654,11 +3607,19 @@ class CWLInstallImportsJob(Job):
         Convert the filenames in the workflow inputs into the URIs
         :return: Promise of transformed workflow inputs. A tuple of the job order and process
         """
-        candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
+        # Merge all the input dicts down to one to check.
+        candidate_to_fileid: dict[str, FileID] = {
+            k: v for mapping in unwrap(
+                self.import_data
+            ) for k, v in unwrap(mapping).items()
+        }
         initialized_job_order = unwrap(self.initialized_job_order)
         tool = unwrap(self.tool)
-        return CWLInstallImportsJob.fill_in_files(
+        # Install the imported files in the tool and job order
+        return self.fill_in_files(
             initialized_job_order,
             tool,
             candidate_to_fileid,
@@ -3682,33 +3643,46 @@ class CWLImportWrapper(CWLNamedJob):
         tool: Process,
         runtime_context: cwltool.context.RuntimeContext,
         file_to_data: dict[str, FileMetadata],
+        imported_files: dict[str, FileID],
         options: Namespace,
     ):
-        super().__init__(local=False, disk=options.import_workers_threshold)
+        """
+        Make a job to do file imports on workers and then run the workflow.
+        :param file_to_data: Metadata for files that need to be imported on the
+            worker.
+        :param imported_files: Files already imported on the leader.
+        """
+        super().__init__(local=False, disk=options.import_workers_batchsize)
         self.initialized_job_order = initialized_job_order
         self.tool = tool
-        self.options = options
         self.runtime_context = runtime_context
         self.file_to_data = file_to_data
+        self.imported_files = imported_files
+        self.options = options
     def run(self, file_store: AbstractFileStore) -> Any:
+        # Do the worker-based imports
         imports_job = ImportsJob(
             self.file_to_data,
-            self.options.import_workers_threshold,
+            self.options.import_workers_batchsize,
             self.options.import_workers_disk,
         )
         self.addChild(imports_job)
+        # Install the worker imports and any leader imports
         install_imports_job = CWLInstallImportsJob(
             initialized_job_order=self.initialized_job_order,
             tool=self.tool,
             basedir=self.options.basedir,
             skip_remote=self.options.reference_inputs,
             bypass_file_store=self.options.bypass_file_store,
-            import_data=imports_job.rv(0),
+            import_data=[self.imported_files, imports_job.rv(0)],
         )
         self.addChild(install_imports_job)
         imports_job.addFollowOn(install_imports_job)
+        # Run the workflow
         start_job = CWLStartJob(
             install_imports_job.rv(0),
             install_imports_job.rv(1),
@@ -4212,6 +4186,8 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
         options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
     )
     tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
+    # tmpdir_prefix and tmp_outdir_prefix must not be checked for existence as they may exist on a worker only path
+    # See https://github.com/DataBiosphere/toil/issues/5310
     workdir = options.workDir or tmp_outdir_prefix
     if options.jobStore is None:
@@ -4262,6 +4238,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
     runtime_context.workdir = workdir  # type: ignore[attr-defined]
     runtime_context.outdir = outdir
     setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
+    setattr(runtime_context, "cwl_min_ram", options.cwl_min_ram)
     runtime_context.move_outputs = "leave"
     runtime_context.rm_tmpdir = False
     runtime_context.streaming_allowed = not options.disable_streaming
@@ -4272,11 +4249,12 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
         # of filestore files and caches those.
         logger.debug("CWL task caching is turned on. Bypassing file store.")
         options.bypass_file_store = True
+        # Ensure the cache directory exists
+        # Only ensure the caching directory exists as that must be local.
+        os.makedirs(os.path.abspath(options.cachedir), exist_ok=True)
     if options.mpi_config_file is not None:
         runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
-    if cwltool.main.check_working_directories(runtime_context) is not None:
-        logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
-        return 1
     setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
     if options.bypass_file_store and options.destBucket:
         # We use the file store to write to buckets, so we can't do this (yet?)

toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

toil 8.2.0py3-none-any.whl → 9.1.0py3-none-any.whl