PyPI - toil - Versions diffs - 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl - Mend

toil 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

toil/batchSystems/registry.py +15 -118
toil/common.py +20 -1
toil/cwl/cwltoil.py +80 -37
toil/cwl/utils.py +103 -3
toil/jobStores/abstractJobStore.py +11 -236
toil/jobStores/aws/jobStore.py +2 -1
toil/jobStores/fileJobStore.py +2 -1
toil/jobStores/googleJobStore.py +7 -4
toil/lib/accelerators.py +1 -1
toil/lib/generatedEC2Lists.py +81 -19
toil/lib/misc.py +1 -1
toil/lib/plugins.py +106 -0
toil/lib/url.py +320 -0
toil/options/cwl.py +13 -1
toil/options/runner.py +17 -10
toil/options/wdl.py +12 -1
toil/provisioners/aws/awsProvisioner.py +25 -2
toil/server/app.py +12 -6
toil/server/cli/wes_cwl_runner.py +2 -2
toil/server/wes/abstract_backend.py +21 -43
toil/server/wes/toil_backend.py +2 -2
toil/test/__init__.py +2 -2
toil/test/batchSystems/batchSystemTest.py +2 -9
toil/test/batchSystems/batch_system_plugin_test.py +7 -0
toil/test/cwl/cwlTest.py +181 -8
toil/test/docs/scriptsTest.py +2 -1
toil/test/lib/test_url.py +69 -0
toil/test/lib/url_plugin_test.py +105 -0
toil/test/provisioners/aws/awsProvisionerTest.py +1 -1
toil/test/provisioners/clusterTest.py +15 -2
toil/test/provisioners/gceProvisionerTest.py +1 -1
toil/test/server/serverTest.py +78 -36
toil/test/wdl/md5sum/md5sum-gs.json +1 -1
toil/test/wdl/testfiles/read_file.wdl +18 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
toil/test/wdl/wdltoil_test.py +74 -125
toil/utils/toilSshCluster.py +23 -0
toil/utils/toilUpdateEC2Instances.py +1 -0
toil/version.py +9 -9
toil/wdl/wdltoil.py +182 -314
toil/worker.py +11 -6
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/METADATA +23 -23
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/RECORD +47 -42
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/licenses/LICENSE +0 -0
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0

toil/batchSystems/registry.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import importlib
 import logging
 import pkgutil
 import warnings
@@ -21,6 +20,7 @@ from typing import TYPE_CHECKING, Callable
 from toil.lib.compatibility import deprecated
 from toil.lib.memoize import memoize
+import toil.lib.plugins
 if TYPE_CHECKING:
     from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
@@ -40,17 +40,14 @@ def add_batch_system_factory(
     :param class_factory: A function that returns a batch system class (NOT an instance), which implements :class:`toil.batchSystems.abstractBatchSystem.AbstractBatchSystem`.
     """
-    _registry_keys.append(key)
-    _registry[key] = class_factory
+    toil.lib.plugins.register_plugin("batch_system", key, class_factory)
 def get_batch_systems() -> Sequence[str]:
     """
-    Get the names of all the availsble batch systems.
+    Get the names of all the available batch systems.
     """
-    _load_all_plugins()
-    return _registry_keys
+    return toil.lib.plugins.get_plugin_names("batch_system")
 def get_batch_system(key: str) -> type["AbstractBatchSystem"]:
@@ -60,8 +57,7 @@ def get_batch_system(key: str) -> type["AbstractBatchSystem"]:
     :raises: KeyError if the key is not the name of a batch system, and
              ImportError if the batch system's class cannot be loaded.
     """
-    return _registry[key]()
+    return toil.lib.plugins.get_plugin("batch_system", key)()
 DEFAULT_BATCH_SYSTEM = "single_machine"
@@ -126,114 +122,15 @@ def kubernetes_batch_system_factory():
 #####
-# Registry implementation
-#####
-_registry: dict[str, Callable[[], type["AbstractBatchSystem"]]] = {
-    "aws_batch": aws_batch_batch_system_factory,
-    "single_machine": single_machine_batch_system_factory,
-    "grid_engine": gridengine_batch_system_factory,
-    "lsf": lsf_batch_system_factory,
-    "mesos": mesos_batch_system_factory,
-    "slurm": slurm_batch_system_factory,
-    "torque": torque_batch_system_factory,
-    "htcondor": htcondor_batch_system_factory,
-    "kubernetes": kubernetes_batch_system_factory,
-}
-_registry_keys = list(_registry.keys())
-# We will load any packages starting with this prefix and let them call
-# add_batch_system_factory()
-_PLUGIN_NAME_PREFIX = "toil_batch_system_"
-@memoize
-def _load_all_plugins() -> None:
-    """
-    Load all the batch system plugins that are installed.
-    """
-    for finder, name, is_pkg in pkgutil.iter_modules():
-        # For all installed packages
-        if name.startswith(_PLUGIN_NAME_PREFIX):
-            # If it is a Toil batch system plugin, import it
-            importlib.import_module(name)
-#####
-# Deprecated API
+# Registers all built-in batch system
 #####
-# We used to directly access these constants, but now the Right Way to use this
-# module is add_batch_system_factory() to register and get_batch_systems() to
-# get the list/get_batch_system() to get a class by name.
-def __getattr__(name):
-    """
-    Implement a fallback attribute getter to handle deprecated constants.
-    See <https://stackoverflow.com/a/48242860>.
-    """
-    if name == "BATCH_SYSTEM_FACTORY_REGISTRY":
-        warnings.warn(
-            "BATCH_SYSTEM_FACTORY_REGISTRY is deprecated; use get_batch_system() or add_batch_system_factory()",
-            DeprecationWarning,
-        )
-        return _registry
-    elif name == "BATCH_SYSTEMS":
-        warnings.warn(
-            "BATCH_SYSTEMS is deprecated; use get_batch_systems()", DeprecationWarning
-        )
-        return _registry_keys
-    else:
-        raise AttributeError(f"Module {__name__} ahs no attribute {name}")
-@deprecated(new_function_name="add_batch_system_factory")
-def addBatchSystemFactory(
-    key: str, batchSystemFactory: Callable[[], type["AbstractBatchSystem"]]
-):
-    """
-    Deprecated method to add a batch system.
-    """
-    return add_batch_system_factory(key, batchSystemFactory)
-#####
-# Testing utilities
-#####
-# We need a snapshot save/restore system for testing. We can't just tamper with
-# the globals because module-level globals are their own references, so we
-# can't touch this module's global name bindings from a client module.
-def save_batch_system_plugin_state() -> (
-    tuple[list[str], dict[str, Callable[[], type["AbstractBatchSystem"]]]]
-):
-    """
-    Return a snapshot of the plugin registry that can be restored to remove
-    added plugins. Useful for testing the plugin system in-process with other
-    tests.
-    """
-    snapshot = (list(_registry_keys), dict(_registry))
-    return snapshot
-def restore_batch_system_plugin_state(
-    snapshot: tuple[list[str], dict[str, Callable[[], type["AbstractBatchSystem"]]]]
-):
-    """
-    Restore the batch system registry state to a snapshot from
-    save_batch_system_plugin_state().
-    """
-    # We need to apply the snapshot without rebinding the names, because that
-    # won't affect modules that imported the names.
-    wanted_batch_systems, wanted_registry = snapshot
-    _registry_keys.clear()
-    _registry_keys.extend(wanted_batch_systems)
-    _registry.clear()
-    _registry.update(wanted_registry)
+add_batch_system_factory("aws_batch", aws_batch_batch_system_factory)
+add_batch_system_factory("single_machine", single_machine_batch_system_factory)
+add_batch_system_factory("grid_engine", gridengine_batch_system_factory)
+add_batch_system_factory("lsf", lsf_batch_system_factory)
+add_batch_system_factory("mesos", mesos_batch_system_factory)
+add_batch_system_factory("slurm", slurm_batch_system_factory)
+add_batch_system_factory("torque", torque_batch_system_factory)
+add_batch_system_factory("htcondor", htcondor_batch_system_factory)
+add_batch_system_factory("kubernetes", kubernetes_batch_system_factory)

toil/common.py CHANGED Viewed

@@ -86,6 +86,7 @@ from toil.provisioners import add_provisioner_options, cluster_factory
 from toil.realtimeLogger import RealtimeLogger
 from toil.statsAndLogging import add_logging_options, set_logging_from_options
 from toil.version import dockerRegistry, dockerTag, version, baseVersion
+from toil.lib.url import URLAccess
 if TYPE_CHECKING:
     from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
@@ -449,6 +450,11 @@ class Config:
         self.check_configuration_consistency()
+        # Check for deprecated Toil built-in autoscaling
+        # --provisioner is guaranteed to be set
+        if self.provisioner is not None and self.batchSystem == "mesos":
+            logger.warning("Toil built-in autoscaling with Mesos is deprecated as Mesos is no longer active. Please use Kubernetes-based autoscaling instead.")
     def check_configuration_consistency(self) -> None:
         """Old checks that cannot be fit into an action class for argparse"""
         if self.writeLogs and self.writeLogsGzip:
@@ -545,6 +551,19 @@ def generate_config(filepath: str) -> None:
         "enableCaching",
         "disableCaching",
         "version",
+        # Toil built-in autoscaling with mesos is deprecated as mesos has not been updated since Python 3.10
+        "provisioner",
+        "nodeTypes"
+        "minNodes",
+        "maxNodes",
+        "targetTime",
+        "betaInertia",
+        "scaleInterval",
+        "preemtibleCompensation",
+        "nodeStorage",
+        "nodeStorageOverrides",
+        "metrics",
+        "assumeZeroOverhead"
     )
     def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
@@ -1397,7 +1416,7 @@ class Toil(ContextManager["Toil"]):
             self._batchSystem.setUserScript(userScriptResource)
     def url_exists(self, src_uri: str) -> bool:
-        return self._jobStore.url_exists(self.normalize_uri(src_uri))
+        return URLAccess.url_exists(self.normalize_uri(src_uri))
     # Importing a file with a shared file name returns None, but without one it
     # returns a file ID. Explain this to MyPy.

toil/cwl/cwltoil.py CHANGED Viewed

@@ -34,7 +34,6 @@ import stat
 import sys
 import textwrap
 import uuid
-from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
 from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
 from threading import Thread
 from typing import (
@@ -122,6 +121,7 @@ from toil.cwl.utils import (
     download_structure,
     get_from_structure,
     visit_cwl_class_and_reduce,
+    remove_redundant_mounts
 )
 from toil.exceptions import FailedJobsException
 from toil.fileStores import FileID
@@ -149,6 +149,7 @@ from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
 from toil.lib.io import mkdtemp
 from toil.lib.threading import ExceptionalThread, global_mutex
 from toil.statsAndLogging import DEFAULT_LOGLEVEL
+from toil.lib.url import URLAccess
 logger = logging.getLogger(__name__)
@@ -1395,7 +1396,7 @@ class ToilFsAccess(StdFsAccess):
             destination = path
         else:
             # The destination is something else.
-            if AbstractJobStore.get_is_directory(path):
+            if URLAccess.get_is_directory(path):
                 # Treat this as a directory
                 if path not in self.dir_to_download:
                     logger.debug(
@@ -1405,14 +1406,14 @@ class ToilFsAccess(StdFsAccess):
                     # Recursively fetch all the files in the directory.
                     def download_to(url: str, dest: str) -> None:
-                        if AbstractJobStore.get_is_directory(url):
+                        if URLAccess.get_is_directory(url):
                             os.mkdir(dest)
-                            for part in AbstractJobStore.list_url(url):
+                            for part in URLAccess.list_url(url):
                                 download_to(
                                     os.path.join(url, part), os.path.join(dest, part)
                                 )
                         else:
-                            AbstractJobStore.read_from_url(url, open(dest, "wb"))
+                            URLAccess.read_from_url(url, open(dest, "wb"))
                     download_to(path, dest_dir)
                     self.dir_to_download[path] = dest_dir
@@ -1425,7 +1426,7 @@ class ToilFsAccess(StdFsAccess):
                     # Try to grab it with a jobstore implementation, and save it
                     # somewhere arbitrary.
                     dest_file = NamedTemporaryFile(delete=False)
-                    AbstractJobStore.read_from_url(path, dest_file)
+                    URLAccess.read_from_url(path, dest_file)
                     dest_file.close()
                     self.dir_to_download[path] = dest_file.name
                 destination = self.dir_to_download[path]
@@ -1483,7 +1484,7 @@ class ToilFsAccess(StdFsAccess):
             return open(self._abs(fn), mode)
         else:
             # This should be supported by a job store.
-            byte_stream = AbstractJobStore.open_url(fn)
+            byte_stream = URLAccess.open_url(fn)
             if "b" in mode:
                 # Pass stream along in binary
                 return byte_stream
@@ -1520,7 +1521,7 @@ class ToilFsAccess(StdFsAccess):
             return True
         else:
             # This should be supported by a job store.
-            return AbstractJobStore.url_exists(path)
+            return URLAccess.url_exists(path)
     def size(self, path: str) -> int:
         parse = urlparse(path)
@@ -1549,7 +1550,7 @@ class ToilFsAccess(StdFsAccess):
             )
         else:
             # This should be supported by a job store.
-            size = AbstractJobStore.get_size(path)
+            size = URLAccess.get_size(path)
             if size is None:
                 # get_size can be unimplemented or unavailable
                 raise RuntimeError(f"Could not get size of {path}")
@@ -1572,7 +1573,7 @@ class ToilFsAccess(StdFsAccess):
             # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
             return isinstance(found, str)
         else:
-            return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
+            return self.exists(fn) and not URLAccess.get_is_directory(fn)
     def isdir(self, fn: str) -> bool:
         logger.debug("ToilFsAccess checking type of %s", fn)
@@ -1592,7 +1593,7 @@ class ToilFsAccess(StdFsAccess):
             # TODO: We assume directories can't be deleted.
             return isinstance(found, dict)
         else:
-            status = AbstractJobStore.get_is_directory(fn)
+            status = URLAccess.get_is_directory(fn)
             logger.debug("AbstractJobStore said: %s", status)
             return status
@@ -1626,7 +1627,7 @@ class ToilFsAccess(StdFsAccess):
         else:
             return [
                 os.path.join(fn, entry.rstrip("/"))
-                for entry in AbstractJobStore.list_url(fn)
+                for entry in URLAccess.list_url(fn)
             ]
     def join(self, path: str, *paths: str) -> str:
@@ -1736,7 +1737,7 @@ def toil_get_file(
                                 pipe.write(data)
                     else:
                         # Stream from some other URI
-                        AbstractJobStore.read_from_url(uri, pipe)
+                        URLAccess.read_from_url(uri, pipe)
             except OSError as e:
                 # The other side of the pipe may have been closed by the
                 # reading thread, which is OK.
@@ -1779,7 +1780,7 @@ def toil_get_file(
                     # Open that path exclusively to make sure we created it
                     with open(src_path, "xb") as fh:
                         # Download into the file
-                        size, executable = AbstractJobStore.read_from_url(uri, fh)
+                        size, executable = URLAccess.read_from_url(uri, fh)
                         if executable:
                             # Set the execute bit in the file's permissions
                             os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
@@ -2583,7 +2584,7 @@ class CWLJob(CWLNamedJob):
                 resources={},
                 mutation_manager=runtime_context.mutation_manager,
                 formatgraph=tool.formatgraph,
-                make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
+                make_fs_access=runtime_context.make_fs_access,
                 fs_access=runtime_context.make_fs_access(""),
                 job_script_provider=runtime_context.job_script_provider,
                 timeout=runtime_context.eval_timeout,
@@ -2613,6 +2614,12 @@ class CWLJob(CWLNamedJob):
             else:
                 # We use a None requirement and the Toil default applies.
                 memory = None
+        # Imposing a minimum memory limit
+        min_ram = getattr(runtime_context, "cwl_min_ram")
+        if min_ram is not None and memory is not None:
+            # Note: if the job is using the toil default memory, it won't be increased
+            memory = max(memory, min_ram)
         accelerators: Optional[list[AcceleratorRequirement]] = None
         if req.get("cudaDeviceCount", 0) > 0:
@@ -2751,6 +2758,9 @@ class CWLJob(CWLNamedJob):
         cwljob = resolve_dict_w_promises(self.cwljob, file_store)
+        # Deletes duplicate listings
+        remove_redundant_mounts(cwljob)
         if self.conditional.is_false(cwljob):
             return self.conditional.skipped_outputs()
@@ -2984,24 +2994,23 @@ def makeRootJob(
             else:
                 worker_metadata[filename] = file_data
+        if worker_metadata:
+            logger.info(
+                "Planning to import %s files on workers",
+                len(worker_metadata),
+            )
         # import the files for the leader first
         path_to_fileid = WorkerImportJob.import_files(
             list(leader_metadata.keys()), toil._jobStore
         )
-        # then install the imported files before importing the other files
-        # this way the control flow can fall from the leader to workers
-        tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
-            initialized_job_order,
-            tool,
-            path_to_fileid,
-            options.basedir,
-            options.reference_inputs,
-            options.bypass_file_store,
-        )
+        # Because installing the imported files expects all files to have been
+        # imported, we don't do that here; we combine the leader imports and
+        # the worker imports and install them all at once.
         import_job = CWLImportWrapper(
-            initialized_job_order, tool, runtime_context, worker_metadata, options
+            initialized_job_order, tool, runtime_context, worker_metadata, path_to_fileid, options
         )
         return import_job
     else:
@@ -3573,7 +3582,7 @@ class CWLInstallImportsJob(Job):
         basedir: str,
         skip_remote: bool,
         bypass_file_store: bool,
-        import_data: Promised[dict[str, FileID]],
+        import_data: list[Promised[dict[str, FileID]]],
         **kwargs: Any,
     ) -> None:
         """
@@ -3581,6 +3590,8 @@ class CWLInstallImportsJob(Job):
         to convert all file locations to URIs.
         This class is only used when runImportsOnWorkers is enabled.
+        :param import_data: List of mappings from file URI to imported file ID.
         """
         super().__init__(local=True, **kwargs)
         self.initialized_job_order = initialized_job_order
@@ -3590,6 +3601,8 @@ class CWLInstallImportsJob(Job):
         self.bypass_file_store = bypass_file_store
         self.import_data = import_data
+    # TODO: Since we only call this from the class itself now it doesn't really
+    # need to be static anymore.
     @staticmethod
     def fill_in_files(
         initialized_job_order: CWLObjectType,
@@ -3607,7 +3620,12 @@ class CWLInstallImportsJob(Job):
             """
             Return the file name's associated Toil file ID
             """
-            return candidate_to_fileid[filename]
+            try:
+                return candidate_to_fileid[filename]
+            except KeyError:
+                # Give something more useful than a KeyError if something went
+                # wrong with the importing.
+                raise RuntimeError(f"File at \"{filename}\" was never imported.")
         file_convert_function = functools.partial(
             extract_and_convert_file_to_toil_uri, fill_in_file
@@ -3654,11 +3672,19 @@ class CWLInstallImportsJob(Job):
         Convert the filenames in the workflow inputs into the URIs
         :return: Promise of transformed workflow inputs. A tuple of the job order and process
         """
-        candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
+        # Merge all the input dicts down to one to check.
+        candidate_to_fileid: dict[str, FileID] = {
+            k: v for mapping in unwrap(
+                self.import_data
+            ) for k, v in unwrap(mapping).items()
+        }
         initialized_job_order = unwrap(self.initialized_job_order)
         tool = unwrap(self.tool)
-        return CWLInstallImportsJob.fill_in_files(
+        # Install the imported files in the tool and job order
+        return self.fill_in_files(
             initialized_job_order,
             tool,
             candidate_to_fileid,
@@ -3682,33 +3708,46 @@ class CWLImportWrapper(CWLNamedJob):
         tool: Process,
         runtime_context: cwltool.context.RuntimeContext,
         file_to_data: dict[str, FileMetadata],
+        imported_files: dict[str, FileID],
         options: Namespace,
     ):
-        super().__init__(local=False, disk=options.import_workers_threshold)
+        """
+        Make a job to do file imports on workers and then run the workflow.
+        :param file_to_data: Metadata for files that need to be imported on the
+            worker.
+        :param imported_files: Files already imported on the leader.
+        """
+        super().__init__(local=False, disk=options.import_workers_batchsize)
         self.initialized_job_order = initialized_job_order
         self.tool = tool
-        self.options = options
         self.runtime_context = runtime_context
         self.file_to_data = file_to_data
+        self.imported_files = imported_files
+        self.options = options
     def run(self, file_store: AbstractFileStore) -> Any:
+        # Do the worker-based imports
         imports_job = ImportsJob(
             self.file_to_data,
-            self.options.import_workers_threshold,
+            self.options.import_workers_batchsize,
             self.options.import_workers_disk,
         )
         self.addChild(imports_job)
+        # Install the worker imports and any leader imports
         install_imports_job = CWLInstallImportsJob(
             initialized_job_order=self.initialized_job_order,
             tool=self.tool,
             basedir=self.options.basedir,
             skip_remote=self.options.reference_inputs,
             bypass_file_store=self.options.bypass_file_store,
-            import_data=imports_job.rv(0),
+            import_data=[self.imported_files, imports_job.rv(0)],
         )
         self.addChild(install_imports_job)
         imports_job.addFollowOn(install_imports_job)
+        # Run the workflow
         start_job = CWLStartJob(
             install_imports_job.rv(0),
             install_imports_job.rv(1),
@@ -4212,6 +4251,8 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
         options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
     )
     tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
+    # tmpdir_prefix and tmp_outdir_prefix must not be checked for existence as they may exist on a worker only path
+    # See https://github.com/DataBiosphere/toil/issues/5310
     workdir = options.workDir or tmp_outdir_prefix
     if options.jobStore is None:
@@ -4262,6 +4303,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
     runtime_context.workdir = workdir  # type: ignore[attr-defined]
     runtime_context.outdir = outdir
     setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
+    setattr(runtime_context, "cwl_min_ram", options.cwl_min_ram)
     runtime_context.move_outputs = "leave"
     runtime_context.rm_tmpdir = False
     runtime_context.streaming_allowed = not options.disable_streaming
@@ -4272,11 +4314,12 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
         # of filestore files and caches those.
         logger.debug("CWL task caching is turned on. Bypassing file store.")
         options.bypass_file_store = True
+        # Ensure the cache directory exists
+        # Only ensure the caching directory exists as that must be local.
+        os.makedirs(os.path.abspath(options.cachedir), exist_ok=True)
     if options.mpi_config_file is not None:
         runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
-    if cwltool.main.check_working_directories(runtime_context) is not None:
-        logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
-        return 1
     setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
     if options.bypass_file_store and options.destBucket:
         # We use the file store to write to buckets, so we can't do this (yet?)

toil/cwl/utils.py CHANGED Viewed

@@ -20,11 +20,26 @@ import posixpath
 import stat
 from collections.abc import Iterable, MutableMapping, MutableSequence
 from pathlib import PurePosixPath
-from typing import Any, Callable, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    TypeVar,
+    Union,
+    Optional,
+    cast,
+    MutableSequence,
+    MutableMapping,
+    TYPE_CHECKING,
+)
+from urllib.parse import unquote, urlparse
+if TYPE_CHECKING:
+    # This module needs to be importable even if cwltool is not installed.
+    from cwltool.utils import CWLObjectType, CWLOutputType
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.jobStores.abstractJobStore import AbstractJobStore
+from toil.lib.url import URLAccess
 logger = logging.getLogger(__name__)
@@ -208,7 +223,7 @@ def download_structure(
                 )
             else:
                 # We need to download from some other kind of URL.
-                size, executable = AbstractJobStore.read_from_url(
+                size, executable = URLAccess.read_from_url(
                     value, open(dest_path, "wb")
                 )
                 if executable:
@@ -219,3 +234,88 @@ def download_structure(
             # TODO: why?
             index[dest_path] = value
             existing[value] = dest_path
+def trim_mounts_op_down(file_or_directory: "CWLObjectType") -> None:
+    """
+    No-op function for mount-point trimming.
+    """
+    return
+def sniff_location(file_or_directory: "CWLObjectType") -> Optional[str]:
+    """
+    Get the local bare path for a CWL file or directory, or None.
+    :return: None if we don't have a local path or file URI
+    """
+    if file_or_directory.get('location') is None and file_or_directory.get('path') is None:
+        # file or directory is defined by contents or listing respectively, this is not redundant
+        return None
+    # Since we only consider mountable paths, if path is not file URI or bare path, don't consider it
+    path_or_url = cast(str, file_or_directory.get('location') or file_or_directory.get('path'))
+    parsed = urlparse(path_or_url)
+    if parsed.scheme == 'file':
+        return unquote(parsed.path)
+    elif parsed.scheme == '':
+        return path_or_url
+    else:
+        return None
+def trim_mounts_op_up(file_or_directory: "CWLObjectType", op_down_ret: None, child_results: list[bool]) -> bool:
+    """
+    Remove subtrees of the CWL file or directory object tree that only have redundant stuff in them.
+    Nonredundant for something in a directory means its path or location is not within the parent directory or doesn't match its basename
+    Nonredundant for something in a secondary file means its path or location is not adjacent to the primary file or doesn't match its basename
+    If on a File:
+    Returns True if anything in secondary files is nonredundant or has nonredundant children to this file, false otherwise
+    If on a Directory:
+    Returns True if anything in top level listing is nonredundant or has nonredundant children, otherwise false.
+    If something in the listing is redundant and all children are redundant, then delete it
+    :param file_or_directory: CWL file or CWL directory type
+    :return: boolean
+    """
+    own_path = sniff_location(file_or_directory)
+    if own_path is None:
+        return True
+    # basename should be set as we are the implementation
+    own_basename = cast(str, file_or_directory['basename'])
+    # If the basename does not match the path, then this is nonredundant
+    if not own_path.endswith("/" + own_basename):
+        return True
+    if file_or_directory['class'] == 'File':
+        if any(child_results):
+            # one of the children was detected as not redundant
+            return True
+        for secondary in cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('secondaryFiles', [])):
+            # secondary files should already be flagged nonredundant if they don't have either a path or location
+            secondary_path = sniff_location(secondary)
+            secondary_basename = cast(str, secondary['basename'])
+            # If we swap the secondary basename for the primary basename in the primary path, and they don't match, then they are nonredundant
+            if os.path.join(own_path[:-len(own_basename)], secondary_basename) != secondary_path:
+                return True
+    else:
+        listings = cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('listing', []))
+        if len(listings) == 0:
+            return False
+        # We assume child_results is in the same order as the directory listing
+        # iterate backwards to avoid iteration issues
+        for i in range(len(listings) - 1, -1, -1):
+            if child_results[i] is False:
+                if os.path.join(own_path, cast(str, listings[i]['basename'])) == sniff_location(listings[i]):
+                    del listings[i]
+        # If one of the listings was nonredundant, then this directory is also nonredundant
+        if any(child_results):
+            return True
+    return False
+def remove_redundant_mounts(cwljob: "CWLObjectType") -> None:
+    """
+    Remove any redundant mount points from the listing. Modifies the CWL object in place.
+    """
+    visit_cwl_class_and_reduce(cwljob, ["Directory", "File"], trim_mounts_op_down, trim_mounts_op_up)

toil 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

toil 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl