PyPI - toil - Versions diffs - 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl - Mend

toil 6.1.0a1py3-none-any.whl → 8.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

toil/__init__.py +122 -315
toil/batchSystems/__init__.py +1 -0
toil/batchSystems/abstractBatchSystem.py +173 -89
toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
toil/batchSystems/awsBatch.py +244 -135
toil/batchSystems/cleanup_support.py +26 -16
toil/batchSystems/contained_executor.py +31 -28
toil/batchSystems/gridengine.py +86 -50
toil/batchSystems/htcondor.py +166 -89
toil/batchSystems/kubernetes.py +632 -382
toil/batchSystems/local_support.py +20 -15
toil/batchSystems/lsf.py +134 -81
toil/batchSystems/lsfHelper.py +13 -11
toil/batchSystems/mesos/__init__.py +41 -29
toil/batchSystems/mesos/batchSystem.py +290 -151
toil/batchSystems/mesos/executor.py +79 -50
toil/batchSystems/mesos/test/__init__.py +31 -23
toil/batchSystems/options.py +46 -28
toil/batchSystems/registry.py +53 -19
toil/batchSystems/singleMachine.py +296 -125
toil/batchSystems/slurm.py +603 -138
toil/batchSystems/torque.py +47 -33
toil/bus.py +186 -76
toil/common.py +664 -368
toil/cwl/__init__.py +1 -1
toil/cwl/cwltoil.py +1136 -483
toil/cwl/utils.py +17 -22
toil/deferred.py +63 -42
toil/exceptions.py +5 -3
toil/fileStores/__init__.py +5 -5
toil/fileStores/abstractFileStore.py +140 -60
toil/fileStores/cachingFileStore.py +717 -269
toil/fileStores/nonCachingFileStore.py +116 -87
toil/job.py +1225 -368
toil/jobStores/abstractJobStore.py +416 -266
toil/jobStores/aws/jobStore.py +863 -477
toil/jobStores/aws/utils.py +201 -120
toil/jobStores/conftest.py +3 -2
toil/jobStores/fileJobStore.py +292 -154
toil/jobStores/googleJobStore.py +140 -74
toil/jobStores/utils.py +36 -15
toil/leader.py +668 -272
toil/lib/accelerators.py +115 -18
toil/lib/aws/__init__.py +74 -31
toil/lib/aws/ami.py +122 -87
toil/lib/aws/iam.py +284 -108
toil/lib/aws/s3.py +31 -0
toil/lib/aws/session.py +214 -39
toil/lib/aws/utils.py +287 -231
toil/lib/bioio.py +13 -5
toil/lib/compatibility.py +11 -6
toil/lib/conversions.py +104 -47
toil/lib/docker.py +131 -103
toil/lib/ec2.py +361 -199
toil/lib/ec2nodes.py +174 -106
toil/lib/encryption/_dummy.py +5 -3
toil/lib/encryption/_nacl.py +10 -6
toil/lib/encryption/conftest.py +1 -0
toil/lib/exceptions.py +26 -7
toil/lib/expando.py +5 -3
toil/lib/ftp_utils.py +217 -0
toil/lib/generatedEC2Lists.py +127 -19
toil/lib/humanize.py +6 -2
toil/lib/integration.py +341 -0
toil/lib/io.py +141 -15
toil/lib/iterables.py +4 -2
toil/lib/memoize.py +12 -8
toil/lib/misc.py +66 -21
toil/lib/objects.py +2 -2
toil/lib/resources.py +68 -15
toil/lib/retry.py +126 -81
toil/lib/threading.py +299 -82
toil/lib/throttle.py +16 -15
toil/options/common.py +843 -409
toil/options/cwl.py +175 -90
toil/options/runner.py +50 -0
toil/options/wdl.py +73 -17
toil/provisioners/__init__.py +117 -46
toil/provisioners/abstractProvisioner.py +332 -157
toil/provisioners/aws/__init__.py +70 -33
toil/provisioners/aws/awsProvisioner.py +1145 -715
toil/provisioners/clusterScaler.py +541 -279
toil/provisioners/gceProvisioner.py +282 -179
toil/provisioners/node.py +155 -79
toil/realtimeLogger.py +34 -22
toil/resource.py +137 -75
toil/server/app.py +128 -62
toil/server/celery_app.py +3 -1
toil/server/cli/wes_cwl_runner.py +82 -53
toil/server/utils.py +54 -28
toil/server/wes/abstract_backend.py +64 -26
toil/server/wes/amazon_wes_utils.py +21 -15
toil/server/wes/tasks.py +121 -63
toil/server/wes/toil_backend.py +142 -107
toil/server/wsgi_app.py +4 -3
toil/serviceManager.py +58 -22
toil/statsAndLogging.py +224 -70
toil/test/__init__.py +282 -183
toil/test/batchSystems/batchSystemTest.py +460 -210
toil/test/batchSystems/batch_system_plugin_test.py +90 -0
toil/test/batchSystems/test_gridengine.py +173 -0
toil/test/batchSystems/test_lsf_helper.py +67 -58
toil/test/batchSystems/test_slurm.py +110 -49
toil/test/cactus/__init__.py +0 -0
toil/test/cactus/test_cactus_integration.py +56 -0
toil/test/cwl/cwlTest.py +496 -287
toil/test/cwl/measure_default_memory.cwl +12 -0
toil/test/cwl/not_run_required_input.cwl +29 -0
toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
toil/test/cwl/seqtk_seq.cwl +1 -1
toil/test/docs/scriptsTest.py +69 -46
toil/test/jobStores/jobStoreTest.py +427 -264
toil/test/lib/aws/test_iam.py +118 -50
toil/test/lib/aws/test_s3.py +16 -9
toil/test/lib/aws/test_utils.py +5 -6
toil/test/lib/dockerTest.py +118 -141
toil/test/lib/test_conversions.py +113 -115
toil/test/lib/test_ec2.py +58 -50
toil/test/lib/test_integration.py +104 -0
toil/test/lib/test_misc.py +12 -5
toil/test/mesos/MesosDataStructuresTest.py +23 -10
toil/test/mesos/helloWorld.py +7 -6
toil/test/mesos/stress.py +25 -20
toil/test/options/__init__.py +13 -0
toil/test/options/options.py +42 -0
toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
toil/test/provisioners/clusterScalerTest.py +440 -250
toil/test/provisioners/clusterTest.py +166 -44
toil/test/provisioners/gceProvisionerTest.py +174 -100
toil/test/provisioners/provisionerTest.py +25 -13
toil/test/provisioners/restartScript.py +5 -4
toil/test/server/serverTest.py +188 -141
toil/test/sort/restart_sort.py +137 -68
toil/test/sort/sort.py +134 -66
toil/test/sort/sortTest.py +91 -49
toil/test/src/autoDeploymentTest.py +141 -101
toil/test/src/busTest.py +20 -18
toil/test/src/checkpointTest.py +8 -2
toil/test/src/deferredFunctionTest.py +49 -35
toil/test/src/dockerCheckTest.py +32 -24
toil/test/src/environmentTest.py +135 -0
toil/test/src/fileStoreTest.py +539 -272
toil/test/src/helloWorldTest.py +7 -4
toil/test/src/importExportFileTest.py +61 -31
toil/test/src/jobDescriptionTest.py +46 -21
toil/test/src/jobEncapsulationTest.py +2 -0
toil/test/src/jobFileStoreTest.py +74 -50
toil/test/src/jobServiceTest.py +187 -73
toil/test/src/jobTest.py +121 -71
toil/test/src/miscTests.py +19 -18
toil/test/src/promisedRequirementTest.py +82 -36
toil/test/src/promisesTest.py +7 -6
toil/test/src/realtimeLoggerTest.py +10 -6
toil/test/src/regularLogTest.py +71 -37
toil/test/src/resourceTest.py +80 -49
toil/test/src/restartDAGTest.py +36 -22
toil/test/src/resumabilityTest.py +9 -2
toil/test/src/retainTempDirTest.py +45 -14
toil/test/src/systemTest.py +12 -8
toil/test/src/threadingTest.py +44 -25
toil/test/src/toilContextManagerTest.py +10 -7
toil/test/src/userDefinedJobArgTypeTest.py +8 -5
toil/test/src/workerTest.py +73 -23
toil/test/utils/toilDebugTest.py +103 -33
toil/test/utils/toilKillTest.py +4 -5
toil/test/utils/utilsTest.py +245 -106
toil/test/wdl/wdltoil_test.py +818 -149
toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
toil/toilState.py +120 -35
toil/utils/toilConfig.py +13 -4
toil/utils/toilDebugFile.py +44 -27
toil/utils/toilDebugJob.py +214 -27
toil/utils/toilDestroyCluster.py +11 -6
toil/utils/toilKill.py +8 -3
toil/utils/toilLaunchCluster.py +256 -140
toil/utils/toilMain.py +37 -16
toil/utils/toilRsyncCluster.py +32 -14
toil/utils/toilSshCluster.py +49 -22
toil/utils/toilStats.py +356 -273
toil/utils/toilStatus.py +292 -139
toil/utils/toilUpdateEC2Instances.py +3 -1
toil/version.py +12 -12
toil/wdl/utils.py +5 -5
toil/wdl/wdltoil.py +3913 -1033
toil/worker.py +367 -184
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
toil-8.0.0.dist-info/METADATA +173 -0
toil-8.0.0.dist-info/RECORD +253 -0
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
toil-6.1.0a1.dist-info/METADATA +0 -125
toil-6.1.0a1.dist-info/RECORD +0 -237
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
{toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0

toil/job.py CHANGED Viewed

@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 import collections
 import copy
 import importlib
@@ -27,55 +29,59 @@ from abc import ABCMeta, abstractmethod
 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, Namespace
 from contextlib import contextmanager
 from io import BytesIO
-from typing import (TYPE_CHECKING,
-                    Any,
-                    Callable,
-                    Dict,
-                    Iterator,
-                    List,
-                    Mapping,
-                    Optional,
-                    Sequence,
-                    Set,
-                    Tuple,
-                    TypeVar,
-                    Union,
-                    cast,
-                    overload)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    NamedTuple,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+    TypedDict,
+    Literal,
+)
+from urllib.error import HTTPError
+from urllib.parse import urlsplit, unquote, urljoin
+from toil import memoize
+import dill
 from configargparse import ArgParser
-from toil.lib.compatibility import deprecated
-if sys.version_info >= (3, 8):
-    from typing import TypedDict
-else:
-    from typing_extensions import TypedDict
-import dill
-# TODO: When this gets into the standard library, get it from there and drop
-# typing-extensions dependency on Pythons that are new enough.
-from typing_extensions import NotRequired
+from toil.lib.io import is_remote_url
-if sys.version_info >= (3, 8):
-    from typing import Literal
+if sys.version_info < (3, 11):
+    from typing_extensions import NotRequired
 else:
-    from typing_extensions import Literal
+    from typing import NotRequired
+from toil.bus import Names
 from toil.common import Config, Toil, addOptions, safeUnpickleFromStream
 from toil.deferred import DeferredFunction
 from toil.fileStores import FileID
+from toil.lib.compatibility import deprecated
 from toil.lib.conversions import bytes2human, human2bytes
 from toil.lib.expando import Expando
-from toil.lib.resources import (get_total_cpu_time,
-                                get_total_cpu_time_and_memory_usage)
+from toil.lib.resources import ResourceMonitor
 from toil.resource import ModuleDescriptor
 from toil.statsAndLogging import set_logging_from_options
+from toil.lib.exceptions import UnimplementedURLException
 if TYPE_CHECKING:
     from optparse import OptionParser
-    from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
+    from toil.batchSystems.abstractBatchSystem import (
+        BatchJobExitReason
+    )
     from toil.fileStores.abstractFileStore import AbstractFileStore
     from toil.jobStores.abstractJobStore import AbstractJobStore
@@ -122,6 +128,27 @@ class ConflictingPredecessorError(Exception):
         )
+class DebugStoppingPointReached(BaseException):
+    """
+    Raised when a job reaches a point at which it has been instructed to stop for debugging.
+    """
+class FilesDownloadedStoppingPointReached(DebugStoppingPointReached):
+    """
+    Raised when a job stops because it was asked to download its files, and the files are downloaded.
+    """
+    def __init__(
+        self, message, host_and_job_paths: Optional[list[tuple[str, str]]] = None
+    ):
+        super().__init__(message)
+        # Save the host and user-code-visible paths of files, in case we're
+        # using a container and they are different.
+        self.host_and_job_paths = host_and_job_paths
 class TemporaryID:
     """
     Placeholder for a unregistered job ID used by a JobDescription.
@@ -143,7 +170,7 @@ class TemporaryID:
         return self.__repr__()
     def __repr__(self) -> str:
-        return f'TemporaryID({self._value})'
+        return f"TemporaryID({self._value})"
     def __hash__(self) -> int:
         return hash(self._value)
@@ -154,6 +181,7 @@ class TemporaryID:
     def __ne__(self, other: Any) -> bool:
         return not isinstance(other, TemporaryID) or self._value != other._value
 class AcceleratorRequirement(TypedDict):
     """Requirement for one or more computational accelerators, like a GPU or FPGA."""
@@ -192,7 +220,10 @@ class AcceleratorRequirement(TypedDict):
     # TODO: support requesting any GPU with X amount of vram
-def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> AcceleratorRequirement:
+def parse_accelerator(
+    spec: Union[int, str, dict[str, Union[str, int]]]
+) -> AcceleratorRequirement:
     """
     Parse an AcceleratorRequirement specified by user code.
@@ -226,19 +257,19 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
     of them. Knows that "gpu" is a kind, and "cuda" is an API, and "nvidia"
     is a brand.
-    :raises ValueError: if it gets somethign it can't parse
+    :raises ValueError: if it gets something it can't parse
     :raises TypeError: if it gets something it can't parse because it's the wrong type.
     """
-    KINDS = {'gpu'}
-    BRANDS = {'nvidia', 'amd'}
-    APIS = {'cuda', 'rocm', 'opencl'}
+    KINDS = {"gpu"}
+    BRANDS = {"nvidia", "amd"}
+    APIS = {"cuda", "rocm", "opencl"}
-    parsed: AcceleratorRequirement = {'count': 1, 'kind': 'gpu'}
+    parsed: AcceleratorRequirement = {"count": 1, "kind": "gpu"}
     if isinstance(spec, int):
-        parsed['count'] = spec
+        parsed["count"] = spec
     elif isinstance(spec, str):
-        parts = spec.split(':')
+        parts = spec.split(":")
         if len(parts) > 2:
             raise ValueError("Could not parse AcceleratorRequirement: " + spec)
@@ -247,7 +278,7 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
         try:
             # If they have : and then a count, or just a count, handle that.
-            parsed['count'] = int(possible_count)
+            parsed["count"] = int(possible_count)
             if len(parts) > 1:
                 # Then we take whatever was before the colon as text
                 possible_description = parts[0]
@@ -257,73 +288,97 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
             # It doesn't end with a number
             if len(parts) == 2:
                 # We should have a number though.
-                raise ValueError("Could not parse AcceleratorRequirement count in: " + spec)
+                raise ValueError(
+                    "Could not parse AcceleratorRequirement count in: " + spec
+                )
             else:
                 # Must be just the description
                 possible_description = possible_count
         # Determine if we have a kind, brand, API, or (by default) model
         if possible_description in KINDS:
-            parsed['kind'] = possible_description
+            parsed["kind"] = possible_description
         elif possible_description in BRANDS:
-            parsed['brand'] = possible_description
+            parsed["brand"] = possible_description
         elif possible_description in APIS:
-            parsed['api'] = possible_description
+            parsed["api"] = possible_description
         else:
             if possible_description is not None:
-                parsed['model'] = possible_description
+                parsed["model"] = possible_description
     elif isinstance(spec, dict):
         # It's a dict, so merge with the defaults.
         parsed.update(spec)
         # TODO: make sure they didn't misspell keys or something
     else:
-        raise TypeError(f"Cannot parse value of type {type(spec)} as an AcceleratorRequirement")
+        raise TypeError(
+            f"Cannot parse value of type {type(spec)} as an AcceleratorRequirement"
+        )
-    if parsed['kind'] == 'gpu':
+    if parsed["kind"] == "gpu":
         # Use some smarts about what current GPUs are like to elaborate the
         # description.
-        if 'brand' not in parsed and 'model' in parsed:
+        if "brand" not in parsed and "model" in parsed:
             # Try to guess the brand from the model
             for brand in BRANDS:
-                if parsed['model'].startswith(brand):
+                if parsed["model"].startswith(brand):
                     # The model often starts with the brand
-                    parsed['brand'] = brand
+                    parsed["brand"] = brand
                     break
-        if 'brand' not in parsed and 'api' in parsed:
+        if "brand" not in parsed and "api" in parsed:
             # Try to guess the brand from the API
-            if parsed['api'] == 'cuda':
+            if parsed["api"] == "cuda":
                 # Only nvidia makes cuda cards
-                parsed['brand'] = 'nvidia'
-            elif parsed['api'] == 'rocm':
+                parsed["brand"] = "nvidia"
+            elif parsed["api"] == "rocm":
                 # Only amd makes rocm cards
-                parsed['brand'] = 'amd'
+                parsed["brand"] = "amd"
     return parsed
-def accelerator_satisfies(candidate: AcceleratorRequirement, requirement: AcceleratorRequirement, ignore: List[str] = []) -> bool:
+def accelerator_satisfies(
+    candidate: AcceleratorRequirement,
+    requirement: AcceleratorRequirement,
+    ignore: list[str] = [],
+) -> bool:
     """
     Test if candidate partially satisfies the given requirement.
     :returns: True if the given candidate at least partially satisfies the
               given requirement (i.e. check all fields other than count).
     """
-    for key in ['kind', 'brand', 'api', 'model']:
+    for key in ["kind", "brand", "api", "model"]:
         if key in ignore:
             # Skip this aspect.
             continue
         if key in requirement:
             if key not in candidate:
-                logger.debug('Candidate %s does not satisfy requirement %s because it does not have a %s', candidate, requirement, key)
+                logger.debug(
+                    "Candidate %s does not satisfy requirement %s because it does not have a %s",
+                    candidate,
+                    requirement,
+                    key,
+                )
                 return False
             if candidate[key] != requirement[key]:
-                logger.debug('Candidate %s does not satisfy requirement %s because it does not have the correct %s', candidate, requirement, key)
+                logger.debug(
+                    "Candidate %s does not satisfy requirement %s because it does not have the correct %s",
+                    candidate,
+                    requirement,
+                    key,
+                )
                 return False
     # If all these match or are more specific than required, we match!
     return True
-def accelerators_fully_satisfy(candidates: Optional[List[AcceleratorRequirement]], requirement: AcceleratorRequirement, ignore: List[str] = []) -> bool:
+def accelerators_fully_satisfy(
+    candidates: Optional[list[AcceleratorRequirement]],
+    requirement: AcceleratorRequirement,
+    ignore: list[str] = [],
+) -> bool:
     """
     Determine if a set of accelerators satisfy a requirement.
@@ -334,21 +389,22 @@ def accelerators_fully_satisfy(candidates: Optional[List[AcceleratorRequirement]
                together (i.e. check all fields including count).
     """
-    count_remaining = requirement['count']
+    count_remaining = requirement["count"]
     if candidates:
         for candidate in candidates:
             if accelerator_satisfies(candidate, requirement, ignore=ignore):
-                if candidate['count'] > count_remaining:
+                if candidate["count"] > count_remaining:
                     # We found all the matching accelerators we need
                     count_remaining = 0
                     break
                 else:
-                    count_remaining -= candidate['count']
+                    count_remaining -= candidate["count"]
     # If we have no count left we are fully satisfied
     return count_remaining == 0
 class RequirementsDict(TypedDict):
     """
     Typed storage for requirements for a job.
@@ -359,22 +415,35 @@ class RequirementsDict(TypedDict):
     cores: NotRequired[Union[int, float]]
     memory: NotRequired[int]
     disk: NotRequired[int]
-    accelerators: NotRequired[List[AcceleratorRequirement]]
+    accelerators: NotRequired[list[AcceleratorRequirement]]
     preemptible: NotRequired[bool]
 # These must be all the key names in RequirementsDict
 REQUIREMENT_NAMES = ["disk", "memory", "cores", "accelerators", "preemptible"]
 # This is the supertype of all value types in RequirementsDict
-ParsedRequirement = Union[int, float, bool, List[AcceleratorRequirement]]
+ParsedRequirement = Union[int, float, bool, list[AcceleratorRequirement]]
 # We define some types for things we can parse into different kind of requirements
 ParseableIndivisibleResource = Union[str, int]
 ParseableDivisibleResource = Union[str, int, float]
 ParseableFlag = Union[str, int, bool]
-ParseableAcceleratorRequirement = Union[str, int, Mapping[str, Any], AcceleratorRequirement, Sequence[Union[str, int, Mapping[str, Any], AcceleratorRequirement]]]
+ParseableAcceleratorRequirement = Union[
+    str,
+    int,
+    Mapping[str, Any],
+    AcceleratorRequirement,
+    Sequence[Union[str, int, Mapping[str, Any], AcceleratorRequirement]],
+]
+ParseableRequirement = Union[
+    ParseableIndivisibleResource,
+    ParseableDivisibleResource,
+    ParseableFlag,
+    ParseableAcceleratorRequirement,
+]
-ParseableRequirement = Union[ParseableIndivisibleResource, ParseableDivisibleResource, ParseableFlag, ParseableAcceleratorRequirement]
 class Requirer:
     """
@@ -385,9 +454,7 @@ class Requirer:
     _requirementOverrides: RequirementsDict
-    def __init__(
-        self, requirements: Mapping[str, ParseableRequirement]
-    ) -> None:
+    def __init__(self, requirements: Mapping[str, ParseableRequirement]) -> None:
         """
         Parse and save the given requirements.
@@ -428,12 +495,11 @@ class Requirer:
             raise RuntimeError(f"Config assigned multiple times to {self}")
         self._config = config
-    def __getstate__(self) -> Dict[str, Any]:
+    def __getstate__(self) -> dict[str, Any]:
         """Return the dict to use as the instance's __dict__ when pickling."""
         # We want to exclude the config from pickling.
         state = self.__dict__.copy()
-        state['_config'] = None
+        state["_config"] = None
         return state
     def __copy__(self) -> "Requirer":
@@ -474,37 +540,29 @@ class Requirer:
     @overload
     @staticmethod
     def _parseResource(
-        name: Union[Literal["memory"], Literal["disks"]], value: ParseableIndivisibleResource
-    ) -> int:
-        ...
+        name: Union[Literal["memory"], Literal["disks"]],
+        value: ParseableIndivisibleResource,
+    ) -> int: ...
     @overload
     @staticmethod
     def _parseResource(
         name: Literal["cores"], value: ParseableDivisibleResource
-    ) -> Union[int, float]:
-        ...
+    ) -> Union[int, float]: ...
     @overload
     @staticmethod
     def _parseResource(
         name: Literal["accelerators"], value: ParseableAcceleratorRequirement
-    ) -> List[AcceleratorRequirement]:
-        ...
+    ) -> list[AcceleratorRequirement]: ...
     @overload
     @staticmethod
-    def _parseResource(
-        name: str, value: ParseableRequirement
-    ) -> ParsedRequirement:
-        ...
+    def _parseResource(name: str, value: ParseableRequirement) -> ParsedRequirement: ...
     @overload
     @staticmethod
-    def _parseResource(
-        name: str, value: None
-    ) -> None:
-        ...
+    def _parseResource(name: str, value: None) -> None: ...
     @staticmethod
     def _parseResource(
@@ -541,43 +599,53 @@ class Requirer:
             # Anything can be None.
             return value
-        if name in ('memory', 'disk', 'cores'):
+        if name in ("memory", "disk", "cores"):
             # These should be numbers that accept things like "5G".
             if isinstance(value, (str, bytes)):
                 value = human2bytes(value)
             if isinstance(value, int):
                 return value
-            elif isinstance(value, float) and name == 'cores':
+            elif isinstance(value, float) and name == "cores":
                 # But only cores can be fractional.
                 return value
             else:
-                raise TypeError(f"The '{name}' requirement does not accept values that are of type {type(value)}")
-        elif name == 'preemptible':
+                raise TypeError(
+                    f"The '{name}' requirement does not accept values that are of type {type(value)}"
+                )
+        elif name == "preemptible":
             if isinstance(value, str):
                 if value.lower() == "true":
                     return True
                 elif value.lower() == "false":
                     return False
                 else:
-                    raise ValueError(f"The '{name}' requirement, as a string, must be 'true' or 'false' but is {value}")
+                    raise ValueError(
+                        f"The '{name}' requirement, as a string, must be 'true' or 'false' but is {value}"
+                    )
             elif isinstance(value, int):
                 if value == 1:
                     return True
                 if value == 0:
                     return False
                 else:
-                    raise ValueError(f"The '{name}' requirement, as an int, must be 1 or 0 but is {value}")
+                    raise ValueError(
+                        f"The '{name}' requirement, as an int, must be 1 or 0 but is {value}"
+                    )
             elif isinstance(value, bool):
                 return value
             else:
-                raise TypeError(f"The '{name}' requirement does not accept values that are of type {type(value)}")
-        elif name == 'accelerators':
+                raise TypeError(
+                    f"The '{name}' requirement does not accept values that are of type {type(value)}"
+                )
+        elif name == "accelerators":
             # The type checking for this is delegated to the
             # AcceleratorRequirement class.
             if isinstance(value, list):
-                return [parse_accelerator(v) for v in value] #accelerators={'kind': 'gpu', 'brand': 'nvidia', 'count': 2}
+                return [
+                    parse_accelerator(v) for v in value
+                ]  # accelerators={'kind': 'gpu', 'brand': 'nvidia', 'count': 2}
             else:
-                return [parse_accelerator(value)] #accelerators=1
+                return [parse_accelerator(value)]  # accelerators=1
         else:
             # Anything else we just pass along without opinons
             return cast(ParsedRequirement, value)
@@ -600,7 +668,10 @@ class Requirer:
                 )
             return value
         elif self._config is not None:
-            values = [getattr(self._config, 'default_' + requirement, None), getattr(self._config, 'default' + requirement.capitalize(), None)]
+            values = [
+                getattr(self._config, "default_" + requirement, None),
+                getattr(self._config, "default" + requirement.capitalize(), None),
+            ]
             value = values[0] if values[0] is not None else values[1]
             if value is None:
                 raise AttributeError(
@@ -661,10 +732,13 @@ class Requirer:
         self._requirementOverrides["preemptible"] = Requirer._parseResource(
             "preemptible", val
         )
     @property
-    def accelerators(self) -> List[AcceleratorRequirement]:
+    def accelerators(self) -> list[AcceleratorRequirement]:
         """Any accelerators, such as GPUs, that are needed."""
-        return cast(List[AcceleratorRequirement], self._fetchRequirement("accelerators"))
+        return cast(
+            list[AcceleratorRequirement], self._fetchRequirement("accelerators")
+        )
     @accelerators.setter
     def accelerators(self, val: ParseableAcceleratorRequirement) -> None:
@@ -687,7 +761,7 @@ class Requirer:
         if isinstance(original_value, (int, float)):
             # This is something we actually can scale up and down
             new_value = original_value * factor
-            if requirement in ('memory', 'disk'):
+            if requirement in ("memory", "disk"):
                 # Must round to an int
                 new_value = math.ceil(new_value)
             setattr(scaled, requirement, new_value)
@@ -705,18 +779,31 @@ class Requirer:
                 if isinstance(v, (int, float)) and v > 1000:
                     # Make large numbers readable
                     v = bytes2human(v)
-                parts.append(f'{k}: {v}')
+                parts.append(f"{k}: {v}")
         if len(parts) == 0:
-            parts = ['no requirements']
-        return ', '.join(parts)
+            parts = ["no requirements"]
+        return ", ".join(parts)
+class JobBodyReference(NamedTuple):
+    """
+    Reference from a job description to its body.
+    """
+    file_store_id: str
+    """File ID (or special shared file name for the root job) of the job's body."""
+    module_string: str
+    """Stringified description of the module needed to load the body."""
 class JobDescription(Requirer):
     """
     Stores all the information that the Toil Leader ever needs to know about a Job.
-    (requirements information, dependency information, commands to issue,
-    etc.)
+    This includes:
+        * Resource requirements.
+        * Which jobs are children or follow-ons or predecessors of this job.
+        * A reference to the Job object in the job store.
     Can be obtained from an actual (i.e. executable) Job object, and can be
     used to obtain the Job object from the JobStore.
@@ -733,8 +820,8 @@ class JobDescription(Requirer):
         jobName: str,
         unitName: Optional[str] = "",
         displayName: Optional[str] = "",
-        command: Optional[str] = None,
-        local: Optional[bool] = None
+        local: Optional[bool] = None,
+        files: Optional[set[FileID]] = None,
     ) -> None:
         """
         Create a new JobDescription.
@@ -757,6 +844,7 @@ class JobDescription(Requirer):
         :param local: If True, the job is meant to use minimal resources but is
             sensitive to execution latency, and so should be executed by the
             leader.
+        :param files: Set of FileID objects that the job plans to use.
         """
         # Set requirements
         super().__init__(requirements)
@@ -767,10 +855,11 @@ class JobDescription(Requirer):
         # Save names, making sure they are strings and not e.g. bytes or None.
         def makeString(x: Union[str, bytes, None]) -> str:
             if isinstance(x, bytes):
-                return x.decode('utf-8', errors='replace')
+                return x.decode("utf-8", errors="replace")
             if x is None:
                 return ""
             return x
         self.jobName = makeString(jobName)
         self.unitName = makeString(unitName)
         self.displayName = makeString(displayName)
@@ -780,14 +869,10 @@ class JobDescription(Requirer):
         # ID of this job description in the JobStore.
         self.jobStoreID: Union[str, TemporaryID] = TemporaryID()
-        # Mostly fake, not-really-executable command string that encodes how to
-        # find the Job body data that this JobDescription describes, and the
-        # module(s) needed to unpickle it.
-        #
-        # Gets replaced with/rewritten into the real, executable command when
-        # the leader passes the description off to the batch system to be
-        # executed.
-        self.command: Optional[str] = command
+        # Information that encodes how to find the Job body data that this
+        # JobDescription describes, and the module(s) needed to unpickle it.
+        # None if no body needs to run.
+        self._body: Optional[JobBodyReference] = None
         # Set scheduling properties that the leader read to think about scheduling.
@@ -814,11 +899,14 @@ class JobDescription(Requirer):
         # in the process of being committed.
         self.filesToDelete = []
-        # Holds JobStore Job IDs of the jobs that have been chained into this
+        # Holds job names and IDs of the jobs that have been chained into this
         # job, and which should be deleted when this job finally is deleted
         # (but not before). The successor relationships with them will have
-        # been cut, so we need to hold onto them somehow.
-        self.merged_jobs = []
+        # been cut, so we need to hold onto them somehow. Includes each
+        # chained-in job with its original ID, and also this job's ID with its
+        # original names, or is empty if no chaining has happened.
+        # The first job in the chain comes first in the list.
+        self._merged_job_names: list[Names] = []
         # The number of direct predecessors of the job. Needs to be stored at
         # the JobDescription to support dynamically-created jobs with multiple
@@ -841,17 +929,17 @@ class JobDescription(Requirer):
         # The IDs of all child jobs of the described job.
         # Children which are done must be removed with filterSuccessors.
-        self.childIDs: Set[str] = set()
+        self.childIDs: set[str] = set()
         # The IDs of all follow-on jobs of the described job.
         # Follow-ons which are done must be removed with filterSuccessors.
-        self.followOnIDs: Set[str] = set()
+        self.followOnIDs: set[str] = set()
         # We keep our own children and follow-ons in a list of successor
         # phases, along with any successors adopted from jobs we have chained
         # from. When we finish our own children and follow-ons, we may have to
         # go back and finish successors for those jobs.
-        self.successor_phases: List[Set[str]] = [self.followOnIDs, self.childIDs]
+        self.successor_phases: list[set[str]] = [self.followOnIDs, self.childIDs]
         # Dict from ServiceHostJob ID to list of child ServiceHostJobs that start after it.
         # All services must have an entry, if only to an empty list.
@@ -867,11 +955,39 @@ class JobDescription(Requirer):
         # And we log who made the version (by PID)
         self._job_version_writer = 0
-        # Human-readable names of jobs that were run as part of this job's
-        # invocation, starting with this job
-        self.chainedJobs = []
+        # Store FileIDs that the Job will want to use
+        # This currently does not serve much of a purpose except for debugging
+        # In the future, this can be used to improve job scheduling, see https://github.com/DataBiosphere/toil/issues/3071
+        self.files_to_use = files or set()
-    def serviceHostIDsInBatches(self) -> Iterator[List[str]]:
+    def get_names(self) -> Names:
+        """
+        Get the names and ID of this job as a named tuple.
+        """
+        return Names(
+            self.jobName,
+            self.unitName,
+            self.displayName,
+            self.displayName,
+            str(self.jobStoreID),
+        )
+    def get_chain(self) -> list[Names]:
+        """
+        Get all the jobs that executed in this job's chain, in order.
+        For each job, produces a named tuple with its various names and its
+        original job store ID. The jobs in the chain are in execution order.
+        If the job hasn't run yet or it didn't chain, produces a one-item list.
+        """
+        if len(self._merged_job_names) == 0:
+            # We haven't merged so we're just ourselves.
+            return [self.get_names()]
+        else:
+            return list(self._merged_job_names)
+    def serviceHostIDsInBatches(self) -> Iterator[list[str]]:
         """
         Find all batches of service host job IDs that can be started at the same time.
@@ -912,14 +1028,13 @@ class JobDescription(Requirer):
         """
         for phase in self.successor_phases:
-            for successor in phase:
-                yield successor
+            yield from phase
-    def successors_by_phase(self) -> Iterator[Tuple[int, str]]:
+    def successors_by_phase(self) -> Iterator[tuple[int, str]]:
         """
-        Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase numbere on the stack.
+        Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase number on the stack.
-        Phases ececute higher numbers to lower numbers.
+        Phases execute higher numbers to lower numbers.
         """
         for i, phase in enumerate(self.successor_phases):
@@ -935,7 +1050,49 @@ class JobDescription(Requirer):
         """
         return list(self.serviceTree.keys())
-    def nextSuccessors(self) -> Set[str]:
+    def has_body(self) -> bool:
+        """
+        Returns True if we have a job body associated, and False otherwise.
+        """
+        return self._body is not None
+    def attach_body(self, file_store_id: str, user_script: ModuleDescriptor) -> None:
+        """
+        Attach a job body to this JobDescription.
+        Takes the file store ID that the body is stored at, and the required
+        user script module.
+        The file store ID can also be "firstJob" for the root job, stored as a
+        shared file instead.
+        """
+        self._body = JobBodyReference(file_store_id, user_script.toCommand())
+    def detach_body(self) -> None:
+        """
+        Drop the body reference from a JobDescription.
+        """
+        self._body = None
+    def get_body(self) -> tuple[str, ModuleDescriptor]:
+        """
+        Get the information needed to load the job body.
+        :returns: a file store ID (or magic shared file name "firstJob") and a
+            user script module.
+        Fails if no body is attached; check has_body() first.
+        """
+        if not self.has_body():
+            raise RuntimeError(f"Cannot load the body of a job {self} without one")
+        return self._body.file_store_id, ModuleDescriptor.fromCommand(
+            self._body.module_string
+        )
+    def nextSuccessors(self) -> Optional[set[str]]:
         """
         Return the collection of job IDs for the successors of this job that are ready to run.
@@ -946,7 +1103,7 @@ class JobDescription(Requirer):
         empty collection if there are more phases but they can't be entered yet
         (e.g. because we are waiting for the job itself to run).
         """
-        if self.command is not None:
+        if self.has_body():
             # We ourselves need to run. So there's not nothing to do
             # but no successors are ready.
             return set()
@@ -1018,7 +1175,9 @@ class JobDescription(Requirer):
         :returns: True if the job appears to be done, and all related child,
                   follow-on, and service jobs appear to be finished and removed.
         """
-        return self.command == None and next(self.successorsAndServiceHosts(), None) is None
+        return (
+            not self.has_body() and next(self.successorsAndServiceHosts(), None) is None
+        )
     def replace(self, other: "JobDescription") -> None:
         """
@@ -1037,32 +1196,90 @@ class JobDescription(Requirer):
         # TODO: We can't join the job graphs with Job._jobGraphsJoined, is that a problem?
         # Take all the successors other than this one
-        old_phases = [{i for i in p if i != self.jobStoreID} for p in other.successor_phases]
+        old_phases = [
+            {i for i in p if i != self.jobStoreID} for p in other.successor_phases
+        ]
         # And drop empty phases
         old_phases = [p for p in old_phases if len(p) > 0]
         # And put in front of our existing phases
-        logger.debug('%s is adopting successor phases from %s of: %s', self, other, old_phases)
+        logger.debug(
+            "%s is adopting successor phases from %s of: %s", self, other, old_phases
+        )
         self.successor_phases = old_phases + self.successor_phases
         # When deleting, we need to delete the files for our old ID, and also
-        # anything that needed to be deleted for the job we are replacing.
-        self.merged_jobs += [self.jobStoreID] + other.merged_jobs
+        # anything that needed to be deleted for the job we are replacing. And
+        # we need to keep track of all the names of jobs involved for logging.
+        # We need first the job we are merging into if nothing has merged into
+        # it yet, then anything that already merged into it (including it),
+        # then us if nothing has yet merged into us, then anything that merged
+        # into us (inclusing us)
+        _merged_job_names = []
+        if len(other._merged_job_names) == 0:
+            _merged_job_names.append(other.get_names())
+        _merged_job_names += other._merged_job_names
+        if len(self._merged_job_names) == 0:
+            _merged_job_names.append(self.get_names())
+        _merged_job_names += self._merged_job_names
+        self._merged_job_names = _merged_job_names
+        # Now steal its ID.
         self.jobStoreID = other.jobStoreID
         if len(other.filesToDelete) > 0:
-            raise RuntimeError("Trying to take on the ID of a job that is in the process of being committed!")
+            raise RuntimeError(
+                "Trying to take on the ID of a job that is in the process of being committed!"
+            )
         if len(self.filesToDelete) > 0:
-            raise RuntimeError("Trying to take on the ID of anothe job while in the process of being committed!")
+            raise RuntimeError(
+                "Trying to take on the ID of anothe job while in the process of being committed!"
+            )
         self._job_version = other._job_version
         self._job_version_writer = os.getpid()
-    def check_new_version(self, other: "JobDescription") -> None:
+    def assert_is_not_newer_than(self, other: "JobDescription") -> None:
         """
-        Make sure a prospective new version of the JobDescription is actually moving forward in time and not backward.
+        Make sure this JobDescription is not newer than a prospective new version of the JobDescription.
         """
         if other._job_version < self._job_version:
-            raise RuntimeError(f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}")
+            raise RuntimeError(
+                f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}"
+            )
+    def is_updated_by(self, other: "JobDescription") -> bool:
+        """
+        Return True if the passed JobDescription is a distinct, newer version of this one.
+        """
+        if self.jobStoreID != other.jobStoreID:
+            # Not the same job
+            logger.warning(
+                "Found ID %s in job %s from PID %s but expected ID %s to "
+                "update job %s from PID %s",
+                other.jobStoreID,
+                other,
+                other._job_version_writer,
+                self.jobStoreID,
+                self,
+                self._job_version_writer,
+            )
+            return False
+        if self._job_version >= other._job_version:
+            # Version isn't strictly newer
+            logger.debug(
+                "Expected newer version in job %s from PID %s but it is no "
+                "newer than job %s from PID %s",
+                other,
+                other._job_version_writer,
+                self,
+                self._job_version_writer,
+            )
+            return False
+        return True
     def addChild(self, childID: str) -> None:
         """Make the job with the given ID a child of the described job."""
@@ -1098,7 +1315,7 @@ class JobDescription(Requirer):
         """Test if the ServiceHostJob is a service of the described job."""
         return serviceID in self.serviceTree
-    def renameReferences(self, renames: Dict[TemporaryID, str]) -> None:
+    def renameReferences(self, renames: dict[TemporaryID, str]) -> None:
         """
         Apply the given dict of ID renames to all references to jobs.
@@ -1114,8 +1331,12 @@ class JobDescription(Requirer):
                     # Replace each renamed item one at a time to preserve set identity
                     phase.remove(item)
                     phase.add(renames[item])
-        self.serviceTree = {renames.get(parent, parent): [renames.get(child, child) for child in children]
-                            for parent, children in self.serviceTree.items()}
+        self.serviceTree = {
+            renames.get(parent, parent): [
+                renames.get(child, child) for child in children
+            ]
+            for parent, children in self.serviceTree.items()
+        }
     def addPredecessor(self) -> None:
         """Notify the JobDescription that a predecessor has been added to its Job."""
@@ -1133,7 +1354,11 @@ class JobDescription(Requirer):
         :param jobStore: The job store we are being placed into
         """
-    def setupJobAfterFailure(self, exit_status: Optional[int] = None, exit_reason: Optional["BatchJobExitReason"] = None) -> None:
+    def setupJobAfterFailure(
+        self,
+        exit_status: Optional[int] = None,
+        exit_reason: Optional["BatchJobExitReason"] = None,
+    ) -> None:
         """
         Configure job after a failure.
@@ -1156,30 +1381,49 @@ class JobDescription(Requirer):
         if self._config is None:
             raise RuntimeError("The job's config is not assigned.")
-        if self._config.enableUnlimitedPreemptibleRetries and exit_reason == BatchJobExitReason.LOST:
-            logger.info("*Not* reducing try count (%s) of job %s with ID %s",
-                        self.remainingTryCount, self, self.jobStoreID)
+        if (
+            self._config.enableUnlimitedPreemptibleRetries
+            and exit_reason == BatchJobExitReason.LOST
+        ):
+            logger.info(
+                "*Not* reducing try count (%s) of job %s with ID %s",
+                self.remainingTryCount,
+                self,
+                self.jobStoreID,
+            )
         else:
             self.remainingTryCount = max(0, self.remainingTryCount - 1)
-            logger.warning("Due to failure we are reducing the remaining try count of job %s with ID %s to %s",
-                           self, self.jobStoreID, self.remainingTryCount)
+            logger.warning(
+                "Due to failure we are reducing the remaining try count of job %s with ID %s to %s",
+                self,
+                self.jobStoreID,
+                self.remainingTryCount,
+            )
         # Set the default memory to be at least as large as the default, in
         # case this was a malloc failure (we do this because of the combined
         # batch system)
         if exit_reason == BatchJobExitReason.MEMLIMIT and self._config.doubleMem:
             self.memory = self.memory * 2
-            logger.warning("We have doubled the memory of the failed job %s to %s bytes due to doubleMem flag",
-                           self, self.memory)
+            logger.warning(
+                "We have doubled the memory of the failed job %s to %s bytes due to doubleMem flag",
+                self,
+                self.memory,
+            )
         if self.memory < self._config.defaultMemory:
             self.memory = self._config.defaultMemory
-            logger.warning("We have increased the default memory of the failed job %s to %s bytes",
-                           self, self.memory)
+            logger.warning(
+                "We have increased the default memory of the failed job %s to %s bytes",
+                self,
+                self.memory,
+            )
         if self.disk < self._config.defaultDisk:
             self.disk = self._config.defaultDisk
-            logger.warning("We have increased the disk of the failed job %s to the default of %s bytes",
-                           self, self.disk)
+            logger.warning(
+                "We have increased the disk of the failed job %s to the default of %s bytes",
+                self,
+                self.disk,
+            )
     def getLogFileHandle(self, jobStore):
         """
@@ -1229,12 +1473,12 @@ class JobDescription(Requirer):
         """Produce a useful logging string identifying this job."""
         printedName = "'" + self.jobName + "'"
         if self.unitName:
-            printedName += ' ' + self.unitName
+            printedName += " " + self.unitName
         if self.jobStoreID is not None:
-            printedName += ' ' + str(self.jobStoreID)
+            printedName += " " + str(self.jobStoreID)
-        printedName += ' v' + str(self._job_version)
+        printedName += " v" + str(self._job_version)
         return printedName
@@ -1243,7 +1487,7 @@ class JobDescription(Requirer):
     # a time, keyed by jobStoreID.
     def __repr__(self):
-        return f'{self.__class__.__name__}( **{self.__dict__!r} )'
+        return f"{self.__class__.__name__}( **{self.__dict__!r} )"
     def reserve_versions(self, count: int) -> None:
         """
@@ -1263,25 +1507,6 @@ class JobDescription(Requirer):
         self._job_version_writer = os.getpid()
         logger.debug("New job version: %s", self)
-    def get_job_kind(self) -> str:
-        """
-        Return an identifying string for the job.
-        The result may contain spaces.
-        Returns: Either the unit name, job name, or display name, which identifies
-                 the kind of job it is to toil.
-                 Otherwise "Unknown Job" in case no identifier is available
-        """
-        if self.unitName:
-            return self.unitName
-        elif self.jobName:
-            return self.jobName
-        elif self.displayName:
-            return self.displayName
-        else:
-            return "Unknown Job"
 class ServiceJobDescription(JobDescription):
     """A description of a job that hosts a service."""
@@ -1330,13 +1555,30 @@ class CheckpointJobDescription(JobDescription):
         # Set checkpoint-specific properties
-        # None, or a copy of the original command string used to reestablish the job after failure.
-        self.checkpoint = None
+        # None, or a copy of the original self._body used to reestablish the job after failure.
+        self.checkpoint: Optional[JobBodyReference] = None
         # Files that can not be deleted until the job and its successors have completed
         self.checkpointFilesToDelete = []
-    def restartCheckpoint(self, jobStore: "AbstractJobStore") -> List[str]:
+    def set_checkpoint(self) -> str:
+        """
+        Save a body checkpoint into self.checkpoint
+        """
+        if not self.has_body():
+            raise RuntimeError(f"Cannot snapshot the body of a job {self} without one")
+        self.checkpoint = self._body
+    def restore_checkpoint(self) -> None:
+        """
+        Restore the body checkpoint from self.checkpoint
+        """
+        if self.checkpoint is None:
+            raise RuntimeError(f"Cannot restore an empty checkpoint for a job {self}")
+        self._body = self.checkpoint
+    def restartCheckpoint(self, jobStore: "AbstractJobStore") -> list[str]:
         """
         Restart a checkpoint after the total failure of jobs in its subtree.
@@ -1347,24 +1589,30 @@ class CheckpointJobDescription(JobDescription):
         Returns a list with the IDs of any successors deleted.
         """
         if self.checkpoint is None:
-            raise RuntimeError("Cannot restart a checkpoint job. The checkpoint was never set.")
+            raise RuntimeError(
+                "Cannot restart a checkpoint job. The checkpoint was never set."
+            )
         successorsDeleted = []
         all_successors = list(self.allSuccessors())
-        if len(all_successors) > 0 or self.serviceTree or self.command is not None:
-            if self.command is not None:
-                if self.command != self.checkpoint:
-                    raise RuntimeError("The command and checkpoint are not the same.")
-                logger.debug("Checkpoint job already has command set to run")
+        if len(all_successors) > 0 or self.serviceTree or self.has_body():
+            if self.has_body():
+                if self._body != self.checkpoint:
+                    raise RuntimeError(
+                        "The stored body reference and checkpoint are not the same."
+                    )
+                logger.debug("Checkpoint job already has body set to run")
             else:
-                self.command = self.checkpoint
+                self.restore_checkpoint()
-            jobStore.update_job(self) # Update immediately to ensure that checkpoint
+            jobStore.update_job(self)  # Update immediately to ensure that checkpoint
             # is made before deleting any remaining successors
             if len(all_successors) > 0 or self.serviceTree:
                 # If the subtree of successors is not complete restart everything
-                logger.debug("Checkpoint job has unfinished successor jobs, deleting successors: %s, services: %s " %
-                             (all_successors, self.serviceTree.keys()))
+                logger.debug(
+                    "Checkpoint job has unfinished successor jobs, deleting successors: %s, services: %s "
+                    % (all_successors, self.serviceTree.keys())
+                )
                 # Delete everything on the stack, as these represent successors to clean
                 # up as we restart the queue
@@ -1377,9 +1625,13 @@ class CheckpointJobDescription(JobDescription):
                             logger.debug("Job %s has already been deleted", otherJobID)
                     if jobDesc.jobStoreID != self.jobStoreID:
                         # Delete everything under us except us.
-                        logger.debug("Checkpoint is deleting old successor job: %s", jobDesc.jobStoreID)
+                        logger.debug(
+                            "Checkpoint is deleting old successor job: %s",
+                            jobDesc.jobStoreID,
+                        )
                         jobStore.delete_job(jobDesc.jobStoreID)
                         successorsDeleted.append(jobDesc.jobStoreID)
                 recursiveDelete(self)
                 # Cut links to the jobs we deleted.
@@ -1408,6 +1660,7 @@ class Job:
         displayName: Optional[str] = "",
         descriptionClass: Optional[type] = None,
         local: Optional[bool] = None,
+        files: Optional[set[FileID]] = None,
     ) -> None:
         """
         Job initializer.
@@ -1428,6 +1681,7 @@ class Job:
         :param displayName: Human-readable job type display name.
         :param descriptionClass: Override for the JobDescription class used to describe the job.
         :param local: if the job can be run on the leader.
+        :param files: Set of Files that the job will want to use.
         :type memory: int or string convertible by toil.lib.conversions.human2bytes to an int
         :type cores: float, int, or string convertible by toil.lib.conversions.human2bytes to an int
@@ -1443,14 +1697,20 @@ class Job:
         jobName = self.__class__.__name__
         displayName = displayName if displayName else jobName
-        #Some workflows use preemptable instead of preemptible
+        # Some workflows use preemptable instead of preemptible
         if preemptable and not preemptible:
-            logger.warning("Preemptable as a keyword has been deprecated, please use preemptible.")
+            logger.warning(
+                "Preemptable as a keyword has been deprecated, please use preemptible."
+            )
             preemptible = preemptable
         # Build a requirements dict for the description
-        requirements = {'memory': memory, 'cores': cores, 'disk': disk,
-                        'accelerators': accelerators,
-                        'preemptible': preemptible}
+        requirements = {
+            "memory": memory,
+            "cores": cores,
+            "disk": disk,
+            "accelerators": accelerators,
+            "preemptible": preemptible,
+        }
         if descriptionClass is None:
             if checkpoint:
                 # Actually describe as a checkpoint job
@@ -1466,7 +1726,8 @@ class Job:
             jobName,
             unitName=unitName,
             displayName=displayName,
-            local=local
+            local=local,
+            files=files,
         )
         # Private class variables needed to actually execute a job, in the worker.
@@ -1489,7 +1750,9 @@ class Job:
         # Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module
         # defining the class self is an instance of, which may be a subclass of Job that may be
         # defined in a different module.
-        self.userModule: ModuleDescriptor = ModuleDescriptor.forModule(self.__module__).globalize()
+        self.userModule: ModuleDescriptor = ModuleDescriptor.forModule(
+            self.__module__
+        ).globalize()
         # Maps index paths into composite return values to lists of IDs of files containing
         # promised values for those return value items. An index path is a tuple of indices that
         # traverses a nested data structure of lists, dicts, tuples or any other type supporting
@@ -1501,6 +1764,9 @@ class Job:
         self._defer = None
         self._tempDir = None
+        # Holds flags set by set_debug_flag()
+        self._debug_flags: set[str] = set()
     def __str__(self):
         """
         Produce a useful logging string to identify this Job and distinguish it
@@ -1509,7 +1775,22 @@ class Job:
         if self.description is None:
             return repr(self)
         else:
-            return 'Job(' + str(self.description) + ')'
+            return "Job(" + str(self.description) + ")"
+    def check_initialized(self) -> None:
+        """
+        Ensure that Job.__init__() has been called by any subclass __init__().
+        This uses the fact that the self._description instance variable should always
+        be set after __init__().
+        If __init__() has not been called, raise an error.
+        """
+        if not hasattr(self, "_description"):
+            raise ValueError(
+                f"Job instance of type {type(self)} has not been initialized. super().__init__() may not "
+                f"have been called."
+            )
     @property
     def jobStoreID(self) -> Union[str, TemporaryID]:
@@ -1529,33 +1810,37 @@ class Job:
     def disk(self) -> int:
         """The maximum number of bytes of disk the job will require to run."""
         return self.description.disk
     @disk.setter
     def disk(self, val):
-         self.description.disk = val
+        self.description.disk = val
     @property
     def memory(self):
         """The maximum number of bytes of memory the job will require to run."""
         return self.description.memory
     @memory.setter
     def memory(self, val):
-         self.description.memory = val
+        self.description.memory = val
     @property
     def cores(self) -> Union[int, float]:
         """The number of CPU cores required."""
         return self.description.cores
     @cores.setter
     def cores(self, val):
-         self.description.cores = val
+        self.description.cores = val
     @property
-    def accelerators(self) -> List[AcceleratorRequirement]:
+    def accelerators(self) -> list[AcceleratorRequirement]:
         """Any accelerators, such as GPUs, that are needed."""
         return self.description.accelerators
     @accelerators.setter
-    def accelerators(self, val: List[ParseableAcceleratorRequirement]) -> None:
-         self.description.accelerators = val
+    def accelerators(self, val: list[ParseableAcceleratorRequirement]) -> None:
+        self.description.accelerators = val
     @property
     def preemptible(self) -> bool:
@@ -1565,15 +1850,30 @@ class Job:
     @deprecated(new_function_name="preemptible")
     def preemptable(self):
         return self.description.preemptible
     @preemptible.setter
     def preemptible(self, val):
-         self.description.preemptible = val
+        self.description.preemptible = val
     @property
     def checkpoint(self) -> bool:
         """Determine if the job is a checkpoint job or not."""
         return isinstance(self._description, CheckpointJobDescription)
+    @property
+    def files_to_use(self) -> set[FileID]:
+        return self.description.files_to_use
+    @files_to_use.setter
+    def files_to_use(self, val: set[FileID]):
+        self.description.files_to_use = val
+    def add_to_files_to_use(self, val: FileID):
+        self.description.files_to_use.add(val)
+    def remove_from_files_to_use(self, val: FileID):
+        self.description.files_to_use.remove(val)
     def assignConfig(self, config: Config) -> None:
         """
         Assign the given config object.
@@ -1641,6 +1941,11 @@ class Job:
         """
         if not isinstance(childJob, Job):
             raise RuntimeError("The type of the child job is not a job.")
+        # Check that both jobs have been initialized
+        self.check_initialized()
+        childJob.check_initialized()
         # Join the job graphs
         self._jobGraphsJoined(childJob)
         # Remember the child relationship
@@ -1668,6 +1973,11 @@ class Job:
         """
         if not isinstance(followOnJob, Job):
             raise RuntimeError("The type of the follow-on job is not a job.")
+        # Check that both jobs have been initialized
+        self.check_initialized()
+        followOnJob.check_initialized()
         # Join the job graphs
         self._jobGraphsJoined(followOnJob)
         # Remember the follow-on relationship
@@ -1677,7 +1987,7 @@ class Job:
         return followOnJob
-    def hasPredecessor(self, job: 'Job') -> bool:
+    def hasPredecessor(self, job: "Job") -> bool:
         """Check if a given job is already a predecessor of this job."""
         return job in self._directPredecessors
@@ -1739,7 +2049,9 @@ class Job:
     def hasService(self, service: "Job.Service") -> bool:
         """Return True if the given Service is a service of this job, and False otherwise."""
-        return service.hostID is None or self._description.hasServiceHostJob(service.hostID)
+        return service.hostID is None or self._description.hasServiceHostJob(
+            service.hostID
+        )
     # Convenience functions for creating jobs
@@ -1787,7 +2099,9 @@ class Job:
         :return: The new child job that wraps fn.
         """
         if PromisedRequirement.convertPromises(kwargs):
-            return self.addChild(PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs))
+            return self.addChild(
+                PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs)
+            )
         else:
             return self.addChild(JobFunctionWrappingJob(fn, *args, **kwargs))
@@ -1803,7 +2117,9 @@ class Job:
         :return: The new follow-on job that wraps fn.
         """
         if PromisedRequirement.convertPromises(kwargs):
-            return self.addFollowOn(PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs))
+            return self.addFollowOn(
+                PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs)
+            )
         else:
             return self.addFollowOn(JobFunctionWrappingJob(fn, *args, **kwargs))
@@ -1905,8 +2221,12 @@ class Job:
             raise JobPromiseConstraintError(self)
         # TODO: can we guarantee self.jobStoreID is populated and so pass that here?
         with self._promiseJobStore.write_file_stream() as (fileHandle, jobStoreFileID):
-            promise = UnfulfilledPromiseSentinel(str(self.description), jobStoreFileID, False)
-            logger.debug('Issuing promise %s for result of %s', jobStoreFileID, self.description)
+            promise = UnfulfilledPromiseSentinel(
+                str(self.description), jobStoreFileID, False
+            )
+            logger.debug(
+                "Issuing promise %s for result of %s", jobStoreFileID, self.description
+            )
             pickle.dump(promise, fileHandle, pickle.HIGHEST_PROTOCOL)
         self._rvs[path].append(jobStoreFileID)
         return self._promiseJobStore.config.jobStore, jobStoreFileID
@@ -1956,7 +2276,7 @@ class Job:
         self.checkJobGraphAcylic()
         self.checkNewCheckpointsAreLeafVertices()
-    def getRootJobs(self) -> Set['Job']:
+    def getRootJobs(self) -> set["Job"]:
         """
         Return the set of root job objects that contain this job.
@@ -1988,8 +2308,9 @@ class Job:
         """
         rootJobs = self.getRootJobs()
         if len(rootJobs) != 1:
-            raise JobGraphDeadlockException("Graph does not contain exactly one"
-                                            " root job: %s" % rootJobs)
+            raise JobGraphDeadlockException(
+                "Graph does not contain exactly one" " root job: %s" % rootJobs
+            )
     def checkJobGraphAcylic(self):
         """
@@ -2009,15 +2330,15 @@ class Job:
         Only deals with jobs created here, rather than loaded from the job store.
         """
-        #Get the root jobs
+        # Get the root jobs
         roots = self.getRootJobs()
         if len(roots) == 0:
             raise JobGraphDeadlockException("Graph contains no root jobs due to cycles")
-        #Get implied edges
+        # Get implied edges
         extraEdges = self._getImpliedEdges(roots)
-        #Check for directed cycles in the augmented graph
+        # Check for directed cycles in the augmented graph
         visited = set()
         for root in roots:
             root._checkJobGraphAcylicDFS([], visited, extraEdges)
@@ -2027,17 +2348,23 @@ class Job:
         if self not in visited:
             visited.add(self)
             stack.append(self)
-            for successor in [self._registry[jID] for jID in self.description.allSuccessors() if jID in self._registry] + extraEdges[self]:
+            for successor in [
+                self._registry[jID]
+                for jID in self.description.allSuccessors()
+                if jID in self._registry
+            ] + extraEdges[self]:
                 # Grab all the successors in the current registry (i.e. added form this node) and look at them.
                 successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
             if stack.pop() != self:
                 raise RuntimeError("The stack ordering/elements was changed.")
         if self in stack:
             stack.append(self)
-            raise JobGraphDeadlockException("A cycle of job dependencies has been detected '%s'" % stack)
+            raise JobGraphDeadlockException(
+                "A cycle of job dependencies has been detected '%s'" % stack
+            )
     @staticmethod
-    def _getImpliedEdges(roots) -> Dict["Job", List["Job"]]:
+    def _getImpliedEdges(roots) -> dict["Job", list["Job"]]:
         """
         Gets the set of implied edges (between children and follow-ons of a common job).
@@ -2047,17 +2374,17 @@ class Job:
         :returns: dict from Job object to list of Job objects that must be done before it can start.
         """
-        #Get nodes (Job objects) in job graph
+        # Get nodes (Job objects) in job graph
         nodes = set()
         for root in roots:
             root._collectAllSuccessors(nodes)
         ##For each follow-on edge calculate the extra implied edges
-        #Adjacency list of implied edges, i.e. map of jobs to lists of jobs
-        #connected by an implied edge
+        # Adjacency list of implied edges, i.e. map of jobs to lists of jobs
+        # connected by an implied edge
         extraEdges = {n: [] for n in nodes}
         for job in nodes:
-             # Get all the nonempty successor phases
+            # Get all the nonempty successor phases
             phases = [p for p in job.description.successor_phases if len(p) > 0]
             for depth in range(1, len(phases)):
                 # Add edges from all jobs in the earlier/upper subtrees to all
@@ -2077,7 +2404,11 @@ class Job:
                 for inUpper in reacheable:
                     # Add extra edges to the roots of all the lower subtrees
                     # But skip anything in the lower subtree not in the current _registry (i.e. not created hear)
-                    extraEdges[inUpper] += [job._registry[lowerID] for lowerID in lower if lowerID in job._registry]
+                    extraEdges[inUpper] += [
+                        job._registry[lowerID]
+                        for lowerID in lower
+                        if lowerID in job._registry
+                    ]
         return extraEdges
@@ -2097,17 +2428,21 @@ class Job:
         :raises toil.job.JobGraphDeadlockException: if there exists a job being added to the graph for which \
         checkpoint=True and which is not a leaf.
         """
-        roots = self.getRootJobs() # Roots jobs of component, these are preexisting jobs in the graph
+        roots = (
+            self.getRootJobs()
+        )  # Roots jobs of component, these are preexisting jobs in the graph
         # All jobs in the component of the job graph containing self
         jobs = set()
-        list(map(lambda x : x._collectAllSuccessors(jobs), roots))
+        list(map(lambda x: x._collectAllSuccessors(jobs), roots))
         # Check for each job for which checkpoint is true that it is a cut vertex or leaf
         for y in [x for x in jobs if x.checkpoint]:
-            if y not in roots: # The roots are the prexisting jobs
+            if y not in roots:  # The roots are the prexisting jobs
                 if not Job._isLeafVertex(y):
-                    raise JobGraphDeadlockException("New checkpoint job %s is not a leaf in the job graph" % y)
+                    raise JobGraphDeadlockException(
+                        "New checkpoint job %s is not a leaf in the job graph" % y
+                    )
     ####################################################
     # Deferred function system
@@ -2136,7 +2471,9 @@ class Job:
         :param dict kwargs: The keyword arguments to the function
         """
         if self._defer is None:
-            raise Exception('A deferred function may only be registered with a job while that job is running.')
+            raise Exception(
+                "A deferred function may only be registered with a job while that job is running."
+            )
         self._defer(DeferredFunction.create(function, *args, **kwargs))
     ####################################################
@@ -2145,7 +2482,7 @@ class Job:
     # and defining a service (Job.Service)
     ####################################################
-    class Runner():
+    class Runner:
         """Used to setup and run Toil workflow."""
         @staticmethod
@@ -2161,7 +2498,9 @@ class Job:
             return parser
         @staticmethod
-        def getDefaultOptions(jobStore: Optional[str] = None, jobstore_as_flag: bool = False) -> Namespace:
+        def getDefaultOptions(
+            jobStore: Optional[str] = None, jobstore_as_flag: bool = False
+        ) -> Namespace:
             """
             Get default options for a toil workflow.
@@ -2172,9 +2511,13 @@ class Job:
             """
             # setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
             if not jobstore_as_flag and jobStore is None:
-                raise RuntimeError("The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
-                                   "to False!")
-            parser = Job.Runner.getDefaultArgumentParser(jobstore_as_flag=jobstore_as_flag)
+                raise RuntimeError(
+                    "The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
+                    "to False!"
+                )
+            parser = Job.Runner.getDefaultArgumentParser(
+                jobstore_as_flag=jobstore_as_flag
+            )
             arguments = []
             if jobstore_as_flag and jobStore is not None:
                 arguments = ["--jobstore", jobStore]
@@ -2183,7 +2526,10 @@ class Job:
             return parser.parse_args(args=arguments)
         @staticmethod
-        def addToilOptions(parser: Union["OptionParser", ArgumentParser], jobstore_as_flag: bool = False) -> None:
+        def addToilOptions(
+            parser: Union["OptionParser", ArgumentParser],
+            jobstore_as_flag: bool = False,
+        ) -> None:
             """
             Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
             parser object.
@@ -2223,19 +2569,29 @@ class Job:
         Is not executed as a job; runs within a ServiceHostJob.
         """
-        def __init__(self, memory=None, cores=None, disk=None, accelerators=None, preemptible=None, unitName=None):
+        def __init__(
+            self,
+            memory=None,
+            cores=None,
+            disk=None,
+            accelerators=None,
+            preemptible=None,
+            unitName=None,
+        ):
             """
             Memory, core and disk requirements are specified identically to as in \
             :func:`toil.job.Job.__init__`.
             """
             # Save the requirements in ourselves so they are visible on `self` to user code.
-            super().__init__({
-                'memory': memory,
-                'cores': cores,
-                'disk': disk,
-                'accelerators': accelerators,
-                'preemptible': preemptible
-            })
+            super().__init__(
+                {
+                    "memory": memory,
+                    "cores": cores,
+                    "disk": disk,
+                    "accelerators": accelerators,
+                    "preemptible": preemptible,
+                }
+            )
             # And the unit name
             self.unitName = unitName
@@ -2313,15 +2669,19 @@ class Job:
         def filter_main(module_name, class_name):
             try:
-                if module_name == '__main__':
+                if module_name == "__main__":
                     return getattr(userModule, class_name)
                 else:
                     return getattr(importlib.import_module(module_name), class_name)
             except:
-                if module_name == '__main__':
-                    logger.debug('Failed getting %s from module %s.', class_name, userModule)
+                if module_name == "__main__":
+                    logger.debug(
+                        "Failed getting %s from module %s.", class_name, userModule
+                    )
                 else:
-                    logger.debug('Failed getting %s from module %s.', class_name, module_name)
+                    logger.debug(
+                        "Failed getting %s from module %s.", class_name, module_name
+                    )
                 raise
         class FilteredUnpickler(pickle.Unpickler):
@@ -2331,7 +2691,9 @@ class Job:
         unpickler = FilteredUnpickler(fileHandle)
         runnable = unpickler.load()
-        if requireInstanceOf is not None and not isinstance(runnable, requireInstanceOf):
+        if requireInstanceOf is not None and not isinstance(
+            runnable, requireInstanceOf
+        ):
             raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
         return runnable
@@ -2364,15 +2726,28 @@ class Job:
                 # File may be gone if the job is a service being re-run and the accessing job is
                 # already complete.
                 if jobStore.file_exists(promiseFileStoreID):
-                    logger.debug("Resolve promise %s from %s with a %s", promiseFileStoreID, self, type(promisedValue))
+                    logger.debug(
+                        "Resolve promise %s from %s with a %s",
+                        promiseFileStoreID,
+                        self,
+                        type(promisedValue),
+                    )
                     with jobStore.update_file_stream(promiseFileStoreID) as fileHandle:
                         try:
-                            pickle.dump(promisedValue, fileHandle, pickle.HIGHEST_PROTOCOL)
+                            pickle.dump(
+                                promisedValue, fileHandle, pickle.HIGHEST_PROTOCOL
+                            )
                         except AttributeError:
-                            logger.exception("Could not pickle promise result %s", promisedValue)
+                            logger.exception(
+                                "Could not pickle promise result %s", promisedValue
+                            )
                             raise
                 else:
-                    logger.debug("Do not resolve promise %s from %s because it is no longer needed", promiseFileStoreID, self)
+                    logger.debug(
+                        "Do not resolve promise %s from %s because it is no longer needed",
+                        promiseFileStoreID,
+                        self,
+                    )
     # Functions associated with Job.checkJobGraphAcyclic to establish that the job graph does not
     # contain any cycles of dependencies:
@@ -2397,7 +2772,7 @@ class Job:
                         # We added this successor locally
                         todo.append(self._registry[successorID])
-    def getTopologicalOrderingOfJobs(self) -> List["Job"]:
+    def getTopologicalOrderingOfJobs(self) -> list["Job"]:
         """
         :returns: a list of jobs such that for all pairs of indices i, j for which i < j, \
         the job at index i can be run before the job at index j.
@@ -2419,8 +2794,8 @@ class Job:
             job = todo[-1]
             todo.pop()
-            #Do not add the job to the ordering until all its predecessors have been
-            #added to the ordering
+            # Do not add the job to the ordering until all its predecessors have been
+            # added to the ordering
             outstandingPredecessor = False
             for predJob in job._directPredecessors:
                 if predJob.jobStoreID not in visited:
@@ -2445,7 +2820,7 @@ class Job:
     # Storing Jobs into the JobStore
     ####################################################
-    def _register(self, jobStore) -> List[Tuple[TemporaryID, str]]:
+    def _register(self, jobStore) -> list[tuple[TemporaryID, str]]:
         """
         If this job lacks a JobStore-assigned ID, assign this job an ID.
         Must be called for each job before it is saved to the JobStore for the first time.
@@ -2474,7 +2849,7 @@ class Job:
             # We already have an ID. No assignment or reference rewrite necessary.
             return []
-    def _renameReferences(self, renames: Dict[TemporaryID, str]) -> None:
+    def _renameReferences(self, renames: dict[TemporaryID, str]) -> None:
         """
         Apply the given dict of ID renames to all references to other jobs.
@@ -2510,8 +2885,8 @@ class Job:
         # Clear out old Cactus compatibility fields that don't need to be
         # preserved and shouldn't be serialized.
-        if hasattr(self, '_services'):
-            delattr(self, '_services')
+        if hasattr(self, "_services"):
+            delattr(self, "_services")
         # Remember fields we will overwrite
         description = self._description
@@ -2529,7 +2904,9 @@ class Job:
                 self._directPredecessors = set()
                 # Save the body of the job
-                with jobStore.write_file_stream(description.jobStoreID, cleanup=True) as (fileHandle, fileStoreID):
+                with jobStore.write_file_stream(
+                    description.jobStoreID, cleanup=True
+                ) as (fileHandle, fileStoreID):
                     pickle.dump(self, fileHandle, pickle.HIGHEST_PROTOCOL)
             finally:
                 # Restore important fields (before handling errors)
@@ -2552,10 +2929,15 @@ class Job:
         # filter_main() in _unpickle( ) do its job of resolving any user-defined type or function.
         userScript = self.getUserScript().globalize()
-        # The command connects the body of the job to the JobDescription
-        self._description.command = ' '.join(('_toil', fileStoreID) + userScript.toCommand())
+        # Connect the body of the job to the JobDescription
+        self._description.attach_body(fileStoreID, userScript)
-    def _saveJobGraph(self, jobStore: "AbstractJobStore", saveSelf: bool = False, returnValues: bool = None):
+    def _saveJobGraph(
+        self,
+        jobStore: "AbstractJobStore",
+        saveSelf: bool = False,
+        returnValues: bool = None,
+    ):
         """
         Save job data and new JobDescriptions to the given job store for this
         job and all descending jobs, including services.
@@ -2606,7 +2988,12 @@ class Job:
         # Set up to save last job first, so promises flow the right way
         ordering.reverse()
-        logger.debug("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
+        logger.debug(
+            "Saving graph of %d jobs, %d non-service, %d new",
+            len(allJobs),
+            len(ordering),
+            len(fakeToReal),
+        )
         # Make sure we're the root
         if ordering[-1] != self:
@@ -2619,15 +3006,15 @@ class Job:
             if not isinstance(j, ServiceHostJob) and j.jobStoreID not in ordered_ids:
                 raise RuntimeError(f"{j} not found in ordering {ordering}")
         if not saveSelf:
             # Fulfil promises for return values (even if value is None)
             self._fulfillPromises(returnValues, jobStore)
         for job in ordering:
             logger.debug("Processing job %s", job.description)
-            for serviceBatch in reversed(list(job.description.serviceHostIDsInBatches())):
+            for serviceBatch in reversed(
+                list(job.description.serviceHostIDsInBatches())
+            ):
                 # For each batch of service host jobs in reverse order they start
                 for serviceID in serviceBatch:
                     logger.debug("Processing service %s", serviceID)
@@ -2665,7 +3052,8 @@ class Job:
         # All other job vertices in the graph are checked by checkNewCheckpointsAreLeafVertices
         if self.checkpoint and not Job._isLeafVertex(self):
             raise JobGraphDeadlockException(
-                'New checkpoint job %s is not a leaf in the job graph' % self)
+                "New checkpoint job %s is not a leaf in the job graph" % self
+            )
         # Save the root job and all descendants and services
         self._saveJobGraph(jobStore, saveSelf=True)
@@ -2682,45 +3070,39 @@ class Job:
     @classmethod
     def loadJob(
-        cls, jobStore: "AbstractJobStore", jobDescription: JobDescription
+        cls, job_store: "AbstractJobStore", job_description: JobDescription
     ) -> "Job":
         """
         Retrieves a :class:`toil.job.Job` instance from a JobStore
-        :param jobStore: The job store.
-        :param jobDescription: the JobDescription of the job to retrieve.
+        :param job_store: The job store.
+        :param job_description: the JobDescription of the job to retrieve.
         :returns: The job referenced by the JobDescription.
         """
-        # Grab the command that connects the description to the job body
-        command = jobDescription.command
-        commandTokens = command.split()
-        if "_toil" != commandTokens[0]:
-            raise RuntimeError("An invalid command was passed into the job.")
-        userModule = ModuleDescriptor.fromCommand(commandTokens[2:])
-        logger.debug('Loading user module %s.', userModule)
-        userModule = cls._loadUserModule(userModule)
-        pickleFile = commandTokens[1]
+        file_store_id, user_module_descriptor = job_description.get_body()
+        logger.debug("Loading user module %s.", user_module_descriptor)
+        user_module = cls._loadUserModule(user_module_descriptor)
-        #Loads context manager using file stream
-        if pickleFile == "firstJob":
-            manager = jobStore.read_shared_file_stream(pickleFile)
+        # Loads context manager using file stream
+        if file_store_id == "firstJob":
+            # This one is actually a shared file name and not a file ID.
+            manager = job_store.read_shared_file_stream(file_store_id)
         else:
-            manager = jobStore.read_file_stream(pickleFile)
+            manager = job_store.read_file_stream(file_store_id)
-        #Open and unpickle
-        with manager as fileHandle:
+        # Open and unpickle
+        with manager as file_handle:
-            job = cls._unpickle(userModule, fileHandle, requireInstanceOf=Job)
+            job = cls._unpickle(user_module, file_handle, requireInstanceOf=Job)
             # Fill in the current description
-            job._description = jobDescription
+            job._description = job_description
             # Set up the registry again, so children and follow-ons can be added on the worker
             job._registry = {job.jobStoreID: job}
         return job
     def _run(self, jobGraph=None, fileStore=None, **kwargs):
         """
         Function which worker calls to ultimately invoke
@@ -2756,11 +3138,16 @@ class Job:
         """
         if stats is not None:
             startTime = time.time()
-            startClock = get_total_cpu_time()
+            startClock = ResourceMonitor.get_total_cpu_time()
         baseDir = os.getcwd()
         yield
+        if "download_only" in self._debug_flags:
+            # We should stop right away
+            logger.debug("Job did not stop itself after downloading files; stopping.")
+            raise DebugStoppingPointReached()
         # If the job is not a checkpoint job, add the promise files to delete
         # to the list of jobStoreFileIDs to delete
         # TODO: why is Promise holding a global list here???
@@ -2780,14 +3167,17 @@ class Job:
             os.chdir(baseDir)
         # Finish up the stats
         if stats is not None:
-            totalCpuTime, totalMemoryUsage = get_total_cpu_time_and_memory_usage()
+            totalCpuTime, totalMemoryUsage = (
+                ResourceMonitor.get_total_cpu_time_and_memory_usage()
+            )
             stats.jobs.append(
                 Expando(
                     time=str(time.time() - startTime),
                     clock=str(totalCpuTime - startClock),
                     class_name=self._jobName(),
                     memory=str(totalMemoryUsage),
-                    requested_cores=str(self.cores)
+                    requested_cores=str(self.cores),
+                    disk=str(fileStore.get_disk_usage()),
                 )
             )
@@ -2801,7 +3191,7 @@ class Job:
         """
         Run the job, and serialise the next jobs.
-        It marks the job as completed (by clearing its command) and creates the
+        It marks the job as completed (by clearing its body) and creates the
         successor relationships to new successors, but it doesn't actually
         commit those updates to the current job into the JobStore.
@@ -2832,12 +3222,11 @@ class Job:
         self._defer = None
         self._fileStore = None
         # Serialize the new Jobs defined by the run method to the jobStore
         self._saveJobGraph(jobStore, saveSelf=False, returnValues=returnValues)
-        # Clear out the command, because the job is done.
-        self.description.command = None
+        # Clear out the body, because the job is done.
+        self.description.detach_body()
         # That and the new child/follow-on relationships will need to be
         # recorded later by an update() of the JobDescription.
@@ -2848,6 +3237,40 @@ class Job:
         """
         return self._description.displayName
+    def set_debug_flag(self, flag: str) -> None:
+        """
+        Enable the given debug option on the job.
+        """
+        self._debug_flags.add(flag)
+    def has_debug_flag(self, flag: str) -> bool:
+        """
+        Return true if the given debug flag is set.
+        """
+        return flag in self._debug_flags
+    def files_downloaded_hook(
+        self, host_and_job_paths: Optional[list[tuple[str, str]]] = None
+    ) -> None:
+        """
+        Function that subclasses can call when they have downloaded their input files.
+        Will abort the job if the "download_only" debug flag is set.
+        Can be hinted a list of file path pairs outside and inside the job
+        container, in which case the container environment can be
+        reconstructed.
+        """
+        if self.has_debug_flag("download_only"):
+            # Stop the worker!
+            logger.info("Job has downloaded its files. Stopping.")
+            # Send off the path mapping for the debugging wrapper.
+            raise FilesDownloadedStoppingPointReached(
+                "Files downloaded", host_and_job_paths=host_and_job_paths
+            )
 class JobException(Exception):
     """General job exception."""
@@ -2861,6 +3284,7 @@ class JobGraphDeadlockException(JobException):
     An exception raised in the event that a workflow contains an unresolvable \
     dependency, such as a cycle. See :func:`toil.job.Job.checkJobGraphForDeadlocks`.
     """
     def __init__(self, string):
         super().__init__(string)
@@ -2869,6 +3293,7 @@ class FunctionWrappingJob(Job):
     """
     Job used to wrap a function. In its `run` method the wrapped function is called.
     """
     def __init__(self, userFunction, *args, **kwargs):
         """
         :param callable userFunction: The function to wrap. It will be called with ``*args`` and
@@ -2888,7 +3313,9 @@ class FunctionWrappingJob(Job):
         if argSpec.defaults is None:
             argDict = {}
         else:
-            argDict = dict(list(zip(argSpec.args[-len(argSpec.defaults):], argSpec.defaults)))
+            argDict = dict(
+                list(zip(argSpec.args[-len(argSpec.defaults) :], argSpec.defaults))
+            )
         def resolve(key, default=None, dehumanize=False):
             try:
@@ -2906,36 +3333,48 @@ class FunctionWrappingJob(Job):
                 value = human2bytes(value)
             return value
-        super().__init__(memory=resolve('memory', dehumanize=True),
-                         cores=resolve('cores', dehumanize=True),
-                         disk=resolve('disk', dehumanize=True),
-                         accelerators=resolve('accelerators'),
-                         preemptible=resolve('preemptible'),
-                         checkpoint=resolve('checkpoint', default=False),
-                         unitName=resolve('name', default=None))
+        super().__init__(
+            memory=resolve("memory", dehumanize=True),
+            cores=resolve("cores", dehumanize=True),
+            disk=resolve("disk", dehumanize=True),
+            accelerators=resolve("accelerators"),
+            preemptible=resolve("preemptible"),
+            checkpoint=resolve("checkpoint", default=False),
+            unitName=resolve("name", default=None),
+        )
-        self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__).globalize()
+        self.userFunctionModule = ModuleDescriptor.forModule(
+            userFunction.__module__
+        ).globalize()
         self.userFunctionName = str(userFunction.__name__)
         self.description.jobName = self.userFunctionName
         self._args = args
         self._kwargs = kwargs
     def _getUserFunction(self):
-        logger.debug('Loading user function %s from module %s.',
-                     self.userFunctionName,
-                     self.userFunctionModule)
+        logger.debug(
+            "Loading user function %s from module %s.",
+            self.userFunctionName,
+            self.userFunctionModule,
+        )
         userFunctionModule = self._loadUserModule(self.userFunctionModule)
         return getattr(userFunctionModule, self.userFunctionName)
-    def run(self,fileStore):
-        userFunction = self._getUserFunction( )
+    def run(self, fileStore):
+        userFunction = self._getUserFunction()
         return userFunction(*self._args, **self._kwargs)
     def getUserScript(self):
         return self.userFunctionModule
     def _jobName(self):
-        return ".".join((self.__class__.__name__, self.userFunctionModule.name, self.userFunctionName))
+        return ".".join(
+            (
+                self.__class__.__name__,
+                self.userFunctionModule.name,
+                self.userFunctionName,
+            )
+        )
 class JobFunctionWrappingJob(FunctionWrappingJob):
@@ -2981,10 +3420,20 @@ class PromisedRequirementFunctionWrappingJob(FunctionWrappingJob):
     Spawns child function using parent function parameters and fulfilled promised
     resource requirements.
     """
     def __init__(self, userFunction, *args, **kwargs):
         self._promisedKwargs = kwargs.copy()
         # Replace resource requirements in intermediate job with small values.
-        kwargs.update(dict(disk='1M', memory='32M', cores=0.1, accelerators=[], preemptible=True, preemptable=True))
+        kwargs.update(
+            dict(
+                disk="1M",
+                memory="32M",
+                cores=0.1,
+                accelerators=[],
+                preemptible=True,
+                preemptable=True,
+            )
+        )
         super().__init__(userFunction, *args, **kwargs)
     @classmethod
@@ -3009,7 +3458,9 @@ class PromisedRequirementFunctionWrappingJob(FunctionWrappingJob):
         for requirement in REQUIREMENT_NAMES:
             try:
                 if isinstance(self._promisedKwargs[requirement], PromisedRequirement):
-                    self._promisedKwargs[requirement] = self._promisedKwargs[requirement].getValue()
+                    self._promisedKwargs[requirement] = self._promisedKwargs[
+                        requirement
+                    ].getValue()
             except KeyError:
                 pass
@@ -3023,7 +3474,9 @@ class PromisedRequirementJobFunctionWrappingJob(PromisedRequirementFunctionWrapp
     def run(self, fileStore):
         self.evaluatePromisedRequirements()
         userFunction = self._getUserFunction()
-        return self.addChildJobFn(userFunction, *self._args, **self._promisedKwargs).rv()
+        return self.addChildJobFn(
+            userFunction, *self._args, **self._promisedKwargs
+        ).rv()
 class EncapsulatedJob(Job):
@@ -3050,6 +3503,7 @@ class EncapsulatedJob(Job):
     is the return value of the root job, e.g. A().encapsulate().rv() and A().rv() will resolve to
     the same value after A or A.encapsulate() has been run.
     """
     def __init__(self, job, unitName=None):
         """
         :param toil.job.Job job: the job to encapsulate.
@@ -3069,7 +3523,12 @@ class EncapsulatedJob(Job):
             Job.addChild(self, job)
             # Use small resource requirements for dummy Job instance.
             # But not too small, or the job won't have enough resources to safely start up Toil.
-            self.encapsulatedFollowOn = Job(disk='100M', memory='512M', cores=0.1, unitName=None if unitName is None else unitName + '-followOn')
+            self.encapsulatedFollowOn = Job(
+                disk="100M",
+                memory="512M",
+                cores=0.1,
+                unitName=None if unitName is None else unitName + "-followOn",
+            )
             Job.addFollowOn(self, self.encapsulatedFollowOn)
         else:
             # Unpickling on the worker, to be run as a no-op.
@@ -3081,17 +3540,25 @@ class EncapsulatedJob(Job):
     def addChild(self, childJob):
         if self.encapsulatedFollowOn is None:
-            raise RuntimeError("Children cannot be added to EncapsulatedJob while it is running")
+            raise RuntimeError(
+                "Children cannot be added to EncapsulatedJob while it is running"
+            )
         return Job.addChild(self.encapsulatedFollowOn, childJob)
     def addService(self, service, parentService=None):
         if self.encapsulatedFollowOn is None:
-            raise RuntimeError("Services cannot be added to EncapsulatedJob while it is running")
-        return Job.addService(self.encapsulatedFollowOn, service, parentService=parentService)
+            raise RuntimeError(
+                "Services cannot be added to EncapsulatedJob while it is running"
+            )
+        return Job.addService(
+            self.encapsulatedFollowOn, service, parentService=parentService
+        )
     def addFollowOn(self, followOnJob):
         if self.encapsulatedFollowOn is None:
-            raise RuntimeError("Follow-ons cannot be added to EncapsulatedJob while it is running")
+            raise RuntimeError(
+                "Follow-ons cannot be added to EncapsulatedJob while it is running"
+            )
         return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
     def rv(self, *path) -> "Promise":
@@ -3134,6 +3601,7 @@ class ServiceHostJob(Job):
     """
     Job that runs a service. Used internally by Toil. Users should subclass Service instead of using this.
     """
     def __init__(self, service):
         """
         This constructor should not be called by a user.
@@ -3144,12 +3612,17 @@ class ServiceHostJob(Job):
         # Make sure the service hasn't been given a host already.
         if service.hostID is not None:
-            raise RuntimeError("Cannot set the host. The service has already been given a host.")
+            raise RuntimeError(
+                "Cannot set the host. The service has already been given a host."
+            )
         # Make ourselves with name info from the Service and a
         # ServiceJobDescription that has the service control flags.
-        super().__init__(**service.requirements,
-            unitName=service.unitName, descriptionClass=ServiceJobDescription)
+        super().__init__(
+            **service.requirements,
+            unitName=service.unitName,
+            descriptionClass=ServiceJobDescription,
+        )
         # Make sure the service knows it has a host now
         service.hostID = self.jobStoreID
@@ -3187,13 +3660,19 @@ class ServiceHostJob(Job):
     # stuff onto us.
     def addChild(self, child):
-        raise RuntimeError("Service host jobs cannot have children, follow-ons, or services")
+        raise RuntimeError(
+            "Service host jobs cannot have children, follow-ons, or services"
+        )
     def addFollowOn(self, followOn):
-        raise RuntimeError("Service host jobs cannot have children, follow-ons, or services")
+        raise RuntimeError(
+            "Service host jobs cannot have children, follow-ons, or services"
+        )
     def addService(self, service, parentService=None):
-        raise RuntimeError("Service host jobs cannot have children, follow-ons, or services")
+        raise RuntimeError(
+            "Service host jobs cannot have children, follow-ons, or services"
+        )
     def saveBody(self, jobStore):
         """
@@ -3202,7 +3681,9 @@ class ServiceHostJob(Job):
         # Save unpickled service
         service = self.service
         # Serialize service
-        self.pickledService = pickle.dumps(self.service, protocol=pickle.HIGHEST_PROTOCOL)
+        self.pickledService = pickle.dumps(
+            self.service, protocol=pickle.HIGHEST_PROTOCOL
+        )
         # Clear real service until we have the module to load it back
         self.service = None
         # Save body as normal
@@ -3213,24 +3694,30 @@ class ServiceHostJob(Job):
     def run(self, fileStore):
         # Unpickle the service
-        logger.debug('Loading service module %s.', self.serviceModule)
+        logger.debug("Loading service module %s.", self.serviceModule)
         userModule = self._loadUserModule(self.serviceModule)
-        service = self._unpickle(userModule, BytesIO(self.pickledService), requireInstanceOf=Job.Service)
+        service = self._unpickle(
+            userModule, BytesIO(self.pickledService), requireInstanceOf=Job.Service
+        )
         self.pickledService = None
         # Make sure it has the config, since it wasn't load()-ed via the JobStore
         service.assignConfig(fileStore.jobStore.config)
-        #Start the service
+        # Start the service
         startCredentials = service.start(self)
         try:
-            #The start credentials  must be communicated to processes connecting to
-            #the service, to do this while the run method is running we
-            #cheat and set the return value promise within the run method
+            # The start credentials  must be communicated to processes connecting to
+            # the service, to do this while the run method is running we
+            # cheat and set the return value promise within the run method
             self._fulfillPromises(startCredentials, fileStore.jobStore)
-            self._rvs = {}  # Set this to avoid the return values being updated after the
-            #run method has completed!
-            #Now flag that the service is running jobs can connect to it
-            logger.debug("Removing the start jobStoreID to indicate that establishment of the service")
+            self._rvs = (
+                {}
+            )  # Set this to avoid the return values being updated after the
+            # run method has completed!
+            # Now flag that the service is running jobs can connect to it
+            logger.debug(
+                "Removing the start jobStoreID to indicate that establishment of the service"
+            )
             if self.description.startJobStoreID is None:
                 raise RuntimeError("No start jobStoreID to remove.")
             if fileStore.jobStore.file_exists(self.description.startJobStoreID):
@@ -3238,23 +3725,33 @@ class ServiceHostJob(Job):
             if fileStore.jobStore.file_exists(self.description.startJobStoreID):
                 raise RuntimeError("The start jobStoreID is not a file.")
-            #Now block until we are told to stop, which is indicated by the removal
-            #of a file
+            # Now block until we are told to stop, which is indicated by the removal
+            # of a file
             if self.description.terminateJobStoreID is None:
                 raise RuntimeError("No terminate jobStoreID to use.")
             while True:
                 # Check for the terminate signal
-                if not fileStore.jobStore.file_exists(self.description.terminateJobStoreID):
-                    logger.debug("Detected that the terminate jobStoreID has been removed so exiting")
-                    if not fileStore.jobStore.file_exists(self.description.errorJobStoreID):
-                        raise RuntimeError("Detected the error jobStoreID has been removed so exiting with an error")
+                if not fileStore.jobStore.file_exists(
+                    self.description.terminateJobStoreID
+                ):
+                    logger.debug(
+                        "Detected that the terminate jobStoreID has been removed so exiting"
+                    )
+                    if not fileStore.jobStore.file_exists(
+                        self.description.errorJobStoreID
+                    ):
+                        raise RuntimeError(
+                            "Detected the error jobStoreID has been removed so exiting with an error"
+                        )
                     break
                 # Check the service's status and exit if failed or complete
                 try:
                     if not service.check():
-                        logger.debug("The service has finished okay, but we have not been told to terminate. "
-                                     "Waiting for leader to tell us to come back.")
+                        logger.debug(
+                            "The service has finished okay, but we have not been told to terminate. "
+                            "Waiting for leader to tell us to come back."
+                        )
                         # TODO: Adjust leader so that it keys on something
                         # other than the services finishing (assumed to be
                         # after the children) to know when to run follow-on
@@ -3265,7 +3762,9 @@ class ServiceHostJob(Job):
                     logger.debug("Detected abnormal termination of the service")
                     raise
-                time.sleep(fileStore.jobStore.config.servicePollingInterval) #Avoid excessive polling
+                time.sleep(
+                    fileStore.jobStore.config.servicePollingInterval
+                )  # Avoid excessive polling
             logger.debug("Service is done")
         finally:
@@ -3276,6 +3775,354 @@ class ServiceHostJob(Job):
         return self.serviceModule
+class FileMetadata(NamedTuple):
+    """
+    Metadata for a file.
+    source is the URL to grab the file from
+    parent_dir is parent directory of the source
+    size is the size of the file. Is none if the filesize cannot be retrieved.
+    """
+    source: str
+    parent_dir: str
+    size: Optional[int]
+def potential_absolute_uris(
+    uri: str,
+    path: list[str],
+    importer: Optional[str] = None,
+    execution_dir: Optional[str] = None,
+) -> Iterator[str]:
+    """
+    Get potential absolute URIs to check for an imported file.
+    Given a URI or bare path, yield in turn all the URIs, with schemes, where we
+    should actually try to find it, given that we want to search under/against
+    the given paths or URIs, the current directory, and the given importing WDL
+    document if any.
+    """
+    if uri == "":
+        # Empty URIs can't come from anywhere.
+        return
+    # We need to brute-force find this URI relative to:
+    #
+    # 1. Itself if a full URI.
+    #
+    # 2. Importer's URL, if importer is a URL and this is a
+    #    host-root-relative URL starting with / or scheme-relative
+    #    starting with //, or just plain relative.
+    #
+    # 3. Current directory, if a relative path.
+    #
+    # 4. All the prefixes in "path".
+    #
+    # If it can't be found anywhere, we ought to (probably) throw
+    # FileNotFoundError like the MiniWDL implementation does, with a
+    # correct errno.
+    #
+    # To do this, we have AbstractFileStore.read_from_url, which can read a
+    # URL into a binary-mode writable, or throw some kind of unspecified
+    # exception if the source doesn't exist or can't be fetched.
+    # This holds scheme-applied full URIs for all the places to search.
+    full_path_list = []
+    if importer is not None:
+        # Add the place the imported file came form, to search first.
+        full_path_list.append(Toil.normalize_uri(importer))
+    # Then the current directory. We need to make sure to include a filename component here or it will treat the current directory with no trailing / as a document and relative paths will look 1 level up.
+    # When importing on a worker, the cwd will be a tmpdir and will result in FileNotFoundError after os.path.abspath, so override with the execution dir
+    full_path_list.append(Toil.normalize_uri(execution_dir or ".") + "/.")
+    # Then the specified paths.
+    # TODO:
+    # https://github.com/chanzuckerberg/miniwdl/blob/e3e8ef74e80fbe59f137b0ad40b354957915c345/WDL/Tree.py#L1479-L1482
+    # seems backward actually and might do these first!
+    full_path_list += [Toil.normalize_uri(p) for p in path]
+    # This holds all the URIs we tried and failed with.
+    failures: set[str] = set()
+    for candidate_base in full_path_list:
+        # Try fetching based off each base URI
+        candidate_uri = urljoin(candidate_base, uri)
+        if candidate_uri in failures:
+            # Already tried this one, maybe we have an absolute uri input.
+            continue
+        logger.debug(
+            "Consider %s which is %s off of %s", candidate_uri, uri, candidate_base
+        )
+        # Try it
+        yield candidate_uri
+        # If we come back it didn't work
+        failures.add(candidate_uri)
+def get_file_sizes(
+    filenames: List[str],
+    file_source: AbstractJobStore,
+    search_paths: Optional[List[str]] = None,
+    include_remote_files: bool = True,
+    execution_dir: Optional[str] = None,
+) -> Dict[str, FileMetadata]:
+    """
+    Resolve relative-URI files in the given environment and turn them into absolute normalized URIs. Returns a dictionary of the *string values* from the WDL file values
+    to a tuple of the normalized URI, parent directory ID, and size of the file. The size of the file may be None, which means unknown size.
+    :param filenames: list of filenames to evaluate on
+    :param file_source: Context to search for files with
+    :param task_path: Dotted WDL name of the user-level code doing the
+        importing (probably the workflow name).
+    :param search_paths: If set, try resolving input location relative to the URLs or
+        directories in this list.
+    :param include_remote_files: If set, import files from remote locations. Else leave them as URI references.
+    """
+    @memoize
+    def get_filename_size(filename: str) -> FileMetadata:
+        tried = []
+        for candidate_uri in potential_absolute_uris(
+            filename,
+            search_paths if search_paths is not None else [],
+            execution_dir=execution_dir,
+        ):
+            tried.append(candidate_uri)
+            try:
+                if not include_remote_files and is_remote_url(candidate_uri):
+                    # Use remote URIs in place. But we need to find the one that exists.
+                    if not file_source.url_exists(candidate_uri):
+                        # Wasn't found there
+                        continue
+                # Now we know this exists, so pass it through
+                # Get filesizes
+                filesize = file_source.get_size(candidate_uri)
+            except UnimplementedURLException as e:
+                # We can't find anything that can even support this URL scheme.
+                # Report to the user, they are probably missing an extra.
+                logger.critical("Error: " + str(e))
+                raise
+            except HTTPError as e:
+                # Something went wrong looking for it there.
+                logger.warning(
+                    "Checked URL %s but got HTTP status %s", candidate_uri, e.code
+                )
+                if e.code == 405:
+                    # 405 Method not allowed, maybe HEAD requests are not supported
+                    filesize = None
+                else:
+                    # Try the next location.
+                    continue
+            except FileNotFoundError:
+                # Wasn't found there
+                continue
+            except Exception:
+                # Something went wrong besides the file not being found. Maybe
+                # we have no auth.
+                logger.error(
+                    "Something went wrong when testing for existence of %s",
+                    candidate_uri,
+                )
+                raise
+            # Work out what the basename for the file was
+            file_basename = os.path.basename(urlsplit(candidate_uri).path)
+            if file_basename == "":
+                # We can't have files with no basename because we need to
+                # download them at that basename later in WDL.
+                raise RuntimeError(
+                    f"File {candidate_uri} has no basename"
+                )
+            # Was actually found
+            if is_remote_url(candidate_uri):
+                # Might be a file URI or other URI.
+                # We need to make sure file URIs and local paths that point to
+                # the same place are treated the same.
+                parsed = urlsplit(candidate_uri)
+                if parsed.scheme == "file:":
+                    # This is a local file URI. Convert to a path for source directory tracking.
+                    parent_dir = os.path.dirname(unquote(parsed.path))
+                else:
+                    # This is some other URL. Get the URL to the parent directory and use that.
+                    parent_dir = urljoin(candidate_uri, ".")
+            else:
+                # Must be a local path
+                parent_dir = os.path.dirname(candidate_uri)
+            return cast(FileMetadata, (candidate_uri, parent_dir, filesize))
+        # Not found
+        raise RuntimeError(
+            f"Could not find {filename} at any of: {list(potential_absolute_uris(filename, search_paths if search_paths is not None else []))}"
+        )
+    return {k: get_filename_size(k) for k in filenames}
+class CombineImportsJob(Job):
+    """
+    Combine the outputs of multiple WorkerImportsJob into one promise
+    """
+    def __init__(self, d: Sequence[Promised[Dict[str, FileID]]], **kwargs):
+        """
+        :param d: Sequence of dictionaries to merge
+        """
+        self._d = d
+        super().__init__(**kwargs)
+    def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
+        """
+        Merge the dicts
+        """
+        d = unwrap_all(self._d)
+        return {k: v for item in d for k, v in item.items()}
+class WorkerImportJob(Job):
+    """
+    Job to do file imports on a worker instead of a leader. Assumes all local and cloud files are accessible.
+    For the CWL/WDL runners, this class is only used when runImportsOnWorkers is enabled.
+    """
+    def __init__(
+        self,
+        filenames: List[str],
+        local: bool = False,
+        **kwargs: Any
+    ):
+        """
+        Setup importing files on a worker.
+        :param filenames: List of file URIs to import
+        :param kwargs: args for the superclass
+        """
+        self.filenames = filenames
+        super().__init__(local=local, **kwargs)
+    @staticmethod
+    def import_files(
+        files: List[str], file_source: "AbstractJobStore"
+    ) -> Dict[str, FileID]:
+        """
+        Import a list of files into the jobstore. Returns a mapping of the filename to the associated FileIDs
+        When stream is true but the import is not streamable, the worker will run out of
+        disk space and run a new import job with enough disk space instead.
+        :param files: list of files to import
+        :param file_source: AbstractJobStore
+        :return: Dictionary mapping filenames to associated jobstore FileID
+        """
+        # todo: make the import ensure streaming is done instead of relying on running out of disk space
+        path_to_fileid = {}
+        @memoize
+        def import_filename(filename: str) -> Optional[FileID]:
+            return file_source.import_file(filename, symlink=True)
+        for file in files:
+            imported = import_filename(file)
+            if imported is not None:
+                path_to_fileid[file] = imported
+        return path_to_fileid
+    def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
+        """
+        Import the workflow inputs and then create and run the workflow.
+        :return: Promise of workflow outputs
+        """
+        return self.import_files(self.filenames, file_store.jobStore)
+class ImportsJob(Job):
+    """
+    Job to organize and delegate files to individual WorkerImportJobs.
+    For the CWL/WDL runners, this is only used when runImportsOnWorkers is enabled
+    """
+    def __init__(
+        self,
+        file_to_data: Dict[str, FileMetadata],
+        max_batch_size: ParseableIndivisibleResource,
+        import_worker_disk: ParseableIndivisibleResource,
+        **kwargs: Any,
+    ):
+        """
+        Job to take the inputs for a workflow and import them on a worker instead of a leader. Assumes all local and cloud files are accessible.
+        This class is only used when runImportsOnWorkers is enabled.
+        :param file_to_data: mapping of file source name to file metadata
+        :param max_batch_size: maximum cumulative file size of a batched import
+        """
+        super().__init__(local=True, **kwargs)
+        self._file_to_data = file_to_data
+        self._max_batch_size = max_batch_size
+        self._import_worker_disk = import_worker_disk
+    def run(
+        self, file_store: AbstractFileStore
+    ) -> Tuple[Promised[Dict[str, FileID]], Dict[str, FileMetadata]]:
+        """
+        Import the workflow inputs and then create and run the workflow.
+        :return: Tuple of a mapping from the candidate uri to the file id and a mapping of the source filenames to its metadata. The candidate uri is a field in the file metadata
+        """
+        max_batch_size = self._max_batch_size
+        file_to_data = self._file_to_data
+        # Run WDL imports on a worker instead
+        filenames = list(file_to_data.keys())
+        import_jobs = []
+        # This list will hold lists of batched filenames
+        file_batches = []
+        # List of filenames for each batch
+        per_batch_files = []
+        per_batch_size = 0
+        while len(filenames) > 0:
+            filename = filenames.pop(0)
+            # See if adding this to the queue will make the batch job too big
+            filesize = file_to_data[filename][2]
+            if per_batch_size + filesize >= max_batch_size:
+                # batch is too big now, store to schedule the batch
+                if len(per_batch_files) == 0:
+                    # schedule the individual file
+                    per_batch_files.append(filename)
+                file_batches.append(per_batch_files)
+                # reset batching calculation
+                per_batch_size = 0
+            else:
+                per_batch_size += filesize
+            per_batch_files.append(filename)
+        if per_batch_files:
+            file_batches.append(per_batch_files)
+        # Create batch import jobs for each group of files
+        for batch in file_batches:
+            candidate_uris = [file_to_data[filename][0] for filename in batch]
+            import_jobs.append(WorkerImportJob(candidate_uris, disk=self._import_worker_disk))
+        for job in import_jobs:
+            self.addChild(job)
+        combine_imports_job = CombineImportsJob([job.rv() for job in import_jobs])
+        for job in import_jobs:
+            job.addFollowOn(combine_imports_job)
+        self.addChild(combine_imports_job)
+        return combine_imports_job.rv(), file_to_data
 class Promise:
     """
     References a return value from a method as a *promise* before the method itself is run.
@@ -3336,7 +4183,9 @@ class Promise:
     def __new__(cls, *args) -> "Promise":
         """Instantiate this Promise."""
         if len(args) != 2:
-            raise RuntimeError("Cannot instantiate promise. Invalid number of arguments given (Expected 2).")
+            raise RuntimeError(
+                "Cannot instantiate promise. Invalid number of arguments given (Expected 2)."
+            )
         if isinstance(args[0], Job):
             # Regular instantiation when promise is created, before it is being pickled
             return super().__new__(cls)
@@ -3357,6 +4206,7 @@ class Promise:
             value = safeUnpickleFromStream(fileHandle)
             return value
 # Machinery for type-safe-ish Toil Python workflows.
 #
 # TODO: Until we make Promise generic on the promised type, and work out how to
@@ -3364,12 +4214,13 @@ class Promise:
 # method returns, this won't actually be type-safe, because any Promise will be
 # a Promised[] for any type.
-T = TypeVar('T')
+T = TypeVar("T")
 # We have type shorthand for a promised value.
 # Uses a generic type alias, so you can have a Promised[T]. See <https://github.com/python/mypy/pull/2378>.
 Promised = Union[Promise, T]
 def unwrap(p: Promised[T]) -> T:
     """
     Function for ensuring you actually have a promised value, and not just a promise.
@@ -3378,9 +4229,10 @@ def unwrap(p: Promised[T]) -> T:
     The "unwrap" terminology is borrowed from Rust.
     """
     if isinstance(p, Promise):
-        raise TypeError(f'Attempted to unwrap a value that is still a Promise: {p}')
+        raise TypeError(f"Attempted to unwrap a value that is still a Promise: {p}")
     return p
 def unwrap_all(p: Sequence[Promised[T]]) -> Sequence[T]:
     """
     Function for ensuring you actually have a collection of promised values,
@@ -3390,9 +4242,12 @@ def unwrap_all(p: Sequence[Promised[T]]) -> Sequence[T]:
     """
     for i, item in enumerate(p):
         if isinstance(item, Promise):
-            raise TypeError(f'Attempted to unwrap a value at index {i} that is still a Promise: {item}')
+            raise TypeError(
+                f"Attempted to unwrap a value at index {i} that is still a Promise: {item}"
+            )
     return p
 class PromisedRequirement:
     """
     Class for dynamically allocating job function resource requirements.
@@ -3419,13 +4274,15 @@ class PromisedRequirement:
         :param args: variable length argument list
         :type args: int or .Promise
         """
-        if hasattr(valueOrCallable, '__call__'):
+        if hasattr(valueOrCallable, "__call__"):
             if len(args) == 0:
-                raise RuntimeError('Need parameters for PromisedRequirement function.')
+                raise RuntimeError("Need parameters for PromisedRequirement function.")
             func = valueOrCallable
         else:
             if len(args) != 0:
-                raise RuntimeError('Define a PromisedRequirement function to handle multiple arguments.')
+                raise RuntimeError(
+                    "Define a PromisedRequirement function to handle multiple arguments."
+                )
             func = lambda x: x
             args = [valueOrCallable]
@@ -3438,7 +4295,7 @@ class PromisedRequirement:
         return func(*self._args)
     @staticmethod
-    def convertPromises(kwargs: Dict[str, Any]) -> bool:
+    def convertPromises(kwargs: dict[str, Any]) -> bool:
         """
         Return True if reserved resource keyword is a Promise or PromisedRequirement instance.
@@ -3467,15 +4324,15 @@ class UnfulfilledPromiseSentinel:
         self.file_id = file_id
     @staticmethod
-    def __setstate__(stateDict: Dict[str, Any]) -> None:
+    def __setstate__(stateDict: dict[str, Any]) -> None:
         """
         Only called when unpickling.
         This won't be unpickled unless the promise wasn't resolved, so we throw
         an exception.
         """
-        jobName = stateDict['fulfillingJobName']
-        file_id = stateDict['file_id']
+        jobName = stateDict["fulfillingJobName"]
+        file_id = stateDict["file_id"]
         raise RuntimeError(
             f"This job was passed promise {file_id} that wasn't yet resolved when it "
             f"ran. The job {jobName} that fulfills this promise hasn't yet "

toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

toil 6.1.0a1py3-none-any.whl → 8.0.0py3-none-any.whl