PyPI - toil - Versions diffs - 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl - Mend

toil 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

toil/batchSystems/registry.py +15 -118
toil/common.py +20 -1
toil/cwl/cwltoil.py +80 -37
toil/cwl/utils.py +103 -3
toil/jobStores/abstractJobStore.py +11 -236
toil/jobStores/aws/jobStore.py +2 -1
toil/jobStores/fileJobStore.py +2 -1
toil/jobStores/googleJobStore.py +7 -4
toil/lib/accelerators.py +1 -1
toil/lib/generatedEC2Lists.py +81 -19
toil/lib/misc.py +1 -1
toil/lib/plugins.py +106 -0
toil/lib/url.py +320 -0
toil/options/cwl.py +13 -1
toil/options/runner.py +17 -10
toil/options/wdl.py +12 -1
toil/provisioners/aws/awsProvisioner.py +25 -2
toil/server/app.py +12 -6
toil/server/cli/wes_cwl_runner.py +2 -2
toil/server/wes/abstract_backend.py +21 -43
toil/server/wes/toil_backend.py +2 -2
toil/test/__init__.py +2 -2
toil/test/batchSystems/batchSystemTest.py +2 -9
toil/test/batchSystems/batch_system_plugin_test.py +7 -0
toil/test/cwl/cwlTest.py +181 -8
toil/test/docs/scriptsTest.py +2 -1
toil/test/lib/test_url.py +69 -0
toil/test/lib/url_plugin_test.py +105 -0
toil/test/provisioners/aws/awsProvisionerTest.py +1 -1
toil/test/provisioners/clusterTest.py +15 -2
toil/test/provisioners/gceProvisionerTest.py +1 -1
toil/test/server/serverTest.py +78 -36
toil/test/wdl/md5sum/md5sum-gs.json +1 -1
toil/test/wdl/testfiles/read_file.wdl +18 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
toil/test/wdl/wdltoil_test.py +74 -125
toil/utils/toilSshCluster.py +23 -0
toil/utils/toilUpdateEC2Instances.py +1 -0
toil/version.py +9 -9
toil/wdl/wdltoil.py +182 -314
toil/worker.py +11 -6
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/METADATA +23 -23
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/RECORD +47 -42
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/licenses/LICENSE +0 -0
{toil-8.2.0.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0

toil/wdl/wdltoil.py CHANGED Viewed

@@ -15,6 +15,7 @@
 from __future__ import annotations
 import asyncio
+import copy
 import errno
 import hashlib
 import io
@@ -111,6 +112,7 @@ from toil.lib.misc import get_user_name
 from toil.lib.resources import ResourceMonitor
 from toil.lib.threading import global_mutex
 from toil.provisioners.clusterScaler import JobTooBigError
+from toil.lib.url import URLAccess
 logger = logging.getLogger(__name__)
@@ -293,207 +295,6 @@ def report_wdl_errors(
     return decorator
-def remove_common_leading_whitespace(
-    expression: WDL.Expr.String,
-    tolerate_blanks: bool = True,
-    tolerate_dedents: bool = False,
-    tolerate_all_whitespace: bool = True,
-    debug: bool = False,
-) -> WDL.Expr.String:
-    """
-    Remove "common leading whitespace" as defined in the WDL 1.1 spec.
-    See <https://github.com/openwdl/wdl/blob/main/versions/1.1/SPEC.md#stripping-leading-whitespace>.
-    Operates on a WDL.Expr.String expression that has already been parsed.
-    :param tolerate_blanks: If True, don't allow totally blank lines to zero
-        the common whitespace.
-    :param tolerate_dedents: If True, remove as much of the whitespace on the
-        first indented line as is found on subesquent lines, regardless of
-        whether later lines are out-dented relative to it.
-    :param tolerate_all_whitespace: If True, don't allow all-whitespace lines
-        to reduce the common whitespace prefix.
-    :param debug: If True, the function will show its work by logging at debug
-        level.
-    """
-    # The expression has a "parts" list consisting of interleaved string
-    # literals and placeholder expressions.
-    #
-    # TODO: We assume that there are no newlines in the placeholders.
-    #
-    # TODO: Look at the placeholders and their line and end_line values and try
-    # and guess if they should reduce the amount of common whitespace.
-    if debug:
-        logger.debug("Parts: %s", expression.parts)
-    # We split the parts list into lines, which are also interleaved string
-    # literals and placeholder expressions.
-    lines: list[list[str | WDL.Expr.Placeholder]] = [[]]
-    for part in expression.parts:
-        if isinstance(part, str):
-            # It's a string. Split it into lines.
-            part_lines = part.split("\n")
-            # Part before any newline goes at the end of the current line
-            lines[-1].append(part_lines[0])
-            for part_line in part_lines[1:]:
-                # Any part after a newline starts a new line
-                lines.append([part_line])
-        else:
-            # It's a placeholder. Put it at the end of the current line.
-            lines[-1].append(part)
-    if debug:
-        logger.debug("Lines: %s", lines)
-    # Then we compute the common amount of leading whitespace on all the lines,
-    # looking at the first string literal.
-    # This will be the longest common whitespace prefix, or None if not yet detected.
-    common_whitespace_prefix: str | None = None
-    for line in lines:
-        if len(line) == 0:
-            # TODO: how should totally empty lines be handled? Not in the spec!
-            if not tolerate_blanks:
-                # There's no leading whitespace here!
-                common_whitespace_prefix = ""
-            continue
-        elif isinstance(line[0], WDL.Expr.Placeholder):
-            # TODO: How can we convert MiniWDL's column numbers into space/tab counts or sequences?
-            #
-            # For now just skip these too.
-            continue
-        else:
-            # The line starts with a string
-            assert isinstance(line[0], str)
-            if len(line[0]) == 0:
-                # Still totally empty though!
-                if not tolerate_blanks:
-                    # There's no leading whitespace here!
-                    common_whitespace_prefix = ""
-                continue
-            if (
-                len(line) == 1
-                and tolerate_all_whitespace
-                and all(x in (" ", "\t") for x in line[0])
-            ):
-                # All-whitespace lines shouldn't count
-                continue
-            # TODO: There are good algorithms for common prefixes. This is a bad one.
-            # Find the number of leading whitespace characters
-            line_whitespace_end = 0
-            while line_whitespace_end < len(line[0]) and line[0][
-                line_whitespace_end
-            ] in (" ", "\t"):
-                line_whitespace_end += 1
-            # Find the string of leading whitespace characters
-            line_whitespace_prefix = line[0][:line_whitespace_end]
-            if " " in line_whitespace_prefix and "\t" in line_whitespace_prefix:
-                # Warn and don't change anything if spaces and tabs are mixed, per the spec.
-                logger.warning(
-                    "Line in command at %s mixes leading spaces and tabs! Not removing leading whitespace!",
-                    expression.pos,
-                )
-                return expression
-            if common_whitespace_prefix is None:
-                # This is the first line we found, so it automatically has the common prefic
-                common_whitespace_prefix = line_whitespace_prefix
-            elif not tolerate_dedents:
-                # Trim the common prefix down to what we have for this line
-                if not line_whitespace_prefix.startswith(common_whitespace_prefix):
-                    # Shorten to the real shared prefix.
-                    # Hackily make os.path do it for us,
-                    # character-by-character. See
-                    # <https://stackoverflow.com/a/6718435>
-                    common_whitespace_prefix = os.path.commonprefix(
-                        [common_whitespace_prefix, line_whitespace_prefix]
-                    )
-    if common_whitespace_prefix is None:
-        common_whitespace_prefix = ""
-    if debug:
-        logger.debug("Common Prefix: '%s'", common_whitespace_prefix)
-    # Then we trim that much whitespace off all the leading strings.
-    # We tolerate the common prefix not *actually* being common and remove as
-    # much of it as is there, to support tolerate_dedents.
-    def first_mismatch(prefix: str, value: str) -> int:
-        """
-        Get the index of the first character in value that does not match the corresponding character in prefix, or the length of the shorter string.
-        """
-        for n, (c1, c2) in enumerate(zip(prefix, value)):
-            if c1 != c2:
-                return n
-        return min(len(prefix), len(value))
-    # Trim up to the first mismatch vs. the common prefix if the line starts with a string literal.
-    stripped_lines = [
-        (
-            (
-                cast(
-                    list[Union[str, WDL.Expr.Placeholder]],
-                    [line[0][first_mismatch(common_whitespace_prefix, line[0]) :]],
-                )
-                + line[1:]
-            )
-            if len(line) > 0 and isinstance(line[0], str)
-            else line
-        )
-        for line in lines
-    ]
-    if debug:
-        logger.debug("Stripped Lines: %s", stripped_lines)
-    # Then we reassemble the parts and make a new expression.
-    # Build lists and turn the lists into strings later
-    new_parts: list[list[str] | WDL.Expr.Placeholder] = []
-    for i, line in enumerate(stripped_lines):
-        if i > 0:
-            # This is a second line, so we need to tack on a newline.
-            if len(new_parts) > 0 and isinstance(new_parts[-1], list):
-                # Tack on to existing string collection
-                new_parts[-1].append("\n")
-            else:
-                # Make a new string collection
-                new_parts.append(["\n"])
-        if len(line) > 0 and isinstance(line[0], str) and i > 0:
-            # Line starts with a string we need to merge with the last string.
-            # We know the previous line now ends with a string collection, so tack it on.
-            assert isinstance(new_parts[-1], list)
-            new_parts[-1].append(line[0])
-            # Make all the strings into string collections in the rest of the line
-            new_parts += [([x] if isinstance(x, str) else x) for x in line[1:]]
-        else:
-            # No string merge necessary
-            # Make all the strings into string collections in the whole line
-            new_parts += [([x] if isinstance(x, str) else x) for x in line]
-    if debug:
-        logger.debug("New Parts: %s", new_parts)
-    # Now go back to the alternating strings and placeholders that MiniWDL wants
-    new_parts_merged: list[str | WDL.Expr.Placeholder] = [
-        ("".join(x) if isinstance(x, list) else x) for x in new_parts
-    ]
-    if debug:
-        logger.debug("New Parts Merged: %s", new_parts_merged)
-    modified = WDL.Expr.String(expression.pos, new_parts_merged, expression.command)
-    # Fake the type checking of the modified expression.
-    # TODO: Make MiniWDL expose a real way to do this?
-    modified._type = expression._type
-    return modified
 async def toil_read_source(
     uri: str, path: list[str], importer: WDL.Tree.Document | None
 ) -> ReadSourceResult:
@@ -514,7 +315,7 @@ async def toil_read_source(
         tried.append(candidate_uri)
         try:
             # TODO: this is probably sync work that would be better as async work here
-            AbstractJobStore.read_from_url(candidate_uri, destination_buffer)
+            URLAccess.read_from_url(candidate_uri, destination_buffer)
         except Exception as e:
             if isinstance(e, SyntaxError) or isinstance(e, NameError):
                 # These are probably actual problems with the code and not
@@ -1142,17 +943,29 @@ def evaluate_decls_to_bindings(
     standard_library: ToilWDLStdLibBase,
     include_previous: bool = False,
     drop_missing_files: bool = False,
+    expressions_are_defaults: bool = False,
 ) -> WDLBindings:
     """
     Evaluate decls with a given bindings environment and standard library.
     Creates a new bindings object that only contains the bindings from the given decls.
     Guarantees that each decl in `decls` can access the variables defined by the previous ones.
     :param all_bindings: Environment to use when evaluating decls
     :param decls: Decls to evaluate
     :param standard_library: Standard library
-    :param include_previous: Whether to include the existing environment in the new returned environment. This will be false for outputs where only defined decls should be included
-    :param drop_missing_files: Whether to coerce nonexistent files to null. The coerced elements will be checked that the transformation is valid.
-    Currently should only be enabled in output sections, see https://github.com/openwdl/wdl/issues/673#issuecomment-2248828116
+    :param include_previous: Whether to include the existing environment in the
+        new returned environment. This will be false for outputs where only
+        defined decls should be included
+    :param drop_missing_files: Whether to coerce nonexistent files to null. The
+        coerced elements will be checked that the transformation is valid.
+        Currently should only be enabled in output sections, see
+        https://github.com/openwdl/wdl/issues/673#issuecomment-2248828116.
+    :param expressions_are_defaults: If True, value expressions in decls are
+        treated as default values, and there may be existing values in the
+        incoming environment that take precedence. If False, each decl is taken
+        to be a fresh definition, and expressions are always evaluated and
+        used.
     :return: New bindings object
     """
     # all_bindings contains current bindings + previous all_bindings
@@ -1162,9 +975,14 @@ def evaluate_decls_to_bindings(
         drop_if_missing, standard_library=standard_library
     )
     for each_decl in decls:
-        output_value = evaluate_defaultable_decl(
-            each_decl, all_bindings, standard_library
-        )
+        if expressions_are_defaults:
+            output_value = evaluate_defaultable_decl(
+                each_decl, all_bindings, standard_library
+            )
+        else:
+            output_value = evaluate_decl(
+                each_decl, all_bindings, standard_library
+            )
         if drop_missing_files:
             dropped_output_value = map_over_typed_files_in_value(
                 output_value, drop_if_missing_with_workdir
@@ -1223,7 +1041,7 @@ class NonDownloadingSize(WDL.StdLib._Size):
                 else:
                     # This is some other kind of remote file.
                     # We need to get its size from the URI.
-                    item_size = AbstractJobStore.get_size(uri)
+                    item_size = URLAccess.get_size(uri)
                     if item_size is None:
                         # User asked for the size and we can't figure it out efficiently, so bail out.
                         raise RuntimeError(f"Attempt to check the size of {uri} failed")
@@ -1374,7 +1192,7 @@ def convert_remote_files(
             tried.append(candidate_uri)
             try:
                 # Try polling existence first.
-                polled_existence = file_source.url_exists(candidate_uri)
+                polled_existence = URLAccess.url_exists(candidate_uri)
                 if polled_existence is False:
                     # Known not to exist
                     logger.debug("URL does not exist: %s", candidate_uri)
@@ -1772,7 +1590,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
             # Open it exclusively
             with open(dest_path, "xb") as dest_file:
                 # And save to it
-                size, executable = AbstractJobStore.read_from_url(filename, dest_file)
+                size, executable = URLAccess.read_from_url(filename, dest_file)
                 if executable:
                     # Set the execute bit in the file's permissions
                     os.chmod(dest_path, os.stat(dest_path).st_mode | stat.S_IXUSR)
@@ -2534,11 +2352,15 @@ def evaluate_decl(
     """
     Evaluate the expression of a declaration node, or raise an error.
     """
-    return evaluate_named_expression(
-        node, node.name, node.type, node.expr, environment, stdlib
-    )
+    try:
+        return evaluate_named_expression(
+            node, node.name, node.type, node.expr, environment, stdlib
+        )
+    except Exception:
+        # If something goes wrong, dump.
+        logger.exception("Evaluation failed for %s", node)
+        log_bindings(logger.error, "Statement was evaluated in:", [environment])
+        raise
 def evaluate_call_inputs(
     context: WDL.Error.SourceNode | WDL.Error.SourcePosition,
@@ -2581,33 +2403,28 @@ def evaluate_defaultable_decl(
     If the name of the declaration is already defined in the environment, return its value. Otherwise, return the evaluated expression.
     """
-    try:
-        if (
-            node.name in environment
-            and not isinstance(environment[node.name], WDL.Value.Null)
-        ) or (
-            isinstance(environment.get(node.name), WDL.Value.Null)
-            and node.type.optional
-        ):
-            logger.debug("Name %s is already defined, not using default", node.name)
-            if not isinstance(environment[node.name].type, type(node.type)):
-                return environment[node.name].coerce(node.type)
-            else:
-                return environment[node.name]
+    if (
+        node.name in environment
+        and not isinstance(environment[node.name], WDL.Value.Null)
+    ) or (
+        isinstance(environment.get(node.name), WDL.Value.Null)
+        and node.type.optional
+    ):
+        logger.debug("Name %s is already defined, not using default", node.name)
+        if not isinstance(environment[node.name].type, type(node.type)):
+            return environment[node.name].coerce(node.type)
         else:
-            if node.type is not None and not node.type.optional and node.expr is None:
-                # We need a value for this but there isn't one.
-                raise WDL.Error.EvalError(
-                    node,
-                    f"Value for {node.name} was not provided and no default value is available",
-                )
-            logger.info("Defaulting %s to %s", node.name, node.expr)
-            return evaluate_decl(node, environment, stdlib)
-    except Exception:
-        # If something goes wrong, dump.
-        logger.exception("Evaluation failed for %s", node)
-        log_bindings(logger.error, "Statement was evaluated in:", [environment])
-        raise
+            return environment[node.name]
+    else:
+        if node.type is not None and not node.type.optional and node.expr is None:
+            # We need a value for this but there isn't one.
+            raise WDL.Error.EvalError(
+                node,
+                f"Value for {node.name} was not provided and no default value is available",
+            )
+        logger.info("Defaulting %s to %s", node.name, node.expr)
+        return evaluate_decl(node, environment, stdlib)
 # TODO: make these stdlib methods???
@@ -2719,7 +2536,7 @@ def drop_if_missing(
     if filename is not None and is_any_url(filename):
         try:
-            if filename.startswith(TOIL_URI_SCHEME) or AbstractJobStore.url_exists(
+            if filename.startswith(TOIL_URI_SCHEME) or URLAccess.url_exists(
                 filename
             ):
                 # We assume anything in the filestore actually exists.
@@ -2835,64 +2652,52 @@ def map_over_files_in_binding(
         binding.info,
     )
+def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
+    """
+    Remove the expression from a WDL value
+    :param value: Original WDL value
+    :return: New WDL value without the expr field
+    """
+    # TODO: This is an extra copy that we could get rid of by dropping the immutability idea
+    def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
+        # Do a shallow copy to preserve immutability
+        new_value = copy.copy(value)
+        if value.expr:
+            # We use a Null expr instead of None here, because when evaluating an expression,
+            # MiniWDL applies that expression to the result value *and* all values it contains that
+            # have None expressions. Using a Null expression here protects nested values that
+            # didn't really get created by the current expression from being attributed to it, while
+            # still cutting the reference to the parsed WDL document.
+            new_value._expr = WDL.Expr.Null(value.expr.pos)
+        else:
+            new_value._expr = value.expr
+        return new_value
+    return map_over_typed_value(value, predicate)
-# TODO: We want to type this to say, for anything descended from a WDL type, we
-# return something descended from the same WDL type or a null. But I can't
-# quite do that with generics, since you could pass in some extended WDL value
-# type we've never heard of and expect to get one of those out.
-#
-# For now we assume that any types extending the WDL value types will implement
-# compatible constructors.
-def map_over_typed_files_in_value(
-    value: WDL.Value.Base, transform: Callable[[WDL.Value.File], WDL.Value.File | None]
-) -> WDL.Value.Base:
-    """
-    Run all File values embedded in the given value through the given
-    transformation function.
-    The transformation function must not mutate the original File.
-    If the transform returns None, the file value is changed to Null.
-    The transform has access to the type information for the value, so it knows
-    if it may return None, depending on if the value is optional or not.
-    The transform is *allowed* to return None only if the mapping result won't
-    actually be used, to allow for scans. So error checking needs to be part of
-    the transform itself.
+def map_over_typed_value(value: WDL.Value.Base, transform: Callable[[WDL.Value.Base], WDL.Value.Base]) -> WDL.Value.Base:
     """
-    if isinstance(value, WDL.Value.File):
-        # This is a file so we need to process it
-        orig_file_value = value.value
-        new_file = transform(value)
-        assert (
-            value.value == orig_file_value
-        ), "Transformation mutated the original File"
-        if new_file is None:
-            # Assume the transform checked types if we actually care about the
-            # result.
-            logger.warning("File %s became Null", value)
-            return WDL.Value.Null()
-        else:
-            # Make whatever the value is around the new path.
-            # TODO: why does this need casting?
-            return new_file
-    elif isinstance(value, WDL.Value.Array):
+    Apply a transform to a WDL value and all contained WDL values.
+    :param value: WDL value to transform
+    :param transform: Function that takes a WDL value and returns a new WDL value
+    :return: New transformed WDL value
+    """
+    if isinstance(value, WDL.Value.Array):
         # This is an array, so recurse on the items
-        return WDL.Value.Array(
+        value = WDL.Value.Array(
             value.type.item_type,
-            [map_over_typed_files_in_value(v, transform) for v in value.value],
+            [map_over_typed_value(v, transform) for v in value.value],
             value.expr,
         )
     elif isinstance(value, WDL.Value.Map):
         # This is a map, so recurse on the members of the items, which are tuples (but not wrapped as WDL Pair objects)
         # TODO: Can we avoid a cast in a comprehension if we get MyPy to know that each pair is always a 2-element tuple?
-        return WDL.Value.Map(
+        value = WDL.Value.Map(
             value.type.item_type,
             [
                 cast(
                     tuple[WDL.Value.Base, WDL.Value.Base],
-                    tuple(map_over_typed_files_in_value(v, transform) for v in pair),
+                    tuple(map_over_typed_value(v, transform) for v in pair),
                 )
                 for pair in value.value
             ],
@@ -2900,29 +2705,74 @@ def map_over_typed_files_in_value(
         )
     elif isinstance(value, WDL.Value.Pair):
         # This is a pair, so recurse on the left and right items
-        return WDL.Value.Pair(
+        value = WDL.Value.Pair(
             value.type.left_type,
             value.type.right_type,
             cast(
                 tuple[WDL.Value.Base, WDL.Value.Base],
-                tuple(map_over_typed_files_in_value(v, transform) for v in value.value),
+                tuple(map_over_typed_value(v, transform) for v in value.value),
             ),
             value.expr,
         )
     elif isinstance(value, WDL.Value.Struct):
         # This is a struct, so recurse on the values in the backing dict
-        return WDL.Value.Struct(
+        value = WDL.Value.Struct(
             cast(Union[WDL.Type.StructInstance, WDL.Type.Object], value.type),
             {
-                k: map_over_typed_files_in_value(v, transform)
+                k: map_over_typed_value(v, transform)
                 for k, v in value.value.items()
             },
             value.expr,
         )
-    else:
-        # All other kinds of value can be passed through unmodified.
+    # Run the predicate on the final value
+    return transform(value)
+# TODO: We want to type this to say, for anything descended from a WDL type, we
+# return something descended from the same WDL type or a null. But I can't
+# quite do that with generics, since you could pass in some extended WDL value
+# type we've never heard of and expect to get one of those out.
+#
+# For now we assume that any types extending the WDL value types will implement
+# compatible constructors.
+def map_over_typed_files_in_value(
+    value: WDL.Value.Base, transform: Callable[[WDL.Value.File], WDL.Value.File | None]
+) -> WDL.Value.Base:
+    """
+    Run all File values embedded in the given value through the given
+    transformation function.
+    The transformation function must not mutate the original File.
+    If the transform returns None, the file value is changed to Null.
+    The transform has access to the type information for the value, so it knows
+    if it may return None, depending on if the value is optional or not.
+    The transform is *allowed* to return None only if the mapping result won't
+    actually be used, to allow for scans. So error checking needs to be part of
+    the transform itself.
+    """
+    def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
+        if isinstance(value, WDL.Value.File):
+            # This is a file so we need to process it
+            orig_file_value = value.value
+            new_file = transform(value)
+            assert (
+                value.value == orig_file_value
+            ), "Transformation mutated the original File"
+            if new_file is None:
+                # Assume the transform checked types if we actually care about the
+                # result.
+                logger.warning("File %s became Null", value)
+                return WDL.Value.Null()
+            else:
+                # Make whatever the value is around the new path.
+                return new_file
         return value
+    return map_over_typed_value(value, predicate)
 def ensure_null_files_are_nullable(
     value: WDL.Value.Base, original_value: WDL.Value.Base, expected_type: WDL.Type.Base
@@ -3065,6 +2915,11 @@ class WDLBaseJob(Job):
         logger.debug("Overlay %s after %s", overlay, self)
         self._postprocessing_steps.append(("overlay", overlay))
+    def remove_expr_from_bindings(self, bindings: WDLBindings) -> WDLBindings:
+        # We have to throw out the expressions because they drag the entire WDL document into the WDL outputs
+        # which causes duplicate pickling and linear growth in scatter memory usage
+        return bindings.map(lambda b: WDL.Env.Binding(b.name, remove_expr_from_value(b.value), b.info))
     def postprocess(self, bindings: WDLBindings) -> WDLBindings:
         """
         Apply queued changes to bindings.
@@ -3101,7 +2956,7 @@ class WDLBaseJob(Job):
                 bindings = combine_bindings([bindings.subtract(argument), argument])
             else:
                 raise RuntimeError(f"Unknown postprocessing action {action}")
+        bindings = self.remove_expr_from_bindings(bindings)
         return bindings
     def defer_postprocessing(self, other: WDLBaseJob) -> None:
@@ -3228,7 +3083,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
             logger.debug("Evaluating task code")
             # Evaluate all the inputs that aren't pre-set
             bindings = evaluate_decls_to_bindings(
-                self._task.inputs, bindings, standard_library, include_previous=True
+                self._task.inputs,
+                bindings,
+                standard_library,
+                include_previous=True,
+                expressions_are_defaults=True
             )
         if self._task.postinputs:
             # Evaluate all the postinput decls.
@@ -3779,6 +3638,8 @@ class WDLTaskJob(WDLBaseJob):
                     "is not yet implemented in the MiniWDL Docker "
                     "containerization implementation."
                 )
+            if runtime_bindings.has_binding("memory") and human2bytes(runtime_bindings.resolve("memory").value) < human2bytes("4MiB"):
+                    runtime_bindings.resolve("memory").value = "4MiB"
         else:
             raise RuntimeError(
                 f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}"
@@ -4011,7 +3872,7 @@ class WDLTaskJob(WDLBaseJob):
                     self._task,
                     "command",
                     WDL.Type.String(),
-                    remove_common_leading_whitespace(self._task.command),
+                    self._task.command,
                     contained_bindings,
                     command_library,
                 )
@@ -4943,6 +4804,12 @@ class WDLScatterJob(WDLSectionJob):
                 [(p, p) for p in standard_library.get_local_paths()]
             )
+        # Set the exprs of the WDL values to WDL.Expr.Null to reduce the memory footprint. This got set from evaluate_named_expression
+        # because any evaluation on an expression will mutate child values of the result values of the expression, and we had not
+        # processed it yet by this point as the bindings from input environment and WDLWorkflowJob do not get processing and postprocessing
+        # ran respectively
+        bindings = self.remove_expr_from_bindings(bindings)
         if not isinstance(scatter_value, WDL.Value.Array):
             raise RuntimeError(
                 "The returned value from a scatter is not an Array type."
@@ -4955,6 +4822,8 @@ class WDLScatterJob(WDLSectionJob):
             # duration of the body.
             local_bindings: WDLBindings = WDL.Env.Bindings()
             local_bindings = local_bindings.bind(self._scatter.variable, item)
+            # Remove expr from new scatter binding
+            local_bindings = self.remove_expr_from_bindings(local_bindings)
             # TODO: We need to turn values() into a list because MyPy seems to
             # think a dict_values isn't a Sequence. This is a waste of time to
             # appease MyPy but probably better than a cast?
@@ -5244,6 +5113,7 @@ class WDLWorkflowJob(WDLSectionJob):
                     bindings,
                     standard_library,
                     include_previous=True,
+                    expressions_are_defaults=True,
                 )
             finally:
                 # Report all files are downloaded now that all expressions are evaluated.
@@ -5319,9 +5189,8 @@ class WDLOutputsJob(WDLBaseJob):
         try:
             if self._workflow.outputs is not None:
-                # Output section is declared and is nonempty, so evaluate normally
-                # Combine the bindings from the previous job
+                # Output section is declared and is nonempty, so evaluate normally.
+                # Don't drop nonexistent files here; we do that later.
                 output_bindings = evaluate_decls_to_bindings(
                     self._workflow.outputs, unwrap(self._bindings), standard_library
                 )
@@ -5332,7 +5201,8 @@ class WDLOutputsJob(WDLBaseJob):
             if self._workflow.outputs is None or self._wdl_options.get(
                 "all_call_outputs", False
             ):
-                # The output section is not declared, or we want to keep task outputs anyway.
+                # The output section is not declared, or we want to keep task
+                # outputs anyway on top of an already-evaluated output section.
                 # Get all task outputs and return that
                 # First get all task output names
@@ -5363,16 +5233,6 @@ class WDLOutputsJob(WDLBaseJob):
                         output_bindings = output_bindings.bind(
                             binding.name, binding.value
                         )
-            else:
-                # Output section is declared and is nonempty, so evaluate normally
-                # Combine the bindings from the previous job
-                output_bindings = evaluate_decls_to_bindings(
-                    self._workflow.outputs,
-                    unwrap(self._bindings),
-                    standard_library,
-                    drop_missing_files=True,
-                )
         finally:
             # We don't actually know when all our files are downloaded since
             # anything we evaluate might devirtualize inside any expression.
@@ -5391,6 +5251,13 @@ class WDLOutputsJob(WDLBaseJob):
             output_bindings, standard_library=standard_library
         )
+        # TODO: Unify the rest of this with task output managment somehow
+        # Upload any files in the outputs if not uploaded already.
+        # We need this because it's possible to create new files in a workflow
+        # outputs section.
+        output_bindings = virtualize_files(output_bindings, standard_library)
         if self._cache_key is not None:
             output_bindings = fill_execution_cache(
                 self._cache_key, output_bindings, file_store, self._wdl_options
@@ -5512,7 +5379,7 @@ class WDLImportWrapper(WDLSectionJob):
         wdl_options: WDLContext,
         inputs_search_path: list[str],
         import_remote_files: bool,
-        import_workers_threshold: ParseableIndivisibleResource,
+        import_workers_batchsize: ParseableIndivisibleResource,
         import_workers_disk: ParseableIndivisibleResource,
         **kwargs: Any,
     ):
@@ -5526,7 +5393,7 @@ class WDLImportWrapper(WDLSectionJob):
         self._target = target
         self._inputs_search_path = inputs_search_path
         self._import_remote_files = import_remote_files
-        self._import_workers_threshold = import_workers_threshold
+        self._import_workers_batchsize = import_workers_batchsize
         self._import_workers_disk = import_workers_disk
     def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
@@ -5538,7 +5405,7 @@ class WDLImportWrapper(WDLSectionJob):
             include_remote_files=self._import_remote_files,
             execution_dir=self._wdl_options.get("execution_dir")
         )
-        imports_job = ImportsJob(file_to_data, self._import_workers_threshold, self._import_workers_disk)
+        imports_job = ImportsJob(file_to_data, self._import_workers_batchsize, self._import_workers_disk)
         self.addChild(imports_job)
         install_imports_job = WDLInstallImportsJob(
             self._target.name, self._inputs, imports_job.rv()
@@ -5570,7 +5437,7 @@ def make_root_job(
             wdl_options=wdl_options,
             inputs_search_path=inputs_search_path,
             import_remote_files=options.reference_inputs,
-            import_workers_threshold=options.import_workers_threshold,
+            import_workers_batchsize=options.import_workers_batchsize,
             import_workers_disk=options.import_workers_disk
         )
     else:
@@ -5644,6 +5511,7 @@ def main() -> None:
             document: WDL.Tree.Document = WDL.load(
                 wdl_uri,
                 read_source=toil_read_source,
+                check_quant=options.quant_check
             )
             # See if we're going to run a workflow or a task

toil 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

toil 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl