toil 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/registry.py +15 -118
- toil/common.py +20 -1
- toil/cwl/cwltoil.py +80 -37
- toil/cwl/utils.py +103 -3
- toil/jobStores/abstractJobStore.py +11 -236
- toil/jobStores/aws/jobStore.py +2 -1
- toil/jobStores/fileJobStore.py +2 -1
- toil/jobStores/googleJobStore.py +7 -4
- toil/lib/accelerators.py +1 -1
- toil/lib/generatedEC2Lists.py +81 -19
- toil/lib/misc.py +1 -1
- toil/lib/plugins.py +106 -0
- toil/lib/url.py +320 -0
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +12 -1
- toil/provisioners/aws/awsProvisioner.py +25 -2
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +2 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +2 -2
- toil/test/batchSystems/batchSystemTest.py +2 -9
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/cwl/cwlTest.py +181 -8
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +1 -1
- toil/test/provisioners/clusterTest.py +15 -2
- toil/test/provisioners/gceProvisionerTest.py +1 -1
- toil/test/server/serverTest.py +78 -36
- toil/test/wdl/md5sum/md5sum-gs.json +1 -1
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
- toil/test/wdl/wdltoil_test.py +74 -125
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +9 -9
- toil/wdl/wdltoil.py +182 -314
- toil/worker.py +11 -6
- {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/METADATA +23 -23
- {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/RECORD +47 -42
- {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
- {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
- {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/licenses/LICENSE +0 -0
- {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
17
17
|
import asyncio
|
|
18
|
+
import copy
|
|
18
19
|
import errno
|
|
19
20
|
import hashlib
|
|
20
21
|
import io
|
|
@@ -111,6 +112,7 @@ from toil.lib.misc import get_user_name
|
|
|
111
112
|
from toil.lib.resources import ResourceMonitor
|
|
112
113
|
from toil.lib.threading import global_mutex
|
|
113
114
|
from toil.provisioners.clusterScaler import JobTooBigError
|
|
115
|
+
from toil.lib.url import URLAccess
|
|
114
116
|
|
|
115
117
|
logger = logging.getLogger(__name__)
|
|
116
118
|
|
|
@@ -293,207 +295,6 @@ def report_wdl_errors(
|
|
|
293
295
|
return decorator
|
|
294
296
|
|
|
295
297
|
|
|
296
|
-
def remove_common_leading_whitespace(
|
|
297
|
-
expression: WDL.Expr.String,
|
|
298
|
-
tolerate_blanks: bool = True,
|
|
299
|
-
tolerate_dedents: bool = False,
|
|
300
|
-
tolerate_all_whitespace: bool = True,
|
|
301
|
-
debug: bool = False,
|
|
302
|
-
) -> WDL.Expr.String:
|
|
303
|
-
"""
|
|
304
|
-
Remove "common leading whitespace" as defined in the WDL 1.1 spec.
|
|
305
|
-
|
|
306
|
-
See <https://github.com/openwdl/wdl/blob/main/versions/1.1/SPEC.md#stripping-leading-whitespace>.
|
|
307
|
-
|
|
308
|
-
Operates on a WDL.Expr.String expression that has already been parsed.
|
|
309
|
-
|
|
310
|
-
:param tolerate_blanks: If True, don't allow totally blank lines to zero
|
|
311
|
-
the common whitespace.
|
|
312
|
-
|
|
313
|
-
:param tolerate_dedents: If True, remove as much of the whitespace on the
|
|
314
|
-
first indented line as is found on subesquent lines, regardless of
|
|
315
|
-
whether later lines are out-dented relative to it.
|
|
316
|
-
|
|
317
|
-
:param tolerate_all_whitespace: If True, don't allow all-whitespace lines
|
|
318
|
-
to reduce the common whitespace prefix.
|
|
319
|
-
|
|
320
|
-
:param debug: If True, the function will show its work by logging at debug
|
|
321
|
-
level.
|
|
322
|
-
"""
|
|
323
|
-
|
|
324
|
-
# The expression has a "parts" list consisting of interleaved string
|
|
325
|
-
# literals and placeholder expressions.
|
|
326
|
-
#
|
|
327
|
-
# TODO: We assume that there are no newlines in the placeholders.
|
|
328
|
-
#
|
|
329
|
-
# TODO: Look at the placeholders and their line and end_line values and try
|
|
330
|
-
# and guess if they should reduce the amount of common whitespace.
|
|
331
|
-
|
|
332
|
-
if debug:
|
|
333
|
-
logger.debug("Parts: %s", expression.parts)
|
|
334
|
-
|
|
335
|
-
# We split the parts list into lines, which are also interleaved string
|
|
336
|
-
# literals and placeholder expressions.
|
|
337
|
-
lines: list[list[str | WDL.Expr.Placeholder]] = [[]]
|
|
338
|
-
for part in expression.parts:
|
|
339
|
-
if isinstance(part, str):
|
|
340
|
-
# It's a string. Split it into lines.
|
|
341
|
-
part_lines = part.split("\n")
|
|
342
|
-
# Part before any newline goes at the end of the current line
|
|
343
|
-
lines[-1].append(part_lines[0])
|
|
344
|
-
for part_line in part_lines[1:]:
|
|
345
|
-
# Any part after a newline starts a new line
|
|
346
|
-
lines.append([part_line])
|
|
347
|
-
else:
|
|
348
|
-
# It's a placeholder. Put it at the end of the current line.
|
|
349
|
-
lines[-1].append(part)
|
|
350
|
-
|
|
351
|
-
if debug:
|
|
352
|
-
logger.debug("Lines: %s", lines)
|
|
353
|
-
|
|
354
|
-
# Then we compute the common amount of leading whitespace on all the lines,
|
|
355
|
-
# looking at the first string literal.
|
|
356
|
-
# This will be the longest common whitespace prefix, or None if not yet detected.
|
|
357
|
-
common_whitespace_prefix: str | None = None
|
|
358
|
-
for line in lines:
|
|
359
|
-
if len(line) == 0:
|
|
360
|
-
# TODO: how should totally empty lines be handled? Not in the spec!
|
|
361
|
-
if not tolerate_blanks:
|
|
362
|
-
# There's no leading whitespace here!
|
|
363
|
-
common_whitespace_prefix = ""
|
|
364
|
-
continue
|
|
365
|
-
elif isinstance(line[0], WDL.Expr.Placeholder):
|
|
366
|
-
# TODO: How can we convert MiniWDL's column numbers into space/tab counts or sequences?
|
|
367
|
-
#
|
|
368
|
-
# For now just skip these too.
|
|
369
|
-
continue
|
|
370
|
-
else:
|
|
371
|
-
# The line starts with a string
|
|
372
|
-
assert isinstance(line[0], str)
|
|
373
|
-
if len(line[0]) == 0:
|
|
374
|
-
# Still totally empty though!
|
|
375
|
-
if not tolerate_blanks:
|
|
376
|
-
# There's no leading whitespace here!
|
|
377
|
-
common_whitespace_prefix = ""
|
|
378
|
-
continue
|
|
379
|
-
if (
|
|
380
|
-
len(line) == 1
|
|
381
|
-
and tolerate_all_whitespace
|
|
382
|
-
and all(x in (" ", "\t") for x in line[0])
|
|
383
|
-
):
|
|
384
|
-
# All-whitespace lines shouldn't count
|
|
385
|
-
continue
|
|
386
|
-
# TODO: There are good algorithms for common prefixes. This is a bad one.
|
|
387
|
-
# Find the number of leading whitespace characters
|
|
388
|
-
line_whitespace_end = 0
|
|
389
|
-
while line_whitespace_end < len(line[0]) and line[0][
|
|
390
|
-
line_whitespace_end
|
|
391
|
-
] in (" ", "\t"):
|
|
392
|
-
line_whitespace_end += 1
|
|
393
|
-
# Find the string of leading whitespace characters
|
|
394
|
-
line_whitespace_prefix = line[0][:line_whitespace_end]
|
|
395
|
-
|
|
396
|
-
if " " in line_whitespace_prefix and "\t" in line_whitespace_prefix:
|
|
397
|
-
# Warn and don't change anything if spaces and tabs are mixed, per the spec.
|
|
398
|
-
logger.warning(
|
|
399
|
-
"Line in command at %s mixes leading spaces and tabs! Not removing leading whitespace!",
|
|
400
|
-
expression.pos,
|
|
401
|
-
)
|
|
402
|
-
return expression
|
|
403
|
-
|
|
404
|
-
if common_whitespace_prefix is None:
|
|
405
|
-
# This is the first line we found, so it automatically has the common prefic
|
|
406
|
-
common_whitespace_prefix = line_whitespace_prefix
|
|
407
|
-
elif not tolerate_dedents:
|
|
408
|
-
# Trim the common prefix down to what we have for this line
|
|
409
|
-
if not line_whitespace_prefix.startswith(common_whitespace_prefix):
|
|
410
|
-
# Shorten to the real shared prefix.
|
|
411
|
-
# Hackily make os.path do it for us,
|
|
412
|
-
# character-by-character. See
|
|
413
|
-
# <https://stackoverflow.com/a/6718435>
|
|
414
|
-
common_whitespace_prefix = os.path.commonprefix(
|
|
415
|
-
[common_whitespace_prefix, line_whitespace_prefix]
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
if common_whitespace_prefix is None:
|
|
419
|
-
common_whitespace_prefix = ""
|
|
420
|
-
|
|
421
|
-
if debug:
|
|
422
|
-
logger.debug("Common Prefix: '%s'", common_whitespace_prefix)
|
|
423
|
-
|
|
424
|
-
# Then we trim that much whitespace off all the leading strings.
|
|
425
|
-
# We tolerate the common prefix not *actually* being common and remove as
|
|
426
|
-
# much of it as is there, to support tolerate_dedents.
|
|
427
|
-
|
|
428
|
-
def first_mismatch(prefix: str, value: str) -> int:
|
|
429
|
-
"""
|
|
430
|
-
Get the index of the first character in value that does not match the corresponding character in prefix, or the length of the shorter string.
|
|
431
|
-
"""
|
|
432
|
-
for n, (c1, c2) in enumerate(zip(prefix, value)):
|
|
433
|
-
if c1 != c2:
|
|
434
|
-
return n
|
|
435
|
-
return min(len(prefix), len(value))
|
|
436
|
-
|
|
437
|
-
# Trim up to the first mismatch vs. the common prefix if the line starts with a string literal.
|
|
438
|
-
stripped_lines = [
|
|
439
|
-
(
|
|
440
|
-
(
|
|
441
|
-
cast(
|
|
442
|
-
list[Union[str, WDL.Expr.Placeholder]],
|
|
443
|
-
[line[0][first_mismatch(common_whitespace_prefix, line[0]) :]],
|
|
444
|
-
)
|
|
445
|
-
+ line[1:]
|
|
446
|
-
)
|
|
447
|
-
if len(line) > 0 and isinstance(line[0], str)
|
|
448
|
-
else line
|
|
449
|
-
)
|
|
450
|
-
for line in lines
|
|
451
|
-
]
|
|
452
|
-
if debug:
|
|
453
|
-
logger.debug("Stripped Lines: %s", stripped_lines)
|
|
454
|
-
|
|
455
|
-
# Then we reassemble the parts and make a new expression.
|
|
456
|
-
# Build lists and turn the lists into strings later
|
|
457
|
-
new_parts: list[list[str] | WDL.Expr.Placeholder] = []
|
|
458
|
-
for i, line in enumerate(stripped_lines):
|
|
459
|
-
if i > 0:
|
|
460
|
-
# This is a second line, so we need to tack on a newline.
|
|
461
|
-
if len(new_parts) > 0 and isinstance(new_parts[-1], list):
|
|
462
|
-
# Tack on to existing string collection
|
|
463
|
-
new_parts[-1].append("\n")
|
|
464
|
-
else:
|
|
465
|
-
# Make a new string collection
|
|
466
|
-
new_parts.append(["\n"])
|
|
467
|
-
if len(line) > 0 and isinstance(line[0], str) and i > 0:
|
|
468
|
-
# Line starts with a string we need to merge with the last string.
|
|
469
|
-
# We know the previous line now ends with a string collection, so tack it on.
|
|
470
|
-
assert isinstance(new_parts[-1], list)
|
|
471
|
-
new_parts[-1].append(line[0])
|
|
472
|
-
# Make all the strings into string collections in the rest of the line
|
|
473
|
-
new_parts += [([x] if isinstance(x, str) else x) for x in line[1:]]
|
|
474
|
-
else:
|
|
475
|
-
# No string merge necessary
|
|
476
|
-
# Make all the strings into string collections in the whole line
|
|
477
|
-
new_parts += [([x] if isinstance(x, str) else x) for x in line]
|
|
478
|
-
|
|
479
|
-
if debug:
|
|
480
|
-
logger.debug("New Parts: %s", new_parts)
|
|
481
|
-
|
|
482
|
-
# Now go back to the alternating strings and placeholders that MiniWDL wants
|
|
483
|
-
new_parts_merged: list[str | WDL.Expr.Placeholder] = [
|
|
484
|
-
("".join(x) if isinstance(x, list) else x) for x in new_parts
|
|
485
|
-
]
|
|
486
|
-
|
|
487
|
-
if debug:
|
|
488
|
-
logger.debug("New Parts Merged: %s", new_parts_merged)
|
|
489
|
-
|
|
490
|
-
modified = WDL.Expr.String(expression.pos, new_parts_merged, expression.command)
|
|
491
|
-
# Fake the type checking of the modified expression.
|
|
492
|
-
# TODO: Make MiniWDL expose a real way to do this?
|
|
493
|
-
modified._type = expression._type
|
|
494
|
-
return modified
|
|
495
|
-
|
|
496
|
-
|
|
497
298
|
async def toil_read_source(
|
|
498
299
|
uri: str, path: list[str], importer: WDL.Tree.Document | None
|
|
499
300
|
) -> ReadSourceResult:
|
|
@@ -514,7 +315,7 @@ async def toil_read_source(
|
|
|
514
315
|
tried.append(candidate_uri)
|
|
515
316
|
try:
|
|
516
317
|
# TODO: this is probably sync work that would be better as async work here
|
|
517
|
-
|
|
318
|
+
URLAccess.read_from_url(candidate_uri, destination_buffer)
|
|
518
319
|
except Exception as e:
|
|
519
320
|
if isinstance(e, SyntaxError) or isinstance(e, NameError):
|
|
520
321
|
# These are probably actual problems with the code and not
|
|
@@ -1142,17 +943,29 @@ def evaluate_decls_to_bindings(
|
|
|
1142
943
|
standard_library: ToilWDLStdLibBase,
|
|
1143
944
|
include_previous: bool = False,
|
|
1144
945
|
drop_missing_files: bool = False,
|
|
946
|
+
expressions_are_defaults: bool = False,
|
|
1145
947
|
) -> WDLBindings:
|
|
1146
948
|
"""
|
|
1147
949
|
Evaluate decls with a given bindings environment and standard library.
|
|
950
|
+
|
|
1148
951
|
Creates a new bindings object that only contains the bindings from the given decls.
|
|
1149
952
|
Guarantees that each decl in `decls` can access the variables defined by the previous ones.
|
|
953
|
+
|
|
1150
954
|
:param all_bindings: Environment to use when evaluating decls
|
|
1151
955
|
:param decls: Decls to evaluate
|
|
1152
956
|
:param standard_library: Standard library
|
|
1153
|
-
:param include_previous: Whether to include the existing environment in the
|
|
1154
|
-
|
|
1155
|
-
|
|
957
|
+
:param include_previous: Whether to include the existing environment in the
|
|
958
|
+
new returned environment. This will be false for outputs where only
|
|
959
|
+
defined decls should be included
|
|
960
|
+
:param drop_missing_files: Whether to coerce nonexistent files to null. The
|
|
961
|
+
coerced elements will be checked that the transformation is valid.
|
|
962
|
+
Currently should only be enabled in output sections, see
|
|
963
|
+
https://github.com/openwdl/wdl/issues/673#issuecomment-2248828116.
|
|
964
|
+
:param expressions_are_defaults: If True, value expressions in decls are
|
|
965
|
+
treated as default values, and there may be existing values in the
|
|
966
|
+
incoming environment that take precedence. If False, each decl is taken
|
|
967
|
+
to be a fresh definition, and expressions are always evaluated and
|
|
968
|
+
used.
|
|
1156
969
|
:return: New bindings object
|
|
1157
970
|
"""
|
|
1158
971
|
# all_bindings contains current bindings + previous all_bindings
|
|
@@ -1162,9 +975,14 @@ def evaluate_decls_to_bindings(
|
|
|
1162
975
|
drop_if_missing, standard_library=standard_library
|
|
1163
976
|
)
|
|
1164
977
|
for each_decl in decls:
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
978
|
+
if expressions_are_defaults:
|
|
979
|
+
output_value = evaluate_defaultable_decl(
|
|
980
|
+
each_decl, all_bindings, standard_library
|
|
981
|
+
)
|
|
982
|
+
else:
|
|
983
|
+
output_value = evaluate_decl(
|
|
984
|
+
each_decl, all_bindings, standard_library
|
|
985
|
+
)
|
|
1168
986
|
if drop_missing_files:
|
|
1169
987
|
dropped_output_value = map_over_typed_files_in_value(
|
|
1170
988
|
output_value, drop_if_missing_with_workdir
|
|
@@ -1223,7 +1041,7 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
1223
1041
|
else:
|
|
1224
1042
|
# This is some other kind of remote file.
|
|
1225
1043
|
# We need to get its size from the URI.
|
|
1226
|
-
item_size =
|
|
1044
|
+
item_size = URLAccess.get_size(uri)
|
|
1227
1045
|
if item_size is None:
|
|
1228
1046
|
# User asked for the size and we can't figure it out efficiently, so bail out.
|
|
1229
1047
|
raise RuntimeError(f"Attempt to check the size of {uri} failed")
|
|
@@ -1374,7 +1192,7 @@ def convert_remote_files(
|
|
|
1374
1192
|
tried.append(candidate_uri)
|
|
1375
1193
|
try:
|
|
1376
1194
|
# Try polling existence first.
|
|
1377
|
-
polled_existence =
|
|
1195
|
+
polled_existence = URLAccess.url_exists(candidate_uri)
|
|
1378
1196
|
if polled_existence is False:
|
|
1379
1197
|
# Known not to exist
|
|
1380
1198
|
logger.debug("URL does not exist: %s", candidate_uri)
|
|
@@ -1772,7 +1590,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1772
1590
|
# Open it exclusively
|
|
1773
1591
|
with open(dest_path, "xb") as dest_file:
|
|
1774
1592
|
# And save to it
|
|
1775
|
-
size, executable =
|
|
1593
|
+
size, executable = URLAccess.read_from_url(filename, dest_file)
|
|
1776
1594
|
if executable:
|
|
1777
1595
|
# Set the execute bit in the file's permissions
|
|
1778
1596
|
os.chmod(dest_path, os.stat(dest_path).st_mode | stat.S_IXUSR)
|
|
@@ -2534,11 +2352,15 @@ def evaluate_decl(
|
|
|
2534
2352
|
"""
|
|
2535
2353
|
Evaluate the expression of a declaration node, or raise an error.
|
|
2536
2354
|
"""
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2355
|
+
try:
|
|
2356
|
+
return evaluate_named_expression(
|
|
2357
|
+
node, node.name, node.type, node.expr, environment, stdlib
|
|
2358
|
+
)
|
|
2359
|
+
except Exception:
|
|
2360
|
+
# If something goes wrong, dump.
|
|
2361
|
+
logger.exception("Evaluation failed for %s", node)
|
|
2362
|
+
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
2363
|
+
raise
|
|
2542
2364
|
|
|
2543
2365
|
def evaluate_call_inputs(
|
|
2544
2366
|
context: WDL.Error.SourceNode | WDL.Error.SourcePosition,
|
|
@@ -2581,33 +2403,28 @@ def evaluate_defaultable_decl(
|
|
|
2581
2403
|
If the name of the declaration is already defined in the environment, return its value. Otherwise, return the evaluated expression.
|
|
2582
2404
|
"""
|
|
2583
2405
|
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
)
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
)
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
return environment[node.name].coerce(node.type)
|
|
2595
|
-
else:
|
|
2596
|
-
return environment[node.name]
|
|
2406
|
+
if (
|
|
2407
|
+
node.name in environment
|
|
2408
|
+
and not isinstance(environment[node.name], WDL.Value.Null)
|
|
2409
|
+
) or (
|
|
2410
|
+
isinstance(environment.get(node.name), WDL.Value.Null)
|
|
2411
|
+
and node.type.optional
|
|
2412
|
+
):
|
|
2413
|
+
logger.debug("Name %s is already defined, not using default", node.name)
|
|
2414
|
+
if not isinstance(environment[node.name].type, type(node.type)):
|
|
2415
|
+
return environment[node.name].coerce(node.type)
|
|
2597
2416
|
else:
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
2610
|
-
raise
|
|
2417
|
+
return environment[node.name]
|
|
2418
|
+
else:
|
|
2419
|
+
if node.type is not None and not node.type.optional and node.expr is None:
|
|
2420
|
+
# We need a value for this but there isn't one.
|
|
2421
|
+
raise WDL.Error.EvalError(
|
|
2422
|
+
node,
|
|
2423
|
+
f"Value for {node.name} was not provided and no default value is available",
|
|
2424
|
+
)
|
|
2425
|
+
logger.info("Defaulting %s to %s", node.name, node.expr)
|
|
2426
|
+
return evaluate_decl(node, environment, stdlib)
|
|
2427
|
+
|
|
2611
2428
|
|
|
2612
2429
|
|
|
2613
2430
|
# TODO: make these stdlib methods???
|
|
@@ -2719,7 +2536,7 @@ def drop_if_missing(
|
|
|
2719
2536
|
|
|
2720
2537
|
if filename is not None and is_any_url(filename):
|
|
2721
2538
|
try:
|
|
2722
|
-
if filename.startswith(TOIL_URI_SCHEME) or
|
|
2539
|
+
if filename.startswith(TOIL_URI_SCHEME) or URLAccess.url_exists(
|
|
2723
2540
|
filename
|
|
2724
2541
|
):
|
|
2725
2542
|
# We assume anything in the filestore actually exists.
|
|
@@ -2835,64 +2652,52 @@ def map_over_files_in_binding(
|
|
|
2835
2652
|
binding.info,
|
|
2836
2653
|
)
|
|
2837
2654
|
|
|
2655
|
+
def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
2656
|
+
"""
|
|
2657
|
+
Remove the expression from a WDL value
|
|
2658
|
+
:param value: Original WDL value
|
|
2659
|
+
:return: New WDL value without the expr field
|
|
2660
|
+
"""
|
|
2661
|
+
# TODO: This is an extra copy that we could get rid of by dropping the immutability idea
|
|
2662
|
+
def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
2663
|
+
# Do a shallow copy to preserve immutability
|
|
2664
|
+
new_value = copy.copy(value)
|
|
2665
|
+
if value.expr:
|
|
2666
|
+
# We use a Null expr instead of None here, because when evaluating an expression,
|
|
2667
|
+
# MiniWDL applies that expression to the result value *and* all values it contains that
|
|
2668
|
+
# have None expressions. Using a Null expression here protects nested values that
|
|
2669
|
+
# didn't really get created by the current expression from being attributed to it, while
|
|
2670
|
+
# still cutting the reference to the parsed WDL document.
|
|
2671
|
+
new_value._expr = WDL.Expr.Null(value.expr.pos)
|
|
2672
|
+
else:
|
|
2673
|
+
new_value._expr = value.expr
|
|
2674
|
+
return new_value
|
|
2675
|
+
return map_over_typed_value(value, predicate)
|
|
2838
2676
|
|
|
2839
|
-
# TODO: We want to type this to say, for anything descended from a WDL type, we
|
|
2840
|
-
# return something descended from the same WDL type or a null. But I can't
|
|
2841
|
-
# quite do that with generics, since you could pass in some extended WDL value
|
|
2842
|
-
# type we've never heard of and expect to get one of those out.
|
|
2843
|
-
#
|
|
2844
|
-
# For now we assume that any types extending the WDL value types will implement
|
|
2845
|
-
# compatible constructors.
|
|
2846
|
-
def map_over_typed_files_in_value(
|
|
2847
|
-
value: WDL.Value.Base, transform: Callable[[WDL.Value.File], WDL.Value.File | None]
|
|
2848
|
-
) -> WDL.Value.Base:
|
|
2849
|
-
"""
|
|
2850
|
-
Run all File values embedded in the given value through the given
|
|
2851
|
-
transformation function.
|
|
2852
|
-
|
|
2853
|
-
The transformation function must not mutate the original File.
|
|
2854
|
-
|
|
2855
|
-
If the transform returns None, the file value is changed to Null.
|
|
2856
|
-
|
|
2857
|
-
The transform has access to the type information for the value, so it knows
|
|
2858
|
-
if it may return None, depending on if the value is optional or not.
|
|
2859
2677
|
|
|
2860
|
-
|
|
2861
|
-
actually be used, to allow for scans. So error checking needs to be part of
|
|
2862
|
-
the transform itself.
|
|
2678
|
+
def map_over_typed_value(value: WDL.Value.Base, transform: Callable[[WDL.Value.Base], WDL.Value.Base]) -> WDL.Value.Base:
|
|
2863
2679
|
"""
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
|
|
2869
|
-
|
|
2870
|
-
), "Transformation mutated the original File"
|
|
2871
|
-
if new_file is None:
|
|
2872
|
-
# Assume the transform checked types if we actually care about the
|
|
2873
|
-
# result.
|
|
2874
|
-
logger.warning("File %s became Null", value)
|
|
2875
|
-
return WDL.Value.Null()
|
|
2876
|
-
else:
|
|
2877
|
-
# Make whatever the value is around the new path.
|
|
2878
|
-
# TODO: why does this need casting?
|
|
2879
|
-
return new_file
|
|
2880
|
-
elif isinstance(value, WDL.Value.Array):
|
|
2680
|
+
Apply a transform to a WDL value and all contained WDL values.
|
|
2681
|
+
:param value: WDL value to transform
|
|
2682
|
+
:param transform: Function that takes a WDL value and returns a new WDL value
|
|
2683
|
+
:return: New transformed WDL value
|
|
2684
|
+
"""
|
|
2685
|
+
if isinstance(value, WDL.Value.Array):
|
|
2881
2686
|
# This is an array, so recurse on the items
|
|
2882
|
-
|
|
2687
|
+
value = WDL.Value.Array(
|
|
2883
2688
|
value.type.item_type,
|
|
2884
|
-
[
|
|
2689
|
+
[map_over_typed_value(v, transform) for v in value.value],
|
|
2885
2690
|
value.expr,
|
|
2886
2691
|
)
|
|
2887
2692
|
elif isinstance(value, WDL.Value.Map):
|
|
2888
2693
|
# This is a map, so recurse on the members of the items, which are tuples (but not wrapped as WDL Pair objects)
|
|
2889
2694
|
# TODO: Can we avoid a cast in a comprehension if we get MyPy to know that each pair is always a 2-element tuple?
|
|
2890
|
-
|
|
2695
|
+
value = WDL.Value.Map(
|
|
2891
2696
|
value.type.item_type,
|
|
2892
2697
|
[
|
|
2893
2698
|
cast(
|
|
2894
2699
|
tuple[WDL.Value.Base, WDL.Value.Base],
|
|
2895
|
-
tuple(
|
|
2700
|
+
tuple(map_over_typed_value(v, transform) for v in pair),
|
|
2896
2701
|
)
|
|
2897
2702
|
for pair in value.value
|
|
2898
2703
|
],
|
|
@@ -2900,29 +2705,74 @@ def map_over_typed_files_in_value(
|
|
|
2900
2705
|
)
|
|
2901
2706
|
elif isinstance(value, WDL.Value.Pair):
|
|
2902
2707
|
# This is a pair, so recurse on the left and right items
|
|
2903
|
-
|
|
2708
|
+
value = WDL.Value.Pair(
|
|
2904
2709
|
value.type.left_type,
|
|
2905
2710
|
value.type.right_type,
|
|
2906
2711
|
cast(
|
|
2907
2712
|
tuple[WDL.Value.Base, WDL.Value.Base],
|
|
2908
|
-
tuple(
|
|
2713
|
+
tuple(map_over_typed_value(v, transform) for v in value.value),
|
|
2909
2714
|
),
|
|
2910
2715
|
value.expr,
|
|
2911
2716
|
)
|
|
2912
2717
|
elif isinstance(value, WDL.Value.Struct):
|
|
2913
2718
|
# This is a struct, so recurse on the values in the backing dict
|
|
2914
|
-
|
|
2719
|
+
value = WDL.Value.Struct(
|
|
2915
2720
|
cast(Union[WDL.Type.StructInstance, WDL.Type.Object], value.type),
|
|
2916
2721
|
{
|
|
2917
|
-
k:
|
|
2722
|
+
k: map_over_typed_value(v, transform)
|
|
2918
2723
|
for k, v in value.value.items()
|
|
2919
2724
|
},
|
|
2920
2725
|
value.expr,
|
|
2921
2726
|
)
|
|
2922
|
-
|
|
2923
|
-
|
|
2727
|
+
# Run the predicate on the final value
|
|
2728
|
+
return transform(value)
|
|
2729
|
+
|
|
2730
|
+
|
|
2731
|
+
# TODO: We want to type this to say, for anything descended from a WDL type, we
|
|
2732
|
+
# return something descended from the same WDL type or a null. But I can't
|
|
2733
|
+
# quite do that with generics, since you could pass in some extended WDL value
|
|
2734
|
+
# type we've never heard of and expect to get one of those out.
|
|
2735
|
+
#
|
|
2736
|
+
# For now we assume that any types extending the WDL value types will implement
|
|
2737
|
+
# compatible constructors.
|
|
2738
|
+
def map_over_typed_files_in_value(
|
|
2739
|
+
value: WDL.Value.Base, transform: Callable[[WDL.Value.File], WDL.Value.File | None]
|
|
2740
|
+
) -> WDL.Value.Base:
|
|
2741
|
+
"""
|
|
2742
|
+
Run all File values embedded in the given value through the given
|
|
2743
|
+
transformation function.
|
|
2744
|
+
|
|
2745
|
+
The transformation function must not mutate the original File.
|
|
2746
|
+
|
|
2747
|
+
If the transform returns None, the file value is changed to Null.
|
|
2748
|
+
|
|
2749
|
+
The transform has access to the type information for the value, so it knows
|
|
2750
|
+
if it may return None, depending on if the value is optional or not.
|
|
2751
|
+
|
|
2752
|
+
The transform is *allowed* to return None only if the mapping result won't
|
|
2753
|
+
actually be used, to allow for scans. So error checking needs to be part of
|
|
2754
|
+
the transform itself.
|
|
2755
|
+
"""
|
|
2756
|
+
def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
2757
|
+
if isinstance(value, WDL.Value.File):
|
|
2758
|
+
# This is a file so we need to process it
|
|
2759
|
+
orig_file_value = value.value
|
|
2760
|
+
new_file = transform(value)
|
|
2761
|
+
assert (
|
|
2762
|
+
value.value == orig_file_value
|
|
2763
|
+
), "Transformation mutated the original File"
|
|
2764
|
+
if new_file is None:
|
|
2765
|
+
# Assume the transform checked types if we actually care about the
|
|
2766
|
+
# result.
|
|
2767
|
+
logger.warning("File %s became Null", value)
|
|
2768
|
+
return WDL.Value.Null()
|
|
2769
|
+
else:
|
|
2770
|
+
# Make whatever the value is around the new path.
|
|
2771
|
+
return new_file
|
|
2924
2772
|
return value
|
|
2925
2773
|
|
|
2774
|
+
return map_over_typed_value(value, predicate)
|
|
2775
|
+
|
|
2926
2776
|
|
|
2927
2777
|
def ensure_null_files_are_nullable(
|
|
2928
2778
|
value: WDL.Value.Base, original_value: WDL.Value.Base, expected_type: WDL.Type.Base
|
|
@@ -3065,6 +2915,11 @@ class WDLBaseJob(Job):
|
|
|
3065
2915
|
logger.debug("Overlay %s after %s", overlay, self)
|
|
3066
2916
|
self._postprocessing_steps.append(("overlay", overlay))
|
|
3067
2917
|
|
|
2918
|
+
def remove_expr_from_bindings(self, bindings: WDLBindings) -> WDLBindings:
|
|
2919
|
+
# We have to throw out the expressions because they drag the entire WDL document into the WDL outputs
|
|
2920
|
+
# which causes duplicate pickling and linear growth in scatter memory usage
|
|
2921
|
+
return bindings.map(lambda b: WDL.Env.Binding(b.name, remove_expr_from_value(b.value), b.info))
|
|
2922
|
+
|
|
3068
2923
|
def postprocess(self, bindings: WDLBindings) -> WDLBindings:
|
|
3069
2924
|
"""
|
|
3070
2925
|
Apply queued changes to bindings.
|
|
@@ -3101,7 +2956,7 @@ class WDLBaseJob(Job):
|
|
|
3101
2956
|
bindings = combine_bindings([bindings.subtract(argument), argument])
|
|
3102
2957
|
else:
|
|
3103
2958
|
raise RuntimeError(f"Unknown postprocessing action {action}")
|
|
3104
|
-
|
|
2959
|
+
bindings = self.remove_expr_from_bindings(bindings)
|
|
3105
2960
|
return bindings
|
|
3106
2961
|
|
|
3107
2962
|
def defer_postprocessing(self, other: WDLBaseJob) -> None:
|
|
@@ -3228,7 +3083,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3228
3083
|
logger.debug("Evaluating task code")
|
|
3229
3084
|
# Evaluate all the inputs that aren't pre-set
|
|
3230
3085
|
bindings = evaluate_decls_to_bindings(
|
|
3231
|
-
self._task.inputs,
|
|
3086
|
+
self._task.inputs,
|
|
3087
|
+
bindings,
|
|
3088
|
+
standard_library,
|
|
3089
|
+
include_previous=True,
|
|
3090
|
+
expressions_are_defaults=True
|
|
3232
3091
|
)
|
|
3233
3092
|
if self._task.postinputs:
|
|
3234
3093
|
# Evaluate all the postinput decls.
|
|
@@ -3779,6 +3638,8 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3779
3638
|
"is not yet implemented in the MiniWDL Docker "
|
|
3780
3639
|
"containerization implementation."
|
|
3781
3640
|
)
|
|
3641
|
+
if runtime_bindings.has_binding("memory") and human2bytes(runtime_bindings.resolve("memory").value) < human2bytes("4MiB"):
|
|
3642
|
+
runtime_bindings.resolve("memory").value = "4MiB"
|
|
3782
3643
|
else:
|
|
3783
3644
|
raise RuntimeError(
|
|
3784
3645
|
f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}"
|
|
@@ -4011,7 +3872,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
4011
3872
|
self._task,
|
|
4012
3873
|
"command",
|
|
4013
3874
|
WDL.Type.String(),
|
|
4014
|
-
|
|
3875
|
+
self._task.command,
|
|
4015
3876
|
contained_bindings,
|
|
4016
3877
|
command_library,
|
|
4017
3878
|
)
|
|
@@ -4943,6 +4804,12 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
4943
4804
|
[(p, p) for p in standard_library.get_local_paths()]
|
|
4944
4805
|
)
|
|
4945
4806
|
|
|
4807
|
+
# Set the exprs of the WDL values to WDL.Expr.Null to reduce the memory footprint. This got set from evaluate_named_expression
|
|
4808
|
+
# because any evaluation on an expression will mutate child values of the result values of the expression, and we had not
|
|
4809
|
+
# processed it yet by this point as the bindings from input environment and WDLWorkflowJob do not get processing and postprocessing
|
|
4810
|
+
# ran respectively
|
|
4811
|
+
bindings = self.remove_expr_from_bindings(bindings)
|
|
4812
|
+
|
|
4946
4813
|
if not isinstance(scatter_value, WDL.Value.Array):
|
|
4947
4814
|
raise RuntimeError(
|
|
4948
4815
|
"The returned value from a scatter is not an Array type."
|
|
@@ -4955,6 +4822,8 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
4955
4822
|
# duration of the body.
|
|
4956
4823
|
local_bindings: WDLBindings = WDL.Env.Bindings()
|
|
4957
4824
|
local_bindings = local_bindings.bind(self._scatter.variable, item)
|
|
4825
|
+
# Remove expr from new scatter binding
|
|
4826
|
+
local_bindings = self.remove_expr_from_bindings(local_bindings)
|
|
4958
4827
|
# TODO: We need to turn values() into a list because MyPy seems to
|
|
4959
4828
|
# think a dict_values isn't a Sequence. This is a waste of time to
|
|
4960
4829
|
# appease MyPy but probably better than a cast?
|
|
@@ -5244,6 +5113,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5244
5113
|
bindings,
|
|
5245
5114
|
standard_library,
|
|
5246
5115
|
include_previous=True,
|
|
5116
|
+
expressions_are_defaults=True,
|
|
5247
5117
|
)
|
|
5248
5118
|
finally:
|
|
5249
5119
|
# Report all files are downloaded now that all expressions are evaluated.
|
|
@@ -5319,9 +5189,8 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5319
5189
|
|
|
5320
5190
|
try:
|
|
5321
5191
|
if self._workflow.outputs is not None:
|
|
5322
|
-
# Output section is declared and is nonempty, so evaluate normally
|
|
5323
|
-
|
|
5324
|
-
# Combine the bindings from the previous job
|
|
5192
|
+
# Output section is declared and is nonempty, so evaluate normally.
|
|
5193
|
+
# Don't drop nonexistent files here; we do that later.
|
|
5325
5194
|
output_bindings = evaluate_decls_to_bindings(
|
|
5326
5195
|
self._workflow.outputs, unwrap(self._bindings), standard_library
|
|
5327
5196
|
)
|
|
@@ -5332,7 +5201,8 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5332
5201
|
if self._workflow.outputs is None or self._wdl_options.get(
|
|
5333
5202
|
"all_call_outputs", False
|
|
5334
5203
|
):
|
|
5335
|
-
# The output section is not declared, or we want to keep task
|
|
5204
|
+
# The output section is not declared, or we want to keep task
|
|
5205
|
+
# outputs anyway on top of an already-evaluated output section.
|
|
5336
5206
|
|
|
5337
5207
|
# Get all task outputs and return that
|
|
5338
5208
|
# First get all task output names
|
|
@@ -5363,16 +5233,6 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5363
5233
|
output_bindings = output_bindings.bind(
|
|
5364
5234
|
binding.name, binding.value
|
|
5365
5235
|
)
|
|
5366
|
-
else:
|
|
5367
|
-
# Output section is declared and is nonempty, so evaluate normally
|
|
5368
|
-
|
|
5369
|
-
# Combine the bindings from the previous job
|
|
5370
|
-
output_bindings = evaluate_decls_to_bindings(
|
|
5371
|
-
self._workflow.outputs,
|
|
5372
|
-
unwrap(self._bindings),
|
|
5373
|
-
standard_library,
|
|
5374
|
-
drop_missing_files=True,
|
|
5375
|
-
)
|
|
5376
5236
|
finally:
|
|
5377
5237
|
# We don't actually know when all our files are downloaded since
|
|
5378
5238
|
# anything we evaluate might devirtualize inside any expression.
|
|
@@ -5391,6 +5251,13 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5391
5251
|
output_bindings, standard_library=standard_library
|
|
5392
5252
|
)
|
|
5393
5253
|
|
|
5254
|
+
# TODO: Unify the rest of this with task output managment somehow
|
|
5255
|
+
|
|
5256
|
+
# Upload any files in the outputs if not uploaded already.
|
|
5257
|
+
# We need this because it's possible to create new files in a workflow
|
|
5258
|
+
# outputs section.
|
|
5259
|
+
output_bindings = virtualize_files(output_bindings, standard_library)
|
|
5260
|
+
|
|
5394
5261
|
if self._cache_key is not None:
|
|
5395
5262
|
output_bindings = fill_execution_cache(
|
|
5396
5263
|
self._cache_key, output_bindings, file_store, self._wdl_options
|
|
@@ -5512,7 +5379,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5512
5379
|
wdl_options: WDLContext,
|
|
5513
5380
|
inputs_search_path: list[str],
|
|
5514
5381
|
import_remote_files: bool,
|
|
5515
|
-
|
|
5382
|
+
import_workers_batchsize: ParseableIndivisibleResource,
|
|
5516
5383
|
import_workers_disk: ParseableIndivisibleResource,
|
|
5517
5384
|
**kwargs: Any,
|
|
5518
5385
|
):
|
|
@@ -5526,7 +5393,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5526
5393
|
self._target = target
|
|
5527
5394
|
self._inputs_search_path = inputs_search_path
|
|
5528
5395
|
self._import_remote_files = import_remote_files
|
|
5529
|
-
self.
|
|
5396
|
+
self._import_workers_batchsize = import_workers_batchsize
|
|
5530
5397
|
self._import_workers_disk = import_workers_disk
|
|
5531
5398
|
|
|
5532
5399
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
@@ -5538,7 +5405,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5538
5405
|
include_remote_files=self._import_remote_files,
|
|
5539
5406
|
execution_dir=self._wdl_options.get("execution_dir")
|
|
5540
5407
|
)
|
|
5541
|
-
imports_job = ImportsJob(file_to_data, self.
|
|
5408
|
+
imports_job = ImportsJob(file_to_data, self._import_workers_batchsize, self._import_workers_disk)
|
|
5542
5409
|
self.addChild(imports_job)
|
|
5543
5410
|
install_imports_job = WDLInstallImportsJob(
|
|
5544
5411
|
self._target.name, self._inputs, imports_job.rv()
|
|
@@ -5570,7 +5437,7 @@ def make_root_job(
|
|
|
5570
5437
|
wdl_options=wdl_options,
|
|
5571
5438
|
inputs_search_path=inputs_search_path,
|
|
5572
5439
|
import_remote_files=options.reference_inputs,
|
|
5573
|
-
|
|
5440
|
+
import_workers_batchsize=options.import_workers_batchsize,
|
|
5574
5441
|
import_workers_disk=options.import_workers_disk
|
|
5575
5442
|
)
|
|
5576
5443
|
else:
|
|
@@ -5644,6 +5511,7 @@ def main() -> None:
|
|
|
5644
5511
|
document: WDL.Tree.Document = WDL.load(
|
|
5645
5512
|
wdl_uri,
|
|
5646
5513
|
read_source=toil_read_source,
|
|
5514
|
+
check_quant=options.quant_check
|
|
5647
5515
|
)
|
|
5648
5516
|
|
|
5649
5517
|
# See if we're going to run a workflow or a task
|