toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -12,47 +12,123 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
import argparse
|
|
16
15
|
import asyncio
|
|
17
|
-
import collections
|
|
18
|
-
import copy
|
|
19
16
|
import errno
|
|
20
|
-
import glob
|
|
21
17
|
import io
|
|
22
|
-
import itertools
|
|
23
18
|
import json
|
|
24
19
|
import logging
|
|
25
20
|
import os
|
|
26
21
|
import re
|
|
27
22
|
import shlex
|
|
28
23
|
import shutil
|
|
24
|
+
import stat
|
|
29
25
|
import subprocess
|
|
30
26
|
import sys
|
|
31
|
-
import tempfile
|
|
32
27
|
import uuid
|
|
33
|
-
|
|
34
|
-
from
|
|
35
|
-
from
|
|
36
|
-
from
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
28
|
+
from contextlib import ExitStack, contextmanager
|
|
29
|
+
from graphlib import TopologicalSorter
|
|
30
|
+
from tempfile import mkstemp
|
|
31
|
+
from typing import (Any,
|
|
32
|
+
Callable,
|
|
33
|
+
Dict,
|
|
34
|
+
Generator,
|
|
35
|
+
Iterable,
|
|
36
|
+
Iterator,
|
|
37
|
+
List,
|
|
38
|
+
Optional,
|
|
39
|
+
Sequence,
|
|
40
|
+
Set,
|
|
41
|
+
Tuple,
|
|
42
|
+
Type,
|
|
43
|
+
TypeVar,
|
|
44
|
+
Union,
|
|
45
|
+
cast)
|
|
46
|
+
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
47
|
+
|
|
48
|
+
import WDL.Error
|
|
43
49
|
import WDL.runtime.config
|
|
50
|
+
from configargparse import ArgParser, SUPPRESS
|
|
51
|
+
from WDL._util import byte_size_units, strip_leading_whitespace
|
|
52
|
+
from WDL.CLI import print_error
|
|
53
|
+
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
54
|
+
from WDL.runtime.backend.singularity import SingularityContainer
|
|
55
|
+
from WDL.runtime.task_container import TaskContainer
|
|
44
56
|
|
|
45
|
-
from toil.common import
|
|
46
|
-
from toil.job import AcceleratorRequirement, Job, JobFunctionWrappingJob, Promise, Promised, accelerators_fully_satisfy, parse_accelerator, unwrap, unwrap_all
|
|
57
|
+
from toil.common import Toil, addOptions, check_and_create_default_config_file
|
|
47
58
|
from toil.fileStores import FileID
|
|
48
59
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
49
|
-
from toil.
|
|
60
|
+
from toil.job import (AcceleratorRequirement,
|
|
61
|
+
Job,
|
|
62
|
+
Promise,
|
|
63
|
+
Promised,
|
|
64
|
+
TemporaryID,
|
|
65
|
+
accelerators_fully_satisfy,
|
|
66
|
+
parse_accelerator,
|
|
67
|
+
unwrap,
|
|
68
|
+
unwrap_all)
|
|
69
|
+
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
70
|
+
UnimplementedURLException)
|
|
50
71
|
from toil.lib.conversions import convert_units, human2bytes
|
|
72
|
+
from toil.lib.io import mkdtemp
|
|
73
|
+
from toil.lib.memoize import memoize
|
|
51
74
|
from toil.lib.misc import get_user_name
|
|
52
75
|
from toil.lib.threading import global_mutex
|
|
53
76
|
|
|
54
77
|
logger = logging.getLogger(__name__)
|
|
55
78
|
|
|
79
|
+
|
|
80
|
+
@contextmanager
|
|
81
|
+
def wdl_error_reporter(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Generator[None, None, None]:
|
|
82
|
+
"""
|
|
83
|
+
Run code in a context where WDL errors will be reported with pretty formatting.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
yield
|
|
88
|
+
except (
|
|
89
|
+
WDL.Error.SyntaxError,
|
|
90
|
+
WDL.Error.ImportError,
|
|
91
|
+
WDL.Error.ValidationError,
|
|
92
|
+
WDL.Error.MultipleValidationErrors,
|
|
93
|
+
FileNotFoundError
|
|
94
|
+
) as e:
|
|
95
|
+
log("Could not " + task)
|
|
96
|
+
# These are the errors that MiniWDL's parser can raise and its reporter
|
|
97
|
+
# can report. See
|
|
98
|
+
# https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97.
|
|
99
|
+
#
|
|
100
|
+
# We are going to use MiniWDL's pretty printer to print them.
|
|
101
|
+
print_error(e)
|
|
102
|
+
if exit:
|
|
103
|
+
# Stop right now
|
|
104
|
+
sys.exit(1)
|
|
105
|
+
else:
|
|
106
|
+
# Reraise the exception to stop
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
F = TypeVar('F', bound=Callable[..., Any])
|
|
110
|
+
def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Callable[[F], F]:
|
|
111
|
+
"""
|
|
112
|
+
Create a decorator to report WDL errors with the given task message.
|
|
113
|
+
|
|
114
|
+
Decorator can then be applied to a function, and if a WDL error happens it
|
|
115
|
+
will say that it could not {task}.
|
|
116
|
+
"""
|
|
117
|
+
def decorator(decoratee: F) -> F:
|
|
118
|
+
"""
|
|
119
|
+
Decorate a function with WDL error reporting.
|
|
120
|
+
"""
|
|
121
|
+
def decorated(*args: Any, **kwargs: Any) -> Any:
|
|
122
|
+
"""
|
|
123
|
+
Run the decoratee and handle WDL errors.
|
|
124
|
+
"""
|
|
125
|
+
with wdl_error_reporter(task, exit=exit, log=log):
|
|
126
|
+
return decoratee(*args, **kwargs)
|
|
127
|
+
return cast(F, decorated)
|
|
128
|
+
return decorator
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
56
132
|
def potential_absolute_uris(uri: str, path: List[str], importer: Optional[WDL.Tree.Document] = None) -> Iterator[str]:
|
|
57
133
|
"""
|
|
58
134
|
Get potential absolute URIs to check for an imported file.
|
|
@@ -250,7 +326,8 @@ def get_supertype(types: Sequence[Optional[WDL.Type.Base]]) -> WDL.Type.Base:
|
|
|
250
326
|
if len(types) == 1:
|
|
251
327
|
# Only one type. It isn't None.
|
|
252
328
|
the_type = types[0]
|
|
253
|
-
|
|
329
|
+
if the_type is None:
|
|
330
|
+
raise RuntimeError("The supertype cannot be None.")
|
|
254
331
|
return the_type
|
|
255
332
|
else:
|
|
256
333
|
# Multiple types (or none). Assume Any
|
|
@@ -263,7 +340,6 @@ def for_each_node(root: WDL.Tree.WorkflowNode) -> Iterator[WDL.Tree.WorkflowNode
|
|
|
263
340
|
internal nodes of conditionals and scatters, and gather nodes.
|
|
264
341
|
"""
|
|
265
342
|
|
|
266
|
-
logger.debug('WorkflowNode: %s: %s %s', type(root), root, root.workflow_node_id)
|
|
267
343
|
yield root
|
|
268
344
|
for child_node in root.children:
|
|
269
345
|
if isinstance(child_node, WDL.Tree.WorkflowNode):
|
|
@@ -302,7 +378,7 @@ def recursive_dependencies(root: WDL.Tree.WorkflowNode) -> Set[str]:
|
|
|
302
378
|
|
|
303
379
|
TOIL_URI_SCHEME = 'toilfile:'
|
|
304
380
|
|
|
305
|
-
def pack_toil_uri(file_id: FileID, file_basename: str) -> str:
|
|
381
|
+
def pack_toil_uri(file_id: FileID, dir_id: uuid.UUID, file_basename: str) -> str:
|
|
306
382
|
"""
|
|
307
383
|
Encode a Toil file ID and its source path in a URI that starts with the scheme in TOIL_URI_SCHEME.
|
|
308
384
|
"""
|
|
@@ -310,9 +386,9 @@ def pack_toil_uri(file_id: FileID, file_basename: str) -> str:
|
|
|
310
386
|
# We urlencode everything, including any slashes. We need to use a slash to
|
|
311
387
|
# set off the actual filename, so the WDL standard library basename
|
|
312
388
|
# function works correctly.
|
|
313
|
-
return f"{TOIL_URI_SCHEME}{quote(file_id.pack(), safe='')}/{quote(file_basename, safe='')}"
|
|
389
|
+
return f"{TOIL_URI_SCHEME}{quote(file_id.pack(), safe='')}/{quote(str(dir_id))}/{quote(file_basename, safe='')}"
|
|
314
390
|
|
|
315
|
-
def unpack_toil_uri(toil_uri: str) -> Tuple[FileID, str]:
|
|
391
|
+
def unpack_toil_uri(toil_uri: str) -> Tuple[FileID, str, str]:
|
|
316
392
|
"""
|
|
317
393
|
Unpack a URI made by make_toil_uri to retrieve the FileID and the basename
|
|
318
394
|
(no path prefix) that the file is supposed to have.
|
|
@@ -326,12 +402,32 @@ def unpack_toil_uri(toil_uri: str) -> Tuple[FileID, str]:
|
|
|
326
402
|
raise ValueError(f"URI doesn't start with {TOIL_URI_SCHEME} and should: {toil_uri}")
|
|
327
403
|
# Split encoded file ID from filename
|
|
328
404
|
parts = parts[1].split('/')
|
|
329
|
-
if len(parts) !=
|
|
405
|
+
if len(parts) != 3:
|
|
330
406
|
raise ValueError(f"Wrong number of path segments in URI: {toil_uri}")
|
|
331
407
|
file_id = FileID.unpack(unquote(parts[0]))
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
408
|
+
parent_id = unquote(parts[1])
|
|
409
|
+
file_basename = unquote(parts[2])
|
|
410
|
+
|
|
411
|
+
return file_id, parent_id, file_basename
|
|
412
|
+
|
|
413
|
+
def evaluate_output_decls(output_decls: List[WDL.Tree.Decl], all_bindings: WDL.Env.Bindings[WDL.Value.Base], standard_library: WDL.StdLib.Base) -> WDL.Env.Bindings[WDL.Value.Base]:
|
|
414
|
+
"""
|
|
415
|
+
Evaluate output decls with a given bindings environment and standard library.
|
|
416
|
+
Creates a new bindings object that only contains the bindings from the given decls.
|
|
417
|
+
Guarantees that each decl in `output_decls` can access the variables defined by the previous ones.
|
|
418
|
+
:param all_bindings: Environment to use when evaluating decls
|
|
419
|
+
:param output_decls: Decls to evaluate
|
|
420
|
+
:param standard_library: Standard library
|
|
421
|
+
:return: New bindings object with only the output_decls
|
|
422
|
+
"""
|
|
423
|
+
# all_bindings contains output + previous bindings so that the output can reference its own declarations
|
|
424
|
+
# output_bindings only contains the output bindings themselves so that bindings from sections such as the input aren't included
|
|
425
|
+
output_bindings: WDL.Env.Bindings[WDL.Value.Base] = WDL.Env.Bindings()
|
|
426
|
+
for output_decl in output_decls:
|
|
427
|
+
output_value = evaluate_decl(output_decl, all_bindings, standard_library)
|
|
428
|
+
all_bindings = all_bindings.bind(output_decl.name, output_value)
|
|
429
|
+
output_bindings = output_bindings.bind(output_decl.name, output_value)
|
|
430
|
+
return output_bindings
|
|
335
431
|
|
|
336
432
|
class NonDownloadingSize(WDL.StdLib._Size):
|
|
337
433
|
"""
|
|
@@ -355,15 +451,25 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
355
451
|
total_size = 0.0
|
|
356
452
|
for uri in file_uris:
|
|
357
453
|
# Sum up the sizes of all the files, if any.
|
|
358
|
-
if uri
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
454
|
+
if is_url(uri):
|
|
455
|
+
if uri.startswith(TOIL_URI_SCHEME):
|
|
456
|
+
# This is a Toil File ID we encoded; we have the size
|
|
457
|
+
# available.
|
|
458
|
+
file_id, _, _ = unpack_toil_uri(uri)
|
|
459
|
+
# Use the encoded size
|
|
460
|
+
total_size += file_id.size
|
|
461
|
+
else:
|
|
462
|
+
# This is some other kind of remote file.
|
|
463
|
+
# We need to get its size from the URI.
|
|
464
|
+
item_size = AbstractJobStore.get_size(uri)
|
|
465
|
+
if item_size is None:
|
|
466
|
+
# User asked for the size and we can't figure it out efficiently, so bail out.
|
|
467
|
+
raise RuntimeError(f"Attempt to check the size of {uri} failed")
|
|
468
|
+
total_size += item_size
|
|
364
469
|
else:
|
|
365
|
-
#
|
|
366
|
-
|
|
470
|
+
# This is actually a file we can use locally.
|
|
471
|
+
local_path = self.stdlib._devirtualize_filename(uri)
|
|
472
|
+
total_size += os.path.getsize(local_path)
|
|
367
473
|
|
|
368
474
|
if len(arguments) > 1:
|
|
369
475
|
# Need to convert units. See
|
|
@@ -377,6 +483,14 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
377
483
|
# Return the result as a WDL float value
|
|
378
484
|
return WDL.Value.Float(total_size)
|
|
379
485
|
|
|
486
|
+
def is_url(filename: str, schemes: List[str] = ['http:', 'https:', 's3:', 'gs:', TOIL_URI_SCHEME]) -> bool:
|
|
487
|
+
"""
|
|
488
|
+
Decide if a filename is a known kind of URL
|
|
489
|
+
"""
|
|
490
|
+
for scheme in schemes:
|
|
491
|
+
if filename.startswith(scheme):
|
|
492
|
+
return True
|
|
493
|
+
return False
|
|
380
494
|
|
|
381
495
|
# Both the WDL code itself **and** the commands that it runs will deal in
|
|
382
496
|
# "virtualized" filenames.
|
|
@@ -407,8 +521,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
407
521
|
"""
|
|
408
522
|
Standard library implementation for WDL as run on Toil.
|
|
409
523
|
"""
|
|
410
|
-
|
|
411
|
-
def __init__(self, file_store: AbstractFileStore):
|
|
524
|
+
def __init__(self, file_store: AbstractFileStore, execution_dir: Optional[str] = None):
|
|
412
525
|
"""
|
|
413
526
|
Set up the standard library.
|
|
414
527
|
"""
|
|
@@ -424,17 +537,14 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
424
537
|
self.size = NonDownloadingSize(self)
|
|
425
538
|
|
|
426
539
|
# Keep the file store around so we can access files.
|
|
427
|
-
self._file_store = file_store
|
|
540
|
+
self._file_store = file_store
|
|
428
541
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
for scheme in schemes:
|
|
434
|
-
if filename.startswith(scheme):
|
|
435
|
-
return True
|
|
436
|
-
return False
|
|
542
|
+
# UUID to differentiate which node files are virtualized from
|
|
543
|
+
self._parent_dir_to_ids: Dict[str, uuid.UUID] = dict()
|
|
544
|
+
|
|
545
|
+
self._execution_dir = execution_dir
|
|
437
546
|
|
|
547
|
+
@memoize
|
|
438
548
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
439
549
|
"""
|
|
440
550
|
'devirtualize' filename passed to a read_* function: return a filename that can be open()ed
|
|
@@ -443,32 +553,61 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
443
553
|
|
|
444
554
|
# TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
|
|
445
555
|
# TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
|
|
446
|
-
if filename
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
556
|
+
if is_url(filename):
|
|
557
|
+
if filename.startswith(TOIL_URI_SCHEME):
|
|
558
|
+
# This is a reference to the Toil filestore.
|
|
559
|
+
# Deserialize the FileID
|
|
560
|
+
file_id, parent_id, file_basename = unpack_toil_uri(filename)
|
|
561
|
+
|
|
562
|
+
# Decide where it should be put.
|
|
563
|
+
# This is a URI with the "parent" UUID attached to the filename.
|
|
564
|
+
# Use UUID as folder name rather than a new temp folder to reduce internal clutter.
|
|
565
|
+
# Put the UUID in the destination path in order for tasks to
|
|
566
|
+
# see where to put files depending on their parents.
|
|
567
|
+
dir_path = os.path.join(self._file_store.localTempDir, parent_id)
|
|
568
|
+
|
|
569
|
+
else:
|
|
570
|
+
# Parse the URL and extract the basename
|
|
571
|
+
file_basename = os.path.basename(urlsplit(filename).path)
|
|
572
|
+
# Get the URL to the directory this thing came from. Remember
|
|
573
|
+
# URLs are interpreted relative to the directory the thing is
|
|
574
|
+
# in, not relative to the thing.
|
|
575
|
+
parent_url = urljoin(filename, ".")
|
|
576
|
+
# Turn it into a string we can make a directory for
|
|
577
|
+
dir_path = os.path.join(self._file_store.localTempDir, quote(parent_url, safe=''))
|
|
578
|
+
|
|
579
|
+
if not os.path.exists(dir_path):
|
|
580
|
+
# Make sure the chosen directory exists
|
|
581
|
+
os.mkdir(dir_path)
|
|
582
|
+
# And decide the file goes in it.
|
|
583
|
+
dest_path = os.path.join(dir_path, file_basename)
|
|
584
|
+
|
|
585
|
+
if filename.startswith(TOIL_URI_SCHEME):
|
|
586
|
+
# Get a local path to the file
|
|
587
|
+
result = self._file_store.readGlobalFile(file_id, dest_path)
|
|
588
|
+
else:
|
|
589
|
+
# Download to a local file with the right name and execute bit.
|
|
590
|
+
# Open it exclusively
|
|
591
|
+
with open(dest_path, 'xb') as dest_file:
|
|
592
|
+
# And save to it
|
|
593
|
+
size, executable = AbstractJobStore.read_from_url(filename, dest_file)
|
|
594
|
+
if executable:
|
|
595
|
+
# Set the execute bit in the file's permissions
|
|
596
|
+
os.chmod(dest_path, os.stat(dest_path).st_mode | stat.S_IXUSR)
|
|
597
|
+
|
|
598
|
+
result = dest_path
|
|
466
599
|
else:
|
|
467
600
|
# This is a local file
|
|
468
|
-
|
|
601
|
+
# To support relative paths, join the execution dir and filename
|
|
602
|
+
# if filename is already an abs path, join() will do nothing
|
|
603
|
+
if self._execution_dir is not None:
|
|
604
|
+
result = os.path.join(self._execution_dir, filename)
|
|
605
|
+
else:
|
|
606
|
+
result = filename
|
|
469
607
|
|
|
470
608
|
logger.debug('Devirtualized %s as openable file %s', filename, result)
|
|
471
|
-
|
|
609
|
+
if not os.path.exists(result):
|
|
610
|
+
raise RuntimeError(f"Virtualized file {filename} looks like a local file but isn't!")
|
|
472
611
|
return result
|
|
473
612
|
|
|
474
613
|
def _virtualize_filename(self, filename: str) -> str:
|
|
@@ -477,15 +616,22 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
477
616
|
File value
|
|
478
617
|
"""
|
|
479
618
|
|
|
480
|
-
|
|
481
|
-
if self._is_url(filename):
|
|
619
|
+
if is_url(filename):
|
|
482
620
|
# Already virtual
|
|
483
|
-
logger.debug('
|
|
621
|
+
logger.debug('Already virtualized %s as WDL file %s', filename, filename)
|
|
484
622
|
return filename
|
|
485
623
|
|
|
486
624
|
# Otherwise this is a local file and we want to fake it as a Toil file store file
|
|
487
|
-
|
|
488
|
-
|
|
625
|
+
|
|
626
|
+
# To support relative paths from execution directory, join the execution dir and filename
|
|
627
|
+
# If filename is already an abs path, join() will not do anything
|
|
628
|
+
if self._execution_dir is not None:
|
|
629
|
+
file_id = self._file_store.writeGlobalFile(os.path.join(self._execution_dir, filename))
|
|
630
|
+
else:
|
|
631
|
+
file_id = self._file_store.writeGlobalFile(filename)
|
|
632
|
+
dir = os.path.dirname(os.path.abspath(filename)) # is filename always an abspath?
|
|
633
|
+
parent_id = self._parent_dir_to_ids.setdefault(dir, uuid.uuid4())
|
|
634
|
+
result = pack_toil_uri(file_id, parent_id, os.path.basename(filename))
|
|
489
635
|
logger.debug('Virtualized %s as WDL file %s', filename, result)
|
|
490
636
|
return result
|
|
491
637
|
|
|
@@ -507,18 +653,19 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
|
|
|
507
653
|
super().__init__(file_store)
|
|
508
654
|
self.container = container
|
|
509
655
|
|
|
656
|
+
@memoize
|
|
510
657
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
511
658
|
"""
|
|
512
659
|
Go from a virtualized WDL-side filename to a local disk filename.
|
|
513
660
|
|
|
514
|
-
Any WDL-side filenames which are paths will be paths in the container.
|
|
661
|
+
Any WDL-side filenames which are paths will be paths in the container.
|
|
515
662
|
"""
|
|
516
|
-
if
|
|
663
|
+
if is_url(filename):
|
|
517
664
|
# We shouldn't have to deal with URLs here; we want to have exactly
|
|
518
665
|
# two nicely stacked/back-to-back layers of virtualization, joined
|
|
519
666
|
# on the out-of-container paths.
|
|
520
667
|
raise RuntimeError(f"File {filename} is a URL but should already be an in-container-virtualized filename")
|
|
521
|
-
|
|
668
|
+
|
|
522
669
|
# If this is a local path it will be in the container. Make sure we
|
|
523
670
|
# use the out-of-container equivalent.
|
|
524
671
|
result = self.container.host_path(filename)
|
|
@@ -542,7 +689,7 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
|
|
|
542
689
|
self.container.add_paths([filename])
|
|
543
690
|
|
|
544
691
|
result = self.container.input_path_map[filename]
|
|
545
|
-
|
|
692
|
+
|
|
546
693
|
logger.debug('Virtualized %s as WDL file %s', filename, result)
|
|
547
694
|
return result
|
|
548
695
|
|
|
@@ -645,6 +792,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
645
792
|
# Just turn them all into WDL File objects with local disk out-of-container names.
|
|
646
793
|
return WDL.Value.Array(WDL.Type.File(), [WDL.Value.File(x) for x in results])
|
|
647
794
|
|
|
795
|
+
@memoize
|
|
648
796
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
649
797
|
"""
|
|
650
798
|
Go from a virtualized WDL-side filename to a local disk filename.
|
|
@@ -652,7 +800,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
652
800
|
Any WDL-side filenames which are relative will be relative to the
|
|
653
801
|
current directory override, if set.
|
|
654
802
|
"""
|
|
655
|
-
if not
|
|
803
|
+
if not is_url(filename) and not filename.startswith('/'):
|
|
656
804
|
# We are getting a bare relative path from the WDL side.
|
|
657
805
|
# Find a real path to it relative to the current directory override.
|
|
658
806
|
work_dir = '.' if not self._current_directory_override else self._current_directory_override
|
|
@@ -669,7 +817,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
669
817
|
filenames.
|
|
670
818
|
"""
|
|
671
819
|
|
|
672
|
-
if not
|
|
820
|
+
if not is_url(filename) and not filename.startswith('/'):
|
|
673
821
|
# We are getting a bare relative path the supposedly devirtualized side.
|
|
674
822
|
# Find a real path to it relative to the current directory override.
|
|
675
823
|
work_dir = '.' if not self._current_directory_override else self._current_directory_override
|
|
@@ -697,10 +845,11 @@ def evaluate_named_expression(context: Union[WDL.Error.SourceNode, WDL.Error.Sou
|
|
|
697
845
|
|
|
698
846
|
# Do the actual evaluation
|
|
699
847
|
value = expression.eval(environment, stdlib)
|
|
848
|
+
logger.debug("Got value %s of type %s", value, value.type)
|
|
700
849
|
except Exception:
|
|
701
850
|
# If something goes wrong, dump.
|
|
702
851
|
logger.exception("Expression evaluation failed for %s: %s", name, expression)
|
|
703
|
-
log_bindings(logger.
|
|
852
|
+
log_bindings(logger.error, "Expression was evaluated in:", [environment])
|
|
704
853
|
raise
|
|
705
854
|
|
|
706
855
|
if expected_type:
|
|
@@ -716,15 +865,24 @@ def evaluate_decl(node: WDL.Tree.Decl, environment: WDLBindings, stdlib: WDL.Std
|
|
|
716
865
|
|
|
717
866
|
return evaluate_named_expression(node, node.name, node.type, node.expr, environment, stdlib)
|
|
718
867
|
|
|
719
|
-
def evaluate_call_inputs(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], expressions: Dict[str, WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDLBindings:
|
|
868
|
+
def evaluate_call_inputs(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], expressions: Dict[str, WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base, inputs_dict: Optional[Dict[str, WDL.Type.Base]] = None) -> WDLBindings:
|
|
720
869
|
"""
|
|
721
|
-
Evaluate a bunch of expressions with names, and make them into a fresh set of bindings.
|
|
870
|
+
Evaluate a bunch of expressions with names, and make them into a fresh set of bindings. `inputs_dict` is a mapping of
|
|
871
|
+
variable names to their expected type for the input decls in a task.
|
|
722
872
|
"""
|
|
723
|
-
|
|
724
873
|
new_bindings: WDLBindings = WDL.Env.Bindings()
|
|
725
874
|
for k, v in expressions.items():
|
|
726
875
|
# Add each binding in turn
|
|
727
|
-
|
|
876
|
+
# If the expected type is optional, then don't type check the lhs and rhs as miniwdl will return a StaticTypeMismatch error, so pass in None
|
|
877
|
+
expected_type = None
|
|
878
|
+
if not v.type.optional and inputs_dict is not None:
|
|
879
|
+
# This is done to enable passing in a string into a task input of file type
|
|
880
|
+
expected_type = inputs_dict.get(k, None)
|
|
881
|
+
try:
|
|
882
|
+
new_bindings = new_bindings.bind(k, evaluate_named_expression(context, k, expected_type, v, environment, stdlib))
|
|
883
|
+
except FileNotFoundError as e:
|
|
884
|
+
# MiniWDL's type coercion will raise this when trying to make a File out of Null.
|
|
885
|
+
raise WDL.Error.EvalError(context, f"Cannot evaluate expression for {k} with value {v}")
|
|
728
886
|
return new_bindings
|
|
729
887
|
|
|
730
888
|
def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL.Value.Base:
|
|
@@ -735,7 +893,10 @@ def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, std
|
|
|
735
893
|
try:
|
|
736
894
|
if node.name in environment and not isinstance(environment[node.name], WDL.Value.Null):
|
|
737
895
|
logger.debug('Name %s is already defined with a non-null value, not using default', node.name)
|
|
738
|
-
|
|
896
|
+
if not isinstance(environment[node.name], type(node.type)):
|
|
897
|
+
return environment[node.name].coerce(node.type)
|
|
898
|
+
else:
|
|
899
|
+
return environment[node.name]
|
|
739
900
|
else:
|
|
740
901
|
if node.type is not None and not node.type.optional and node.expr is None:
|
|
741
902
|
# We need a value for this but there isn't one.
|
|
@@ -745,7 +906,7 @@ def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, std
|
|
|
745
906
|
except Exception:
|
|
746
907
|
# If something goes wrong, dump.
|
|
747
908
|
logger.exception("Evaluation failed for %s", node)
|
|
748
|
-
log_bindings(logger.
|
|
909
|
+
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
749
910
|
raise
|
|
750
911
|
|
|
751
912
|
# TODO: make these stdlib methods???
|
|
@@ -753,8 +914,8 @@ def devirtualize_files(environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL
|
|
|
753
914
|
"""
|
|
754
915
|
Make sure all the File values embedded in the given bindings point to files
|
|
755
916
|
that are actually available to command line commands.
|
|
917
|
+
The same virtual file always maps to the same devirtualized filename even with duplicates
|
|
756
918
|
"""
|
|
757
|
-
|
|
758
919
|
return map_over_files_in_bindings(environment, stdlib._devirtualize_filename)
|
|
759
920
|
|
|
760
921
|
def virtualize_files(environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDLBindings:
|
|
@@ -765,15 +926,52 @@ def virtualize_files(environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDLBi
|
|
|
765
926
|
|
|
766
927
|
return map_over_files_in_bindings(environment, stdlib._virtualize_filename)
|
|
767
928
|
|
|
768
|
-
def
|
|
929
|
+
def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
930
|
+
"""
|
|
931
|
+
Based off of WDL.runtime.task_container.add_paths from miniwdl
|
|
932
|
+
Maps the host path to the container paths
|
|
933
|
+
"""
|
|
934
|
+
# partition the files by host directory
|
|
935
|
+
host_paths_by_dir: Dict[str, Set[str]] = {}
|
|
936
|
+
for host_path in host_paths:
|
|
937
|
+
host_path_strip = host_path.rstrip("/")
|
|
938
|
+
if host_path not in task_container.input_path_map and host_path_strip not in task_container.input_path_map:
|
|
939
|
+
if not os.path.exists(host_path_strip):
|
|
940
|
+
raise WDL.Error.InputError("input path not found: " + host_path)
|
|
941
|
+
host_paths_by_dir.setdefault(os.path.dirname(host_path_strip), set()).add(host_path)
|
|
942
|
+
# for each such partition of files
|
|
943
|
+
# - if there are no basename collisions under input subdirectory 0, then mount them there.
|
|
944
|
+
# - otherwise, mount them in a fresh subdirectory
|
|
945
|
+
subd = 0
|
|
946
|
+
id_to_subd: Dict[str, str] = {}
|
|
947
|
+
for paths in host_paths_by_dir.values():
|
|
948
|
+
based = os.path.join(task_container.container_dir, "work/_miniwdl_inputs")
|
|
949
|
+
for host_path in paths:
|
|
950
|
+
parent_id = os.path.basename(os.path.dirname(host_path))
|
|
951
|
+
if id_to_subd.get(parent_id, None) is None:
|
|
952
|
+
id_to_subd[parent_id] = str(subd)
|
|
953
|
+
subd += 1
|
|
954
|
+
host_path_subd = id_to_subd[parent_id]
|
|
955
|
+
container_path = os.path.join(based, host_path_subd, os.path.basename(host_path.rstrip("/")))
|
|
956
|
+
if host_path.endswith("/"):
|
|
957
|
+
container_path += "/"
|
|
958
|
+
assert container_path not in task_container.input_path_map_rev, f"{container_path}, {task_container.input_path_map_rev}"
|
|
959
|
+
task_container.input_path_map[host_path] = container_path
|
|
960
|
+
task_container.input_path_map_rev[container_path] = host_path
|
|
961
|
+
|
|
962
|
+
def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]] = None, skip_remote: bool = False) -> WDLBindings:
|
|
769
963
|
"""
|
|
770
964
|
Make sure all File values embedded in the given bindings are imported,
|
|
771
965
|
using the given Toil object.
|
|
772
966
|
|
|
773
967
|
:param path: If set, try resolving input location relative to the URLs or
|
|
774
|
-
|
|
775
|
-
"""
|
|
968
|
+
directories in this list.
|
|
776
969
|
|
|
970
|
+
:param skip_remote: If set, don't try to import files from remote
|
|
971
|
+
locations. Leave them as URIs.
|
|
972
|
+
"""
|
|
973
|
+
path_to_id: Dict[str, uuid.UUID] = {}
|
|
974
|
+
@memoize
|
|
777
975
|
def import_file_from_uri(uri: str) -> str:
|
|
778
976
|
"""
|
|
779
977
|
Import a file from a URI and return a virtualized filename for it.
|
|
@@ -784,9 +982,23 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
784
982
|
# Try each place it could be according to WDL finding logic.
|
|
785
983
|
tried.append(candidate_uri)
|
|
786
984
|
try:
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
985
|
+
if skip_remote and is_url(candidate_uri):
|
|
986
|
+
# Use remote URIs in place. But we need to find the one that exists.
|
|
987
|
+
if not AbstractJobStore.url_exists(candidate_uri):
|
|
988
|
+
# Wasn't found there
|
|
989
|
+
continue
|
|
990
|
+
# Now we know this exists, so pass it through
|
|
991
|
+
return candidate_uri
|
|
992
|
+
else:
|
|
993
|
+
# Actually import
|
|
994
|
+
# Try to import the file. Don't raise if we can't find it, just
|
|
995
|
+
# return None!
|
|
996
|
+
imported = toil.import_file(candidate_uri, check_existence=False)
|
|
997
|
+
if imported is None:
|
|
998
|
+
# Wasn't found there
|
|
999
|
+
continue
|
|
1000
|
+
logger.info('Imported %s', candidate_uri)
|
|
1001
|
+
|
|
790
1002
|
except UnimplementedURLException as e:
|
|
791
1003
|
# We can't find anything that can even support this URL scheme.
|
|
792
1004
|
# Report to the user, they are probably missing an extra.
|
|
@@ -797,6 +1009,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
797
1009
|
# we have no auth.
|
|
798
1010
|
logger.error("Something went wrong importing %s", candidate_uri)
|
|
799
1011
|
raise
|
|
1012
|
+
|
|
800
1013
|
if imported is None:
|
|
801
1014
|
# Wasn't found there
|
|
802
1015
|
continue
|
|
@@ -809,9 +1022,27 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
809
1022
|
# We can't have files with no basename because we need to
|
|
810
1023
|
# download them at that basename later.
|
|
811
1024
|
raise RuntimeError(f"File {candidate_uri} has no basename and so cannot be a WDL File")
|
|
812
|
-
|
|
1025
|
+
|
|
813
1026
|
# Was actually found
|
|
814
|
-
|
|
1027
|
+
if is_url(candidate_uri):
|
|
1028
|
+
# Might be a file URI or other URI.
|
|
1029
|
+
# We need to make sure file URIs and local paths that point to
|
|
1030
|
+
# the same place are treated the same.
|
|
1031
|
+
parsed = urlsplit(candidate_uri)
|
|
1032
|
+
if parsed.scheme == "file:":
|
|
1033
|
+
# This is a local file URI. Convert to a path for source directory tracking.
|
|
1034
|
+
parent_dir = os.path.dirname(unquote(parsed.path))
|
|
1035
|
+
else:
|
|
1036
|
+
# This is some other URL. Get the URL to the parent directory and use that.
|
|
1037
|
+
parent_dir = urljoin(candidate_uri, ".")
|
|
1038
|
+
else:
|
|
1039
|
+
# Must be a local path
|
|
1040
|
+
parent_dir = os.path.dirname(candidate_uri)
|
|
1041
|
+
|
|
1042
|
+
# Pack a UUID of the parent directory
|
|
1043
|
+
dir_id = path_to_id.setdefault(parent_dir, uuid.uuid4())
|
|
1044
|
+
|
|
1045
|
+
return pack_toil_uri(imported, dir_id, file_basename)
|
|
815
1046
|
|
|
816
1047
|
# If we get here we tried all the candidates
|
|
817
1048
|
raise RuntimeError(f"Could not find {uri} at any of: {tried}")
|
|
@@ -833,12 +1064,22 @@ def drop_missing_files(environment: WDLBindings, current_directory_override: Opt
|
|
|
833
1064
|
"""
|
|
834
1065
|
Return None if a file doesn't exist, or its path if it does.
|
|
835
1066
|
"""
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
1067
|
+
logger.debug("Consider file %s", filename)
|
|
1068
|
+
|
|
1069
|
+
if is_url(filename):
|
|
1070
|
+
if filename.startswith(TOIL_URI_SCHEME) or AbstractJobStore.url_exists(filename):
|
|
1071
|
+
# We assume anything in the filestore actually exists.
|
|
1072
|
+
return filename
|
|
1073
|
+
else:
|
|
1074
|
+
logger.warning('File %s with type %s does not actually exist at its URI', filename, value_type)
|
|
1075
|
+
return None
|
|
839
1076
|
else:
|
|
840
|
-
|
|
841
|
-
|
|
1077
|
+
effective_path = os.path.abspath(os.path.join(work_dir, filename))
|
|
1078
|
+
if os.path.exists(effective_path):
|
|
1079
|
+
return filename
|
|
1080
|
+
else:
|
|
1081
|
+
logger.warning('File %s with type %s does not actually exist at %s', filename, value_type, effective_path)
|
|
1082
|
+
return None
|
|
842
1083
|
|
|
843
1084
|
return map_over_typed_files_in_bindings(environment, drop_if_missing)
|
|
844
1085
|
|
|
@@ -912,6 +1153,7 @@ def map_over_typed_files_in_value(value: WDL.Value.Base, transform: Callable[[WD
|
|
|
912
1153
|
if new_path is None:
|
|
913
1154
|
# Assume the transform checked types if we actually care about the
|
|
914
1155
|
# result.
|
|
1156
|
+
logger.warning("File %s became Null", value)
|
|
915
1157
|
return WDL.Value.Null()
|
|
916
1158
|
else:
|
|
917
1159
|
# Make whatever the value is around the new path.
|
|
@@ -937,9 +1179,14 @@ def map_over_typed_files_in_value(value: WDL.Value.Base, transform: Callable[[WD
|
|
|
937
1179
|
class WDLBaseJob(Job):
|
|
938
1180
|
"""
|
|
939
1181
|
Base job class for all WDL-related jobs.
|
|
1182
|
+
|
|
1183
|
+
Responsible for post-processing returned bindings, to do things like add in
|
|
1184
|
+
null values for things not defined in a section. Post-processing operations
|
|
1185
|
+
can be added onto any job before it is saved, and will be applied as long
|
|
1186
|
+
as the job's run method calls postprocess().
|
|
940
1187
|
"""
|
|
941
1188
|
|
|
942
|
-
def __init__(self, **kwargs: Any) -> None:
|
|
1189
|
+
def __init__(self, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
943
1190
|
"""
|
|
944
1191
|
Make a WDL-related job.
|
|
945
1192
|
|
|
@@ -961,17 +1208,106 @@ class WDLBaseJob(Job):
|
|
|
961
1208
|
# TODO: Make sure C-level stack size is also big enough for this.
|
|
962
1209
|
sys.setrecursionlimit(10000)
|
|
963
1210
|
|
|
1211
|
+
# We need an ordered list of postprocessing steps to apply, because we
|
|
1212
|
+
# may have coalesced postprocessing steps deferred by several levels of
|
|
1213
|
+
# jobs returning other jobs' promised RVs.
|
|
1214
|
+
self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
|
|
1215
|
+
|
|
1216
|
+
self._execution_dir = execution_dir
|
|
1217
|
+
|
|
964
1218
|
# TODO: We're not allowed by MyPy to override a method and widen the return
|
|
965
1219
|
# type, so this has to be Any.
|
|
966
1220
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
967
1221
|
"""
|
|
968
1222
|
Run a WDL-related job.
|
|
1223
|
+
|
|
1224
|
+
Remember to decorate non-trivial overrides with :func:`report_wdl_errors`.
|
|
969
1225
|
"""
|
|
970
1226
|
# Make sure that pickle is prepared to save our return values, which
|
|
971
1227
|
# might take a lot of recursive calls. TODO: This might be because
|
|
972
1228
|
# bindings are actually linked lists or something?
|
|
973
1229
|
sys.setrecursionlimit(10000)
|
|
974
1230
|
|
|
1231
|
+
def then_underlay(self, underlay: Promised[WDLBindings]) -> None:
|
|
1232
|
+
"""
|
|
1233
|
+
Apply an underlay of backup bindings to the result.
|
|
1234
|
+
"""
|
|
1235
|
+
logger.debug("Underlay %s after %s", underlay, self)
|
|
1236
|
+
self._postprocessing_steps.append(("underlay", underlay))
|
|
1237
|
+
|
|
1238
|
+
def then_remove(self, remove: Promised[WDLBindings]) -> None:
|
|
1239
|
+
"""
|
|
1240
|
+
Remove the given bindings from the result.
|
|
1241
|
+
"""
|
|
1242
|
+
logger.debug("Remove %s after %s", remove, self)
|
|
1243
|
+
self._postprocessing_steps.append(("remove", remove))
|
|
1244
|
+
|
|
1245
|
+
def then_namespace(self, namespace: str) -> None:
|
|
1246
|
+
"""
|
|
1247
|
+
Put the result bindings into a namespace.
|
|
1248
|
+
"""
|
|
1249
|
+
logger.debug("Namespace %s after %s", namespace, self)
|
|
1250
|
+
self._postprocessing_steps.append(("namespace", namespace))
|
|
1251
|
+
|
|
1252
|
+
def then_overlay(self, overlay: Promised[WDLBindings]) -> None:
|
|
1253
|
+
"""
|
|
1254
|
+
Overlay the given bindings on top of the (possibly namespaced) result.
|
|
1255
|
+
"""
|
|
1256
|
+
logger.debug("Overlay %s after %s", overlay, self)
|
|
1257
|
+
self._postprocessing_steps.append(("overlay", overlay))
|
|
1258
|
+
|
|
1259
|
+
def postprocess(self, bindings: WDLBindings) -> WDLBindings:
|
|
1260
|
+
"""
|
|
1261
|
+
Apply queued changes to bindings.
|
|
1262
|
+
|
|
1263
|
+
Should be applied by subclasses' run() implementations to their return
|
|
1264
|
+
values.
|
|
1265
|
+
"""
|
|
1266
|
+
|
|
1267
|
+
for action, argument in self._postprocessing_steps:
|
|
1268
|
+
|
|
1269
|
+
logger.debug("Apply postprocessing setp: (%s, %s)", action, argument)
|
|
1270
|
+
|
|
1271
|
+
# Interpret the mini language of postprocessing steps.
|
|
1272
|
+
# These are too small to justify being their own separate jobs.
|
|
1273
|
+
if action == "underlay":
|
|
1274
|
+
if not isinstance(argument, WDL.Env.Bindings):
|
|
1275
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1276
|
+
# We want to apply values from the underlay if not set in the bindings
|
|
1277
|
+
bindings = combine_bindings([bindings, argument.subtract(bindings)])
|
|
1278
|
+
elif action == "remove":
|
|
1279
|
+
if not isinstance(argument, WDL.Env.Bindings):
|
|
1280
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1281
|
+
# We need to take stuff out of scope
|
|
1282
|
+
bindings = bindings.subtract(argument)
|
|
1283
|
+
elif action == "namespace":
|
|
1284
|
+
if not isinstance(argument, str):
|
|
1285
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1286
|
+
# We are supposed to put all our results in a namespace
|
|
1287
|
+
bindings = bindings.wrap_namespace(argument)
|
|
1288
|
+
elif action == "overlay":
|
|
1289
|
+
if not isinstance(argument, WDL.Env.Bindings):
|
|
1290
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1291
|
+
# We want to apply values from the overlay over the bindings
|
|
1292
|
+
bindings = combine_bindings([bindings.subtract(argument), argument])
|
|
1293
|
+
else:
|
|
1294
|
+
raise RuntimeError(f"Unknown postprocessing action {action}")
|
|
1295
|
+
|
|
1296
|
+
return bindings
|
|
1297
|
+
|
|
1298
|
+
def defer_postprocessing(self, other: "WDLBaseJob") -> None:
|
|
1299
|
+
"""
|
|
1300
|
+
Give our postprocessing steps to a different job.
|
|
1301
|
+
|
|
1302
|
+
Use this when you are returning a promise for bindings, on the job that issues the promise.
|
|
1303
|
+
"""
|
|
1304
|
+
|
|
1305
|
+
other._postprocessing_steps += self._postprocessing_steps
|
|
1306
|
+
self._postprocessing_steps = []
|
|
1307
|
+
|
|
1308
|
+
logger.debug("Assigned postprocessing steps from %s to %s", self, other)
|
|
1309
|
+
|
|
1310
|
+
|
|
975
1311
|
class WDLTaskJob(WDLBaseJob):
|
|
976
1312
|
"""
|
|
977
1313
|
Job that runs a WDL task.
|
|
@@ -1023,6 +1359,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1023
1359
|
logger.warning('No subuids are assigned to %s; cannot fake root.', username)
|
|
1024
1360
|
return False
|
|
1025
1361
|
|
|
1362
|
+
@report_wdl_errors("run task")
|
|
1026
1363
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1027
1364
|
"""
|
|
1028
1365
|
Actually run the task.
|
|
@@ -1034,6 +1371,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1034
1371
|
# For a task we are only passed the inside-the-task namespace.
|
|
1035
1372
|
bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1036
1373
|
# Set up the WDL standard library
|
|
1374
|
+
# UUID to use for virtualizing files
|
|
1037
1375
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1038
1376
|
|
|
1039
1377
|
if self._task.inputs:
|
|
@@ -1154,6 +1492,10 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1154
1492
|
rescheduled = WDLTaskJob(self._task, self._prev_node_results, self._task_id, self._namespace, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators)
|
|
1155
1493
|
# Run that as a child
|
|
1156
1494
|
self.addChild(rescheduled)
|
|
1495
|
+
|
|
1496
|
+
# Give it our postprocessing steps
|
|
1497
|
+
self.defer_postprocessing(rescheduled)
|
|
1498
|
+
|
|
1157
1499
|
# And return its result.
|
|
1158
1500
|
return rescheduled.rv()
|
|
1159
1501
|
|
|
@@ -1287,7 +1629,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1287
1629
|
# Tell the container to take up all these files. It will assign
|
|
1288
1630
|
# them all new paths in task_container.input_path_map which we can
|
|
1289
1631
|
# read. We also get a task_container.host_path() to go the other way.
|
|
1290
|
-
|
|
1632
|
+
add_paths(task_container, get_file_paths_in_bindings(bindings))
|
|
1291
1633
|
logger.debug("Using container path map: %s", task_container.input_path_map)
|
|
1292
1634
|
|
|
1293
1635
|
# Replace everything with in-container paths for the command.
|
|
@@ -1296,9 +1638,12 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1296
1638
|
|
|
1297
1639
|
# Make a new standard library for evaluating the command specifically, which only deals with in-container paths and out-of-container paths.
|
|
1298
1640
|
command_library = ToilWDLStdLibTaskCommand(file_store, task_container)
|
|
1641
|
+
|
|
1642
|
+
# Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
|
|
1643
|
+
dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
|
|
1299
1644
|
|
|
1300
1645
|
# Work out the command string, and unwrap it
|
|
1301
|
-
command_string: str = evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value
|
|
1646
|
+
command_string: str = dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)[1]
|
|
1302
1647
|
|
|
1303
1648
|
# Grab the standard out and error paths. MyPy complains if we call
|
|
1304
1649
|
# them because in the current MiniWDL version they are untyped.
|
|
@@ -1343,9 +1688,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1343
1688
|
# objects, and like MiniWDL we can say we only support
|
|
1344
1689
|
# working-directory-based relative paths for globs.
|
|
1345
1690
|
outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
|
|
1346
|
-
output_bindings
|
|
1347
|
-
for output_decl in self._task.outputs:
|
|
1348
|
-
output_bindings = output_bindings.bind(output_decl.name, evaluate_decl(output_decl, bindings, outputs_library))
|
|
1691
|
+
output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
|
|
1349
1692
|
|
|
1350
1693
|
# Drop any files from the output which don't actually exist
|
|
1351
1694
|
output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
|
|
@@ -1358,6 +1701,9 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1358
1701
|
# Upload any files in the outputs if not uploaded already. Accounts for how relative paths may still need to be container-relative.
|
|
1359
1702
|
output_bindings = virtualize_files(output_bindings, outputs_library)
|
|
1360
1703
|
|
|
1704
|
+
# Do postprocessing steps to e.g. apply namespaces.
|
|
1705
|
+
output_bindings = self.postprocess(output_bindings)
|
|
1706
|
+
|
|
1361
1707
|
return output_bindings
|
|
1362
1708
|
|
|
1363
1709
|
class WDLWorkflowNodeJob(WDLBaseJob):
|
|
@@ -1365,11 +1711,11 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1365
1711
|
Job that evaluates a WDL workflow node.
|
|
1366
1712
|
"""
|
|
1367
1713
|
|
|
1368
|
-
def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
|
|
1714
|
+
def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1369
1715
|
"""
|
|
1370
1716
|
Make a new job to run a workflow node to completion.
|
|
1371
1717
|
"""
|
|
1372
|
-
super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, **kwargs)
|
|
1718
|
+
super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, execution_dir=execution_dir, **kwargs)
|
|
1373
1719
|
|
|
1374
1720
|
self._node = node
|
|
1375
1721
|
self._prev_node_results = prev_node_results
|
|
@@ -1378,6 +1724,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1378
1724
|
if isinstance(self._node, WDL.Tree.Call):
|
|
1379
1725
|
logger.debug("Preparing job for call node %s", self._node.workflow_node_id)
|
|
1380
1726
|
|
|
1727
|
+
@report_wdl_errors("run workflow node")
|
|
1381
1728
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1382
1729
|
"""
|
|
1383
1730
|
Actually execute the workflow node.
|
|
@@ -1388,62 +1735,110 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1388
1735
|
# Combine the bindings we get from previous jobs
|
|
1389
1736
|
incoming_bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1390
1737
|
# Set up the WDL standard library
|
|
1391
|
-
standard_library = ToilWDLStdLibBase(file_store)
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
#
|
|
1414
|
-
|
|
1738
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
|
|
1739
|
+
with monkeypatch_coerce(standard_library):
|
|
1740
|
+
if isinstance(self._node, WDL.Tree.Decl):
|
|
1741
|
+
# This is a variable assignment
|
|
1742
|
+
logger.info('Setting %s to %s', self._node.name, self._node.expr)
|
|
1743
|
+
value = evaluate_decl(self._node, incoming_bindings, standard_library)
|
|
1744
|
+
return self.postprocess(incoming_bindings.bind(self._node.name, value))
|
|
1745
|
+
elif isinstance(self._node, WDL.Tree.Call):
|
|
1746
|
+
# This is a call of a task or workflow
|
|
1747
|
+
|
|
1748
|
+
# Fetch all the inputs we are passing and bind them.
|
|
1749
|
+
# The call is only allowed to use these.
|
|
1750
|
+
logger.debug("Evaluating step inputs")
|
|
1751
|
+
if self._node.callee is None:
|
|
1752
|
+
# This should never be None, but mypy gets unhappy and this is better than an assert
|
|
1753
|
+
inputs_mapping = None
|
|
1754
|
+
else:
|
|
1755
|
+
inputs_mapping = {e.name: e.type for e in self._node.callee.inputs or []}
|
|
1756
|
+
input_bindings = evaluate_call_inputs(self._node, self._node.inputs, incoming_bindings, standard_library, inputs_mapping)
|
|
1757
|
+
|
|
1758
|
+
# Bindings may also be added in from the enclosing workflow inputs
|
|
1759
|
+
# TODO: this is letting us also inject them from the workflow body.
|
|
1760
|
+
# TODO: Can this result in picking up non-namespaced values that
|
|
1761
|
+
# aren't meant to be inputs, by not changing their names?
|
|
1762
|
+
passed_down_bindings = incoming_bindings.enter_namespace(self._node.name)
|
|
1763
|
+
|
|
1764
|
+
if isinstance(self._node.callee, WDL.Tree.Workflow):
|
|
1765
|
+
# This is a call of a workflow
|
|
1766
|
+
subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', self._execution_dir)
|
|
1767
|
+
self.addChild(subjob)
|
|
1768
|
+
elif isinstance(self._node.callee, WDL.Tree.Task):
|
|
1769
|
+
# This is a call of a task
|
|
1770
|
+
subjob = WDLTaskJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}')
|
|
1771
|
+
self.addChild(subjob)
|
|
1772
|
+
else:
|
|
1773
|
+
raise WDL.Error.InvalidType(self._node, "Cannot call a " + str(type(self._node.callee)))
|
|
1774
|
+
|
|
1775
|
+
# We need to agregate outputs namespaced with our node name, and existing bindings
|
|
1776
|
+
subjob.then_namespace(self._node.name)
|
|
1777
|
+
subjob.then_overlay(incoming_bindings)
|
|
1778
|
+
self.defer_postprocessing(subjob)
|
|
1779
|
+
return subjob.rv()
|
|
1780
|
+
elif isinstance(self._node, WDL.Tree.Scatter):
|
|
1781
|
+
subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._execution_dir)
|
|
1415
1782
|
self.addChild(subjob)
|
|
1416
|
-
|
|
1417
|
-
#
|
|
1418
|
-
|
|
1783
|
+
# Scatters don't really make a namespace, just kind of a scope?
|
|
1784
|
+
# TODO: Let stuff leave scope!
|
|
1785
|
+
self.defer_postprocessing(subjob)
|
|
1786
|
+
return subjob.rv()
|
|
1787
|
+
elif isinstance(self._node, WDL.Tree.Conditional):
|
|
1788
|
+
subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._execution_dir)
|
|
1419
1789
|
self.addChild(subjob)
|
|
1790
|
+
# Conditionals don't really make a namespace, just kind of a scope?
|
|
1791
|
+
# TODO: Let stuff leave scope!
|
|
1792
|
+
self.defer_postprocessing(subjob)
|
|
1793
|
+
return subjob.rv()
|
|
1420
1794
|
else:
|
|
1421
|
-
raise WDL.Error.InvalidType(self._node, "
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1795
|
+
raise WDL.Error.InvalidType(self._node, "Unimplemented WorkflowNode: " + str(type(self._node)))
|
|
1796
|
+
|
|
1797
|
+
class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
1798
|
+
"""
|
|
1799
|
+
Job that evaluates a list of WDL workflow nodes, which are in the same
|
|
1800
|
+
scope and in a topological dependency order, and which do not call out to any other
|
|
1801
|
+
workflows or tasks or sections.
|
|
1802
|
+
"""
|
|
1803
|
+
|
|
1804
|
+
def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1805
|
+
"""
|
|
1806
|
+
Make a new job to run a list of workflow nodes to completion.
|
|
1807
|
+
"""
|
|
1808
|
+
super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', execution_dir=execution_dir, **kwargs)
|
|
1809
|
+
|
|
1810
|
+
self._nodes = nodes
|
|
1811
|
+
self._prev_node_results = prev_node_results
|
|
1812
|
+
self._namespace = namespace
|
|
1813
|
+
|
|
1814
|
+
for n in self._nodes:
|
|
1815
|
+
if isinstance(n, (WDL.Tree.Call, WDL.Tree.Scatter, WDL.Tree.Conditional)):
|
|
1816
|
+
raise RuntimeError("Node cannot be evaluated with other nodes: " + str(n))
|
|
1817
|
+
|
|
1818
|
+
@report_wdl_errors("run workflow node list")
|
|
1819
|
+
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1820
|
+
"""
|
|
1821
|
+
Actually execute the workflow nodes.
|
|
1822
|
+
"""
|
|
1823
|
+
super().run(file_store)
|
|
1824
|
+
|
|
1825
|
+
# Combine the bindings we get from previous jobs
|
|
1826
|
+
current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1827
|
+
# Set up the WDL standard library
|
|
1828
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
|
|
1829
|
+
|
|
1830
|
+
with monkeypatch_coerce(standard_library):
|
|
1831
|
+
for node in self._nodes:
|
|
1832
|
+
if isinstance(node, WDL.Tree.Decl):
|
|
1833
|
+
# This is a variable assignment
|
|
1834
|
+
logger.info('Setting %s to %s', node.name, node.expr)
|
|
1835
|
+
value = evaluate_decl(node, current_bindings, standard_library)
|
|
1836
|
+
current_bindings = current_bindings.bind(node.name, value)
|
|
1837
|
+
else:
|
|
1838
|
+
raise WDL.Error.InvalidType(node, "Unimplemented WorkflowNode: " + str(type(node)))
|
|
1839
|
+
|
|
1840
|
+
return self.postprocess(current_bindings)
|
|
1841
|
+
|
|
1447
1842
|
|
|
1448
1843
|
class WDLCombineBindingsJob(WDLBaseJob):
|
|
1449
1844
|
"""
|
|
@@ -1451,7 +1846,7 @@ class WDLCombineBindingsJob(WDLBaseJob):
|
|
|
1451
1846
|
environment changes.
|
|
1452
1847
|
"""
|
|
1453
1848
|
|
|
1454
|
-
def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]],
|
|
1849
|
+
def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]], **kwargs: Any) -> None:
|
|
1455
1850
|
"""
|
|
1456
1851
|
Make a new job to combine the results of previous jobs.
|
|
1457
1852
|
|
|
@@ -1462,58 +1857,229 @@ class WDLCombineBindingsJob(WDLBaseJob):
|
|
|
1462
1857
|
super().__init__(**kwargs)
|
|
1463
1858
|
|
|
1464
1859
|
self._prev_node_results = prev_node_results
|
|
1465
|
-
self._underlay = underlay
|
|
1466
|
-
self._remove = remove
|
|
1467
1860
|
|
|
1861
|
+
@report_wdl_errors("combine bindings")
|
|
1468
1862
|
def run(self, file_store: AbstractFileStore) -> WDLBindings:
|
|
1469
1863
|
"""
|
|
1470
1864
|
Aggregate incoming results.
|
|
1471
1865
|
"""
|
|
1472
1866
|
super().run(file_store)
|
|
1473
1867
|
combined = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
combined = combine_bindings([combined, unwrap(self._underlay).subtract(combined)])
|
|
1477
|
-
if self._remove is not None:
|
|
1478
|
-
# We need to take stuff out of scope
|
|
1479
|
-
combined = combined.subtract(unwrap(self._remove))
|
|
1480
|
-
return combined
|
|
1868
|
+
# Make sure to run the universal postprocessing steps
|
|
1869
|
+
return self.postprocess(combined)
|
|
1481
1870
|
|
|
1482
|
-
class
|
|
1871
|
+
class WDLWorkflowGraph:
|
|
1483
1872
|
"""
|
|
1484
|
-
|
|
1873
|
+
Represents a graph of WDL WorkflowNodes.
|
|
1874
|
+
|
|
1875
|
+
Operates at a certain level of instantiation (i.e. sub-sections are
|
|
1876
|
+
represented by single nodes).
|
|
1877
|
+
|
|
1878
|
+
Assumes all relevant nodes are provided; dependencies outside the provided
|
|
1879
|
+
nodes are assumed to be satisfied already.
|
|
1485
1880
|
"""
|
|
1486
1881
|
|
|
1487
|
-
def __init__(self,
|
|
1882
|
+
def __init__(self, nodes: Sequence[WDL.Tree.WorkflowNode]) -> None:
|
|
1488
1883
|
"""
|
|
1489
|
-
Make a
|
|
1884
|
+
Make a graph for analyzing a set of workflow nodes.
|
|
1490
1885
|
"""
|
|
1491
|
-
super().__init__(**kwargs)
|
|
1492
1886
|
|
|
1493
|
-
|
|
1494
|
-
|
|
1887
|
+
# For Gather nodes, the Toil interpreter handles them as part of their
|
|
1888
|
+
# associated section. So make a map from gather ID to the section node
|
|
1889
|
+
# ID.
|
|
1890
|
+
self._gather_to_section: Dict[str, str] = {}
|
|
1891
|
+
for node in nodes:
|
|
1892
|
+
if isinstance(node, WDL.Tree.WorkflowSection):
|
|
1893
|
+
for gather_node in node.gathers.values():
|
|
1894
|
+
self._gather_to_section[gather_node.workflow_node_id] = node.workflow_node_id
|
|
1495
1895
|
|
|
1496
|
-
|
|
1896
|
+
# Store all the nodes by ID, except the gathers which we elide.
|
|
1897
|
+
self._nodes: Dict[str, WDL.Tree.WorkflowNode] = {node.workflow_node_id: node for node in nodes if not isinstance(node, WDL.Tree.Gather)}
|
|
1898
|
+
|
|
1899
|
+
def real_id(self, node_id: str) -> str:
|
|
1497
1900
|
"""
|
|
1498
|
-
|
|
1901
|
+
Map multiple IDs for what we consider the same node to one ID.
|
|
1902
|
+
|
|
1903
|
+
This elides/resolves gathers.
|
|
1499
1904
|
"""
|
|
1500
|
-
|
|
1501
|
-
|
|
1905
|
+
return self._gather_to_section.get(node_id, node_id)
|
|
1906
|
+
|
|
1907
|
+
def is_decl(self, node_id: str) -> bool:
|
|
1908
|
+
"""
|
|
1909
|
+
Return True if a node represents a WDL declaration, and false
|
|
1910
|
+
otherwise.
|
|
1911
|
+
"""
|
|
1912
|
+
return isinstance(self.get(node_id), WDL.Tree.Decl)
|
|
1913
|
+
|
|
1914
|
+
def get(self, node_id: str) -> WDL.Tree.WorkflowNode:
|
|
1915
|
+
"""
|
|
1916
|
+
Get a node by ID.
|
|
1917
|
+
"""
|
|
1918
|
+
return self._nodes[self.real_id(node_id)]
|
|
1919
|
+
|
|
1920
|
+
def get_dependencies(self, node_id: str) -> Set[str]:
|
|
1921
|
+
"""
|
|
1922
|
+
Get all the nodes that a node depends on, recursively (into the node if
|
|
1923
|
+
it has a body) but not transitively.
|
|
1924
|
+
|
|
1925
|
+
Produces dependencies after resolving gathers and internal-to-section
|
|
1926
|
+
dependencies, on nodes that are also in this graph.
|
|
1927
|
+
"""
|
|
1928
|
+
|
|
1929
|
+
# We need to make sure to bubble up dependencies from inside sections.
|
|
1930
|
+
# A conditional might only appear to depend on the variables in the
|
|
1931
|
+
# conditional expression, but its body can depend on other stuff, and
|
|
1932
|
+
# we need to make sure that that stuff has finished and updated the
|
|
1933
|
+
# environment before the conditional body runs. TODO: This is because
|
|
1934
|
+
# Toil can't go and get and add successors to the relevant jobs later,
|
|
1935
|
+
# while MiniWDL's engine apparently can. This ends up reducing
|
|
1936
|
+
# parallelism more than would strictly be necessary; nothing in the
|
|
1937
|
+
# conditional can start until the dependencies of everything in the
|
|
1938
|
+
# conditional are ready.
|
|
1939
|
+
|
|
1940
|
+
dependencies = set()
|
|
1941
|
+
|
|
1942
|
+
node = self.get(node_id)
|
|
1943
|
+
for dependency in recursive_dependencies(node):
|
|
1944
|
+
real_dependency = self.real_id(dependency)
|
|
1945
|
+
if real_dependency in self._nodes:
|
|
1946
|
+
dependencies.add(real_dependency)
|
|
1947
|
+
|
|
1948
|
+
return dependencies
|
|
1949
|
+
|
|
1950
|
+
def get_transitive_dependencies(self, node_id: str) -> Set[str]:
|
|
1951
|
+
"""
|
|
1952
|
+
Get all the nodes that a node depends on, transitively.
|
|
1953
|
+
"""
|
|
1954
|
+
|
|
1955
|
+
dependencies: Set[str] = set()
|
|
1956
|
+
visited: Set[str] = set()
|
|
1957
|
+
queue = [node_id]
|
|
1958
|
+
|
|
1959
|
+
while len(queue) > 0:
|
|
1960
|
+
# Grab the enxt thing off the queue
|
|
1961
|
+
here = queue[-1]
|
|
1962
|
+
queue.pop()
|
|
1963
|
+
if here in visited:
|
|
1964
|
+
# Skip if we got it already
|
|
1965
|
+
continue
|
|
1966
|
+
# Mark it got
|
|
1967
|
+
visited.add(here)
|
|
1968
|
+
# Get all its dependencies
|
|
1969
|
+
here_deps = self.get_dependencies(here)
|
|
1970
|
+
dependencies |= here_deps
|
|
1971
|
+
for dep in here_deps:
|
|
1972
|
+
if dep not in visited:
|
|
1973
|
+
# And queue all the ones we haven't visited.
|
|
1974
|
+
queue.append(dep)
|
|
1975
|
+
|
|
1976
|
+
return dependencies
|
|
1977
|
+
|
|
1978
|
+
def topological_order(self) -> List[str]:
|
|
1979
|
+
"""
|
|
1980
|
+
Get a topological order of the nodes, based on their dependencies.
|
|
1981
|
+
"""
|
|
1982
|
+
|
|
1983
|
+
sorter : TopologicalSorter[str] = TopologicalSorter()
|
|
1984
|
+
for node_id in self._nodes.keys():
|
|
1985
|
+
# Add all the edges
|
|
1986
|
+
sorter.add(node_id, *self.get_dependencies(node_id))
|
|
1987
|
+
return list(sorter.static_order())
|
|
1988
|
+
|
|
1989
|
+
def leaves(self) -> List[str]:
|
|
1990
|
+
"""
|
|
1991
|
+
Get all the workflow node IDs that have no dependents in the graph.
|
|
1992
|
+
"""
|
|
1993
|
+
|
|
1994
|
+
leaves = set(self._nodes.keys())
|
|
1995
|
+
for node_id in self._nodes.keys():
|
|
1996
|
+
for dependency in self.get_dependencies(node_id):
|
|
1997
|
+
if dependency in leaves:
|
|
1998
|
+
# Mark everything depended on as not a leaf
|
|
1999
|
+
leaves.remove(dependency)
|
|
2000
|
+
return list(leaves)
|
|
2001
|
+
|
|
1502
2002
|
|
|
1503
2003
|
class WDLSectionJob(WDLBaseJob):
|
|
1504
2004
|
"""
|
|
1505
2005
|
Job that can create more graph for a section of the wrokflow.
|
|
1506
2006
|
"""
|
|
1507
2007
|
|
|
1508
|
-
def __init__(self, namespace: str, **kwargs: Any) -> None:
|
|
2008
|
+
def __init__(self, namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1509
2009
|
"""
|
|
1510
2010
|
Make a WDLSectionJob where the interior runs in the given namespace,
|
|
1511
2011
|
starting with the root workflow.
|
|
1512
2012
|
"""
|
|
1513
|
-
super().__init__(**kwargs)
|
|
2013
|
+
super().__init__(execution_dir, **kwargs)
|
|
1514
2014
|
self._namespace = namespace
|
|
1515
2015
|
|
|
1516
|
-
|
|
2016
|
+
@staticmethod
|
|
2017
|
+
def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
|
|
2018
|
+
"""
|
|
2019
|
+
Given a topological order of WDL workflow node IDs, produce a list of
|
|
2020
|
+
lists of IDs, still in topological order, where each list of IDs can be
|
|
2021
|
+
run under a single Toil job.
|
|
2022
|
+
"""
|
|
2023
|
+
|
|
2024
|
+
# All the buckets of merged nodes
|
|
2025
|
+
to_return: List[List[str]] = []
|
|
2026
|
+
# The nodes we are currently merging, in topological order
|
|
2027
|
+
current_bucket: List[str] = []
|
|
2028
|
+
# All the non-decl transitive dependencies of nodes in the bucket
|
|
2029
|
+
current_bucket_dependencies: Set[str] = set()
|
|
2030
|
+
|
|
2031
|
+
for next_id in order:
|
|
2032
|
+
# Consider adding each node to the bucket
|
|
2033
|
+
# Get all the dependencies on things that aren't decls.
|
|
2034
|
+
next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not section_graph.is_decl(dep)}
|
|
2035
|
+
if len(current_bucket) == 0:
|
|
2036
|
+
# This is the first thing for the bucket
|
|
2037
|
+
current_bucket.append(next_id)
|
|
2038
|
+
current_bucket_dependencies |= next_dependencies
|
|
2039
|
+
else:
|
|
2040
|
+
# Get a node already in the bucket
|
|
2041
|
+
current_id = current_bucket[0]
|
|
2042
|
+
|
|
2043
|
+
if not section_graph.is_decl(current_id) or not section_graph.is_decl(next_id):
|
|
2044
|
+
# We can only combine decls with decls, so we can't go in
|
|
2045
|
+
# the bucket.
|
|
2046
|
+
|
|
2047
|
+
# Finish the bucket.
|
|
2048
|
+
to_return.append(current_bucket)
|
|
2049
|
+
# Start a new one with this next node
|
|
2050
|
+
current_bucket = [next_id]
|
|
2051
|
+
current_bucket_dependencies = next_dependencies
|
|
2052
|
+
else:
|
|
2053
|
+
# We have a decl in the bucket and a decl we could maybe
|
|
2054
|
+
# add. We know they are part of the same section, so we
|
|
2055
|
+
# aren't jumping in and out of conditionals or scatters.
|
|
2056
|
+
|
|
2057
|
+
# We are going in a topological order, so we know the
|
|
2058
|
+
# bucket can't depend on the new node.
|
|
2059
|
+
|
|
2060
|
+
if next_dependencies == current_bucket_dependencies:
|
|
2061
|
+
# We can add this node without adding more dependencies on non-decls on either side.
|
|
2062
|
+
# Nothing in the bucket can be in the dependency set because the bucket is only decls.
|
|
2063
|
+
# Put it in
|
|
2064
|
+
current_bucket.append(next_id)
|
|
2065
|
+
# TODO: With this condition, this is redundant.
|
|
2066
|
+
current_bucket_dependencies |= next_dependencies
|
|
2067
|
+
else:
|
|
2068
|
+
# Finish the bucket.
|
|
2069
|
+
to_return.append(current_bucket)
|
|
2070
|
+
# Start a new one with this next node
|
|
2071
|
+
current_bucket = [next_id]
|
|
2072
|
+
current_bucket_dependencies = next_dependencies
|
|
2073
|
+
|
|
2074
|
+
if len(current_bucket) > 0:
|
|
2075
|
+
# Now finish the last bucket
|
|
2076
|
+
to_return.append(current_bucket)
|
|
2077
|
+
|
|
2078
|
+
return to_return
|
|
2079
|
+
|
|
2080
|
+
|
|
2081
|
+
|
|
2082
|
+
def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> WDLBaseJob:
|
|
1517
2083
|
"""
|
|
1518
2084
|
Make a Toil job to evaluate a subgraph inside a workflow or workflow
|
|
1519
2085
|
section.
|
|
@@ -1531,95 +2097,69 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
1531
2097
|
at the end of the section.
|
|
1532
2098
|
"""
|
|
1533
2099
|
|
|
1534
|
-
# We need to track the dependency universe; some of our child nodes may
|
|
1535
|
-
# depend on nodes that are e.g. inputs to the workflow that encloses
|
|
1536
|
-
# the section that encloses this section, and we need to just assume
|
|
1537
|
-
# those are already available, even though we don't have access to the
|
|
1538
|
-
# complete list. So we make a set of everything we actually do need to
|
|
1539
|
-
# care about resolving, instead.
|
|
1540
|
-
dependabes: Set[str] = set()
|
|
1541
|
-
|
|
1542
2100
|
if local_environment is not None:
|
|
1543
2101
|
# Bring local environment into scope
|
|
1544
2102
|
environment = combine_bindings([environment, local_environment])
|
|
1545
2103
|
|
|
1546
|
-
#
|
|
1547
|
-
|
|
1548
|
-
dependabes |= set(wdl_id_to_wdl_node.keys())
|
|
1549
|
-
|
|
1550
|
-
# That doesn't include gather nodes, which in the Toil interpreter we
|
|
1551
|
-
# handle as part of their enclosing section, without individual Toil
|
|
1552
|
-
# jobs for each. So make a map from gather ID to the section node ID.
|
|
1553
|
-
gather_to_section: Dict[str, str] = {}
|
|
1554
|
-
for node in nodes:
|
|
1555
|
-
if isinstance(node, WDL.Tree.WorkflowSection):
|
|
1556
|
-
for gather_node in node.gathers.values():
|
|
1557
|
-
gather_to_section[gather_node.workflow_node_id] = node.workflow_node_id
|
|
1558
|
-
dependabes |= set(gather_to_section.keys())
|
|
2104
|
+
# Make a graph of all the nodes at this level
|
|
2105
|
+
section_graph = WDLWorkflowGraph(nodes)
|
|
1559
2106
|
|
|
1560
2107
|
# To make Toil jobs, we need all the jobs they depend on made so we can
|
|
1561
2108
|
# call .rv(). So we need to solve the workflow DAG ourselves to set it up
|
|
1562
2109
|
# properly.
|
|
1563
2110
|
|
|
1564
|
-
#
|
|
1565
|
-
|
|
1566
|
-
#
|
|
1567
|
-
#
|
|
1568
|
-
|
|
1569
|
-
# is because Toil can't go and get and add successors to the relevant
|
|
1570
|
-
# jobs later, while MiniWDL's engine apparently can. This ends up
|
|
1571
|
-
# reducing parallelism more than would strictly be necessary; nothing
|
|
1572
|
-
# in the conditional can start until the dependencies of everything in
|
|
1573
|
-
# the conditional are ready.
|
|
1574
|
-
|
|
1575
|
-
# What are the dependencies of all the body nodes on other body nodes?
|
|
1576
|
-
# Nodes can depend on other nodes actually in the tree, or on gathers
|
|
1577
|
-
# that belong to other nodes, but we rewrite the gather dependencies
|
|
1578
|
-
# through to the enclosing section node. Skip any dependencies on
|
|
1579
|
-
# anything not provided by another body node (such as on an input, or
|
|
1580
|
-
# something outside of the current section). TODO: This will need to
|
|
1581
|
-
# change if we let parallelism transcend sections.
|
|
1582
|
-
wdl_id_to_dependency_ids = {node_id: list({gather_to_section[dep] if dep in gather_to_section else dep for dep in recursive_dependencies(node) if dep in dependabes}) for node_id, node in wdl_id_to_wdl_node.items()}
|
|
1583
|
-
|
|
1584
|
-
# Which of those are outstanding?
|
|
1585
|
-
wdl_id_to_outstanding_dependency_ids = copy.deepcopy(wdl_id_to_dependency_ids)
|
|
1586
|
-
|
|
1587
|
-
# What nodes depend on each node?
|
|
1588
|
-
wdl_id_to_dependent_ids: Dict[str, Set[str]] = collections.defaultdict(set)
|
|
1589
|
-
for node_id, dependencies in wdl_id_to_dependency_ids.items():
|
|
1590
|
-
for dependency_id in dependencies:
|
|
1591
|
-
# Invert the dependency edges
|
|
1592
|
-
wdl_id_to_dependent_ids[dependency_id].add(node_id)
|
|
1593
|
-
|
|
1594
|
-
# This will hold all the Toil jobs by WDL node ID
|
|
1595
|
-
wdl_id_to_toil_job: Dict[str, Job] = {}
|
|
1596
|
-
|
|
1597
|
-
# And collect IDs of jobs with no successors to add a final sink job
|
|
1598
|
-
leaf_ids: Set[str] = set()
|
|
1599
|
-
|
|
1600
|
-
# What nodes are ready?
|
|
1601
|
-
ready_node_ids = {node_id for node_id, dependencies in wdl_id_to_outstanding_dependency_ids.items() if len(dependencies) == 0}
|
|
1602
|
-
|
|
1603
|
-
while len(wdl_id_to_outstanding_dependency_ids) > 0:
|
|
1604
|
-
logger.debug('Ready nodes: %s', ready_node_ids)
|
|
1605
|
-
logger.debug('Waiting nodes: %s', wdl_id_to_outstanding_dependency_ids)
|
|
1606
|
-
|
|
1607
|
-
# Find a node that we can do now
|
|
1608
|
-
node_id = next(iter(ready_node_ids))
|
|
1609
|
-
|
|
1610
|
-
# Say we are doing it
|
|
1611
|
-
ready_node_ids.remove(node_id)
|
|
1612
|
-
del wdl_id_to_outstanding_dependency_ids[node_id]
|
|
1613
|
-
logger.debug('Make Toil job for %s', node_id)
|
|
2111
|
+
# When a WDL node depends on another, we need to be able to find the Toil job we need an rv from.
|
|
2112
|
+
wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
|
|
2113
|
+
# We need the set of Toil jobs not depended on so we can wire them up to the sink.
|
|
2114
|
+
# This maps from Toil job store ID to job.
|
|
2115
|
+
toil_leaves: Dict[Union[str, TemporaryID], WDLBaseJob] = {}
|
|
1614
2116
|
|
|
2117
|
+
def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
|
|
2118
|
+
"""
|
|
2119
|
+
Get the distinct Toil jobs executing any of the given WDL nodes.
|
|
2120
|
+
"""
|
|
2121
|
+
job_ids = set()
|
|
2122
|
+
jobs = []
|
|
2123
|
+
for job in (wdl_id_to_toil_job[wdl_id] for wdl_id in wdl_ids):
|
|
2124
|
+
# For each job that is registered under any of these WDL IDs
|
|
2125
|
+
if job.jobStoreID not in job_ids:
|
|
2126
|
+
# If we haven't taken it already, take it
|
|
2127
|
+
job_ids.add(job.jobStoreID)
|
|
2128
|
+
jobs.append(job)
|
|
2129
|
+
return jobs
|
|
2130
|
+
|
|
2131
|
+
creation_order = section_graph.topological_order()
|
|
2132
|
+
logger.debug('Creation order: %s', creation_order)
|
|
2133
|
+
|
|
2134
|
+
# Now we want to organize the linear list of nodes into collections of nodes that can be in the same Toil job.
|
|
2135
|
+
creation_jobs = self.coalesce_nodes(creation_order, section_graph)
|
|
2136
|
+
logger.debug('Creation jobs: %s', creation_jobs)
|
|
2137
|
+
|
|
2138
|
+
for node_ids in creation_jobs:
|
|
2139
|
+
logger.debug('Make Toil job for %s', node_ids)
|
|
1615
2140
|
# Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
|
|
1616
|
-
|
|
2141
|
+
# Don't inlude stuff in the current batch.
|
|
2142
|
+
prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id) if prev_node_id not in node_ids}
|
|
2143
|
+
|
|
2144
|
+
|
|
2145
|
+
# Get the Toil jobs we depend on
|
|
2146
|
+
prev_jobs = get_job_set_any(prev_node_ids)
|
|
2147
|
+
for prev_job in prev_jobs:
|
|
2148
|
+
if prev_job.jobStoreID in toil_leaves:
|
|
2149
|
+
# Mark them all as depended on
|
|
2150
|
+
del toil_leaves[prev_job.jobStoreID]
|
|
2151
|
+
|
|
2152
|
+
# Get their return values to feed into the new job
|
|
1617
2153
|
rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
|
|
1618
2154
|
# We also need access to section-level bindings like inputs
|
|
1619
2155
|
rvs.append(environment)
|
|
1620
2156
|
|
|
1621
|
-
|
|
1622
|
-
|
|
2157
|
+
if len(node_ids) == 1:
|
|
2158
|
+
# Make a one-node job
|
|
2159
|
+
job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, self._execution_dir)
|
|
2160
|
+
else:
|
|
2161
|
+
# Make a multi-node job
|
|
2162
|
+
job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, self._execution_dir)
|
|
1623
2163
|
for prev_job in prev_jobs:
|
|
1624
2164
|
# Connect up the happens-after relationships to make sure the
|
|
1625
2165
|
# return values are available.
|
|
@@ -1631,38 +2171,38 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
1631
2171
|
# Nothing came before this job, so connect it to the workflow.
|
|
1632
2172
|
self.addChild(job)
|
|
1633
2173
|
|
|
1634
|
-
|
|
1635
|
-
|
|
2174
|
+
for node_id in node_ids:
|
|
2175
|
+
# Save the job for everything it executes
|
|
2176
|
+
wdl_id_to_toil_job[node_id] = job
|
|
1636
2177
|
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
#
|
|
1662
|
-
self.
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
wdl_id_to_toil_job[node_id].addFollowOn(sink)
|
|
2178
|
+
# It isn't depended on yet
|
|
2179
|
+
toil_leaves[job.jobStoreID] = job
|
|
2180
|
+
|
|
2181
|
+
if len(toil_leaves) == 1:
|
|
2182
|
+
# There's one final node so we can just tack postprocessing onto that.
|
|
2183
|
+
sink: WDLBaseJob = next(iter(toil_leaves.values()))
|
|
2184
|
+
else:
|
|
2185
|
+
# We need to bring together with a new sink
|
|
2186
|
+
# Make the sink job to collect all their results.
|
|
2187
|
+
leaf_rvs: List[Union[WDLBindings, Promise]] = [leaf_job.rv() for leaf_job in toil_leaves.values()]
|
|
2188
|
+
# Make sure to also send the section-level bindings
|
|
2189
|
+
leaf_rvs.append(environment)
|
|
2190
|
+
# And to fill in bindings from code not executed in this instantiation
|
|
2191
|
+
# with Null, and filter out stuff that should leave scope.
|
|
2192
|
+
sink = WDLCombineBindingsJob(leaf_rvs)
|
|
2193
|
+
# It runs inside us
|
|
2194
|
+
self.addChild(sink)
|
|
2195
|
+
for leaf_job in toil_leaves.values():
|
|
2196
|
+
# And after all the leaf jobs.
|
|
2197
|
+
leaf_job.addFollowOn(sink)
|
|
2198
|
+
|
|
2199
|
+
logger.debug("Sink job is: %s", sink)
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
# Apply the final postprocessing for leaving the section.
|
|
2203
|
+
sink.then_underlay(self.make_gather_bindings(gather_nodes, WDL.Value.Null()))
|
|
2204
|
+
if local_environment is not None:
|
|
2205
|
+
sink.then_remove(local_environment)
|
|
1666
2206
|
|
|
1667
2207
|
return sink
|
|
1668
2208
|
|
|
@@ -1716,11 +2256,11 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1716
2256
|
instance of the body. If an instance of the body doesn't create a binding,
|
|
1717
2257
|
it gets a null value in the corresponding array.
|
|
1718
2258
|
"""
|
|
1719
|
-
def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
|
|
2259
|
+
def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1720
2260
|
"""
|
|
1721
2261
|
Create a subtree that will run a WDL scatter. The scatter itself and the contents live in the given namespace.
|
|
1722
2262
|
"""
|
|
1723
|
-
super().__init__(namespace, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id)
|
|
2263
|
+
super().__init__(namespace, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, execution_dir=execution_dir)
|
|
1724
2264
|
|
|
1725
2265
|
# Because we need to return the return value of the workflow, we need
|
|
1726
2266
|
# to return a Toil promise for the last/sink job in the workflow's
|
|
@@ -1734,6 +2274,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1734
2274
|
self._scatter = scatter
|
|
1735
2275
|
self._prev_node_results = prev_node_results
|
|
1736
2276
|
|
|
2277
|
+
@report_wdl_errors("run scatter")
|
|
1737
2278
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1738
2279
|
"""
|
|
1739
2280
|
Run the scatter.
|
|
@@ -1749,9 +2290,11 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1749
2290
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1750
2291
|
|
|
1751
2292
|
# Get what to scatter over
|
|
1752
|
-
|
|
2293
|
+
with monkeypatch_coerce(standard_library):
|
|
2294
|
+
scatter_value = evaluate_named_expression(self._scatter, self._scatter.variable, None, self._scatter.expr, bindings, standard_library)
|
|
1753
2295
|
|
|
1754
|
-
|
|
2296
|
+
if not isinstance(scatter_value, WDL.Value.Array):
|
|
2297
|
+
raise RuntimeError("The returned value from a scatter is not an Array type.")
|
|
1755
2298
|
|
|
1756
2299
|
scatter_jobs = []
|
|
1757
2300
|
for item in scatter_value.value:
|
|
@@ -1787,6 +2330,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1787
2330
|
self.addChild(gather_job)
|
|
1788
2331
|
for j in scatter_jobs:
|
|
1789
2332
|
j.addFollowOn(gather_job)
|
|
2333
|
+
self.defer_postprocessing(gather_job)
|
|
1790
2334
|
return gather_job.rv()
|
|
1791
2335
|
|
|
1792
2336
|
class WDLArrayBindingsJob(WDLBaseJob):
|
|
@@ -1813,6 +2357,7 @@ class WDLArrayBindingsJob(WDLBaseJob):
|
|
|
1813
2357
|
self._input_bindings = input_bindings
|
|
1814
2358
|
self._base_bindings = base_bindings
|
|
1815
2359
|
|
|
2360
|
+
@report_wdl_errors("create array bindings")
|
|
1816
2361
|
def run(self, file_store: AbstractFileStore) -> WDLBindings:
|
|
1817
2362
|
"""
|
|
1818
2363
|
Actually produce the array-ified bindings now that promised values are available.
|
|
@@ -1844,17 +2389,17 @@ class WDLArrayBindingsJob(WDLBaseJob):
|
|
|
1844
2389
|
result = result.bind(name, WDL.Value.Array(supertype, [env.resolve(name) if env.has_binding(name) else WDL.Value.Null() for env in new_bindings]))
|
|
1845
2390
|
|
|
1846
2391
|
# Base bindings are already included so return the result
|
|
1847
|
-
return result
|
|
2392
|
+
return self.postprocess(result)
|
|
1848
2393
|
|
|
1849
2394
|
class WDLConditionalJob(WDLSectionJob):
|
|
1850
2395
|
"""
|
|
1851
2396
|
Job that evaluates a conditional in a WDL workflow.
|
|
1852
2397
|
"""
|
|
1853
|
-
def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
|
|
2398
|
+
def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1854
2399
|
"""
|
|
1855
2400
|
Create a subtree that will run a WDL conditional. The conditional itself and its contents live in the given namespace.
|
|
1856
2401
|
"""
|
|
1857
|
-
super().__init__(namespace, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id)
|
|
2402
|
+
super().__init__(namespace, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, execution_dir=execution_dir)
|
|
1858
2403
|
|
|
1859
2404
|
# Once again we need to ship the whole body template to be instantiated
|
|
1860
2405
|
# into Toil jobs only if it will actually run.
|
|
@@ -1864,6 +2409,7 @@ class WDLConditionalJob(WDLSectionJob):
|
|
|
1864
2409
|
self._conditional = conditional
|
|
1865
2410
|
self._prev_node_results = prev_node_results
|
|
1866
2411
|
|
|
2412
|
+
@report_wdl_errors("run conditional")
|
|
1867
2413
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1868
2414
|
"""
|
|
1869
2415
|
Run the conditional.
|
|
@@ -1879,27 +2425,29 @@ class WDLConditionalJob(WDLSectionJob):
|
|
|
1879
2425
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1880
2426
|
|
|
1881
2427
|
# Get the expression value. Fake a name.
|
|
1882
|
-
|
|
2428
|
+
with monkeypatch_coerce(standard_library):
|
|
2429
|
+
expr_value = evaluate_named_expression(self._conditional, "<conditional expression>", WDL.Type.Boolean(), self._conditional.expr, bindings, standard_library)
|
|
1883
2430
|
|
|
1884
2431
|
if expr_value.value:
|
|
1885
2432
|
# Evaluated to true!
|
|
1886
2433
|
logger.info('Condition is true')
|
|
1887
2434
|
# Run the body and return its effects
|
|
1888
2435
|
body_job = self.create_subgraph(self._conditional.body, list(self._conditional.gathers.values()), bindings)
|
|
2436
|
+
self.defer_postprocessing(body_job)
|
|
1889
2437
|
return body_job.rv()
|
|
1890
2438
|
else:
|
|
1891
2439
|
logger.info('Condition is false')
|
|
1892
2440
|
# Return the input bindings and null bindings for all our gathers.
|
|
1893
2441
|
# Should not collide at all.
|
|
1894
2442
|
gather_bindings = self.make_gather_bindings(list(self._conditional.gathers.values()), WDL.Value.Null())
|
|
1895
|
-
return combine_bindings([bindings, gather_bindings])
|
|
2443
|
+
return self.postprocess(combine_bindings([bindings, gather_bindings]))
|
|
1896
2444
|
|
|
1897
2445
|
class WDLWorkflowJob(WDLSectionJob):
|
|
1898
2446
|
"""
|
|
1899
2447
|
Job that evaluates an entire WDL workflow.
|
|
1900
2448
|
"""
|
|
1901
2449
|
|
|
1902
|
-
def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, **kwargs: Any) -> None:
|
|
2450
|
+
def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1903
2451
|
"""
|
|
1904
2452
|
Create a subtree that will run a WDL workflow. The job returns the
|
|
1905
2453
|
return value of the workflow.
|
|
@@ -1907,7 +2455,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
1907
2455
|
:param namespace: the namespace that the workflow's *contents* will be
|
|
1908
2456
|
in. Caller has already added the workflow's own name.
|
|
1909
2457
|
"""
|
|
1910
|
-
super().__init__(namespace, **kwargs)
|
|
2458
|
+
super().__init__(namespace, execution_dir, **kwargs)
|
|
1911
2459
|
|
|
1912
2460
|
# Because we need to return the return value of the workflow, we need
|
|
1913
2461
|
# to return a Toil promise for the last/sink job in the workflow's
|
|
@@ -1924,6 +2472,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
1924
2472
|
self._workflow_id = workflow_id
|
|
1925
2473
|
self._namespace = namespace
|
|
1926
2474
|
|
|
2475
|
+
@report_wdl_errors("run workflow")
|
|
1927
2476
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1928
2477
|
"""
|
|
1929
2478
|
Run the workflow. Return the result of the workflow.
|
|
@@ -1936,25 +2485,28 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
1936
2485
|
# For a task we only see the insode-the-task namespace.
|
|
1937
2486
|
bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1938
2487
|
# Set up the WDL standard library
|
|
1939
|
-
standard_library = ToilWDLStdLibBase(file_store)
|
|
2488
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
|
|
1940
2489
|
|
|
1941
2490
|
if self._workflow.inputs:
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
2491
|
+
with monkeypatch_coerce(standard_library):
|
|
2492
|
+
for input_decl in self._workflow.inputs:
|
|
2493
|
+
# Evaluate all the inputs that aren't pre-set
|
|
2494
|
+
bindings = bindings.bind(input_decl.name, evaluate_defaultable_decl(input_decl, bindings, standard_library))
|
|
1945
2495
|
|
|
1946
2496
|
# Make jobs to run all the parts of the workflow
|
|
1947
2497
|
sink = self.create_subgraph(self._workflow.body, [], bindings)
|
|
1948
2498
|
|
|
1949
|
-
if self._workflow.outputs:
|
|
2499
|
+
if self._workflow.outputs != []: # Compare against empty list as None means there should be outputs
|
|
2500
|
+
# Either the output section is declared and nonempty or it is not declared
|
|
1950
2501
|
# Add evaluating the outputs after the sink
|
|
1951
|
-
outputs_job = WDLOutputsJob(self._workflow
|
|
2502
|
+
outputs_job = WDLOutputsJob(self._workflow, sink.rv(), self._execution_dir)
|
|
1952
2503
|
sink.addFollowOn(outputs_job)
|
|
1953
|
-
# Caller
|
|
2504
|
+
# Caller is responsible for making sure namespaces are applied
|
|
2505
|
+
self.defer_postprocessing(outputs_job)
|
|
1954
2506
|
return outputs_job.rv()
|
|
1955
2507
|
else:
|
|
1956
2508
|
# No outputs from this workflow.
|
|
1957
|
-
return WDL.Env.Bindings()
|
|
2509
|
+
return self.postprocess(WDL.Env.Bindings())
|
|
1958
2510
|
|
|
1959
2511
|
class WDLOutputsJob(WDLBaseJob):
|
|
1960
2512
|
"""
|
|
@@ -1962,29 +2514,44 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
1962
2514
|
|
|
1963
2515
|
Returns an environment with just the outputs bound, in no namespace.
|
|
1964
2516
|
"""
|
|
1965
|
-
|
|
1966
|
-
def __init__(self, outputs: List[WDL.Tree.Decl], bindings: Promised[WDLBindings], **kwargs: Any):
|
|
2517
|
+
def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], execution_dir: Optional[str] = None, **kwargs: Any):
|
|
1967
2518
|
"""
|
|
1968
2519
|
Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
|
|
1969
2520
|
"""
|
|
1970
|
-
super().__init__(**kwargs)
|
|
2521
|
+
super().__init__(execution_dir, **kwargs)
|
|
1971
2522
|
|
|
1972
|
-
self._outputs = outputs
|
|
1973
2523
|
self._bindings = bindings
|
|
2524
|
+
self._workflow = workflow
|
|
1974
2525
|
|
|
2526
|
+
@report_wdl_errors("evaluate outputs")
|
|
1975
2527
|
def run(self, file_store: AbstractFileStore) -> WDLBindings:
|
|
1976
2528
|
"""
|
|
1977
2529
|
Make bindings for the outputs.
|
|
1978
2530
|
"""
|
|
1979
2531
|
super().run(file_store)
|
|
1980
2532
|
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
2533
|
+
if self._workflow.outputs is None:
|
|
2534
|
+
# The output section is not declared
|
|
2535
|
+
# So get all task outputs and return that
|
|
2536
|
+
# First get all task output names
|
|
2537
|
+
output_set = set()
|
|
2538
|
+
for call in self._workflow.body:
|
|
2539
|
+
if isinstance(call, WDL.Tree.Call):
|
|
2540
|
+
for type_binding in call.effective_outputs:
|
|
2541
|
+
output_set.add(type_binding.name)
|
|
2542
|
+
# Collect all bindings that are task outputs
|
|
2543
|
+
output_bindings: WDL.Env.Bindings[WDL.Value.Base] = WDL.Env.Bindings()
|
|
2544
|
+
for binding in unwrap(self._bindings):
|
|
2545
|
+
if binding.name in output_set:
|
|
2546
|
+
# The bindings will already be namespaced with the task namespaces
|
|
2547
|
+
output_bindings = output_bindings.bind(binding.name, binding.value)
|
|
2548
|
+
else:
|
|
2549
|
+
# Output section is declared and is nonempty, so evaluate normally
|
|
2550
|
+
# Evaluate all the outputs in the normal, non-task-outputs library context
|
|
2551
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
|
|
2552
|
+
# Combine the bindings from the previous job
|
|
2553
|
+
output_bindings = evaluate_output_decls(self._workflow.outputs, unwrap(self._bindings), standard_library)
|
|
2554
|
+
return self.postprocess(output_bindings)
|
|
1988
2555
|
|
|
1989
2556
|
class WDLRootJob(WDLSectionJob):
|
|
1990
2557
|
"""
|
|
@@ -1993,17 +2560,18 @@ class WDLRootJob(WDLSectionJob):
|
|
|
1993
2560
|
the workflow name; both forms are accepted.
|
|
1994
2561
|
"""
|
|
1995
2562
|
|
|
1996
|
-
def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, **kwargs: Any) -> None:
|
|
2563
|
+
def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
|
|
1997
2564
|
"""
|
|
1998
2565
|
Create a subtree to run the workflow and namespace the outputs.
|
|
1999
2566
|
"""
|
|
2000
2567
|
|
|
2001
2568
|
# The root workflow names the root namespace
|
|
2002
|
-
super().__init__(workflow.name, **kwargs)
|
|
2569
|
+
super().__init__(workflow.name, execution_dir, **kwargs)
|
|
2003
2570
|
|
|
2004
2571
|
self._workflow = workflow
|
|
2005
2572
|
self._inputs = inputs
|
|
2006
2573
|
|
|
2574
|
+
@report_wdl_errors("run root job")
|
|
2007
2575
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
2008
2576
|
"""
|
|
2009
2577
|
Actually build the subgraph.
|
|
@@ -2012,53 +2580,67 @@ class WDLRootJob(WDLSectionJob):
|
|
|
2012
2580
|
|
|
2013
2581
|
# Run the workflow. We rely in this to handle entering the input
|
|
2014
2582
|
# namespace if needed, or handling free-floating inputs.
|
|
2015
|
-
workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace)
|
|
2583
|
+
workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._execution_dir)
|
|
2584
|
+
workflow_job.then_namespace(self._namespace)
|
|
2016
2585
|
self.addChild(workflow_job)
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2586
|
+
self.defer_postprocessing(workflow_job)
|
|
2587
|
+
return workflow_job.rv()
|
|
2588
|
+
|
|
2589
|
+
@contextmanager
|
|
2590
|
+
def monkeypatch_coerce(standard_library: ToilWDLStdLibBase) -> Generator[None, None, None]:
|
|
2591
|
+
"""
|
|
2592
|
+
Monkeypatch miniwdl's WDL.Value.Base.coerce() function to virtualize files when they are represented as Strings.
|
|
2593
|
+
Calls _virtualize_filename from a given standard library object.
|
|
2594
|
+
:param standard_library: a standard library object
|
|
2595
|
+
:return
|
|
2596
|
+
"""
|
|
2597
|
+
# We're doing this because while miniwdl recognizes when a string needs to be converted into a file, it's method of
|
|
2598
|
+
# conversion is to just store the local filepath. Toil needs to virtualize the file into the jobstore so until
|
|
2599
|
+
# there is an internal entrypoint, monkeypatch it.
|
|
2600
|
+
def base_coerce(self: WDL.Value.Base, desired_type: Optional[WDL.Type.Base] = None) -> WDL.Value.Base:
|
|
2601
|
+
if isinstance(desired_type, WDL.Type.File):
|
|
2602
|
+
self.value = standard_library._virtualize_filename(self.value)
|
|
2603
|
+
return self
|
|
2604
|
+
return old_base_coerce(self, desired_type) # old_coerce will recurse back into this monkey patched coerce
|
|
2605
|
+
def string_coerce(self: WDL.Value.String, desired_type: Optional[WDL.Type.Base] = None) -> WDL.Value.Base:
|
|
2606
|
+
# Sometimes string coerce is called instead, so monkeypatch this one as well
|
|
2607
|
+
if isinstance(desired_type, WDL.Type.File) and not isinstance(self, WDL.Type.File):
|
|
2608
|
+
return WDL.Value.File(standard_library._virtualize_filename(self.value), self.expr)
|
|
2609
|
+
return old_str_coerce(self, desired_type)
|
|
2610
|
+
|
|
2611
|
+
old_base_coerce = WDL.Value.Base.coerce
|
|
2612
|
+
old_str_coerce = WDL.Value.String.coerce
|
|
2613
|
+
try:
|
|
2614
|
+
# Mypy does not like monkeypatching:
|
|
2615
|
+
# https://github.com/python/mypy/issues/2427#issuecomment-1419206807
|
|
2616
|
+
WDL.Value.Base.coerce = base_coerce # type: ignore[method-assign]
|
|
2617
|
+
WDL.Value.String.coerce = string_coerce # type: ignore[method-assign]
|
|
2618
|
+
yield
|
|
2619
|
+
finally:
|
|
2620
|
+
WDL.Value.Base.coerce = old_base_coerce # type: ignore[method-assign]
|
|
2621
|
+
WDL.Value.String.coerce = old_str_coerce # type: ignore[method-assign]
|
|
2622
|
+
|
|
2623
|
+
@report_wdl_errors("run workflow", exit=True)
|
|
2024
2624
|
def main() -> None:
|
|
2025
2625
|
"""
|
|
2026
2626
|
A Toil workflow to interpret WDL input files.
|
|
2027
2627
|
"""
|
|
2628
|
+
args = sys.argv[1:]
|
|
2028
2629
|
|
|
2029
|
-
parser =
|
|
2030
|
-
addOptions(parser, jobstore_as_flag=True)
|
|
2031
|
-
|
|
2032
|
-
parser.add_argument("wdl_uri", type=str,
|
|
2033
|
-
help="WDL document URI")
|
|
2034
|
-
parser.add_argument("inputs_uri", type=str, nargs='?',
|
|
2035
|
-
help="WDL input JSON URI")
|
|
2036
|
-
parser.add_argument("--input", "-i", dest="inputs_uri", type=str,
|
|
2037
|
-
help="WDL input JSON URI")
|
|
2038
|
-
parser.add_argument("--outputDialect", dest="output_dialect", type=str, default='cromwell', choices=['cromwell', 'miniwdl'],
|
|
2039
|
-
help=("JSON output format dialect. 'cromwell' just returns the workflow's output"
|
|
2040
|
-
"values as JSON, while 'miniwdl' nests that under an 'outputs' key, and "
|
|
2041
|
-
"includes a 'dir' key where files are written."))
|
|
2042
|
-
parser.add_argument("--outputDirectory", "-o", dest="output_directory", type=str, default=None,
|
|
2043
|
-
help=("Directory in which to save output files. By default a new directory is created in the current directory."))
|
|
2044
|
-
parser.add_argument("--outputFile", "-m", dest="output_file", type=argparse.FileType('w'), default=sys.stdout,
|
|
2045
|
-
help="File to save output JSON to.")
|
|
2630
|
+
parser = ArgParser(description='Runs WDL files with toil.')
|
|
2631
|
+
addOptions(parser, jobstore_as_flag=True, wdl=True)
|
|
2046
2632
|
|
|
2047
|
-
options = parser.parse_args(
|
|
2633
|
+
options = parser.parse_args(args)
|
|
2048
2634
|
|
|
2049
2635
|
# Make sure we have a jobStore
|
|
2050
2636
|
if options.jobStore is None:
|
|
2051
2637
|
# TODO: Move cwltoil's generate_default_job_store where we can use it
|
|
2052
|
-
options.jobStore = os.path.join(
|
|
2638
|
+
options.jobStore = os.path.join(mkdtemp(), 'tree')
|
|
2053
2639
|
|
|
2054
|
-
# Make sure we have an output directory and we don't need
|
|
2055
|
-
# about a None, and MyPy knows it.
|
|
2640
|
+
# Make sure we have an output directory (or URL prefix) and we don't need
|
|
2641
|
+
# to ever worry about a None, and MyPy knows it.
|
|
2056
2642
|
# If we don't have a directory assigned, make one in the current directory.
|
|
2057
|
-
output_directory: str = options.output_directory if options.output_directory else
|
|
2058
|
-
if not os.path.isdir(output_directory):
|
|
2059
|
-
# Make sure it exists
|
|
2060
|
-
os.mkdir(output_directory)
|
|
2061
|
-
|
|
2643
|
+
output_directory: str = options.output_directory if options.output_directory else mkdtemp(prefix='wdl-out-', dir=os.getcwd())
|
|
2062
2644
|
|
|
2063
2645
|
with Toil(options) as toil:
|
|
2064
2646
|
if options.restart:
|
|
@@ -2068,8 +2650,10 @@ def main() -> None:
|
|
|
2068
2650
|
document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source)
|
|
2069
2651
|
|
|
2070
2652
|
if document.workflow is None:
|
|
2071
|
-
|
|
2072
|
-
|
|
2653
|
+
# Complain that we need a workflow.
|
|
2654
|
+
# We need the absolute path or URL to raise the error
|
|
2655
|
+
wdl_abspath = options.wdl_uri if not os.path.exists(options.wdl_uri) else os.path.abspath(options.wdl_uri)
|
|
2656
|
+
raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0, 0, 1), "No workflow found in document")
|
|
2073
2657
|
|
|
2074
2658
|
if options.inputs_uri:
|
|
2075
2659
|
# Load the inputs. Use the same loading mechanism, which means we
|
|
@@ -2078,10 +2662,13 @@ def main() -> None:
|
|
|
2078
2662
|
try:
|
|
2079
2663
|
inputs = json.loads(downloaded.source_text)
|
|
2080
2664
|
except json.JSONDecodeError as e:
|
|
2081
|
-
|
|
2082
|
-
|
|
2665
|
+
# Complain about the JSON document.
|
|
2666
|
+
# We need the absolute path or URL to raise the error
|
|
2667
|
+
inputs_abspath = options.inputs_uri if not os.path.exists(options.inputs_uri) else os.path.abspath(options.inputs_uri)
|
|
2668
|
+
raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e
|
|
2083
2669
|
else:
|
|
2084
2670
|
inputs = {}
|
|
2671
|
+
|
|
2085
2672
|
# Parse out the available and required inputs. Each key in the
|
|
2086
2673
|
# JSON ought to start with the workflow's name and then a .
|
|
2087
2674
|
# TODO: WDL's Bindings[] isn't variant in the right way, so we
|
|
@@ -2109,14 +2696,18 @@ def main() -> None:
|
|
|
2109
2696
|
inputs_search_path.append(match.group(0))
|
|
2110
2697
|
|
|
2111
2698
|
# Import any files in the bindings
|
|
2112
|
-
input_bindings = import_files(input_bindings, toil, inputs_search_path)
|
|
2699
|
+
input_bindings = import_files(input_bindings, toil, inputs_search_path, skip_remote=options.reference_inputs)
|
|
2113
2700
|
|
|
2114
2701
|
# TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ?
|
|
2115
2702
|
|
|
2703
|
+
# Get the execution directory
|
|
2704
|
+
execution_dir = os.getcwd()
|
|
2705
|
+
|
|
2116
2706
|
# Run the workflow and get its outputs namespaced with the workflow name.
|
|
2117
|
-
root_job = WDLRootJob(document.workflow, input_bindings)
|
|
2707
|
+
root_job = WDLRootJob(document.workflow, input_bindings, execution_dir)
|
|
2118
2708
|
output_bindings = toil.start(root_job)
|
|
2119
|
-
|
|
2709
|
+
if not isinstance(output_bindings, WDL.Env.Bindings):
|
|
2710
|
+
raise RuntimeError("The output of the WDL job is not a binding.")
|
|
2120
2711
|
|
|
2121
2712
|
# Fetch all the output files
|
|
2122
2713
|
# TODO: deduplicate with _devirtualize_filename
|
|
@@ -2125,31 +2716,37 @@ def main() -> None:
|
|
|
2125
2716
|
'devirtualize' a file using the "toil" object instead of a filestore.
|
|
2126
2717
|
Returns its local path.
|
|
2127
2718
|
"""
|
|
2128
|
-
if filename
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2719
|
+
if is_url(filename):
|
|
2720
|
+
if filename.startswith(TOIL_URI_SCHEME):
|
|
2721
|
+
# This is a reference to the Toil filestore.
|
|
2722
|
+
# Deserialize the FileID and required basename
|
|
2723
|
+
file_id, parent_id, file_basename = unpack_toil_uri(filename)
|
|
2724
|
+
else:
|
|
2725
|
+
# Parse the URL and extract the basename
|
|
2726
|
+
file_basename = os.path.basename(urlsplit(filename).path)
|
|
2727
|
+
|
|
2132
2728
|
# Figure out where it should go.
|
|
2133
|
-
#
|
|
2729
|
+
# If a UUID is included, it will be omitted
|
|
2730
|
+
# TODO: Deal with name collisions in the export directory
|
|
2134
2731
|
dest_name = os.path.join(output_directory, file_basename)
|
|
2135
|
-
|
|
2136
|
-
|
|
2732
|
+
|
|
2733
|
+
if filename.startswith(TOIL_URI_SCHEME):
|
|
2734
|
+
# Export the file
|
|
2735
|
+
toil.export_file(file_id, dest_name)
|
|
2736
|
+
else:
|
|
2737
|
+
# Download to a local file with the right name and execute bit.
|
|
2738
|
+
# Open it exclusively
|
|
2739
|
+
with open(dest_name, 'xb') as dest_file:
|
|
2740
|
+
# And save to it
|
|
2741
|
+
size, executable = AbstractJobStore.read_from_url(filename, dest_file)
|
|
2742
|
+
if executable:
|
|
2743
|
+
# Set the execute bit in the file's permissions
|
|
2744
|
+
os.chmod(dest_name, os.stat(dest_name).st_mode | stat.S_IXUSR)
|
|
2745
|
+
|
|
2137
2746
|
# And return where we put it
|
|
2138
2747
|
return dest_name
|
|
2139
|
-
elif filename.startswith('http:') or filename.startswith('https:') or filename.startswith('s3:') or filename.startswith('gs:'):
|
|
2140
|
-
# This is a URL that we think Toil knows how to read.
|
|
2141
|
-
imported = toil.import_file(filename)
|
|
2142
|
-
if imported is None:
|
|
2143
|
-
raise FileNotFoundError(f"Could not import URL {filename}")
|
|
2144
|
-
# Get a basename from the URL.
|
|
2145
|
-
# TODO: Deal with name collisions
|
|
2146
|
-
file_basename = os.path.basename(urlsplit(filename).path)
|
|
2147
|
-
# Do the same as we do for files we actually made.
|
|
2148
|
-
dest_name = os.path.join(output_directory, file_basename)
|
|
2149
|
-
toil.exportFile(imported, dest_name)
|
|
2150
|
-
return dest_name
|
|
2151
2748
|
else:
|
|
2152
|
-
#
|
|
2749
|
+
# We already had a path
|
|
2153
2750
|
return filename
|
|
2154
2751
|
|
|
2155
2752
|
# Make all the files local files
|
|
@@ -2159,8 +2756,24 @@ def main() -> None:
|
|
|
2159
2756
|
outputs = WDL.values_to_json(output_bindings)
|
|
2160
2757
|
if options.output_dialect == 'miniwdl':
|
|
2161
2758
|
outputs = {'dir': output_directory, 'outputs': outputs}
|
|
2162
|
-
options.output_file
|
|
2163
|
-
|
|
2759
|
+
if options.output_file is None:
|
|
2760
|
+
# Send outputs to standard out
|
|
2761
|
+
print(json.dumps(outputs))
|
|
2762
|
+
else:
|
|
2763
|
+
# Export output to path or URL.
|
|
2764
|
+
# So we need to import and then export.
|
|
2765
|
+
fd, filename = mkstemp()
|
|
2766
|
+
with open(fd, 'w') as handle:
|
|
2767
|
+
# Populate the file
|
|
2768
|
+
handle.write(json.dumps(outputs))
|
|
2769
|
+
handle.write('\n')
|
|
2770
|
+
# Import it. Don't link because the temp file will go away.
|
|
2771
|
+
file_id = toil.import_file(filename, symlink=False)
|
|
2772
|
+
# Delete the temp file
|
|
2773
|
+
os.remove(filename)
|
|
2774
|
+
# Export it into place
|
|
2775
|
+
toil.export_file(file_id, options.output_file)
|
|
2776
|
+
|
|
2164
2777
|
|
|
2165
2778
|
|
|
2166
2779
|
if __name__ == "__main__":
|