toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -12,47 +12,123 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
import argparse
|
|
16
15
|
import asyncio
|
|
17
|
-
import collections
|
|
18
|
-
import copy
|
|
19
16
|
import errno
|
|
20
|
-
import glob
|
|
21
17
|
import io
|
|
22
|
-
import itertools
|
|
23
18
|
import json
|
|
24
19
|
import logging
|
|
25
20
|
import os
|
|
26
21
|
import re
|
|
27
22
|
import shlex
|
|
28
23
|
import shutil
|
|
24
|
+
import stat
|
|
29
25
|
import subprocess
|
|
30
26
|
import sys
|
|
31
|
-
import tempfile
|
|
32
27
|
import uuid
|
|
33
|
-
|
|
34
|
-
from
|
|
35
|
-
from
|
|
36
|
-
from
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
28
|
+
from contextlib import ExitStack, contextmanager
|
|
29
|
+
from graphlib import TopologicalSorter
|
|
30
|
+
from tempfile import mkstemp
|
|
31
|
+
from typing import (Any,
|
|
32
|
+
Callable,
|
|
33
|
+
Dict,
|
|
34
|
+
Generator,
|
|
35
|
+
Iterable,
|
|
36
|
+
Iterator,
|
|
37
|
+
List,
|
|
38
|
+
Optional,
|
|
39
|
+
Sequence,
|
|
40
|
+
Set,
|
|
41
|
+
Tuple,
|
|
42
|
+
Type,
|
|
43
|
+
TypeVar,
|
|
44
|
+
Union,
|
|
45
|
+
cast)
|
|
46
|
+
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
47
|
+
|
|
48
|
+
import WDL.Error
|
|
43
49
|
import WDL.runtime.config
|
|
50
|
+
from configargparse import ArgParser, SUPPRESS
|
|
51
|
+
from WDL._util import byte_size_units, strip_leading_whitespace
|
|
52
|
+
from WDL.CLI import print_error
|
|
53
|
+
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
54
|
+
from WDL.runtime.backend.singularity import SingularityContainer
|
|
55
|
+
from WDL.runtime.task_container import TaskContainer
|
|
44
56
|
|
|
45
|
-
from toil.common import
|
|
46
|
-
from toil.job import AcceleratorRequirement, Job, JobFunctionWrappingJob, Promise, Promised, accelerators_fully_satisfy, parse_accelerator, unwrap, unwrap_all
|
|
57
|
+
from toil.common import Toil, addOptions, check_and_create_default_config_file
|
|
47
58
|
from toil.fileStores import FileID
|
|
48
59
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
49
|
-
from toil.
|
|
60
|
+
from toil.job import (AcceleratorRequirement,
|
|
61
|
+
Job,
|
|
62
|
+
Promise,
|
|
63
|
+
Promised,
|
|
64
|
+
TemporaryID,
|
|
65
|
+
accelerators_fully_satisfy,
|
|
66
|
+
parse_accelerator,
|
|
67
|
+
unwrap,
|
|
68
|
+
unwrap_all)
|
|
69
|
+
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
70
|
+
UnimplementedURLException)
|
|
50
71
|
from toil.lib.conversions import convert_units, human2bytes
|
|
72
|
+
from toil.lib.io import mkdtemp
|
|
73
|
+
from toil.lib.memoize import memoize
|
|
51
74
|
from toil.lib.misc import get_user_name
|
|
52
75
|
from toil.lib.threading import global_mutex
|
|
53
76
|
|
|
54
77
|
logger = logging.getLogger(__name__)
|
|
55
78
|
|
|
79
|
+
|
|
80
|
+
@contextmanager
|
|
81
|
+
def wdl_error_reporter(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Generator[None, None, None]:
|
|
82
|
+
"""
|
|
83
|
+
Run code in a context where WDL errors will be reported with pretty formatting.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
yield
|
|
88
|
+
except (
|
|
89
|
+
WDL.Error.SyntaxError,
|
|
90
|
+
WDL.Error.ImportError,
|
|
91
|
+
WDL.Error.ValidationError,
|
|
92
|
+
WDL.Error.MultipleValidationErrors,
|
|
93
|
+
FileNotFoundError
|
|
94
|
+
) as e:
|
|
95
|
+
log("Could not " + task)
|
|
96
|
+
# These are the errors that MiniWDL's parser can raise and its reporter
|
|
97
|
+
# can report. See
|
|
98
|
+
# https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97.
|
|
99
|
+
#
|
|
100
|
+
# We are going to use MiniWDL's pretty printer to print them.
|
|
101
|
+
print_error(e)
|
|
102
|
+
if exit:
|
|
103
|
+
# Stop right now
|
|
104
|
+
sys.exit(1)
|
|
105
|
+
else:
|
|
106
|
+
# Reraise the exception to stop
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
F = TypeVar('F', bound=Callable[..., Any])
|
|
110
|
+
def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Callable[[F], F]:
|
|
111
|
+
"""
|
|
112
|
+
Create a decorator to report WDL errors with the given task message.
|
|
113
|
+
|
|
114
|
+
Decorator can then be applied to a function, and if a WDL error happens it
|
|
115
|
+
will say that it could not {task}.
|
|
116
|
+
"""
|
|
117
|
+
def decorator(decoratee: F) -> F:
|
|
118
|
+
"""
|
|
119
|
+
Decorate a function with WDL error reporting.
|
|
120
|
+
"""
|
|
121
|
+
def decorated(*args: Any, **kwargs: Any) -> Any:
|
|
122
|
+
"""
|
|
123
|
+
Run the decoratee and handle WDL errors.
|
|
124
|
+
"""
|
|
125
|
+
with wdl_error_reporter(task, exit=exit, log=log):
|
|
126
|
+
return decoratee(*args, **kwargs)
|
|
127
|
+
return cast(F, decorated)
|
|
128
|
+
return decorator
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
56
132
|
def potential_absolute_uris(uri: str, path: List[str], importer: Optional[WDL.Tree.Document] = None) -> Iterator[str]:
|
|
57
133
|
"""
|
|
58
134
|
Get potential absolute URIs to check for an imported file.
|
|
@@ -250,7 +326,8 @@ def get_supertype(types: Sequence[Optional[WDL.Type.Base]]) -> WDL.Type.Base:
|
|
|
250
326
|
if len(types) == 1:
|
|
251
327
|
# Only one type. It isn't None.
|
|
252
328
|
the_type = types[0]
|
|
253
|
-
|
|
329
|
+
if the_type is None:
|
|
330
|
+
raise RuntimeError("The supertype cannot be None.")
|
|
254
331
|
return the_type
|
|
255
332
|
else:
|
|
256
333
|
# Multiple types (or none). Assume Any
|
|
@@ -263,7 +340,6 @@ def for_each_node(root: WDL.Tree.WorkflowNode) -> Iterator[WDL.Tree.WorkflowNode
|
|
|
263
340
|
internal nodes of conditionals and scatters, and gather nodes.
|
|
264
341
|
"""
|
|
265
342
|
|
|
266
|
-
logger.debug('WorkflowNode: %s: %s %s', type(root), root, root.workflow_node_id)
|
|
267
343
|
yield root
|
|
268
344
|
for child_node in root.children:
|
|
269
345
|
if isinstance(child_node, WDL.Tree.WorkflowNode):
|
|
@@ -302,7 +378,7 @@ def recursive_dependencies(root: WDL.Tree.WorkflowNode) -> Set[str]:
|
|
|
302
378
|
|
|
303
379
|
TOIL_URI_SCHEME = 'toilfile:'
|
|
304
380
|
|
|
305
|
-
def pack_toil_uri(file_id: FileID, file_basename: str) -> str:
|
|
381
|
+
def pack_toil_uri(file_id: FileID, dir_id: uuid.UUID, file_basename: str) -> str:
|
|
306
382
|
"""
|
|
307
383
|
Encode a Toil file ID and its source path in a URI that starts with the scheme in TOIL_URI_SCHEME.
|
|
308
384
|
"""
|
|
@@ -310,9 +386,9 @@ def pack_toil_uri(file_id: FileID, file_basename: str) -> str:
|
|
|
310
386
|
# We urlencode everything, including any slashes. We need to use a slash to
|
|
311
387
|
# set off the actual filename, so the WDL standard library basename
|
|
312
388
|
# function works correctly.
|
|
313
|
-
return f"{TOIL_URI_SCHEME}{quote(file_id.pack(), safe='')}/{quote(file_basename, safe='')}"
|
|
389
|
+
return f"{TOIL_URI_SCHEME}{quote(file_id.pack(), safe='')}/{quote(str(dir_id))}/{quote(file_basename, safe='')}"
|
|
314
390
|
|
|
315
|
-
def unpack_toil_uri(toil_uri: str) -> Tuple[FileID, str]:
|
|
391
|
+
def unpack_toil_uri(toil_uri: str) -> Tuple[FileID, str, str]:
|
|
316
392
|
"""
|
|
317
393
|
Unpack a URI made by make_toil_uri to retrieve the FileID and the basename
|
|
318
394
|
(no path prefix) that the file is supposed to have.
|
|
@@ -326,12 +402,32 @@ def unpack_toil_uri(toil_uri: str) -> Tuple[FileID, str]:
|
|
|
326
402
|
raise ValueError(f"URI doesn't start with {TOIL_URI_SCHEME} and should: {toil_uri}")
|
|
327
403
|
# Split encoded file ID from filename
|
|
328
404
|
parts = parts[1].split('/')
|
|
329
|
-
if len(parts) !=
|
|
405
|
+
if len(parts) != 3:
|
|
330
406
|
raise ValueError(f"Wrong number of path segments in URI: {toil_uri}")
|
|
331
407
|
file_id = FileID.unpack(unquote(parts[0]))
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
408
|
+
parent_id = unquote(parts[1])
|
|
409
|
+
file_basename = unquote(parts[2])
|
|
410
|
+
|
|
411
|
+
return file_id, parent_id, file_basename
|
|
412
|
+
|
|
413
|
+
def evaluate_output_decls(output_decls: List[WDL.Tree.Decl], all_bindings: WDL.Env.Bindings[WDL.Value.Base], standard_library: WDL.StdLib.Base) -> WDL.Env.Bindings[WDL.Value.Base]:
|
|
414
|
+
"""
|
|
415
|
+
Evaluate output decls with a given bindings environment and standard library.
|
|
416
|
+
Creates a new bindings object that only contains the bindings from the given decls.
|
|
417
|
+
Guarantees that each decl in `output_decls` can access the variables defined by the previous ones.
|
|
418
|
+
:param all_bindings: Environment to use when evaluating decls
|
|
419
|
+
:param output_decls: Decls to evaluate
|
|
420
|
+
:param standard_library: Standard library
|
|
421
|
+
:return: New bindings object with only the output_decls
|
|
422
|
+
"""
|
|
423
|
+
# all_bindings contains output + previous bindings so that the output can reference its own declarations
|
|
424
|
+
# output_bindings only contains the output bindings themselves so that bindings from sections such as the input aren't included
|
|
425
|
+
output_bindings: WDL.Env.Bindings[WDL.Value.Base] = WDL.Env.Bindings()
|
|
426
|
+
for output_decl in output_decls:
|
|
427
|
+
output_value = evaluate_decl(output_decl, all_bindings, standard_library)
|
|
428
|
+
all_bindings = all_bindings.bind(output_decl.name, output_value)
|
|
429
|
+
output_bindings = output_bindings.bind(output_decl.name, output_value)
|
|
430
|
+
return output_bindings
|
|
335
431
|
|
|
336
432
|
class NonDownloadingSize(WDL.StdLib._Size):
|
|
337
433
|
"""
|
|
@@ -355,15 +451,25 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
355
451
|
total_size = 0.0
|
|
356
452
|
for uri in file_uris:
|
|
357
453
|
# Sum up the sizes of all the files, if any.
|
|
358
|
-
if uri
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
454
|
+
if is_url(uri):
|
|
455
|
+
if uri.startswith(TOIL_URI_SCHEME):
|
|
456
|
+
# This is a Toil File ID we encoded; we have the size
|
|
457
|
+
# available.
|
|
458
|
+
file_id, _, _ = unpack_toil_uri(uri)
|
|
459
|
+
# Use the encoded size
|
|
460
|
+
total_size += file_id.size
|
|
461
|
+
else:
|
|
462
|
+
# This is some other kind of remote file.
|
|
463
|
+
# We need to get its size from the URI.
|
|
464
|
+
item_size = AbstractJobStore.get_size(uri)
|
|
465
|
+
if item_size is None:
|
|
466
|
+
# User asked for the size and we can't figure it out efficiently, so bail out.
|
|
467
|
+
raise RuntimeError(f"Attempt to check the size of {uri} failed")
|
|
468
|
+
total_size += item_size
|
|
364
469
|
else:
|
|
365
|
-
#
|
|
366
|
-
|
|
470
|
+
# This is actually a file we can use locally.
|
|
471
|
+
local_path = self.stdlib._devirtualize_filename(uri)
|
|
472
|
+
total_size += os.path.getsize(local_path)
|
|
367
473
|
|
|
368
474
|
if len(arguments) > 1:
|
|
369
475
|
# Need to convert units. See
|
|
@@ -377,6 +483,14 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
377
483
|
# Return the result as a WDL float value
|
|
378
484
|
return WDL.Value.Float(total_size)
|
|
379
485
|
|
|
486
|
+
def is_url(filename: str, schemes: List[str] = ['http:', 'https:', 's3:', 'gs:', TOIL_URI_SCHEME]) -> bool:
|
|
487
|
+
"""
|
|
488
|
+
Decide if a filename is a known kind of URL
|
|
489
|
+
"""
|
|
490
|
+
for scheme in schemes:
|
|
491
|
+
if filename.startswith(scheme):
|
|
492
|
+
return True
|
|
493
|
+
return False
|
|
380
494
|
|
|
381
495
|
# Both the WDL code itself **and** the commands that it runs will deal in
|
|
382
496
|
# "virtualized" filenames.
|
|
@@ -407,8 +521,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
407
521
|
"""
|
|
408
522
|
Standard library implementation for WDL as run on Toil.
|
|
409
523
|
"""
|
|
410
|
-
|
|
411
|
-
def __init__(self, file_store: AbstractFileStore):
|
|
524
|
+
def __init__(self, file_store: AbstractFileStore, execution_dir: Optional[str] = None):
|
|
412
525
|
"""
|
|
413
526
|
Set up the standard library.
|
|
414
527
|
"""
|
|
@@ -424,51 +537,95 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
424
537
|
self.size = NonDownloadingSize(self)
|
|
425
538
|
|
|
426
539
|
# Keep the file store around so we can access files.
|
|
427
|
-
self._file_store = file_store
|
|
540
|
+
self._file_store = file_store
|
|
428
541
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
Decide if a filename is a known kind of URL
|
|
432
|
-
"""
|
|
433
|
-
for scheme in schemes:
|
|
434
|
-
if filename.startswith(scheme):
|
|
435
|
-
return True
|
|
436
|
-
return False
|
|
542
|
+
# UUID to differentiate which node files are virtualized from
|
|
543
|
+
self._parent_dir_to_ids: Dict[str, uuid.UUID] = dict()
|
|
437
544
|
|
|
545
|
+
self._execution_dir = execution_dir
|
|
546
|
+
|
|
547
|
+
@memoize
|
|
438
548
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
439
549
|
"""
|
|
440
550
|
'devirtualize' filename passed to a read_* function: return a filename that can be open()ed
|
|
441
551
|
on the local host.
|
|
442
552
|
"""
|
|
443
553
|
|
|
554
|
+
return self.devirtualze_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
|
|
555
|
+
|
|
556
|
+
@staticmethod
|
|
557
|
+
def devirtualze_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
|
|
558
|
+
"""
|
|
559
|
+
Download or export a WDL virtualized filename/URL to the given directory.
|
|
560
|
+
|
|
561
|
+
Makes sure sibling files stay siblings and files with the same name don't clobber each other. Called from within this class for tasks, and statically at the end of the workflow for outputs.
|
|
562
|
+
|
|
563
|
+
Returns the local path to the file.
|
|
564
|
+
"""
|
|
565
|
+
|
|
444
566
|
# TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
|
|
445
567
|
# TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
|
|
446
|
-
if filename
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
568
|
+
if is_url(filename):
|
|
569
|
+
if filename.startswith(TOIL_URI_SCHEME):
|
|
570
|
+
# This is a reference to the Toil filestore.
|
|
571
|
+
# Deserialize the FileID
|
|
572
|
+
file_id, parent_id, file_basename = unpack_toil_uri(filename)
|
|
573
|
+
|
|
574
|
+
# Decide where it should be put.
|
|
575
|
+
# This is a URI with the "parent" UUID attached to the filename.
|
|
576
|
+
# Use UUID as folder name rather than a new temp folder to reduce internal clutter.
|
|
577
|
+
# Put the UUID in the destination path in order for tasks to
|
|
578
|
+
# see where to put files depending on their parents.
|
|
579
|
+
dir_path = os.path.join(dest_dir, parent_id)
|
|
580
|
+
|
|
581
|
+
else:
|
|
582
|
+
# Parse the URL and extract the basename
|
|
583
|
+
file_basename = os.path.basename(urlsplit(filename).path)
|
|
584
|
+
# Get the URL to the directory this thing came from. Remember
|
|
585
|
+
# URLs are interpreted relative to the directory the thing is
|
|
586
|
+
# in, not relative to the thing.
|
|
587
|
+
parent_url = urljoin(filename, ".")
|
|
588
|
+
# Turn it into a string we can make a directory for
|
|
589
|
+
dir_path = os.path.join(dest_dir, quote(parent_url, safe=''))
|
|
590
|
+
|
|
591
|
+
if not os.path.exists(dir_path):
|
|
592
|
+
# Make sure the chosen directory exists
|
|
593
|
+
os.mkdir(dir_path)
|
|
594
|
+
# And decide the file goes in it.
|
|
595
|
+
dest_path = os.path.join(dir_path, file_basename)
|
|
596
|
+
|
|
597
|
+
if filename.startswith(TOIL_URI_SCHEME):
|
|
598
|
+
# Get a local path to the file
|
|
599
|
+
if isinstance(file_source, AbstractFileStore):
|
|
600
|
+
# Read from the file store
|
|
601
|
+
result = file_source.readGlobalFile(file_id, dest_path)
|
|
602
|
+
elif isinstance(file_source, Toil):
|
|
603
|
+
# Read from the Toil context
|
|
604
|
+
file_source.export_file(file_id, dest_path)
|
|
605
|
+
result = dest_path
|
|
606
|
+
else:
|
|
607
|
+
# Download to a local file with the right name and execute bit.
|
|
608
|
+
# Open it exclusively
|
|
609
|
+
with open(dest_path, 'xb') as dest_file:
|
|
610
|
+
# And save to it
|
|
611
|
+
size, executable = AbstractJobStore.read_from_url(filename, dest_file)
|
|
612
|
+
if executable:
|
|
613
|
+
# Set the execute bit in the file's permissions
|
|
614
|
+
os.chmod(dest_path, os.stat(dest_path).st_mode | stat.S_IXUSR)
|
|
615
|
+
|
|
616
|
+
result = dest_path
|
|
466
617
|
else:
|
|
467
618
|
# This is a local file
|
|
468
|
-
|
|
619
|
+
# To support relative paths, join the execution dir and filename
|
|
620
|
+
# if filename is already an abs path, join() will do nothing
|
|
621
|
+
if execution_dir is not None:
|
|
622
|
+
result = os.path.join(execution_dir, filename)
|
|
623
|
+
else:
|
|
624
|
+
result = filename
|
|
469
625
|
|
|
470
626
|
logger.debug('Devirtualized %s as openable file %s', filename, result)
|
|
471
|
-
|
|
627
|
+
if not os.path.exists(result):
|
|
628
|
+
raise RuntimeError(f"Virtualized file {filename} looks like a local file but isn't!")
|
|
472
629
|
return result
|
|
473
630
|
|
|
474
631
|
def _virtualize_filename(self, filename: str) -> str:
|
|
@@ -477,15 +634,22 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
477
634
|
File value
|
|
478
635
|
"""
|
|
479
636
|
|
|
480
|
-
|
|
481
|
-
if self._is_url(filename):
|
|
637
|
+
if is_url(filename):
|
|
482
638
|
# Already virtual
|
|
483
|
-
logger.debug('
|
|
639
|
+
logger.debug('Already virtualized %s as WDL file %s', filename, filename)
|
|
484
640
|
return filename
|
|
485
641
|
|
|
486
642
|
# Otherwise this is a local file and we want to fake it as a Toil file store file
|
|
487
|
-
|
|
488
|
-
|
|
643
|
+
|
|
644
|
+
# To support relative paths from execution directory, join the execution dir and filename
|
|
645
|
+
# If filename is already an abs path, join() will not do anything
|
|
646
|
+
if self._execution_dir is not None:
|
|
647
|
+
file_id = self._file_store.writeGlobalFile(os.path.join(self._execution_dir, filename))
|
|
648
|
+
else:
|
|
649
|
+
file_id = self._file_store.writeGlobalFile(filename)
|
|
650
|
+
dir = os.path.dirname(os.path.abspath(filename)) # is filename always an abspath?
|
|
651
|
+
parent_id = self._parent_dir_to_ids.setdefault(dir, uuid.uuid4())
|
|
652
|
+
result = pack_toil_uri(file_id, parent_id, os.path.basename(filename))
|
|
489
653
|
logger.debug('Virtualized %s as WDL file %s', filename, result)
|
|
490
654
|
return result
|
|
491
655
|
|
|
@@ -507,18 +671,19 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
|
|
|
507
671
|
super().__init__(file_store)
|
|
508
672
|
self.container = container
|
|
509
673
|
|
|
674
|
+
@memoize
|
|
510
675
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
511
676
|
"""
|
|
512
677
|
Go from a virtualized WDL-side filename to a local disk filename.
|
|
513
678
|
|
|
514
|
-
Any WDL-side filenames which are paths will be paths in the container.
|
|
679
|
+
Any WDL-side filenames which are paths will be paths in the container.
|
|
515
680
|
"""
|
|
516
|
-
if
|
|
681
|
+
if is_url(filename):
|
|
517
682
|
# We shouldn't have to deal with URLs here; we want to have exactly
|
|
518
683
|
# two nicely stacked/back-to-back layers of virtualization, joined
|
|
519
684
|
# on the out-of-container paths.
|
|
520
685
|
raise RuntimeError(f"File {filename} is a URL but should already be an in-container-virtualized filename")
|
|
521
|
-
|
|
686
|
+
|
|
522
687
|
# If this is a local path it will be in the container. Make sure we
|
|
523
688
|
# use the out-of-container equivalent.
|
|
524
689
|
result = self.container.host_path(filename)
|
|
@@ -542,7 +707,7 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
|
|
|
542
707
|
self.container.add_paths([filename])
|
|
543
708
|
|
|
544
709
|
result = self.container.input_path_map[filename]
|
|
545
|
-
|
|
710
|
+
|
|
546
711
|
logger.debug('Virtualized %s as WDL file %s', filename, result)
|
|
547
712
|
return result
|
|
548
713
|
|
|
@@ -565,10 +730,14 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
565
730
|
# WDL.StdLib.TaskOutputs next.
|
|
566
731
|
super().__init__(file_store)
|
|
567
732
|
|
|
568
|
-
# Remember task
|
|
733
|
+
# Remember task output files
|
|
569
734
|
self._stdout_path = stdout_path
|
|
570
735
|
self._stderr_path = stderr_path
|
|
571
736
|
|
|
737
|
+
# Remember that the WDL code has not referenced them yet.
|
|
738
|
+
self._stdout_used = False
|
|
739
|
+
self._stderr_used = False
|
|
740
|
+
|
|
572
741
|
# Remember current directory
|
|
573
742
|
self._current_directory_override = current_directory_override
|
|
574
743
|
|
|
@@ -594,14 +763,28 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
594
763
|
"""
|
|
595
764
|
Get the standard output of the command that ran, as a WDL File, outside the container.
|
|
596
765
|
"""
|
|
766
|
+
self._stdout_used = True
|
|
597
767
|
return WDL.Value.File(self._stdout_path)
|
|
598
768
|
|
|
769
|
+
def stdout_used(self) -> bool:
|
|
770
|
+
"""
|
|
771
|
+
Return True if the standard output was read by the WDL.
|
|
772
|
+
"""
|
|
773
|
+
return self._stdout_used
|
|
774
|
+
|
|
599
775
|
def _stderr(self) -> WDL.Value.File:
|
|
600
776
|
"""
|
|
601
777
|
Get the standard error of the command that ran, as a WDL File, outside the container.
|
|
602
778
|
"""
|
|
779
|
+
self._stderr_used = True
|
|
603
780
|
return WDL.Value.File(self._stderr_path)
|
|
604
781
|
|
|
782
|
+
def stderr_used(self) -> bool:
|
|
783
|
+
"""
|
|
784
|
+
Return True if the standard error was read by the WDL.
|
|
785
|
+
"""
|
|
786
|
+
return self._stderr_used
|
|
787
|
+
|
|
605
788
|
def _glob(self, pattern: WDL.Value.String) -> WDL.Value.Array:
|
|
606
789
|
"""
|
|
607
790
|
Get a WDL Array of WDL Files left behind by the job that ran, matching the given glob pattern, outside the container.
|
|
@@ -645,6 +828,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
645
828
|
# Just turn them all into WDL File objects with local disk out-of-container names.
|
|
646
829
|
return WDL.Value.Array(WDL.Type.File(), [WDL.Value.File(x) for x in results])
|
|
647
830
|
|
|
831
|
+
@memoize
|
|
648
832
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
649
833
|
"""
|
|
650
834
|
Go from a virtualized WDL-side filename to a local disk filename.
|
|
@@ -652,7 +836,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
652
836
|
Any WDL-side filenames which are relative will be relative to the
|
|
653
837
|
current directory override, if set.
|
|
654
838
|
"""
|
|
655
|
-
if not
|
|
839
|
+
if not is_url(filename) and not filename.startswith('/'):
|
|
656
840
|
# We are getting a bare relative path from the WDL side.
|
|
657
841
|
# Find a real path to it relative to the current directory override.
|
|
658
842
|
work_dir = '.' if not self._current_directory_override else self._current_directory_override
|
|
@@ -669,7 +853,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
669
853
|
filenames.
|
|
670
854
|
"""
|
|
671
855
|
|
|
672
|
-
if not
|
|
856
|
+
if not is_url(filename) and not filename.startswith('/'):
|
|
673
857
|
# We are getting a bare relative path the supposedly devirtualized side.
|
|
674
858
|
# Find a real path to it relative to the current directory override.
|
|
675
859
|
work_dir = '.' if not self._current_directory_override else self._current_directory_override
|
|
@@ -697,10 +881,11 @@ def evaluate_named_expression(context: Union[WDL.Error.SourceNode, WDL.Error.Sou
|
|
|
697
881
|
|
|
698
882
|
# Do the actual evaluation
|
|
699
883
|
value = expression.eval(environment, stdlib)
|
|
884
|
+
logger.debug("Got value %s of type %s", value, value.type)
|
|
700
885
|
except Exception:
|
|
701
886
|
# If something goes wrong, dump.
|
|
702
887
|
logger.exception("Expression evaluation failed for %s: %s", name, expression)
|
|
703
|
-
log_bindings(logger.
|
|
888
|
+
log_bindings(logger.error, "Expression was evaluated in:", [environment])
|
|
704
889
|
raise
|
|
705
890
|
|
|
706
891
|
if expected_type:
|
|
@@ -716,15 +901,24 @@ def evaluate_decl(node: WDL.Tree.Decl, environment: WDLBindings, stdlib: WDL.Std
|
|
|
716
901
|
|
|
717
902
|
return evaluate_named_expression(node, node.name, node.type, node.expr, environment, stdlib)
|
|
718
903
|
|
|
719
|
-
def evaluate_call_inputs(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], expressions: Dict[str, WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDLBindings:
|
|
904
|
+
def evaluate_call_inputs(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], expressions: Dict[str, WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base, inputs_dict: Optional[Dict[str, WDL.Type.Base]] = None) -> WDLBindings:
|
|
720
905
|
"""
|
|
721
|
-
Evaluate a bunch of expressions with names, and make them into a fresh set of bindings.
|
|
906
|
+
Evaluate a bunch of expressions with names, and make them into a fresh set of bindings. `inputs_dict` is a mapping of
|
|
907
|
+
variable names to their expected type for the input decls in a task.
|
|
722
908
|
"""
|
|
723
|
-
|
|
724
909
|
new_bindings: WDLBindings = WDL.Env.Bindings()
|
|
725
910
|
for k, v in expressions.items():
|
|
726
911
|
# Add each binding in turn
|
|
727
|
-
|
|
912
|
+
# If the expected type is optional, then don't type check the lhs and rhs as miniwdl will return a StaticTypeMismatch error, so pass in None
|
|
913
|
+
expected_type = None
|
|
914
|
+
if not v.type.optional and inputs_dict is not None:
|
|
915
|
+
# This is done to enable passing in a string into a task input of file type
|
|
916
|
+
expected_type = inputs_dict.get(k, None)
|
|
917
|
+
try:
|
|
918
|
+
new_bindings = new_bindings.bind(k, evaluate_named_expression(context, k, expected_type, v, environment, stdlib))
|
|
919
|
+
except FileNotFoundError as e:
|
|
920
|
+
# MiniWDL's type coercion will raise this when trying to make a File out of Null.
|
|
921
|
+
raise WDL.Error.EvalError(context, f"Cannot evaluate expression for {k} with value {v}")
|
|
728
922
|
return new_bindings
|
|
729
923
|
|
|
730
924
|
def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL.Value.Base:
|
|
@@ -735,7 +929,10 @@ def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, std
|
|
|
735
929
|
try:
|
|
736
930
|
if node.name in environment and not isinstance(environment[node.name], WDL.Value.Null):
|
|
737
931
|
logger.debug('Name %s is already defined with a non-null value, not using default', node.name)
|
|
738
|
-
|
|
932
|
+
if not isinstance(environment[node.name], type(node.type)):
|
|
933
|
+
return environment[node.name].coerce(node.type)
|
|
934
|
+
else:
|
|
935
|
+
return environment[node.name]
|
|
739
936
|
else:
|
|
740
937
|
if node.type is not None and not node.type.optional and node.expr is None:
|
|
741
938
|
# We need a value for this but there isn't one.
|
|
@@ -745,7 +942,7 @@ def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, std
|
|
|
745
942
|
except Exception:
|
|
746
943
|
# If something goes wrong, dump.
|
|
747
944
|
logger.exception("Evaluation failed for %s", node)
|
|
748
|
-
log_bindings(logger.
|
|
945
|
+
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
749
946
|
raise
|
|
750
947
|
|
|
751
948
|
# TODO: make these stdlib methods???
|
|
@@ -753,8 +950,8 @@ def devirtualize_files(environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL
|
|
|
753
950
|
"""
|
|
754
951
|
Make sure all the File values embedded in the given bindings point to files
|
|
755
952
|
that are actually available to command line commands.
|
|
953
|
+
The same virtual file always maps to the same devirtualized filename even with duplicates
|
|
756
954
|
"""
|
|
757
|
-
|
|
758
955
|
return map_over_files_in_bindings(environment, stdlib._devirtualize_filename)
|
|
759
956
|
|
|
760
957
|
def virtualize_files(environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDLBindings:
|
|
@@ -765,15 +962,52 @@ def virtualize_files(environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDLBi
|
|
|
765
962
|
|
|
766
963
|
return map_over_files_in_bindings(environment, stdlib._virtualize_filename)
|
|
767
964
|
|
|
768
|
-
def
|
|
965
|
+
def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
966
|
+
"""
|
|
967
|
+
Based off of WDL.runtime.task_container.add_paths from miniwdl
|
|
968
|
+
Maps the host path to the container paths
|
|
969
|
+
"""
|
|
970
|
+
# partition the files by host directory
|
|
971
|
+
host_paths_by_dir: Dict[str, Set[str]] = {}
|
|
972
|
+
for host_path in host_paths:
|
|
973
|
+
host_path_strip = host_path.rstrip("/")
|
|
974
|
+
if host_path not in task_container.input_path_map and host_path_strip not in task_container.input_path_map:
|
|
975
|
+
if not os.path.exists(host_path_strip):
|
|
976
|
+
raise WDL.Error.InputError("input path not found: " + host_path)
|
|
977
|
+
host_paths_by_dir.setdefault(os.path.dirname(host_path_strip), set()).add(host_path)
|
|
978
|
+
# for each such partition of files
|
|
979
|
+
# - if there are no basename collisions under input subdirectory 0, then mount them there.
|
|
980
|
+
# - otherwise, mount them in a fresh subdirectory
|
|
981
|
+
subd = 0
|
|
982
|
+
id_to_subd: Dict[str, str] = {}
|
|
983
|
+
for paths in host_paths_by_dir.values():
|
|
984
|
+
based = os.path.join(task_container.container_dir, "work/_miniwdl_inputs")
|
|
985
|
+
for host_path in paths:
|
|
986
|
+
parent_id = os.path.basename(os.path.dirname(host_path))
|
|
987
|
+
if id_to_subd.get(parent_id, None) is None:
|
|
988
|
+
id_to_subd[parent_id] = str(subd)
|
|
989
|
+
subd += 1
|
|
990
|
+
host_path_subd = id_to_subd[parent_id]
|
|
991
|
+
container_path = os.path.join(based, host_path_subd, os.path.basename(host_path.rstrip("/")))
|
|
992
|
+
if host_path.endswith("/"):
|
|
993
|
+
container_path += "/"
|
|
994
|
+
assert container_path not in task_container.input_path_map_rev, f"{container_path}, {task_container.input_path_map_rev}"
|
|
995
|
+
task_container.input_path_map[host_path] = container_path
|
|
996
|
+
task_container.input_path_map_rev[container_path] = host_path
|
|
997
|
+
|
|
998
|
+
def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]] = None, skip_remote: bool = False) -> WDLBindings:
|
|
769
999
|
"""
|
|
770
1000
|
Make sure all File values embedded in the given bindings are imported,
|
|
771
1001
|
using the given Toil object.
|
|
772
1002
|
|
|
773
1003
|
:param path: If set, try resolving input location relative to the URLs or
|
|
774
|
-
|
|
775
|
-
"""
|
|
1004
|
+
directories in this list.
|
|
776
1005
|
|
|
1006
|
+
:param skip_remote: If set, don't try to import files from remote
|
|
1007
|
+
locations. Leave them as URIs.
|
|
1008
|
+
"""
|
|
1009
|
+
path_to_id: Dict[str, uuid.UUID] = {}
|
|
1010
|
+
@memoize
|
|
777
1011
|
def import_file_from_uri(uri: str) -> str:
|
|
778
1012
|
"""
|
|
779
1013
|
Import a file from a URI and return a virtualized filename for it.
|
|
@@ -784,9 +1018,23 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
784
1018
|
# Try each place it could be according to WDL finding logic.
|
|
785
1019
|
tried.append(candidate_uri)
|
|
786
1020
|
try:
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
1021
|
+
if skip_remote and is_url(candidate_uri):
|
|
1022
|
+
# Use remote URIs in place. But we need to find the one that exists.
|
|
1023
|
+
if not AbstractJobStore.url_exists(candidate_uri):
|
|
1024
|
+
# Wasn't found there
|
|
1025
|
+
continue
|
|
1026
|
+
# Now we know this exists, so pass it through
|
|
1027
|
+
return candidate_uri
|
|
1028
|
+
else:
|
|
1029
|
+
# Actually import
|
|
1030
|
+
# Try to import the file. Don't raise if we can't find it, just
|
|
1031
|
+
# return None!
|
|
1032
|
+
imported = toil.import_file(candidate_uri, check_existence=False)
|
|
1033
|
+
if imported is None:
|
|
1034
|
+
# Wasn't found there
|
|
1035
|
+
continue
|
|
1036
|
+
logger.info('Imported %s', candidate_uri)
|
|
1037
|
+
|
|
790
1038
|
except UnimplementedURLException as e:
|
|
791
1039
|
# We can't find anything that can even support this URL scheme.
|
|
792
1040
|
# Report to the user, they are probably missing an extra.
|
|
@@ -797,6 +1045,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
797
1045
|
# we have no auth.
|
|
798
1046
|
logger.error("Something went wrong importing %s", candidate_uri)
|
|
799
1047
|
raise
|
|
1048
|
+
|
|
800
1049
|
if imported is None:
|
|
801
1050
|
# Wasn't found there
|
|
802
1051
|
continue
|
|
@@ -811,7 +1060,25 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
811
1060
|
raise RuntimeError(f"File {candidate_uri} has no basename and so cannot be a WDL File")
|
|
812
1061
|
|
|
813
1062
|
# Was actually found
|
|
814
|
-
|
|
1063
|
+
if is_url(candidate_uri):
|
|
1064
|
+
# Might be a file URI or other URI.
|
|
1065
|
+
# We need to make sure file URIs and local paths that point to
|
|
1066
|
+
# the same place are treated the same.
|
|
1067
|
+
parsed = urlsplit(candidate_uri)
|
|
1068
|
+
if parsed.scheme == "file:":
|
|
1069
|
+
# This is a local file URI. Convert to a path for source directory tracking.
|
|
1070
|
+
parent_dir = os.path.dirname(unquote(parsed.path))
|
|
1071
|
+
else:
|
|
1072
|
+
# This is some other URL. Get the URL to the parent directory and use that.
|
|
1073
|
+
parent_dir = urljoin(candidate_uri, ".")
|
|
1074
|
+
else:
|
|
1075
|
+
# Must be a local path
|
|
1076
|
+
parent_dir = os.path.dirname(candidate_uri)
|
|
1077
|
+
|
|
1078
|
+
# Pack a UUID of the parent directory
|
|
1079
|
+
dir_id = path_to_id.setdefault(parent_dir, uuid.uuid4())
|
|
1080
|
+
|
|
1081
|
+
return pack_toil_uri(imported, dir_id, file_basename)
|
|
815
1082
|
|
|
816
1083
|
# If we get here we tried all the candidates
|
|
817
1084
|
raise RuntimeError(f"Could not find {uri} at any of: {tried}")
|
|
@@ -833,12 +1100,22 @@ def drop_missing_files(environment: WDLBindings, current_directory_override: Opt
|
|
|
833
1100
|
"""
|
|
834
1101
|
Return None if a file doesn't exist, or its path if it does.
|
|
835
1102
|
"""
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
1103
|
+
logger.debug("Consider file %s", filename)
|
|
1104
|
+
|
|
1105
|
+
if is_url(filename):
|
|
1106
|
+
if filename.startswith(TOIL_URI_SCHEME) or AbstractJobStore.url_exists(filename):
|
|
1107
|
+
# We assume anything in the filestore actually exists.
|
|
1108
|
+
return filename
|
|
1109
|
+
else:
|
|
1110
|
+
logger.warning('File %s with type %s does not actually exist at its URI', filename, value_type)
|
|
1111
|
+
return None
|
|
839
1112
|
else:
|
|
840
|
-
|
|
841
|
-
|
|
1113
|
+
effective_path = os.path.abspath(os.path.join(work_dir, filename))
|
|
1114
|
+
if os.path.exists(effective_path):
|
|
1115
|
+
return filename
|
|
1116
|
+
else:
|
|
1117
|
+
logger.warning('File %s with type %s does not actually exist at %s', filename, value_type, effective_path)
|
|
1118
|
+
return None
|
|
842
1119
|
|
|
843
1120
|
return map_over_typed_files_in_bindings(environment, drop_if_missing)
|
|
844
1121
|
|
|
@@ -912,6 +1189,7 @@ def map_over_typed_files_in_value(value: WDL.Value.Base, transform: Callable[[WD
|
|
|
912
1189
|
if new_path is None:
|
|
913
1190
|
# Assume the transform checked types if we actually care about the
|
|
914
1191
|
# result.
|
|
1192
|
+
logger.warning("File %s became Null", value)
|
|
915
1193
|
return WDL.Value.Null()
|
|
916
1194
|
else:
|
|
917
1195
|
# Make whatever the value is around the new path.
|
|
@@ -937,9 +1215,16 @@ def map_over_typed_files_in_value(value: WDL.Value.Base, transform: Callable[[WD
|
|
|
937
1215
|
class WDLBaseJob(Job):
|
|
938
1216
|
"""
|
|
939
1217
|
Base job class for all WDL-related jobs.
|
|
1218
|
+
|
|
1219
|
+
Responsible for post-processing returned bindings, to do things like add in
|
|
1220
|
+
null values for things not defined in a section. Post-processing operations
|
|
1221
|
+
can be added onto any job before it is saved, and will be applied as long
|
|
1222
|
+
as the job's run method calls postprocess().
|
|
1223
|
+
|
|
1224
|
+
Also responsible for remembering the Toil WDL configuration keys and values.
|
|
940
1225
|
"""
|
|
941
1226
|
|
|
942
|
-
def __init__(self, **kwargs: Any) -> None:
|
|
1227
|
+
def __init__(self, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
943
1228
|
"""
|
|
944
1229
|
Make a WDL-related job.
|
|
945
1230
|
|
|
@@ -961,95 +1246,168 @@ class WDLBaseJob(Job):
|
|
|
961
1246
|
# TODO: Make sure C-level stack size is also big enough for this.
|
|
962
1247
|
sys.setrecursionlimit(10000)
|
|
963
1248
|
|
|
1249
|
+
# We need an ordered list of postprocessing steps to apply, because we
|
|
1250
|
+
# may have coalesced postprocessing steps deferred by several levels of
|
|
1251
|
+
# jobs returning other jobs' promised RVs.
|
|
1252
|
+
self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
|
|
1253
|
+
|
|
1254
|
+
self._wdl_options = wdl_options if wdl_options is not None else {}
|
|
1255
|
+
|
|
1256
|
+
assert self._wdl_options.get("container") is not None
|
|
1257
|
+
|
|
964
1258
|
# TODO: We're not allowed by MyPy to override a method and widen the return
|
|
965
1259
|
# type, so this has to be Any.
|
|
966
1260
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
967
1261
|
"""
|
|
968
1262
|
Run a WDL-related job.
|
|
1263
|
+
|
|
1264
|
+
Remember to decorate non-trivial overrides with :func:`report_wdl_errors`.
|
|
969
1265
|
"""
|
|
970
1266
|
# Make sure that pickle is prepared to save our return values, which
|
|
971
1267
|
# might take a lot of recursive calls. TODO: This might be because
|
|
972
1268
|
# bindings are actually linked lists or something?
|
|
973
1269
|
sys.setrecursionlimit(10000)
|
|
974
1270
|
|
|
975
|
-
|
|
1271
|
+
def then_underlay(self, underlay: Promised[WDLBindings]) -> None:
|
|
1272
|
+
"""
|
|
1273
|
+
Apply an underlay of backup bindings to the result.
|
|
1274
|
+
"""
|
|
1275
|
+
logger.debug("Underlay %s after %s", underlay, self)
|
|
1276
|
+
self._postprocessing_steps.append(("underlay", underlay))
|
|
1277
|
+
|
|
1278
|
+
def then_remove(self, remove: Promised[WDLBindings]) -> None:
|
|
1279
|
+
"""
|
|
1280
|
+
Remove the given bindings from the result.
|
|
1281
|
+
"""
|
|
1282
|
+
logger.debug("Remove %s after %s", remove, self)
|
|
1283
|
+
self._postprocessing_steps.append(("remove", remove))
|
|
1284
|
+
|
|
1285
|
+
def then_namespace(self, namespace: str) -> None:
|
|
1286
|
+
"""
|
|
1287
|
+
Put the result bindings into a namespace.
|
|
1288
|
+
"""
|
|
1289
|
+
logger.debug("Namespace %s after %s", namespace, self)
|
|
1290
|
+
self._postprocessing_steps.append(("namespace", namespace))
|
|
1291
|
+
|
|
1292
|
+
def then_overlay(self, overlay: Promised[WDLBindings]) -> None:
|
|
1293
|
+
"""
|
|
1294
|
+
Overlay the given bindings on top of the (possibly namespaced) result.
|
|
1295
|
+
"""
|
|
1296
|
+
logger.debug("Overlay %s after %s", overlay, self)
|
|
1297
|
+
self._postprocessing_steps.append(("overlay", overlay))
|
|
1298
|
+
|
|
1299
|
+
def postprocess(self, bindings: WDLBindings) -> WDLBindings:
|
|
1300
|
+
"""
|
|
1301
|
+
Apply queued changes to bindings.
|
|
1302
|
+
|
|
1303
|
+
Should be applied by subclasses' run() implementations to their return
|
|
1304
|
+
values.
|
|
1305
|
+
"""
|
|
1306
|
+
|
|
1307
|
+
for action, argument in self._postprocessing_steps:
|
|
1308
|
+
|
|
1309
|
+
logger.debug("Apply postprocessing setp: (%s, %s)", action, argument)
|
|
1310
|
+
|
|
1311
|
+
# Interpret the mini language of postprocessing steps.
|
|
1312
|
+
# These are too small to justify being their own separate jobs.
|
|
1313
|
+
if action == "underlay":
|
|
1314
|
+
if not isinstance(argument, WDL.Env.Bindings):
|
|
1315
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1316
|
+
# We want to apply values from the underlay if not set in the bindings
|
|
1317
|
+
bindings = combine_bindings([bindings, argument.subtract(bindings)])
|
|
1318
|
+
elif action == "remove":
|
|
1319
|
+
if not isinstance(argument, WDL.Env.Bindings):
|
|
1320
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1321
|
+
# We need to take stuff out of scope
|
|
1322
|
+
bindings = bindings.subtract(argument)
|
|
1323
|
+
elif action == "namespace":
|
|
1324
|
+
if not isinstance(argument, str):
|
|
1325
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1326
|
+
# We are supposed to put all our results in a namespace
|
|
1327
|
+
bindings = bindings.wrap_namespace(argument)
|
|
1328
|
+
elif action == "overlay":
|
|
1329
|
+
if not isinstance(argument, WDL.Env.Bindings):
|
|
1330
|
+
raise RuntimeError("Wrong postprocessing argument type")
|
|
1331
|
+
# We want to apply values from the overlay over the bindings
|
|
1332
|
+
bindings = combine_bindings([bindings.subtract(argument), argument])
|
|
1333
|
+
else:
|
|
1334
|
+
raise RuntimeError(f"Unknown postprocessing action {action}")
|
|
1335
|
+
|
|
1336
|
+
return bindings
|
|
1337
|
+
|
|
1338
|
+
def defer_postprocessing(self, other: "WDLBaseJob") -> None:
|
|
1339
|
+
"""
|
|
1340
|
+
Give our postprocessing steps to a different job.
|
|
1341
|
+
|
|
1342
|
+
Use this when you are returning a promise for bindings, on the job that issues the promise.
|
|
1343
|
+
"""
|
|
1344
|
+
|
|
1345
|
+
other._postprocessing_steps += self._postprocessing_steps
|
|
1346
|
+
self._postprocessing_steps = []
|
|
1347
|
+
|
|
1348
|
+
logger.debug("Assigned postprocessing steps from %s to %s", self, other)
|
|
1349
|
+
|
|
1350
|
+
class WDLTaskWrapperJob(WDLBaseJob):
|
|
976
1351
|
"""
|
|
977
|
-
Job that
|
|
1352
|
+
Job that determines the resources needed to run a WDL job.
|
|
978
1353
|
|
|
979
1354
|
Responsible for evaluating the input declarations for unspecified inputs,
|
|
980
|
-
evaluating the runtime section,
|
|
981
|
-
|
|
1355
|
+
evaluating the runtime section, and scheduling or chaining to the real WDL
|
|
1356
|
+
job.
|
|
982
1357
|
|
|
983
1358
|
All bindings are in terms of task-internal names.
|
|
984
1359
|
"""
|
|
985
1360
|
|
|
986
|
-
def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, **kwargs: Any) -> None:
|
|
1361
|
+
def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
|
|
987
1362
|
"""
|
|
988
|
-
Make a new job to run a task.
|
|
1363
|
+
Make a new job to determine resources and run a task.
|
|
989
1364
|
|
|
990
1365
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
991
1366
|
The caller has alredy added the task's own name.
|
|
992
|
-
"""
|
|
993
1367
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
super().__init__(unitName=
|
|
1368
|
+
:param task_path: Like the namespace, but including subscript numbers
|
|
1369
|
+
for scatters.
|
|
1370
|
+
"""
|
|
1371
|
+
super().__init__(unitName=task_path + ".inputs", displayName=namespace + ".inputs", local=True, **kwargs)
|
|
998
1372
|
|
|
999
|
-
logger.info("Preparing to run task %s as %s", task.name, namespace)
|
|
1373
|
+
logger.info("Preparing to run task code for %s as %s", task.name, namespace)
|
|
1000
1374
|
|
|
1001
1375
|
self._task = task
|
|
1002
1376
|
self._prev_node_results = prev_node_results
|
|
1003
1377
|
self._task_id = task_id
|
|
1004
1378
|
self._namespace = namespace
|
|
1379
|
+
self._task_path = task_path
|
|
1005
1380
|
|
|
1006
|
-
|
|
1007
|
-
"""
|
|
1008
|
-
Determie if --fakeroot is likely to work for Singularity.
|
|
1009
|
-
"""
|
|
1010
|
-
|
|
1011
|
-
# We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
|
|
1012
|
-
try:
|
|
1013
|
-
subuid_file = open('/etc/subuid')
|
|
1014
|
-
except OSError as e:
|
|
1015
|
-
logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
|
|
1016
|
-
return False
|
|
1017
|
-
username = get_user_name()
|
|
1018
|
-
for line in subuid_file:
|
|
1019
|
-
if line.split(':')[0].strip() == username:
|
|
1020
|
-
# We have a line assigning subuids
|
|
1021
|
-
return True
|
|
1022
|
-
# If there is no line, we have no subuids
|
|
1023
|
-
logger.warning('No subuids are assigned to %s; cannot fake root.', username)
|
|
1024
|
-
return False
|
|
1025
|
-
|
|
1381
|
+
@report_wdl_errors("evaluate task code")
|
|
1026
1382
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1027
1383
|
"""
|
|
1028
|
-
|
|
1384
|
+
Evaluate inputs and runtime and schedule the task.
|
|
1029
1385
|
"""
|
|
1030
1386
|
super().run(file_store)
|
|
1031
|
-
logger.info("
|
|
1387
|
+
logger.info("Evaluating inputs and runtime for task %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
|
|
1032
1388
|
|
|
1033
1389
|
# Combine the bindings we get from previous jobs.
|
|
1034
1390
|
# For a task we are only passed the inside-the-task namespace.
|
|
1035
1391
|
bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1036
1392
|
# Set up the WDL standard library
|
|
1393
|
+
# UUID to use for virtualizing files
|
|
1037
1394
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1038
1395
|
|
|
1039
1396
|
if self._task.inputs:
|
|
1040
|
-
logger.debug("Evaluating task
|
|
1397
|
+
logger.debug("Evaluating task code")
|
|
1041
1398
|
for input_decl in self._task.inputs:
|
|
1042
1399
|
# Evaluate all the inputs that aren't pre-set
|
|
1043
1400
|
bindings = bindings.bind(input_decl.name, evaluate_defaultable_decl(input_decl, bindings, standard_library))
|
|
1044
1401
|
for postinput_decl in self._task.postinputs:
|
|
1045
|
-
# Evaluate all the postinput decls
|
|
1402
|
+
# Evaluate all the postinput decls.
|
|
1403
|
+
# We need these in order to evaluate the runtime.
|
|
1404
|
+
# TODO: What if they wanted resources from the runtime?
|
|
1046
1405
|
bindings = bindings.bind(postinput_decl.name, evaluate_defaultable_decl(postinput_decl, bindings, standard_library))
|
|
1047
1406
|
|
|
1048
1407
|
# Evaluate the runtime section
|
|
1049
1408
|
runtime_bindings = evaluate_call_inputs(self._task, self._task.runtime, bindings, standard_library)
|
|
1050
1409
|
|
|
1051
|
-
# Fill these in with not-None if
|
|
1052
|
-
# TODO: Can this break out into a function somehow?
|
|
1410
|
+
# Fill these in with not-None if the workflow asks for each resource.
|
|
1053
1411
|
runtime_memory: Optional[int] = None
|
|
1054
1412
|
runtime_cores: Optional[float] = None
|
|
1055
1413
|
runtime_disk: Optional[int] = None
|
|
@@ -1057,21 +1415,14 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1057
1415
|
|
|
1058
1416
|
if runtime_bindings.has_binding('cpu'):
|
|
1059
1417
|
cpu_spec: int = runtime_bindings.resolve('cpu').value
|
|
1060
|
-
|
|
1061
|
-
# We need to get more cores
|
|
1062
|
-
runtime_cores = float(cpu_spec)
|
|
1063
|
-
logger.info('Need to reschedule to get %s cores; have %s', runtime_cores, self.cores)
|
|
1418
|
+
runtime_cores = float(cpu_spec)
|
|
1064
1419
|
|
|
1065
1420
|
if runtime_bindings.has_binding('memory'):
|
|
1066
1421
|
# Get the memory requirement and convert to bytes
|
|
1067
1422
|
memory_spec: Union[int, str] = runtime_bindings.resolve('memory').value
|
|
1068
1423
|
if isinstance(memory_spec, str):
|
|
1069
1424
|
memory_spec = human2bytes(memory_spec)
|
|
1070
|
-
|
|
1071
|
-
if memory_spec > self.memory:
|
|
1072
|
-
# We need to go get more memory
|
|
1073
|
-
runtime_memory = memory_spec
|
|
1074
|
-
logger.info('Need to reschedule to get %s memory; have %s', runtime_memory, self.memory)
|
|
1425
|
+
runtime_memory = memory_spec
|
|
1075
1426
|
|
|
1076
1427
|
if runtime_bindings.has_binding('disks'):
|
|
1077
1428
|
# Miniwdl doesn't have this, but we need to be able to parse things like:
|
|
@@ -1107,9 +1458,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1107
1458
|
if spec_parts[2] == 'LOCAL':
|
|
1108
1459
|
logger.warning('Not rounding LOCAL disk to the nearest 375 GB; workflow execution will differ from Cromwell!')
|
|
1109
1460
|
total_bytes: float = convert_units(total_gb, 'GB')
|
|
1110
|
-
|
|
1111
|
-
runtime_disk = int(total_bytes)
|
|
1112
|
-
logger.info('Need to reschedule to get %s disk, have %s', runtime_disk, self.disk)
|
|
1461
|
+
runtime_disk = int(total_bytes)
|
|
1113
1462
|
|
|
1114
1463
|
if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
|
|
1115
1464
|
# We want to have GPUs
|
|
@@ -1129,65 +1478,145 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1129
1478
|
accelerator_spec['brand'] = gpu_brand
|
|
1130
1479
|
|
|
1131
1480
|
accelerator_requirement = parse_accelerator(accelerator_spec)
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1481
|
+
runtime_accelerators = [accelerator_requirement]
|
|
1482
|
+
|
|
1483
|
+
# Schedule to get resources. Pass along the bindings from evaluating all the inputs and decls, and the runtime, with files virtualized.
|
|
1484
|
+
run_job = WDLTaskJob(self._task, virtualize_files(bindings, standard_library), virtualize_files(runtime_bindings, standard_library), self._task_id, self._namespace, self._task_path, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators, wdl_options=self._wdl_options)
|
|
1485
|
+
# Run that as a child
|
|
1486
|
+
self.addChild(run_job)
|
|
1487
|
+
|
|
1488
|
+
# Give it our postprocessing steps
|
|
1489
|
+
self.defer_postprocessing(run_job)
|
|
1490
|
+
|
|
1491
|
+
# And return its result.
|
|
1492
|
+
return run_job.rv()
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
|
|
1496
|
+
class WDLTaskJob(WDLBaseJob):
|
|
1497
|
+
"""
|
|
1498
|
+
Job that runs a WDL task.
|
|
1499
|
+
|
|
1500
|
+
Responsible for re-evaluating input declarations for unspecified inputs,
|
|
1501
|
+
evaluating the runtime section, re-scheduling if resources are not
|
|
1502
|
+
available, running any command, and evaluating the outputs.
|
|
1503
|
+
|
|
1504
|
+
All bindings are in terms of task-internal names.
|
|
1505
|
+
"""
|
|
1506
|
+
|
|
1507
|
+
def __init__(self, task: WDL.Tree.Task, task_internal_bindings: Promised[WDLBindings], runtime_bindings: Promised[WDLBindings], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
|
|
1508
|
+
"""
|
|
1509
|
+
Make a new job to run a task.
|
|
1510
|
+
|
|
1511
|
+
:param namespace: The namespace that the task's *contents* exist in.
|
|
1512
|
+
The caller has alredy added the task's own name.
|
|
1163
1513
|
|
|
1514
|
+
:param task_path: Like the namespace, but including subscript numbers
|
|
1515
|
+
for scatters.
|
|
1516
|
+
"""
|
|
1517
|
+
|
|
1518
|
+
# This job should not be local because it represents a real workflow task.
|
|
1519
|
+
# TODO: Instead of re-scheduling with more resources, add a local
|
|
1520
|
+
# "wrapper" job like CWL uses to determine the actual requirements.
|
|
1521
|
+
super().__init__(unitName=task_path + ".command", displayName=namespace + ".command", local=False, **kwargs)
|
|
1522
|
+
|
|
1523
|
+
logger.info("Preparing to run task %s as %s", task.name, namespace)
|
|
1524
|
+
|
|
1525
|
+
self._task = task
|
|
1526
|
+
self._task_internal_bindings = task_internal_bindings
|
|
1527
|
+
self._runtime_bindings = runtime_bindings
|
|
1528
|
+
self._task_id = task_id
|
|
1529
|
+
self._namespace = namespace
|
|
1530
|
+
self._task_path = task_path
|
|
1531
|
+
|
|
1532
|
+
def can_fake_root(self) -> bool:
|
|
1533
|
+
"""
|
|
1534
|
+
Determine if --fakeroot is likely to work for Singularity.
|
|
1535
|
+
"""
|
|
1536
|
+
|
|
1537
|
+
# We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
|
|
1538
|
+
try:
|
|
1539
|
+
subuid_file = open('/etc/subuid')
|
|
1540
|
+
except OSError as e:
|
|
1541
|
+
logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
|
|
1542
|
+
return False
|
|
1543
|
+
username = get_user_name()
|
|
1544
|
+
for line in subuid_file:
|
|
1545
|
+
if line.split(':')[0].strip() == username:
|
|
1546
|
+
# We have a line assigning subuids
|
|
1547
|
+
return True
|
|
1548
|
+
# If there is no line, we have no subuids
|
|
1549
|
+
logger.warning('No subuids are assigned to %s; cannot fake root.', username)
|
|
1550
|
+
return False
|
|
1551
|
+
|
|
1552
|
+
def can_mount_proc(self) -> bool:
|
|
1553
|
+
"""
|
|
1554
|
+
Determine if --containall will work for Singularity. On Kubernetes, this will result in operation not permitted
|
|
1555
|
+
See: https://github.com/apptainer/singularity/issues/5857
|
|
1556
|
+
|
|
1557
|
+
So if Kubernetes is detected, return False
|
|
1558
|
+
:return: bool
|
|
1559
|
+
"""
|
|
1560
|
+
return "KUBERNETES_SERVICE_HOST" not in os.environ
|
|
1561
|
+
|
|
1562
|
+
@report_wdl_errors("run task command")
|
|
1563
|
+
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1564
|
+
"""
|
|
1565
|
+
Actually run the task.
|
|
1566
|
+
"""
|
|
1567
|
+
super().run(file_store)
|
|
1568
|
+
logger.info("Running task command for %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
|
|
1569
|
+
|
|
1570
|
+
# Set up the WDL standard library
|
|
1571
|
+
# UUID to use for virtualizing files
|
|
1572
|
+
standard_library = ToilWDLStdLibBase(file_store)
|
|
1573
|
+
|
|
1574
|
+
# Get the bindings from after the input section
|
|
1575
|
+
bindings = unwrap(self._task_internal_bindings)
|
|
1576
|
+
# And the bindings from evaluating the runtime section
|
|
1577
|
+
runtime_bindings = unwrap(self._runtime_bindings)
|
|
1578
|
+
|
|
1579
|
+
# We have all the resources we need, so run the task
|
|
1580
|
+
|
|
1581
|
+
if shutil.which('singularity') and self._wdl_options.get("container") in ["singularity", "auto"]:
|
|
1164
1582
|
# Prepare to use Singularity. We will need plenty of space to
|
|
1165
1583
|
# download images.
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1584
|
+
# Default the Singularity and MiniWDL cache directories. This sets the cache to the same place as
|
|
1585
|
+
# Singularity/MiniWDL's default cache directory
|
|
1586
|
+
# With launch-cluster, the singularity and miniwdl cache is set to /var/lib/toil in abstractProvisioner.py
|
|
1587
|
+
# A current limitation with the singularity/miniwdl cache is it cannot check for image updates if the
|
|
1588
|
+
# filename is the same
|
|
1589
|
+
singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
|
|
1590
|
+
miniwdl_cache = os.path.join(os.path.expanduser("~"), ".cache/miniwdl")
|
|
1591
|
+
|
|
1592
|
+
# Cache Singularity's layers somewhere known to have space
|
|
1593
|
+
os.environ['SINGULARITY_CACHEDIR'] = os.environ.get("SINGULARITY_CACHEDIR", singularity_cache)
|
|
1594
|
+
|
|
1169
1595
|
# Make sure it exists.
|
|
1170
1596
|
os.makedirs(os.environ['SINGULARITY_CACHEDIR'], exist_ok=True)
|
|
1171
1597
|
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1598
|
+
# Cache Singularity images for the workflow on this machine.
|
|
1599
|
+
# Since MiniWDL does only within-process synchronization for pulls,
|
|
1600
|
+
# we also will need to pre-pull one image into here at a time.
|
|
1601
|
+
os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'] = os.environ.get("MINIWDL__SINGULARITY__IMAGE_CACHE", miniwdl_cache)
|
|
1602
|
+
|
|
1177
1603
|
# Make sure it exists.
|
|
1178
1604
|
os.makedirs(os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'], exist_ok=True)
|
|
1179
1605
|
|
|
1180
1606
|
# Run containers with Singularity
|
|
1181
1607
|
TaskContainerImplementation: Type[TaskContainer] = SingularityContainer
|
|
1182
|
-
|
|
1608
|
+
elif self._wdl_options.get("container") in ["docker", "auto"]:
|
|
1183
1609
|
# Run containers with Docker
|
|
1610
|
+
# TODO: Poll if it is available and don't just try and fail.
|
|
1184
1611
|
TaskContainerImplementation = SwarmContainer
|
|
1185
|
-
if
|
|
1612
|
+
if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
|
|
1186
1613
|
# Complain to the user that this is unlikely to work.
|
|
1187
|
-
logger.warning("Running job that
|
|
1188
|
-
"
|
|
1614
|
+
logger.warning("Running job that might need accelerators with Docker. "
|
|
1615
|
+
"Accelerator and GPU support "
|
|
1189
1616
|
"is not yet implemented in the MiniWDL Docker "
|
|
1190
1617
|
"containerization implementation.")
|
|
1618
|
+
else:
|
|
1619
|
+
raise RuntimeError(f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}")
|
|
1191
1620
|
|
|
1192
1621
|
# Set up the MiniWDL container running stuff
|
|
1193
1622
|
miniwdl_logger = logging.getLogger("MiniWDLContainers")
|
|
@@ -1255,6 +1684,10 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1255
1684
|
# We can't fake root so don't try.
|
|
1256
1685
|
command_line.remove('--fakeroot')
|
|
1257
1686
|
|
|
1687
|
+
# If on Kubernetes and proc cannot be mounted, get rid of --containall
|
|
1688
|
+
if '--containall' in command_line and not self.can_mount_proc():
|
|
1689
|
+
command_line.remove('--containall')
|
|
1690
|
+
|
|
1258
1691
|
extra_flags: Set[str] = set()
|
|
1259
1692
|
accelerators_needed: Optional[List[AcceleratorRequirement]] = self.accelerators
|
|
1260
1693
|
if accelerators_needed is not None:
|
|
@@ -1282,12 +1715,12 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1282
1715
|
task_container._run_invocation = patched_run_invocation # type: ignore
|
|
1283
1716
|
|
|
1284
1717
|
# Show the runtime info to the container
|
|
1285
|
-
task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in runtime_bindings})
|
|
1718
|
+
task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in devirtualize_files(runtime_bindings, standard_library)})
|
|
1286
1719
|
|
|
1287
1720
|
# Tell the container to take up all these files. It will assign
|
|
1288
1721
|
# them all new paths in task_container.input_path_map which we can
|
|
1289
1722
|
# read. We also get a task_container.host_path() to go the other way.
|
|
1290
|
-
|
|
1723
|
+
add_paths(task_container, get_file_paths_in_bindings(bindings))
|
|
1291
1724
|
logger.debug("Using container path map: %s", task_container.input_path_map)
|
|
1292
1725
|
|
|
1293
1726
|
# Replace everything with in-container paths for the command.
|
|
@@ -1297,8 +1730,42 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1297
1730
|
# Make a new standard library for evaluating the command specifically, which only deals with in-container paths and out-of-container paths.
|
|
1298
1731
|
command_library = ToilWDLStdLibTaskCommand(file_store, task_container)
|
|
1299
1732
|
|
|
1733
|
+
def hacky_dedent(text: str) -> str:
|
|
1734
|
+
"""
|
|
1735
|
+
Guess what result we would have gotten if we dedented the
|
|
1736
|
+
command before substituting placeholder expressions, given the
|
|
1737
|
+
command after substituting placeholder expressions. Workaround
|
|
1738
|
+
for mimicking MiniWDL making us also suffer from
|
|
1739
|
+
<https://github.com/chanzuckerberg/miniwdl/issues/674>.
|
|
1740
|
+
"""
|
|
1741
|
+
|
|
1742
|
+
# First just run MiniWDL's dedent
|
|
1743
|
+
# Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
|
|
1744
|
+
dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
|
|
1745
|
+
|
|
1746
|
+
text = dedent(text)[1]
|
|
1747
|
+
|
|
1748
|
+
# But this can still leave dedenting to do. Find the first
|
|
1749
|
+
# not-all-whitespace line and get its leading whitespace.
|
|
1750
|
+
to_strip: Optional[str] = None
|
|
1751
|
+
for line in text.split("\n"):
|
|
1752
|
+
if len(line.strip()) > 0:
|
|
1753
|
+
# This is the first not-all-whitespace line.
|
|
1754
|
+
# Drop the leading whitespace.
|
|
1755
|
+
rest = line.lstrip()
|
|
1756
|
+
# Grab the part that gets removed by lstrip
|
|
1757
|
+
to_strip = line[0:(len(line) - len(rest))]
|
|
1758
|
+
break
|
|
1759
|
+
if to_strip is None or len(to_strip) == 0:
|
|
1760
|
+
# Nothing to cut
|
|
1761
|
+
return text
|
|
1762
|
+
|
|
1763
|
+
# Cut to_strip off each line that it appears at the start of.
|
|
1764
|
+
return "\n".join((line.removeprefix(to_strip) for line in text.split("\n")))
|
|
1765
|
+
|
|
1766
|
+
|
|
1300
1767
|
# Work out the command string, and unwrap it
|
|
1301
|
-
command_string: str = evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value
|
|
1768
|
+
command_string: str = hacky_dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)
|
|
1302
1769
|
|
|
1303
1770
|
# Grab the standard out and error paths. MyPy complains if we call
|
|
1304
1771
|
# them because in the current MiniWDL version they are untyped.
|
|
@@ -1323,12 +1790,37 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1323
1790
|
logger.info('Executing command in %s: %s', task_container, command_string)
|
|
1324
1791
|
try:
|
|
1325
1792
|
task_container.run(miniwdl_logger, command_string)
|
|
1326
|
-
|
|
1793
|
+
except Exception:
|
|
1327
1794
|
if os.path.exists(host_stderr_txt):
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1795
|
+
size = os.path.getsize(host_stderr_txt)
|
|
1796
|
+
logger.error('Failed task left standard error at %s of %d bytes', host_stderr_txt, size)
|
|
1797
|
+
if size > 0:
|
|
1798
|
+
# Send the whole error stream.
|
|
1799
|
+
file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
|
|
1800
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
1801
|
+
logger.debug("MiniWDL already logged standard error")
|
|
1802
|
+
else:
|
|
1803
|
+
# At debug level, MiniWDL itself logs command error lines.
|
|
1804
|
+
# But otherwise we just dump into StatsAndLogging;
|
|
1805
|
+
# we also want the messages in the job log that
|
|
1806
|
+
# gets printed at the end of the workflow. So log
|
|
1807
|
+
# the error log ourselves.
|
|
1808
|
+
logger.error("====TASK ERROR LOG====")
|
|
1809
|
+
for line in open(host_stderr_txt, 'r', errors="replace"):
|
|
1810
|
+
logger.error("> %s", line.rstrip('\n'))
|
|
1811
|
+
logger.error("====TASK ERROR LOG====")
|
|
1331
1812
|
|
|
1813
|
+
if os.path.exists(host_stdout_txt):
|
|
1814
|
+
size = os.path.getsize(host_stdout_txt)
|
|
1815
|
+
logger.info('Failed task left standard output at %s of %d bytes', host_stdout_txt, size)
|
|
1816
|
+
if size > 0:
|
|
1817
|
+
# Save the whole output stream.
|
|
1818
|
+
# TODO: We can't tell if this was supposed to be
|
|
1819
|
+
# captured. It might really be huge binary data.
|
|
1820
|
+
file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
|
|
1821
|
+
|
|
1822
|
+
# Keep crashing
|
|
1823
|
+
raise
|
|
1332
1824
|
else:
|
|
1333
1825
|
# We need to fake stdout and stderr, since nothing ran but the
|
|
1334
1826
|
# standard library lets you grab them. TODO: Can these be None?
|
|
@@ -1343,13 +1835,28 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1343
1835
|
# objects, and like MiniWDL we can say we only support
|
|
1344
1836
|
# working-directory-based relative paths for globs.
|
|
1345
1837
|
outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
|
|
1346
|
-
output_bindings
|
|
1347
|
-
|
|
1348
|
-
|
|
1838
|
+
output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
|
|
1839
|
+
|
|
1840
|
+
# Now we know if the standard output and error were sent somewhere by
|
|
1841
|
+
# the workflow. If not, we should report them to the leader.
|
|
1349
1842
|
|
|
1350
1843
|
# Drop any files from the output which don't actually exist
|
|
1351
1844
|
output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
|
|
1352
1845
|
|
|
1846
|
+
if not outputs_library.stderr_used() and os.path.exists(host_stderr_txt):
|
|
1847
|
+
size = os.path.getsize(host_stderr_txt)
|
|
1848
|
+
logger.info('Unused standard error at %s of %d bytes', host_stderr_txt, size)
|
|
1849
|
+
if size > 0:
|
|
1850
|
+
# Save the whole error stream because the workflow didn't capture it.
|
|
1851
|
+
file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
|
|
1852
|
+
|
|
1853
|
+
if not outputs_library.stdout_used() and os.path.exists(host_stdout_txt):
|
|
1854
|
+
size = os.path.getsize(host_stdout_txt)
|
|
1855
|
+
logger.info('Unused standard output at %s of %d bytes', host_stdout_txt, size)
|
|
1856
|
+
if size > 0:
|
|
1857
|
+
# Save the whole output stream because the workflow didn't capture it.
|
|
1858
|
+
file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
|
|
1859
|
+
|
|
1353
1860
|
# TODO: Check the output bindings against the types of the decls so we
|
|
1354
1861
|
# can tell if we have a null in a value that is supposed to not be
|
|
1355
1862
|
# nullable. We can't just look at the types on the values themselves
|
|
@@ -1358,6 +1865,9 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1358
1865
|
# Upload any files in the outputs if not uploaded already. Accounts for how relative paths may still need to be container-relative.
|
|
1359
1866
|
output_bindings = virtualize_files(output_bindings, outputs_library)
|
|
1360
1867
|
|
|
1868
|
+
# Do postprocessing steps to e.g. apply namespaces.
|
|
1869
|
+
output_bindings = self.postprocess(output_bindings)
|
|
1870
|
+
|
|
1361
1871
|
return output_bindings
|
|
1362
1872
|
|
|
1363
1873
|
class WDLWorkflowNodeJob(WDLBaseJob):
|
|
@@ -1365,19 +1875,21 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1365
1875
|
Job that evaluates a WDL workflow node.
|
|
1366
1876
|
"""
|
|
1367
1877
|
|
|
1368
|
-
def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
|
|
1878
|
+
def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1369
1879
|
"""
|
|
1370
1880
|
Make a new job to run a workflow node to completion.
|
|
1371
1881
|
"""
|
|
1372
|
-
super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, **kwargs)
|
|
1882
|
+
super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, wdl_options=wdl_options or {}, **kwargs)
|
|
1373
1883
|
|
|
1374
1884
|
self._node = node
|
|
1375
1885
|
self._prev_node_results = prev_node_results
|
|
1376
1886
|
self._namespace = namespace
|
|
1887
|
+
self._task_path = task_path
|
|
1377
1888
|
|
|
1378
1889
|
if isinstance(self._node, WDL.Tree.Call):
|
|
1379
1890
|
logger.debug("Preparing job for call node %s", self._node.workflow_node_id)
|
|
1380
1891
|
|
|
1892
|
+
@report_wdl_errors("run workflow node")
|
|
1381
1893
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1382
1894
|
"""
|
|
1383
1895
|
Actually execute the workflow node.
|
|
@@ -1388,62 +1900,110 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1388
1900
|
# Combine the bindings we get from previous jobs
|
|
1389
1901
|
incoming_bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1390
1902
|
# Set up the WDL standard library
|
|
1391
|
-
standard_library = ToilWDLStdLibBase(file_store)
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
#
|
|
1414
|
-
|
|
1903
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
1904
|
+
with monkeypatch_coerce(standard_library):
|
|
1905
|
+
if isinstance(self._node, WDL.Tree.Decl):
|
|
1906
|
+
# This is a variable assignment
|
|
1907
|
+
logger.info('Setting %s to %s', self._node.name, self._node.expr)
|
|
1908
|
+
value = evaluate_decl(self._node, incoming_bindings, standard_library)
|
|
1909
|
+
return self.postprocess(incoming_bindings.bind(self._node.name, value))
|
|
1910
|
+
elif isinstance(self._node, WDL.Tree.Call):
|
|
1911
|
+
# This is a call of a task or workflow
|
|
1912
|
+
|
|
1913
|
+
# Fetch all the inputs we are passing and bind them.
|
|
1914
|
+
# The call is only allowed to use these.
|
|
1915
|
+
logger.debug("Evaluating step inputs")
|
|
1916
|
+
if self._node.callee is None:
|
|
1917
|
+
# This should never be None, but mypy gets unhappy and this is better than an assert
|
|
1918
|
+
inputs_mapping = None
|
|
1919
|
+
else:
|
|
1920
|
+
inputs_mapping = {e.name: e.type for e in self._node.callee.inputs or []}
|
|
1921
|
+
input_bindings = evaluate_call_inputs(self._node, self._node.inputs, incoming_bindings, standard_library, inputs_mapping)
|
|
1922
|
+
|
|
1923
|
+
# Bindings may also be added in from the enclosing workflow inputs
|
|
1924
|
+
# TODO: this is letting us also inject them from the workflow body.
|
|
1925
|
+
# TODO: Can this result in picking up non-namespaced values that
|
|
1926
|
+
# aren't meant to be inputs, by not changing their names?
|
|
1927
|
+
passed_down_bindings = incoming_bindings.enter_namespace(self._node.name)
|
|
1928
|
+
|
|
1929
|
+
if isinstance(self._node.callee, WDL.Tree.Workflow):
|
|
1930
|
+
# This is a call of a workflow
|
|
1931
|
+
subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
|
|
1932
|
+
self.addChild(subjob)
|
|
1933
|
+
elif isinstance(self._node.callee, WDL.Tree.Task):
|
|
1934
|
+
# This is a call of a task
|
|
1935
|
+
subjob = WDLTaskWrapperJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
|
|
1936
|
+
self.addChild(subjob)
|
|
1937
|
+
else:
|
|
1938
|
+
raise WDL.Error.InvalidType(self._node, "Cannot call a " + str(type(self._node.callee)))
|
|
1939
|
+
|
|
1940
|
+
# We need to agregate outputs namespaced with our node name, and existing bindings
|
|
1941
|
+
subjob.then_namespace(self._node.name)
|
|
1942
|
+
subjob.then_overlay(incoming_bindings)
|
|
1943
|
+
self.defer_postprocessing(subjob)
|
|
1944
|
+
return subjob.rv()
|
|
1945
|
+
elif isinstance(self._node, WDL.Tree.Scatter):
|
|
1946
|
+
subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
|
|
1415
1947
|
self.addChild(subjob)
|
|
1416
|
-
|
|
1417
|
-
#
|
|
1418
|
-
|
|
1948
|
+
# Scatters don't really make a namespace, just kind of a scope?
|
|
1949
|
+
# TODO: Let stuff leave scope!
|
|
1950
|
+
self.defer_postprocessing(subjob)
|
|
1951
|
+
return subjob.rv()
|
|
1952
|
+
elif isinstance(self._node, WDL.Tree.Conditional):
|
|
1953
|
+
subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
|
|
1419
1954
|
self.addChild(subjob)
|
|
1955
|
+
# Conditionals don't really make a namespace, just kind of a scope?
|
|
1956
|
+
# TODO: Let stuff leave scope!
|
|
1957
|
+
self.defer_postprocessing(subjob)
|
|
1958
|
+
return subjob.rv()
|
|
1420
1959
|
else:
|
|
1421
|
-
raise WDL.Error.InvalidType(self._node, "
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1960
|
+
raise WDL.Error.InvalidType(self._node, "Unimplemented WorkflowNode: " + str(type(self._node)))
|
|
1961
|
+
|
|
1962
|
+
class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
1963
|
+
"""
|
|
1964
|
+
Job that evaluates a list of WDL workflow nodes, which are in the same
|
|
1965
|
+
scope and in a topological dependency order, and which do not call out to any other
|
|
1966
|
+
workflows or tasks or sections.
|
|
1967
|
+
"""
|
|
1968
|
+
|
|
1969
|
+
def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1970
|
+
"""
|
|
1971
|
+
Make a new job to run a list of workflow nodes to completion.
|
|
1972
|
+
"""
|
|
1973
|
+
super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', wdl_options=wdl_options, **kwargs)
|
|
1974
|
+
|
|
1975
|
+
self._nodes = nodes
|
|
1976
|
+
self._prev_node_results = prev_node_results
|
|
1977
|
+
self._namespace = namespace
|
|
1978
|
+
|
|
1979
|
+
for n in self._nodes:
|
|
1980
|
+
if isinstance(n, (WDL.Tree.Call, WDL.Tree.Scatter, WDL.Tree.Conditional)):
|
|
1981
|
+
raise RuntimeError("Node cannot be evaluated with other nodes: " + str(n))
|
|
1982
|
+
|
|
1983
|
+
@report_wdl_errors("run workflow node list")
|
|
1984
|
+
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1985
|
+
"""
|
|
1986
|
+
Actually execute the workflow nodes.
|
|
1987
|
+
"""
|
|
1988
|
+
super().run(file_store)
|
|
1989
|
+
|
|
1990
|
+
# Combine the bindings we get from previous jobs
|
|
1991
|
+
current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1992
|
+
# Set up the WDL standard library
|
|
1993
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
1994
|
+
|
|
1995
|
+
with monkeypatch_coerce(standard_library):
|
|
1996
|
+
for node in self._nodes:
|
|
1997
|
+
if isinstance(node, WDL.Tree.Decl):
|
|
1998
|
+
# This is a variable assignment
|
|
1999
|
+
logger.info('Setting %s to %s', node.name, node.expr)
|
|
2000
|
+
value = evaluate_decl(node, current_bindings, standard_library)
|
|
2001
|
+
current_bindings = current_bindings.bind(node.name, value)
|
|
2002
|
+
else:
|
|
2003
|
+
raise WDL.Error.InvalidType(node, "Unimplemented WorkflowNode: " + str(type(node)))
|
|
2004
|
+
|
|
2005
|
+
return self.postprocess(current_bindings)
|
|
2006
|
+
|
|
1447
2007
|
|
|
1448
2008
|
class WDLCombineBindingsJob(WDLBaseJob):
|
|
1449
2009
|
"""
|
|
@@ -1451,7 +2011,7 @@ class WDLCombineBindingsJob(WDLBaseJob):
|
|
|
1451
2011
|
environment changes.
|
|
1452
2012
|
"""
|
|
1453
2013
|
|
|
1454
|
-
def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]],
|
|
2014
|
+
def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]], **kwargs: Any) -> None:
|
|
1455
2015
|
"""
|
|
1456
2016
|
Make a new job to combine the results of previous jobs.
|
|
1457
2017
|
|
|
@@ -1462,58 +2022,230 @@ class WDLCombineBindingsJob(WDLBaseJob):
|
|
|
1462
2022
|
super().__init__(**kwargs)
|
|
1463
2023
|
|
|
1464
2024
|
self._prev_node_results = prev_node_results
|
|
1465
|
-
self._underlay = underlay
|
|
1466
|
-
self._remove = remove
|
|
1467
2025
|
|
|
2026
|
+
@report_wdl_errors("combine bindings")
|
|
1468
2027
|
def run(self, file_store: AbstractFileStore) -> WDLBindings:
|
|
1469
2028
|
"""
|
|
1470
2029
|
Aggregate incoming results.
|
|
1471
2030
|
"""
|
|
1472
2031
|
super().run(file_store)
|
|
1473
2032
|
combined = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
combined = combine_bindings([combined, unwrap(self._underlay).subtract(combined)])
|
|
1477
|
-
if self._remove is not None:
|
|
1478
|
-
# We need to take stuff out of scope
|
|
1479
|
-
combined = combined.subtract(unwrap(self._remove))
|
|
1480
|
-
return combined
|
|
2033
|
+
# Make sure to run the universal postprocessing steps
|
|
2034
|
+
return self.postprocess(combined)
|
|
1481
2035
|
|
|
1482
|
-
class
|
|
2036
|
+
class WDLWorkflowGraph:
|
|
1483
2037
|
"""
|
|
1484
|
-
|
|
2038
|
+
Represents a graph of WDL WorkflowNodes.
|
|
2039
|
+
|
|
2040
|
+
Operates at a certain level of instantiation (i.e. sub-sections are
|
|
2041
|
+
represented by single nodes).
|
|
2042
|
+
|
|
2043
|
+
Assumes all relevant nodes are provided; dependencies outside the provided
|
|
2044
|
+
nodes are assumed to be satisfied already.
|
|
1485
2045
|
"""
|
|
1486
2046
|
|
|
1487
|
-
def __init__(self,
|
|
2047
|
+
def __init__(self, nodes: Sequence[WDL.Tree.WorkflowNode]) -> None:
|
|
1488
2048
|
"""
|
|
1489
|
-
Make a
|
|
2049
|
+
Make a graph for analyzing a set of workflow nodes.
|
|
1490
2050
|
"""
|
|
1491
|
-
super().__init__(**kwargs)
|
|
1492
2051
|
|
|
1493
|
-
|
|
1494
|
-
|
|
2052
|
+
# For Gather nodes, the Toil interpreter handles them as part of their
|
|
2053
|
+
# associated section. So make a map from gather ID to the section node
|
|
2054
|
+
# ID.
|
|
2055
|
+
self._gather_to_section: Dict[str, str] = {}
|
|
2056
|
+
for node in nodes:
|
|
2057
|
+
if isinstance(node, WDL.Tree.WorkflowSection):
|
|
2058
|
+
for gather_node in node.gathers.values():
|
|
2059
|
+
self._gather_to_section[gather_node.workflow_node_id] = node.workflow_node_id
|
|
1495
2060
|
|
|
1496
|
-
|
|
2061
|
+
# Store all the nodes by ID, except the gathers which we elide.
|
|
2062
|
+
self._nodes: Dict[str, WDL.Tree.WorkflowNode] = {node.workflow_node_id: node for node in nodes if not isinstance(node, WDL.Tree.Gather)}
|
|
2063
|
+
|
|
2064
|
+
def real_id(self, node_id: str) -> str:
|
|
1497
2065
|
"""
|
|
1498
|
-
|
|
2066
|
+
Map multiple IDs for what we consider the same node to one ID.
|
|
2067
|
+
|
|
2068
|
+
This elides/resolves gathers.
|
|
1499
2069
|
"""
|
|
1500
|
-
|
|
1501
|
-
|
|
2070
|
+
return self._gather_to_section.get(node_id, node_id)
|
|
2071
|
+
|
|
2072
|
+
def is_decl(self, node_id: str) -> bool:
|
|
2073
|
+
"""
|
|
2074
|
+
Return True if a node represents a WDL declaration, and false
|
|
2075
|
+
otherwise.
|
|
2076
|
+
"""
|
|
2077
|
+
return isinstance(self.get(node_id), WDL.Tree.Decl)
|
|
2078
|
+
|
|
2079
|
+
def get(self, node_id: str) -> WDL.Tree.WorkflowNode:
|
|
2080
|
+
"""
|
|
2081
|
+
Get a node by ID.
|
|
2082
|
+
"""
|
|
2083
|
+
return self._nodes[self.real_id(node_id)]
|
|
2084
|
+
|
|
2085
|
+
def get_dependencies(self, node_id: str) -> Set[str]:
|
|
2086
|
+
"""
|
|
2087
|
+
Get all the nodes that a node depends on, recursively (into the node if
|
|
2088
|
+
it has a body) but not transitively.
|
|
2089
|
+
|
|
2090
|
+
Produces dependencies after resolving gathers and internal-to-section
|
|
2091
|
+
dependencies, on nodes that are also in this graph.
|
|
2092
|
+
"""
|
|
2093
|
+
|
|
2094
|
+
# We need to make sure to bubble up dependencies from inside sections.
|
|
2095
|
+
# A conditional might only appear to depend on the variables in the
|
|
2096
|
+
# conditional expression, but its body can depend on other stuff, and
|
|
2097
|
+
# we need to make sure that that stuff has finished and updated the
|
|
2098
|
+
# environment before the conditional body runs. TODO: This is because
|
|
2099
|
+
# Toil can't go and get and add successors to the relevant jobs later,
|
|
2100
|
+
# while MiniWDL's engine apparently can. This ends up reducing
|
|
2101
|
+
# parallelism more than would strictly be necessary; nothing in the
|
|
2102
|
+
# conditional can start until the dependencies of everything in the
|
|
2103
|
+
# conditional are ready.
|
|
2104
|
+
|
|
2105
|
+
dependencies = set()
|
|
2106
|
+
|
|
2107
|
+
node = self.get(node_id)
|
|
2108
|
+
for dependency in recursive_dependencies(node):
|
|
2109
|
+
real_dependency = self.real_id(dependency)
|
|
2110
|
+
if real_dependency in self._nodes:
|
|
2111
|
+
dependencies.add(real_dependency)
|
|
2112
|
+
|
|
2113
|
+
return dependencies
|
|
2114
|
+
|
|
2115
|
+
def get_transitive_dependencies(self, node_id: str) -> Set[str]:
|
|
2116
|
+
"""
|
|
2117
|
+
Get all the nodes that a node depends on, transitively.
|
|
2118
|
+
"""
|
|
2119
|
+
|
|
2120
|
+
dependencies: Set[str] = set()
|
|
2121
|
+
visited: Set[str] = set()
|
|
2122
|
+
queue = [node_id]
|
|
2123
|
+
|
|
2124
|
+
while len(queue) > 0:
|
|
2125
|
+
# Grab the enxt thing off the queue
|
|
2126
|
+
here = queue[-1]
|
|
2127
|
+
queue.pop()
|
|
2128
|
+
if here in visited:
|
|
2129
|
+
# Skip if we got it already
|
|
2130
|
+
continue
|
|
2131
|
+
# Mark it got
|
|
2132
|
+
visited.add(here)
|
|
2133
|
+
# Get all its dependencies
|
|
2134
|
+
here_deps = self.get_dependencies(here)
|
|
2135
|
+
dependencies |= here_deps
|
|
2136
|
+
for dep in here_deps:
|
|
2137
|
+
if dep not in visited:
|
|
2138
|
+
# And queue all the ones we haven't visited.
|
|
2139
|
+
queue.append(dep)
|
|
2140
|
+
|
|
2141
|
+
return dependencies
|
|
2142
|
+
|
|
2143
|
+
def topological_order(self) -> List[str]:
|
|
2144
|
+
"""
|
|
2145
|
+
Get a topological order of the nodes, based on their dependencies.
|
|
2146
|
+
"""
|
|
2147
|
+
|
|
2148
|
+
sorter : TopologicalSorter[str] = TopologicalSorter()
|
|
2149
|
+
for node_id in self._nodes.keys():
|
|
2150
|
+
# Add all the edges
|
|
2151
|
+
sorter.add(node_id, *self.get_dependencies(node_id))
|
|
2152
|
+
return list(sorter.static_order())
|
|
2153
|
+
|
|
2154
|
+
def leaves(self) -> List[str]:
|
|
2155
|
+
"""
|
|
2156
|
+
Get all the workflow node IDs that have no dependents in the graph.
|
|
2157
|
+
"""
|
|
2158
|
+
|
|
2159
|
+
leaves = set(self._nodes.keys())
|
|
2160
|
+
for node_id in self._nodes.keys():
|
|
2161
|
+
for dependency in self.get_dependencies(node_id):
|
|
2162
|
+
if dependency in leaves:
|
|
2163
|
+
# Mark everything depended on as not a leaf
|
|
2164
|
+
leaves.remove(dependency)
|
|
2165
|
+
return list(leaves)
|
|
2166
|
+
|
|
1502
2167
|
|
|
1503
2168
|
class WDLSectionJob(WDLBaseJob):
|
|
1504
2169
|
"""
|
|
1505
2170
|
Job that can create more graph for a section of the wrokflow.
|
|
1506
2171
|
"""
|
|
1507
2172
|
|
|
1508
|
-
def __init__(self, namespace: str, **kwargs: Any) -> None:
|
|
2173
|
+
def __init__(self, namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1509
2174
|
"""
|
|
1510
2175
|
Make a WDLSectionJob where the interior runs in the given namespace,
|
|
1511
2176
|
starting with the root workflow.
|
|
1512
2177
|
"""
|
|
1513
|
-
super().__init__(**kwargs)
|
|
2178
|
+
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
1514
2179
|
self._namespace = namespace
|
|
2180
|
+
self._task_path = task_path
|
|
2181
|
+
|
|
2182
|
+
@staticmethod
|
|
2183
|
+
def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
|
|
2184
|
+
"""
|
|
2185
|
+
Given a topological order of WDL workflow node IDs, produce a list of
|
|
2186
|
+
lists of IDs, still in topological order, where each list of IDs can be
|
|
2187
|
+
run under a single Toil job.
|
|
2188
|
+
"""
|
|
2189
|
+
|
|
2190
|
+
# All the buckets of merged nodes
|
|
2191
|
+
to_return: List[List[str]] = []
|
|
2192
|
+
# The nodes we are currently merging, in topological order
|
|
2193
|
+
current_bucket: List[str] = []
|
|
2194
|
+
# All the non-decl transitive dependencies of nodes in the bucket
|
|
2195
|
+
current_bucket_dependencies: Set[str] = set()
|
|
2196
|
+
|
|
2197
|
+
for next_id in order:
|
|
2198
|
+
# Consider adding each node to the bucket
|
|
2199
|
+
# Get all the dependencies on things that aren't decls.
|
|
2200
|
+
next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not section_graph.is_decl(dep)}
|
|
2201
|
+
if len(current_bucket) == 0:
|
|
2202
|
+
# This is the first thing for the bucket
|
|
2203
|
+
current_bucket.append(next_id)
|
|
2204
|
+
current_bucket_dependencies |= next_dependencies
|
|
2205
|
+
else:
|
|
2206
|
+
# Get a node already in the bucket
|
|
2207
|
+
current_id = current_bucket[0]
|
|
2208
|
+
|
|
2209
|
+
if not section_graph.is_decl(current_id) or not section_graph.is_decl(next_id):
|
|
2210
|
+
# We can only combine decls with decls, so we can't go in
|
|
2211
|
+
# the bucket.
|
|
2212
|
+
|
|
2213
|
+
# Finish the bucket.
|
|
2214
|
+
to_return.append(current_bucket)
|
|
2215
|
+
# Start a new one with this next node
|
|
2216
|
+
current_bucket = [next_id]
|
|
2217
|
+
current_bucket_dependencies = next_dependencies
|
|
2218
|
+
else:
|
|
2219
|
+
# We have a decl in the bucket and a decl we could maybe
|
|
2220
|
+
# add. We know they are part of the same section, so we
|
|
2221
|
+
# aren't jumping in and out of conditionals or scatters.
|
|
2222
|
+
|
|
2223
|
+
# We are going in a topological order, so we know the
|
|
2224
|
+
# bucket can't depend on the new node.
|
|
2225
|
+
|
|
2226
|
+
if next_dependencies == current_bucket_dependencies:
|
|
2227
|
+
# We can add this node without adding more dependencies on non-decls on either side.
|
|
2228
|
+
# Nothing in the bucket can be in the dependency set because the bucket is only decls.
|
|
2229
|
+
# Put it in
|
|
2230
|
+
current_bucket.append(next_id)
|
|
2231
|
+
# TODO: With this condition, this is redundant.
|
|
2232
|
+
current_bucket_dependencies |= next_dependencies
|
|
2233
|
+
else:
|
|
2234
|
+
# Finish the bucket.
|
|
2235
|
+
to_return.append(current_bucket)
|
|
2236
|
+
# Start a new one with this next node
|
|
2237
|
+
current_bucket = [next_id]
|
|
2238
|
+
current_bucket_dependencies = next_dependencies
|
|
2239
|
+
|
|
2240
|
+
if len(current_bucket) > 0:
|
|
2241
|
+
# Now finish the last bucket
|
|
2242
|
+
to_return.append(current_bucket)
|
|
2243
|
+
|
|
2244
|
+
return to_return
|
|
1515
2245
|
|
|
1516
|
-
|
|
2246
|
+
|
|
2247
|
+
|
|
2248
|
+
def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None, subscript: Optional[int] = None) -> WDLBaseJob:
|
|
1517
2249
|
"""
|
|
1518
2250
|
Make a Toil job to evaluate a subgraph inside a workflow or workflow
|
|
1519
2251
|
section.
|
|
@@ -1529,97 +2261,79 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
1529
2261
|
:param local_environment: Bindings in this environment will be
|
|
1530
2262
|
used to evaluate the subgraph but will go out of scope
|
|
1531
2263
|
at the end of the section.
|
|
2264
|
+
:param subscript: If the subgraph is being evaluated multiple times,
|
|
2265
|
+
this should be a disambiguating integer for logging.
|
|
1532
2266
|
"""
|
|
1533
2267
|
|
|
1534
|
-
#
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
# care about resolving, instead.
|
|
1540
|
-
dependabes: Set[str] = set()
|
|
2268
|
+
# Work out what to call what we are working on
|
|
2269
|
+
task_path = self._task_path
|
|
2270
|
+
if subscript is not None:
|
|
2271
|
+
# We need to include a scatter loop number.
|
|
2272
|
+
task_path += f'.{subscript}'
|
|
1541
2273
|
|
|
1542
2274
|
if local_environment is not None:
|
|
1543
2275
|
# Bring local environment into scope
|
|
1544
2276
|
environment = combine_bindings([environment, local_environment])
|
|
1545
2277
|
|
|
1546
|
-
#
|
|
1547
|
-
|
|
1548
|
-
dependabes |= set(wdl_id_to_wdl_node.keys())
|
|
1549
|
-
|
|
1550
|
-
# That doesn't include gather nodes, which in the Toil interpreter we
|
|
1551
|
-
# handle as part of their enclosing section, without individual Toil
|
|
1552
|
-
# jobs for each. So make a map from gather ID to the section node ID.
|
|
1553
|
-
gather_to_section: Dict[str, str] = {}
|
|
1554
|
-
for node in nodes:
|
|
1555
|
-
if isinstance(node, WDL.Tree.WorkflowSection):
|
|
1556
|
-
for gather_node in node.gathers.values():
|
|
1557
|
-
gather_to_section[gather_node.workflow_node_id] = node.workflow_node_id
|
|
1558
|
-
dependabes |= set(gather_to_section.keys())
|
|
2278
|
+
# Make a graph of all the nodes at this level
|
|
2279
|
+
section_graph = WDLWorkflowGraph(nodes)
|
|
1559
2280
|
|
|
1560
2281
|
# To make Toil jobs, we need all the jobs they depend on made so we can
|
|
1561
2282
|
# call .rv(). So we need to solve the workflow DAG ourselves to set it up
|
|
1562
2283
|
# properly.
|
|
1563
2284
|
|
|
1564
|
-
#
|
|
1565
|
-
|
|
1566
|
-
#
|
|
1567
|
-
#
|
|
1568
|
-
|
|
1569
|
-
# is because Toil can't go and get and add successors to the relevant
|
|
1570
|
-
# jobs later, while MiniWDL's engine apparently can. This ends up
|
|
1571
|
-
# reducing parallelism more than would strictly be necessary; nothing
|
|
1572
|
-
# in the conditional can start until the dependencies of everything in
|
|
1573
|
-
# the conditional are ready.
|
|
1574
|
-
|
|
1575
|
-
# What are the dependencies of all the body nodes on other body nodes?
|
|
1576
|
-
# Nodes can depend on other nodes actually in the tree, or on gathers
|
|
1577
|
-
# that belong to other nodes, but we rewrite the gather dependencies
|
|
1578
|
-
# through to the enclosing section node. Skip any dependencies on
|
|
1579
|
-
# anything not provided by another body node (such as on an input, or
|
|
1580
|
-
# something outside of the current section). TODO: This will need to
|
|
1581
|
-
# change if we let parallelism transcend sections.
|
|
1582
|
-
wdl_id_to_dependency_ids = {node_id: list({gather_to_section[dep] if dep in gather_to_section else dep for dep in recursive_dependencies(node) if dep in dependabes}) for node_id, node in wdl_id_to_wdl_node.items()}
|
|
1583
|
-
|
|
1584
|
-
# Which of those are outstanding?
|
|
1585
|
-
wdl_id_to_outstanding_dependency_ids = copy.deepcopy(wdl_id_to_dependency_ids)
|
|
1586
|
-
|
|
1587
|
-
# What nodes depend on each node?
|
|
1588
|
-
wdl_id_to_dependent_ids: Dict[str, Set[str]] = collections.defaultdict(set)
|
|
1589
|
-
for node_id, dependencies in wdl_id_to_dependency_ids.items():
|
|
1590
|
-
for dependency_id in dependencies:
|
|
1591
|
-
# Invert the dependency edges
|
|
1592
|
-
wdl_id_to_dependent_ids[dependency_id].add(node_id)
|
|
1593
|
-
|
|
1594
|
-
# This will hold all the Toil jobs by WDL node ID
|
|
1595
|
-
wdl_id_to_toil_job: Dict[str, Job] = {}
|
|
1596
|
-
|
|
1597
|
-
# And collect IDs of jobs with no successors to add a final sink job
|
|
1598
|
-
leaf_ids: Set[str] = set()
|
|
1599
|
-
|
|
1600
|
-
# What nodes are ready?
|
|
1601
|
-
ready_node_ids = {node_id for node_id, dependencies in wdl_id_to_outstanding_dependency_ids.items() if len(dependencies) == 0}
|
|
1602
|
-
|
|
1603
|
-
while len(wdl_id_to_outstanding_dependency_ids) > 0:
|
|
1604
|
-
logger.debug('Ready nodes: %s', ready_node_ids)
|
|
1605
|
-
logger.debug('Waiting nodes: %s', wdl_id_to_outstanding_dependency_ids)
|
|
1606
|
-
|
|
1607
|
-
# Find a node that we can do now
|
|
1608
|
-
node_id = next(iter(ready_node_ids))
|
|
1609
|
-
|
|
1610
|
-
# Say we are doing it
|
|
1611
|
-
ready_node_ids.remove(node_id)
|
|
1612
|
-
del wdl_id_to_outstanding_dependency_ids[node_id]
|
|
1613
|
-
logger.debug('Make Toil job for %s', node_id)
|
|
2285
|
+
# When a WDL node depends on another, we need to be able to find the Toil job we need an rv from.
|
|
2286
|
+
wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
|
|
2287
|
+
# We need the set of Toil jobs not depended on so we can wire them up to the sink.
|
|
2288
|
+
# This maps from Toil job store ID to job.
|
|
2289
|
+
toil_leaves: Dict[Union[str, TemporaryID], WDLBaseJob] = {}
|
|
1614
2290
|
|
|
2291
|
+
def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
|
|
2292
|
+
"""
|
|
2293
|
+
Get the distinct Toil jobs executing any of the given WDL nodes.
|
|
2294
|
+
"""
|
|
2295
|
+
job_ids = set()
|
|
2296
|
+
jobs = []
|
|
2297
|
+
for job in (wdl_id_to_toil_job[wdl_id] for wdl_id in wdl_ids):
|
|
2298
|
+
# For each job that is registered under any of these WDL IDs
|
|
2299
|
+
if job.jobStoreID not in job_ids:
|
|
2300
|
+
# If we haven't taken it already, take it
|
|
2301
|
+
job_ids.add(job.jobStoreID)
|
|
2302
|
+
jobs.append(job)
|
|
2303
|
+
return jobs
|
|
2304
|
+
|
|
2305
|
+
creation_order = section_graph.topological_order()
|
|
2306
|
+
logger.debug('Creation order: %s', creation_order)
|
|
2307
|
+
|
|
2308
|
+
# Now we want to organize the linear list of nodes into collections of nodes that can be in the same Toil job.
|
|
2309
|
+
creation_jobs = self.coalesce_nodes(creation_order, section_graph)
|
|
2310
|
+
logger.debug('Creation jobs: %s', creation_jobs)
|
|
2311
|
+
|
|
2312
|
+
for node_ids in creation_jobs:
|
|
2313
|
+
logger.debug('Make Toil job for %s', node_ids)
|
|
1615
2314
|
# Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
|
|
1616
|
-
|
|
2315
|
+
# Don't inlude stuff in the current batch.
|
|
2316
|
+
prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id) if prev_node_id not in node_ids}
|
|
2317
|
+
|
|
2318
|
+
|
|
2319
|
+
# Get the Toil jobs we depend on
|
|
2320
|
+
prev_jobs = get_job_set_any(prev_node_ids)
|
|
2321
|
+
for prev_job in prev_jobs:
|
|
2322
|
+
if prev_job.jobStoreID in toil_leaves:
|
|
2323
|
+
# Mark them all as depended on
|
|
2324
|
+
del toil_leaves[prev_job.jobStoreID]
|
|
2325
|
+
|
|
2326
|
+
# Get their return values to feed into the new job
|
|
1617
2327
|
rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
|
|
1618
2328
|
# We also need access to section-level bindings like inputs
|
|
1619
2329
|
rvs.append(environment)
|
|
1620
2330
|
|
|
1621
|
-
|
|
1622
|
-
|
|
2331
|
+
if len(node_ids) == 1:
|
|
2332
|
+
# Make a one-node job
|
|
2333
|
+
job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, task_path, wdl_options=self._wdl_options)
|
|
2334
|
+
else:
|
|
2335
|
+
# Make a multi-node job
|
|
2336
|
+
job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, wdl_options=self._wdl_options)
|
|
1623
2337
|
for prev_job in prev_jobs:
|
|
1624
2338
|
# Connect up the happens-after relationships to make sure the
|
|
1625
2339
|
# return values are available.
|
|
@@ -1631,38 +2345,38 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
1631
2345
|
# Nothing came before this job, so connect it to the workflow.
|
|
1632
2346
|
self.addChild(job)
|
|
1633
2347
|
|
|
1634
|
-
|
|
1635
|
-
|
|
2348
|
+
for node_id in node_ids:
|
|
2349
|
+
# Save the job for everything it executes
|
|
2350
|
+
wdl_id_to_toil_job[node_id] = job
|
|
1636
2351
|
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
#
|
|
1662
|
-
self.
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
wdl_id_to_toil_job[node_id].addFollowOn(sink)
|
|
2352
|
+
# It isn't depended on yet
|
|
2353
|
+
toil_leaves[job.jobStoreID] = job
|
|
2354
|
+
|
|
2355
|
+
if len(toil_leaves) == 1:
|
|
2356
|
+
# There's one final node so we can just tack postprocessing onto that.
|
|
2357
|
+
sink: WDLBaseJob = next(iter(toil_leaves.values()))
|
|
2358
|
+
else:
|
|
2359
|
+
# We need to bring together with a new sink
|
|
2360
|
+
# Make the sink job to collect all their results.
|
|
2361
|
+
leaf_rvs: List[Union[WDLBindings, Promise]] = [leaf_job.rv() for leaf_job in toil_leaves.values()]
|
|
2362
|
+
# Make sure to also send the section-level bindings
|
|
2363
|
+
leaf_rvs.append(environment)
|
|
2364
|
+
# And to fill in bindings from code not executed in this instantiation
|
|
2365
|
+
# with Null, and filter out stuff that should leave scope.
|
|
2366
|
+
sink = WDLCombineBindingsJob(leaf_rvs, wdl_options=self._wdl_options)
|
|
2367
|
+
# It runs inside us
|
|
2368
|
+
self.addChild(sink)
|
|
2369
|
+
for leaf_job in toil_leaves.values():
|
|
2370
|
+
# And after all the leaf jobs.
|
|
2371
|
+
leaf_job.addFollowOn(sink)
|
|
2372
|
+
|
|
2373
|
+
logger.debug("Sink job is: %s", sink)
|
|
2374
|
+
|
|
2375
|
+
|
|
2376
|
+
# Apply the final postprocessing for leaving the section.
|
|
2377
|
+
sink.then_underlay(self.make_gather_bindings(gather_nodes, WDL.Value.Null()))
|
|
2378
|
+
if local_environment is not None:
|
|
2379
|
+
sink.then_remove(local_environment)
|
|
1666
2380
|
|
|
1667
2381
|
return sink
|
|
1668
2382
|
|
|
@@ -1716,11 +2430,11 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1716
2430
|
instance of the body. If an instance of the body doesn't create a binding,
|
|
1717
2431
|
it gets a null value in the corresponding array.
|
|
1718
2432
|
"""
|
|
1719
|
-
def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
|
|
2433
|
+
def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1720
2434
|
"""
|
|
1721
2435
|
Create a subtree that will run a WDL scatter. The scatter itself and the contents live in the given namespace.
|
|
1722
2436
|
"""
|
|
1723
|
-
super().__init__(namespace, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id)
|
|
2437
|
+
super().__init__(namespace, task_path, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, wdl_options=wdl_options)
|
|
1724
2438
|
|
|
1725
2439
|
# Because we need to return the return value of the workflow, we need
|
|
1726
2440
|
# to return a Toil promise for the last/sink job in the workflow's
|
|
@@ -1734,6 +2448,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1734
2448
|
self._scatter = scatter
|
|
1735
2449
|
self._prev_node_results = prev_node_results
|
|
1736
2450
|
|
|
2451
|
+
@report_wdl_errors("run scatter")
|
|
1737
2452
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1738
2453
|
"""
|
|
1739
2454
|
Run the scatter.
|
|
@@ -1749,12 +2464,14 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1749
2464
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1750
2465
|
|
|
1751
2466
|
# Get what to scatter over
|
|
1752
|
-
|
|
2467
|
+
with monkeypatch_coerce(standard_library):
|
|
2468
|
+
scatter_value = evaluate_named_expression(self._scatter, self._scatter.variable, None, self._scatter.expr, bindings, standard_library)
|
|
1753
2469
|
|
|
1754
|
-
|
|
2470
|
+
if not isinstance(scatter_value, WDL.Value.Array):
|
|
2471
|
+
raise RuntimeError("The returned value from a scatter is not an Array type.")
|
|
1755
2472
|
|
|
1756
2473
|
scatter_jobs = []
|
|
1757
|
-
for item in scatter_value.value:
|
|
2474
|
+
for subscript, item in enumerate(scatter_value.value):
|
|
1758
2475
|
# Make an instantiation of our subgraph for each possible value of
|
|
1759
2476
|
# the variable. Make sure the variable is bound only for the
|
|
1760
2477
|
# duration of the body.
|
|
@@ -1763,7 +2480,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1763
2480
|
# TODO: We need to turn values() into a list because MyPy seems to
|
|
1764
2481
|
# think a dict_values isn't a Sequence. This is a waste of time to
|
|
1765
2482
|
# appease MyPy but probably better than a cast?
|
|
1766
|
-
scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings))
|
|
2483
|
+
scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings, subscript=subscript))
|
|
1767
2484
|
|
|
1768
2485
|
if len(scatter_jobs) == 0:
|
|
1769
2486
|
# No scattering is needed. We just need to bind all the names.
|
|
@@ -1783,10 +2500,11 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
1783
2500
|
# of maybe-optional values. Each body execution will define names it
|
|
1784
2501
|
# doesn't make as nulls, so we don't have to worry about
|
|
1785
2502
|
# totally-missing names.
|
|
1786
|
-
gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings)
|
|
2503
|
+
gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings, wdl_options=self._wdl_options)
|
|
1787
2504
|
self.addChild(gather_job)
|
|
1788
2505
|
for j in scatter_jobs:
|
|
1789
2506
|
j.addFollowOn(gather_job)
|
|
2507
|
+
self.defer_postprocessing(gather_job)
|
|
1790
2508
|
return gather_job.rv()
|
|
1791
2509
|
|
|
1792
2510
|
class WDLArrayBindingsJob(WDLBaseJob):
|
|
@@ -1813,6 +2531,7 @@ class WDLArrayBindingsJob(WDLBaseJob):
|
|
|
1813
2531
|
self._input_bindings = input_bindings
|
|
1814
2532
|
self._base_bindings = base_bindings
|
|
1815
2533
|
|
|
2534
|
+
@report_wdl_errors("create array bindings")
|
|
1816
2535
|
def run(self, file_store: AbstractFileStore) -> WDLBindings:
|
|
1817
2536
|
"""
|
|
1818
2537
|
Actually produce the array-ified bindings now that promised values are available.
|
|
@@ -1844,17 +2563,17 @@ class WDLArrayBindingsJob(WDLBaseJob):
|
|
|
1844
2563
|
result = result.bind(name, WDL.Value.Array(supertype, [env.resolve(name) if env.has_binding(name) else WDL.Value.Null() for env in new_bindings]))
|
|
1845
2564
|
|
|
1846
2565
|
# Base bindings are already included so return the result
|
|
1847
|
-
return result
|
|
2566
|
+
return self.postprocess(result)
|
|
1848
2567
|
|
|
1849
2568
|
class WDLConditionalJob(WDLSectionJob):
|
|
1850
2569
|
"""
|
|
1851
2570
|
Job that evaluates a conditional in a WDL workflow.
|
|
1852
2571
|
"""
|
|
1853
|
-
def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
|
|
2572
|
+
def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1854
2573
|
"""
|
|
1855
2574
|
Create a subtree that will run a WDL conditional. The conditional itself and its contents live in the given namespace.
|
|
1856
2575
|
"""
|
|
1857
|
-
super().__init__(namespace, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id)
|
|
2576
|
+
super().__init__(namespace, task_path, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, wdl_options=wdl_options)
|
|
1858
2577
|
|
|
1859
2578
|
# Once again we need to ship the whole body template to be instantiated
|
|
1860
2579
|
# into Toil jobs only if it will actually run.
|
|
@@ -1864,6 +2583,7 @@ class WDLConditionalJob(WDLSectionJob):
|
|
|
1864
2583
|
self._conditional = conditional
|
|
1865
2584
|
self._prev_node_results = prev_node_results
|
|
1866
2585
|
|
|
2586
|
+
@report_wdl_errors("run conditional")
|
|
1867
2587
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1868
2588
|
"""
|
|
1869
2589
|
Run the conditional.
|
|
@@ -1879,27 +2599,29 @@ class WDLConditionalJob(WDLSectionJob):
|
|
|
1879
2599
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1880
2600
|
|
|
1881
2601
|
# Get the expression value. Fake a name.
|
|
1882
|
-
|
|
2602
|
+
with monkeypatch_coerce(standard_library):
|
|
2603
|
+
expr_value = evaluate_named_expression(self._conditional, "<conditional expression>", WDL.Type.Boolean(), self._conditional.expr, bindings, standard_library)
|
|
1883
2604
|
|
|
1884
2605
|
if expr_value.value:
|
|
1885
2606
|
# Evaluated to true!
|
|
1886
2607
|
logger.info('Condition is true')
|
|
1887
2608
|
# Run the body and return its effects
|
|
1888
2609
|
body_job = self.create_subgraph(self._conditional.body, list(self._conditional.gathers.values()), bindings)
|
|
2610
|
+
self.defer_postprocessing(body_job)
|
|
1889
2611
|
return body_job.rv()
|
|
1890
2612
|
else:
|
|
1891
2613
|
logger.info('Condition is false')
|
|
1892
2614
|
# Return the input bindings and null bindings for all our gathers.
|
|
1893
2615
|
# Should not collide at all.
|
|
1894
2616
|
gather_bindings = self.make_gather_bindings(list(self._conditional.gathers.values()), WDL.Value.Null())
|
|
1895
|
-
return combine_bindings([bindings, gather_bindings])
|
|
2617
|
+
return self.postprocess(combine_bindings([bindings, gather_bindings]))
|
|
1896
2618
|
|
|
1897
2619
|
class WDLWorkflowJob(WDLSectionJob):
|
|
1898
2620
|
"""
|
|
1899
2621
|
Job that evaluates an entire WDL workflow.
|
|
1900
2622
|
"""
|
|
1901
2623
|
|
|
1902
|
-
def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, **kwargs: Any) -> None:
|
|
2624
|
+
def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1903
2625
|
"""
|
|
1904
2626
|
Create a subtree that will run a WDL workflow. The job returns the
|
|
1905
2627
|
return value of the workflow.
|
|
@@ -1907,7 +2629,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
1907
2629
|
:param namespace: the namespace that the workflow's *contents* will be
|
|
1908
2630
|
in. Caller has already added the workflow's own name.
|
|
1909
2631
|
"""
|
|
1910
|
-
super().__init__(namespace, **kwargs)
|
|
2632
|
+
super().__init__(namespace, task_path, wdl_options=wdl_options, **kwargs)
|
|
1911
2633
|
|
|
1912
2634
|
# Because we need to return the return value of the workflow, we need
|
|
1913
2635
|
# to return a Toil promise for the last/sink job in the workflow's
|
|
@@ -1924,6 +2646,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
1924
2646
|
self._workflow_id = workflow_id
|
|
1925
2647
|
self._namespace = namespace
|
|
1926
2648
|
|
|
2649
|
+
@report_wdl_errors("run workflow")
|
|
1927
2650
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1928
2651
|
"""
|
|
1929
2652
|
Run the workflow. Return the result of the workflow.
|
|
@@ -1936,25 +2659,28 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
1936
2659
|
# For a task we only see the insode-the-task namespace.
|
|
1937
2660
|
bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1938
2661
|
# Set up the WDL standard library
|
|
1939
|
-
standard_library = ToilWDLStdLibBase(file_store)
|
|
2662
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
1940
2663
|
|
|
1941
2664
|
if self._workflow.inputs:
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
2665
|
+
with monkeypatch_coerce(standard_library):
|
|
2666
|
+
for input_decl in self._workflow.inputs:
|
|
2667
|
+
# Evaluate all the inputs that aren't pre-set
|
|
2668
|
+
bindings = bindings.bind(input_decl.name, evaluate_defaultable_decl(input_decl, bindings, standard_library))
|
|
1945
2669
|
|
|
1946
2670
|
# Make jobs to run all the parts of the workflow
|
|
1947
2671
|
sink = self.create_subgraph(self._workflow.body, [], bindings)
|
|
1948
2672
|
|
|
1949
|
-
if self._workflow.outputs:
|
|
2673
|
+
if self._workflow.outputs != []: # Compare against empty list as None means there should be outputs
|
|
2674
|
+
# Either the output section is declared and nonempty or it is not declared
|
|
1950
2675
|
# Add evaluating the outputs after the sink
|
|
1951
|
-
outputs_job = WDLOutputsJob(self._workflow
|
|
2676
|
+
outputs_job = WDLOutputsJob(self._workflow, sink.rv(), wdl_options=self._wdl_options)
|
|
1952
2677
|
sink.addFollowOn(outputs_job)
|
|
1953
|
-
# Caller
|
|
2678
|
+
# Caller is responsible for making sure namespaces are applied
|
|
2679
|
+
self.defer_postprocessing(outputs_job)
|
|
1954
2680
|
return outputs_job.rv()
|
|
1955
2681
|
else:
|
|
1956
2682
|
# No outputs from this workflow.
|
|
1957
|
-
return WDL.Env.Bindings()
|
|
2683
|
+
return self.postprocess(WDL.Env.Bindings())
|
|
1958
2684
|
|
|
1959
2685
|
class WDLOutputsJob(WDLBaseJob):
|
|
1960
2686
|
"""
|
|
@@ -1962,29 +2688,44 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
1962
2688
|
|
|
1963
2689
|
Returns an environment with just the outputs bound, in no namespace.
|
|
1964
2690
|
"""
|
|
1965
|
-
|
|
1966
|
-
def __init__(self, outputs: List[WDL.Tree.Decl], bindings: Promised[WDLBindings], **kwargs: Any):
|
|
2691
|
+
def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any):
|
|
1967
2692
|
"""
|
|
1968
2693
|
Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
|
|
1969
2694
|
"""
|
|
1970
|
-
super().__init__(**kwargs)
|
|
2695
|
+
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
1971
2696
|
|
|
1972
|
-
self._outputs = outputs
|
|
1973
2697
|
self._bindings = bindings
|
|
2698
|
+
self._workflow = workflow
|
|
1974
2699
|
|
|
2700
|
+
@report_wdl_errors("evaluate outputs")
|
|
1975
2701
|
def run(self, file_store: AbstractFileStore) -> WDLBindings:
|
|
1976
2702
|
"""
|
|
1977
2703
|
Make bindings for the outputs.
|
|
1978
2704
|
"""
|
|
1979
2705
|
super().run(file_store)
|
|
1980
2706
|
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
2707
|
+
if self._workflow.outputs is None:
|
|
2708
|
+
# The output section is not declared
|
|
2709
|
+
# So get all task outputs and return that
|
|
2710
|
+
# First get all task output names
|
|
2711
|
+
output_set = set()
|
|
2712
|
+
for call in self._workflow.body:
|
|
2713
|
+
if isinstance(call, WDL.Tree.Call):
|
|
2714
|
+
for type_binding in call.effective_outputs:
|
|
2715
|
+
output_set.add(type_binding.name)
|
|
2716
|
+
# Collect all bindings that are task outputs
|
|
2717
|
+
output_bindings: WDL.Env.Bindings[WDL.Value.Base] = WDL.Env.Bindings()
|
|
2718
|
+
for binding in unwrap(self._bindings):
|
|
2719
|
+
if binding.name in output_set:
|
|
2720
|
+
# The bindings will already be namespaced with the task namespaces
|
|
2721
|
+
output_bindings = output_bindings.bind(binding.name, binding.value)
|
|
2722
|
+
else:
|
|
2723
|
+
# Output section is declared and is nonempty, so evaluate normally
|
|
2724
|
+
# Evaluate all the outputs in the normal, non-task-outputs library context
|
|
2725
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
2726
|
+
# Combine the bindings from the previous job
|
|
2727
|
+
output_bindings = evaluate_output_decls(self._workflow.outputs, unwrap(self._bindings), standard_library)
|
|
2728
|
+
return self.postprocess(output_bindings)
|
|
1988
2729
|
|
|
1989
2730
|
class WDLRootJob(WDLSectionJob):
|
|
1990
2731
|
"""
|
|
@@ -1993,17 +2734,18 @@ class WDLRootJob(WDLSectionJob):
|
|
|
1993
2734
|
the workflow name; both forms are accepted.
|
|
1994
2735
|
"""
|
|
1995
2736
|
|
|
1996
|
-
def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, **kwargs: Any) -> None:
|
|
2737
|
+
def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1997
2738
|
"""
|
|
1998
2739
|
Create a subtree to run the workflow and namespace the outputs.
|
|
1999
2740
|
"""
|
|
2000
2741
|
|
|
2001
|
-
# The root workflow names the root namespace
|
|
2002
|
-
super().__init__(workflow.name, **kwargs)
|
|
2742
|
+
# The root workflow names the root namespace and task path.
|
|
2743
|
+
super().__init__(workflow.name, workflow.name, wdl_options=wdl_options, **kwargs)
|
|
2003
2744
|
|
|
2004
2745
|
self._workflow = workflow
|
|
2005
2746
|
self._inputs = inputs
|
|
2006
2747
|
|
|
2748
|
+
@report_wdl_errors("run root job")
|
|
2007
2749
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
2008
2750
|
"""
|
|
2009
2751
|
Actually build the subgraph.
|
|
@@ -2012,53 +2754,70 @@ class WDLRootJob(WDLSectionJob):
|
|
|
2012
2754
|
|
|
2013
2755
|
# Run the workflow. We rely in this to handle entering the input
|
|
2014
2756
|
# namespace if needed, or handling free-floating inputs.
|
|
2015
|
-
workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace)
|
|
2757
|
+
workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._task_path, wdl_options=self._wdl_options)
|
|
2758
|
+
workflow_job.then_namespace(self._namespace)
|
|
2016
2759
|
self.addChild(workflow_job)
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2760
|
+
self.defer_postprocessing(workflow_job)
|
|
2761
|
+
return workflow_job.rv()
|
|
2762
|
+
|
|
2763
|
+
@contextmanager
|
|
2764
|
+
def monkeypatch_coerce(standard_library: ToilWDLStdLibBase) -> Generator[None, None, None]:
|
|
2765
|
+
"""
|
|
2766
|
+
Monkeypatch miniwdl's WDL.Value.Base.coerce() function to virtualize files when they are represented as Strings.
|
|
2767
|
+
Calls _virtualize_filename from a given standard library object.
|
|
2768
|
+
:param standard_library: a standard library object
|
|
2769
|
+
:return
|
|
2770
|
+
"""
|
|
2771
|
+
# We're doing this because while miniwdl recognizes when a string needs to be converted into a file, it's method of
|
|
2772
|
+
# conversion is to just store the local filepath. Toil needs to virtualize the file into the jobstore so until
|
|
2773
|
+
# there is an internal entrypoint, monkeypatch it.
|
|
2774
|
+
def base_coerce(self: WDL.Value.Base, desired_type: Optional[WDL.Type.Base] = None) -> WDL.Value.Base:
|
|
2775
|
+
if isinstance(desired_type, WDL.Type.File):
|
|
2776
|
+
self.value = standard_library._virtualize_filename(self.value)
|
|
2777
|
+
return self
|
|
2778
|
+
return old_base_coerce(self, desired_type) # old_coerce will recurse back into this monkey patched coerce
|
|
2779
|
+
def string_coerce(self: WDL.Value.String, desired_type: Optional[WDL.Type.Base] = None) -> WDL.Value.Base:
|
|
2780
|
+
# Sometimes string coerce is called instead, so monkeypatch this one as well
|
|
2781
|
+
if isinstance(desired_type, WDL.Type.File) and not isinstance(self, WDL.Type.File):
|
|
2782
|
+
return WDL.Value.File(standard_library._virtualize_filename(self.value), self.expr)
|
|
2783
|
+
return old_str_coerce(self, desired_type)
|
|
2784
|
+
|
|
2785
|
+
old_base_coerce = WDL.Value.Base.coerce
|
|
2786
|
+
old_str_coerce = WDL.Value.String.coerce
|
|
2787
|
+
try:
|
|
2788
|
+
# Mypy does not like monkeypatching:
|
|
2789
|
+
# https://github.com/python/mypy/issues/2427#issuecomment-1419206807
|
|
2790
|
+
WDL.Value.Base.coerce = base_coerce # type: ignore[method-assign]
|
|
2791
|
+
WDL.Value.String.coerce = string_coerce # type: ignore[method-assign]
|
|
2792
|
+
yield
|
|
2793
|
+
finally:
|
|
2794
|
+
WDL.Value.Base.coerce = old_base_coerce # type: ignore[method-assign]
|
|
2795
|
+
WDL.Value.String.coerce = old_str_coerce # type: ignore[method-assign]
|
|
2796
|
+
|
|
2797
|
+
@report_wdl_errors("run workflow", exit=True)
|
|
2024
2798
|
def main() -> None:
|
|
2025
2799
|
"""
|
|
2026
2800
|
A Toil workflow to interpret WDL input files.
|
|
2027
2801
|
"""
|
|
2802
|
+
args = sys.argv[1:]
|
|
2028
2803
|
|
|
2029
|
-
parser =
|
|
2030
|
-
addOptions(parser, jobstore_as_flag=True)
|
|
2031
|
-
|
|
2032
|
-
parser.add_argument("wdl_uri", type=str,
|
|
2033
|
-
help="WDL document URI")
|
|
2034
|
-
parser.add_argument("inputs_uri", type=str, nargs='?',
|
|
2035
|
-
help="WDL input JSON URI")
|
|
2036
|
-
parser.add_argument("--input", "-i", dest="inputs_uri", type=str,
|
|
2037
|
-
help="WDL input JSON URI")
|
|
2038
|
-
parser.add_argument("--outputDialect", dest="output_dialect", type=str, default='cromwell', choices=['cromwell', 'miniwdl'],
|
|
2039
|
-
help=("JSON output format dialect. 'cromwell' just returns the workflow's output"
|
|
2040
|
-
"values as JSON, while 'miniwdl' nests that under an 'outputs' key, and "
|
|
2041
|
-
"includes a 'dir' key where files are written."))
|
|
2042
|
-
parser.add_argument("--outputDirectory", "-o", dest="output_directory", type=str, default=None,
|
|
2043
|
-
help=("Directory in which to save output files. By default a new directory is created in the current directory."))
|
|
2044
|
-
parser.add_argument("--outputFile", "-m", dest="output_file", type=argparse.FileType('w'), default=sys.stdout,
|
|
2045
|
-
help="File to save output JSON to.")
|
|
2804
|
+
parser = ArgParser(description='Runs WDL files with toil.')
|
|
2805
|
+
addOptions(parser, jobstore_as_flag=True, wdl=True)
|
|
2046
2806
|
|
|
2047
|
-
options = parser.parse_args(
|
|
2807
|
+
options = parser.parse_args(args)
|
|
2048
2808
|
|
|
2049
2809
|
# Make sure we have a jobStore
|
|
2050
2810
|
if options.jobStore is None:
|
|
2051
2811
|
# TODO: Move cwltoil's generate_default_job_store where we can use it
|
|
2052
|
-
options.jobStore = os.path.join(
|
|
2812
|
+
options.jobStore = os.path.join(mkdtemp(), 'tree')
|
|
2053
2813
|
|
|
2054
|
-
# Make sure we have an output directory and we don't need
|
|
2055
|
-
# about a None, and MyPy knows it.
|
|
2814
|
+
# Make sure we have an output directory (or URL prefix) and we don't need
|
|
2815
|
+
# to ever worry about a None, and MyPy knows it.
|
|
2056
2816
|
# If we don't have a directory assigned, make one in the current directory.
|
|
2057
|
-
output_directory: str = options.output_directory if options.output_directory else
|
|
2058
|
-
if not os.path.isdir(output_directory):
|
|
2059
|
-
# Make sure it exists
|
|
2060
|
-
os.mkdir(output_directory)
|
|
2817
|
+
output_directory: str = options.output_directory if options.output_directory else mkdtemp(prefix='wdl-out-', dir=os.getcwd())
|
|
2061
2818
|
|
|
2819
|
+
# Get the execution directory
|
|
2820
|
+
execution_dir = os.getcwd()
|
|
2062
2821
|
|
|
2063
2822
|
with Toil(options) as toil:
|
|
2064
2823
|
if options.restart:
|
|
@@ -2068,8 +2827,10 @@ def main() -> None:
|
|
|
2068
2827
|
document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source)
|
|
2069
2828
|
|
|
2070
2829
|
if document.workflow is None:
|
|
2071
|
-
|
|
2072
|
-
|
|
2830
|
+
# Complain that we need a workflow.
|
|
2831
|
+
# We need the absolute path or URL to raise the error
|
|
2832
|
+
wdl_abspath = options.wdl_uri if not os.path.exists(options.wdl_uri) else os.path.abspath(options.wdl_uri)
|
|
2833
|
+
raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0, 0, 1), "No workflow found in document")
|
|
2073
2834
|
|
|
2074
2835
|
if options.inputs_uri:
|
|
2075
2836
|
# Load the inputs. Use the same loading mechanism, which means we
|
|
@@ -2078,10 +2839,13 @@ def main() -> None:
|
|
|
2078
2839
|
try:
|
|
2079
2840
|
inputs = json.loads(downloaded.source_text)
|
|
2080
2841
|
except json.JSONDecodeError as e:
|
|
2081
|
-
|
|
2082
|
-
|
|
2842
|
+
# Complain about the JSON document.
|
|
2843
|
+
# We need the absolute path or URL to raise the error
|
|
2844
|
+
inputs_abspath = options.inputs_uri if not os.path.exists(options.inputs_uri) else os.path.abspath(options.inputs_uri)
|
|
2845
|
+
raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e
|
|
2083
2846
|
else:
|
|
2084
2847
|
inputs = {}
|
|
2848
|
+
|
|
2085
2849
|
# Parse out the available and required inputs. Each key in the
|
|
2086
2850
|
# JSON ought to start with the workflow's name and then a .
|
|
2087
2851
|
# TODO: WDL's Bindings[] isn't variant in the right way, so we
|
|
@@ -2109,14 +2873,24 @@ def main() -> None:
|
|
|
2109
2873
|
inputs_search_path.append(match.group(0))
|
|
2110
2874
|
|
|
2111
2875
|
# Import any files in the bindings
|
|
2112
|
-
input_bindings = import_files(input_bindings, toil, inputs_search_path)
|
|
2876
|
+
input_bindings = import_files(input_bindings, toil, inputs_search_path, skip_remote=options.reference_inputs)
|
|
2113
2877
|
|
|
2114
2878
|
# TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ?
|
|
2115
2879
|
|
|
2880
|
+
# Get the execution directory
|
|
2881
|
+
execution_dir = os.getcwd()
|
|
2882
|
+
|
|
2883
|
+
# Configure workflow interpreter options
|
|
2884
|
+
wdl_options: Dict[str, str] = {}
|
|
2885
|
+
wdl_options["execution_dir"] = execution_dir
|
|
2886
|
+
wdl_options["container"] = options.container
|
|
2887
|
+
assert wdl_options.get("container") is not None
|
|
2888
|
+
|
|
2116
2889
|
# Run the workflow and get its outputs namespaced with the workflow name.
|
|
2117
|
-
root_job = WDLRootJob(document.workflow, input_bindings)
|
|
2890
|
+
root_job = WDLRootJob(document.workflow, input_bindings, wdl_options=wdl_options)
|
|
2118
2891
|
output_bindings = toil.start(root_job)
|
|
2119
|
-
|
|
2892
|
+
if not isinstance(output_bindings, WDL.Env.Bindings):
|
|
2893
|
+
raise RuntimeError("The output of the WDL job is not a binding.")
|
|
2120
2894
|
|
|
2121
2895
|
# Fetch all the output files
|
|
2122
2896
|
# TODO: deduplicate with _devirtualize_filename
|
|
@@ -2125,32 +2899,7 @@ def main() -> None:
|
|
|
2125
2899
|
'devirtualize' a file using the "toil" object instead of a filestore.
|
|
2126
2900
|
Returns its local path.
|
|
2127
2901
|
"""
|
|
2128
|
-
|
|
2129
|
-
# This is a reference to the Toil filestore.
|
|
2130
|
-
# Deserialize the FileID and required basename
|
|
2131
|
-
file_id, file_basename = unpack_toil_uri(filename)
|
|
2132
|
-
# Figure out where it should go.
|
|
2133
|
-
# TODO: Deal with name collisions
|
|
2134
|
-
dest_name = os.path.join(output_directory, file_basename)
|
|
2135
|
-
# Export the file
|
|
2136
|
-
toil.exportFile(file_id, dest_name)
|
|
2137
|
-
# And return where we put it
|
|
2138
|
-
return dest_name
|
|
2139
|
-
elif filename.startswith('http:') or filename.startswith('https:') or filename.startswith('s3:') or filename.startswith('gs:'):
|
|
2140
|
-
# This is a URL that we think Toil knows how to read.
|
|
2141
|
-
imported = toil.import_file(filename)
|
|
2142
|
-
if imported is None:
|
|
2143
|
-
raise FileNotFoundError(f"Could not import URL {filename}")
|
|
2144
|
-
# Get a basename from the URL.
|
|
2145
|
-
# TODO: Deal with name collisions
|
|
2146
|
-
file_basename = os.path.basename(urlsplit(filename).path)
|
|
2147
|
-
# Do the same as we do for files we actually made.
|
|
2148
|
-
dest_name = os.path.join(output_directory, file_basename)
|
|
2149
|
-
toil.exportFile(imported, dest_name)
|
|
2150
|
-
return dest_name
|
|
2151
|
-
else:
|
|
2152
|
-
# Not a fancy file
|
|
2153
|
-
return filename
|
|
2902
|
+
return ToilWDLStdLibBase.devirtualze_to(filename, output_directory, toil, execution_dir)
|
|
2154
2903
|
|
|
2155
2904
|
# Make all the files local files
|
|
2156
2905
|
output_bindings = map_over_files_in_bindings(output_bindings, devirtualize_output)
|
|
@@ -2159,8 +2908,24 @@ def main() -> None:
|
|
|
2159
2908
|
outputs = WDL.values_to_json(output_bindings)
|
|
2160
2909
|
if options.output_dialect == 'miniwdl':
|
|
2161
2910
|
outputs = {'dir': output_directory, 'outputs': outputs}
|
|
2162
|
-
options.output_file
|
|
2163
|
-
|
|
2911
|
+
if options.output_file is None:
|
|
2912
|
+
# Send outputs to standard out
|
|
2913
|
+
print(json.dumps(outputs))
|
|
2914
|
+
else:
|
|
2915
|
+
# Export output to path or URL.
|
|
2916
|
+
# So we need to import and then export.
|
|
2917
|
+
fd, filename = mkstemp()
|
|
2918
|
+
with open(fd, 'w') as handle:
|
|
2919
|
+
# Populate the file
|
|
2920
|
+
handle.write(json.dumps(outputs))
|
|
2921
|
+
handle.write('\n')
|
|
2922
|
+
# Import it. Don't link because the temp file will go away.
|
|
2923
|
+
file_id = toil.import_file(filename, symlink=False)
|
|
2924
|
+
# Delete the temp file
|
|
2925
|
+
os.remove(filename)
|
|
2926
|
+
# Export it into place
|
|
2927
|
+
toil.export_file(file_id, options.output_file)
|
|
2928
|
+
|
|
2164
2929
|
|
|
2165
2930
|
|
|
2166
2931
|
if __name__ == "__main__":
|