toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +41 -17
- toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +9 -9
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +129 -16
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +45 -3
- toil/common.py +56 -31
- toil/cwl/cwltoil.py +442 -371
- toil/deferred.py +1 -1
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +69 -20
- toil/fileStores/cachingFileStore.py +6 -22
- toil/fileStores/nonCachingFileStore.py +6 -15
- toil/job.py +270 -86
- toil/jobStores/abstractJobStore.py +37 -31
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +60 -31
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +3 -3
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +89 -38
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +24 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +42 -4
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +57 -16
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +29 -14
- toil/lib/throttle.py +1 -1
- toil/options/common.py +31 -30
- toil/options/wdl.py +5 -0
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +12 -2
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +93 -23
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +22 -7
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +245 -236
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +11 -14
- toil/test/jobStores/jobStoreTest.py +40 -54
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +99 -16
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +62 -4
- toil/test/utils/utilsTest.py +23 -21
- toil/test/wdl/wdltoil_test.py +49 -21
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +310 -266
- toil/utils/toilStatus.py +98 -52
- toil/version.py +11 -11
- toil/wdl/wdltoil.py +644 -225
- toil/worker.py +125 -83
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- toil-7.0.0.dist-info/METADATA +158 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -29,32 +29,29 @@ import logging
|
|
|
29
29
|
import os
|
|
30
30
|
import pprint
|
|
31
31
|
import shutil
|
|
32
|
-
import socket
|
|
33
32
|
import stat
|
|
34
33
|
import sys
|
|
35
34
|
import textwrap
|
|
36
35
|
import uuid
|
|
37
|
-
from tempfile import NamedTemporaryFile, gettempdir
|
|
36
|
+
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
38
37
|
from threading import Thread
|
|
39
|
-
from typing import (
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
Sequence,
|
|
57
|
-
)
|
|
38
|
+
from typing import (IO,
|
|
39
|
+
Any,
|
|
40
|
+
Callable,
|
|
41
|
+
Dict,
|
|
42
|
+
Iterator,
|
|
43
|
+
List,
|
|
44
|
+
Mapping,
|
|
45
|
+
MutableMapping,
|
|
46
|
+
MutableSequence,
|
|
47
|
+
Optional,
|
|
48
|
+
Sequence,
|
|
49
|
+
TextIO,
|
|
50
|
+
Tuple,
|
|
51
|
+
Type,
|
|
52
|
+
TypeVar,
|
|
53
|
+
Union,
|
|
54
|
+
cast)
|
|
58
55
|
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
59
56
|
|
|
60
57
|
import cwl_utils.errors
|
|
@@ -68,36 +65,30 @@ import cwltool.load_tool
|
|
|
68
65
|
import cwltool.main
|
|
69
66
|
import cwltool.resolver
|
|
70
67
|
import schema_salad.ref_resolver
|
|
71
|
-
from configargparse import ArgParser,
|
|
68
|
+
from configargparse import ArgParser, Namespace
|
|
72
69
|
from cwltool.loghandler import _logger as cwllogger
|
|
73
70
|
from cwltool.loghandler import defaultStreamHandler
|
|
74
71
|
from cwltool.mpi import MpiConfig
|
|
75
72
|
from cwltool.mutation import MutationManager
|
|
76
73
|
from cwltool.pathmapper import MapperEnt, PathMapper
|
|
77
|
-
from cwltool.process import (
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
shortname,
|
|
83
|
-
)
|
|
74
|
+
from cwltool.process import (Process,
|
|
75
|
+
add_sizes,
|
|
76
|
+
compute_checksums,
|
|
77
|
+
fill_in_defaults,
|
|
78
|
+
shortname)
|
|
84
79
|
from cwltool.secrets import SecretStore
|
|
85
|
-
from cwltool.software_requirements import (
|
|
86
|
-
|
|
87
|
-
get_container_from_software_requirements,
|
|
88
|
-
)
|
|
80
|
+
from cwltool.software_requirements import (DependenciesConfiguration,
|
|
81
|
+
get_container_from_software_requirements)
|
|
89
82
|
from cwltool.stdfsaccess import StdFsAccess, abspath
|
|
90
|
-
from cwltool.utils import (
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
visit_class,
|
|
100
|
-
)
|
|
83
|
+
from cwltool.utils import (CWLObjectType,
|
|
84
|
+
CWLOutputType,
|
|
85
|
+
DirectoryType,
|
|
86
|
+
adjustDirObjs,
|
|
87
|
+
aslist,
|
|
88
|
+
downloadHttpFile,
|
|
89
|
+
get_listing,
|
|
90
|
+
normalizeFilesDirs,
|
|
91
|
+
visit_class)
|
|
101
92
|
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
|
102
93
|
from schema_salad.avro.schema import Names
|
|
103
94
|
from schema_salad.exceptions import ValidationException
|
|
@@ -105,23 +96,24 @@ from schema_salad.ref_resolver import file_uri, uri_file_path
|
|
|
105
96
|
from schema_salad.sourceline import SourceLine
|
|
106
97
|
from typing_extensions import Literal
|
|
107
98
|
|
|
99
|
+
from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
108
100
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
109
101
|
from toil.common import Toil, addOptions
|
|
110
102
|
from toil.cwl import check_cwltool_version
|
|
103
|
+
from toil.provisioners.clusterScaler import JobTooBigError
|
|
111
104
|
|
|
112
105
|
check_cwltool_version()
|
|
113
|
-
from toil.cwl.utils import (
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
visit_cwl_class_and_reduce,
|
|
119
|
-
)
|
|
106
|
+
from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
|
|
107
|
+
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
|
|
108
|
+
download_structure,
|
|
109
|
+
get_from_structure,
|
|
110
|
+
visit_cwl_class_and_reduce)
|
|
120
111
|
from toil.exceptions import FailedJobsException
|
|
121
112
|
from toil.fileStores import FileID
|
|
122
113
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
123
114
|
from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
|
|
124
|
-
from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
|
|
115
|
+
from toil.jobStores.abstractJobStore import (AbstractJobStore, NoSuchFileException, LocatorException,
|
|
116
|
+
InvalidImportExportUrlException, UnimplementedURLException)
|
|
125
117
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
126
118
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
127
119
|
from toil.lib.io import mkdtemp
|
|
@@ -1019,6 +1011,24 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
|
|
|
1019
1011
|
class ToilTool:
|
|
1020
1012
|
"""Mixin to hook Toil into a cwltool tool type."""
|
|
1021
1013
|
|
|
1014
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
1015
|
+
"""
|
|
1016
|
+
Init hook to set up member variables.
|
|
1017
|
+
"""
|
|
1018
|
+
super().__init__(*args, **kwargs)
|
|
1019
|
+
# Reserve a spot for the Toil job that ends up executing this tool.
|
|
1020
|
+
self._toil_job: Optional[Job] = None
|
|
1021
|
+
# Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
|
|
1022
|
+
self._path_mappers: List[cwltool.pathmapper.PathMapper] = []
|
|
1023
|
+
|
|
1024
|
+
def connect_toil_job(self, job: Job) -> None:
|
|
1025
|
+
"""
|
|
1026
|
+
Attach the Toil tool to the Toil job that is executing it. This allows
|
|
1027
|
+
it to use the Toil job to stop at certain points if debugging flags are
|
|
1028
|
+
set.
|
|
1029
|
+
"""
|
|
1030
|
+
self._toil_job = job
|
|
1031
|
+
|
|
1022
1032
|
def make_path_mapper(
|
|
1023
1033
|
self,
|
|
1024
1034
|
reffiles: List[Any],
|
|
@@ -1029,12 +1039,12 @@ class ToilTool:
|
|
|
1029
1039
|
"""Create the appropriate PathMapper for the situation."""
|
|
1030
1040
|
if getattr(runtimeContext, "bypass_file_store", False):
|
|
1031
1041
|
# We only need to understand cwltool's supported URIs
|
|
1032
|
-
|
|
1042
|
+
mapper = PathMapper(
|
|
1033
1043
|
reffiles, runtimeContext.basedir, stagedir, separateDirs=separateDirs
|
|
1034
1044
|
)
|
|
1035
1045
|
else:
|
|
1036
1046
|
# We need to be able to read from Toil-provided URIs
|
|
1037
|
-
|
|
1047
|
+
mapper = ToilPathMapper(
|
|
1038
1048
|
reffiles,
|
|
1039
1049
|
runtimeContext.basedir,
|
|
1040
1050
|
stagedir,
|
|
@@ -1043,6 +1053,10 @@ class ToilTool:
|
|
|
1043
1053
|
streaming_allowed=runtimeContext.streaming_allowed,
|
|
1044
1054
|
)
|
|
1045
1055
|
|
|
1056
|
+
# Remember the path mappers
|
|
1057
|
+
self._path_mappers.append(mapper)
|
|
1058
|
+
return mapper
|
|
1059
|
+
|
|
1046
1060
|
def __str__(self) -> str:
|
|
1047
1061
|
"""Return string representation of this tool type."""
|
|
1048
1062
|
return f'{self.__class__.__name__}({repr(getattr(self, "tool", {}).get("id", "???"))})'
|
|
@@ -1059,17 +1073,34 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1059
1073
|
name conflicts at the top level of the work directory.
|
|
1060
1074
|
"""
|
|
1061
1075
|
|
|
1076
|
+
# Set up the initial work dir with all its files
|
|
1062
1077
|
super()._initialworkdir(j, builder)
|
|
1063
1078
|
|
|
1064
1079
|
# The initial work dir listing is now in j.generatefiles["listing"]
|
|
1065
|
-
# Also j.
|
|
1080
|
+
# Also j.generatefiles is a CWL Directory.
|
|
1066
1081
|
# So check the initial working directory.
|
|
1067
|
-
logger.
|
|
1082
|
+
logger.debug("Initial work dir: %s", j.generatefiles)
|
|
1068
1083
|
ensure_no_collisions(
|
|
1069
1084
|
j.generatefiles,
|
|
1070
1085
|
"the job's working directory as specified by the InitialWorkDirRequirement",
|
|
1071
1086
|
)
|
|
1072
1087
|
|
|
1088
|
+
if self._toil_job is not None:
|
|
1089
|
+
# Make a table of all the places we mapped files to when downloading the inputs.
|
|
1090
|
+
|
|
1091
|
+
# We want to hint which host paths and container (if any) paths correspond
|
|
1092
|
+
host_and_job_paths: List[Tuple[str, str]] = []
|
|
1093
|
+
|
|
1094
|
+
for pm in self._path_mappers:
|
|
1095
|
+
for _, mapper_entry in pm.items_exclude_children():
|
|
1096
|
+
# We know that mapper_entry.target as seen by the task is
|
|
1097
|
+
# mapper_entry.resolved on the host.
|
|
1098
|
+
host_and_job_paths.append((mapper_entry.resolved, mapper_entry.target))
|
|
1099
|
+
|
|
1100
|
+
# Notice that we have downloaded our inputs. Explain which files
|
|
1101
|
+
# those are here and what the task will expect to call them.
|
|
1102
|
+
self._toil_job.files_downloaded_hook(host_and_job_paths)
|
|
1103
|
+
|
|
1073
1104
|
|
|
1074
1105
|
class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
|
|
1075
1106
|
"""Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
|
|
@@ -1092,6 +1123,10 @@ def toil_make_tool(
|
|
|
1092
1123
|
return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
|
|
1093
1124
|
|
|
1094
1125
|
|
|
1126
|
+
# When a file we want to have is missing, we can give it this sentinal location
|
|
1127
|
+
# URI instead of raising an error right away, in case it is optional.
|
|
1128
|
+
MISSING_FILE = "missing://"
|
|
1129
|
+
|
|
1095
1130
|
DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
|
|
1096
1131
|
|
|
1097
1132
|
|
|
@@ -1716,7 +1751,7 @@ def import_files(
|
|
|
1716
1751
|
fileindex: Dict[str, str],
|
|
1717
1752
|
existing: Dict[str, str],
|
|
1718
1753
|
cwl_object: Optional[CWLObjectType],
|
|
1719
|
-
|
|
1754
|
+
mark_broken: bool = False,
|
|
1720
1755
|
skip_remote: bool = False,
|
|
1721
1756
|
bypass_file_store: bool = False,
|
|
1722
1757
|
log_level: int = logging.DEBUG
|
|
@@ -1735,10 +1770,10 @@ def import_files(
|
|
|
1735
1770
|
Preserves any listing fields.
|
|
1736
1771
|
|
|
1737
1772
|
If a file cannot be found (like if it is an optional secondary file that
|
|
1738
|
-
doesn't exist), fails, unless
|
|
1739
|
-
|
|
1773
|
+
doesn't exist), fails, unless mark_broken is set, in which case it applies
|
|
1774
|
+
a sentinel location.
|
|
1740
1775
|
|
|
1741
|
-
Also does some
|
|
1776
|
+
Also does some miscellaneous normalization.
|
|
1742
1777
|
|
|
1743
1778
|
:param import_function: The function used to upload a URI and get a
|
|
1744
1779
|
Toil FileID for it.
|
|
@@ -1754,8 +1789,9 @@ def import_files(
|
|
|
1754
1789
|
|
|
1755
1790
|
:param cwl_object: CWL tool (or workflow order) we are importing files for
|
|
1756
1791
|
|
|
1757
|
-
:param
|
|
1758
|
-
don't exist,
|
|
1792
|
+
:param mark_broken: If True, when files can't be imported because they e.g.
|
|
1793
|
+
don't exist, set their locations to MISSING_FILE rather than failing
|
|
1794
|
+
with an error.
|
|
1759
1795
|
|
|
1760
1796
|
:param skp_remote: If True, leave remote URIs in place instead of importing
|
|
1761
1797
|
files.
|
|
@@ -1875,7 +1911,7 @@ def import_files(
|
|
|
1875
1911
|
|
|
1876
1912
|
# Upload the file itself, which will adjust its location.
|
|
1877
1913
|
upload_file(
|
|
1878
|
-
import_and_log, fileindex, existing, rec,
|
|
1914
|
+
import_and_log, fileindex, existing, rec, mark_broken=mark_broken, skip_remote=skip_remote
|
|
1879
1915
|
)
|
|
1880
1916
|
|
|
1881
1917
|
# Make a record for this file under its name
|
|
@@ -1904,7 +1940,7 @@ def import_files(
|
|
|
1904
1940
|
contents.update(child_result)
|
|
1905
1941
|
|
|
1906
1942
|
# Upload the directory itself, which will adjust its location.
|
|
1907
|
-
upload_directory(rec, contents,
|
|
1943
|
+
upload_directory(rec, contents, mark_broken=mark_broken)
|
|
1908
1944
|
|
|
1909
1945
|
# Show those contents as being under our name in our parent.
|
|
1910
1946
|
return {cast(str, rec["basename"]): contents}
|
|
@@ -1924,7 +1960,7 @@ def import_files(
|
|
|
1924
1960
|
def upload_directory(
|
|
1925
1961
|
directory_metadata: CWLObjectType,
|
|
1926
1962
|
directory_contents: DirectoryContents,
|
|
1927
|
-
|
|
1963
|
+
mark_broken: bool = False,
|
|
1928
1964
|
) -> None:
|
|
1929
1965
|
"""
|
|
1930
1966
|
Upload a Directory object.
|
|
@@ -1936,6 +1972,9 @@ def upload_directory(
|
|
|
1936
1972
|
Makes sure the directory actually exists, and rewrites its location to be
|
|
1937
1973
|
something we can use on another machine.
|
|
1938
1974
|
|
|
1975
|
+
If mark_broken is set, ignores missing directories and replaces them with
|
|
1976
|
+
directories containing the given (possibly empty) contents.
|
|
1977
|
+
|
|
1939
1978
|
We can't rely on the directory's listing as visible to the next tool as a
|
|
1940
1979
|
complete recursive description of the files we will need to present to the
|
|
1941
1980
|
tool, since some tools require it to be cleared or single-level but still
|
|
@@ -1956,8 +1995,8 @@ def upload_directory(
|
|
|
1956
1995
|
if location.startswith("file://") and not os.path.isdir(
|
|
1957
1996
|
schema_salad.ref_resolver.uri_file_path(location)
|
|
1958
1997
|
):
|
|
1959
|
-
if
|
|
1960
|
-
|
|
1998
|
+
if mark_broken:
|
|
1999
|
+
logger.debug("Directory %s is missing as a whole", directory_metadata)
|
|
1961
2000
|
else:
|
|
1962
2001
|
raise cwl_utils.errors.WorkflowException(
|
|
1963
2002
|
"Directory is missing: %s" % directory_metadata["location"]
|
|
@@ -1979,7 +2018,7 @@ def upload_file(
|
|
|
1979
2018
|
fileindex: Dict[str, str],
|
|
1980
2019
|
existing: Dict[str, str],
|
|
1981
2020
|
file_metadata: CWLObjectType,
|
|
1982
|
-
|
|
2021
|
+
mark_broken: bool = False,
|
|
1983
2022
|
skip_remote: bool = False
|
|
1984
2023
|
) -> None:
|
|
1985
2024
|
"""
|
|
@@ -1987,7 +2026,10 @@ def upload_file(
|
|
|
1987
2026
|
|
|
1988
2027
|
Uploads local files to the Toil file store, and sets their location to a
|
|
1989
2028
|
reference to the toil file store.
|
|
1990
|
-
|
|
2029
|
+
|
|
2030
|
+
If a file doesn't exist, fails with an error, unless mark_broken is set, in
|
|
2031
|
+
which case the missing file is given a special sentinel location.
|
|
2032
|
+
|
|
1991
2033
|
Unless skip_remote is set, downloads remote files into the file store and
|
|
1992
2034
|
sets their locations to references into the file store as well.
|
|
1993
2035
|
"""
|
|
@@ -2008,10 +2050,11 @@ def upload_file(
|
|
|
2008
2050
|
if location.startswith("file://") and not os.path.isfile(
|
|
2009
2051
|
schema_salad.ref_resolver.uri_file_path(location)
|
|
2010
2052
|
):
|
|
2011
|
-
if
|
|
2012
|
-
|
|
2053
|
+
if mark_broken:
|
|
2054
|
+
logger.debug("File %s is missing", file_metadata)
|
|
2055
|
+
file_metadata["location"] = location = MISSING_FILE
|
|
2013
2056
|
else:
|
|
2014
|
-
raise cwl_utils.errors.WorkflowException("File is missing: %s" %
|
|
2057
|
+
raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
|
|
2015
2058
|
|
|
2016
2059
|
if location.startswith("file://") or not skip_remote:
|
|
2017
2060
|
# This is a local file, or we also need to download and re-upload remote files
|
|
@@ -2134,7 +2177,7 @@ def toilStageFiles(
|
|
|
2134
2177
|
:param destBucket: If set, export to this base URL instead of to the local
|
|
2135
2178
|
filesystem.
|
|
2136
2179
|
|
|
2137
|
-
:param log_level: Log each file
|
|
2180
|
+
:param log_level: Log each file transferred at the given level.
|
|
2138
2181
|
"""
|
|
2139
2182
|
|
|
2140
2183
|
def _collectDirEntries(
|
|
@@ -2614,6 +2657,13 @@ class CWLJob(CWLNamedJob):
|
|
|
2614
2657
|
streaming_allowed=runtime_context.streaming_allowed,
|
|
2615
2658
|
)
|
|
2616
2659
|
|
|
2660
|
+
# Collect standard output and standard error somewhere if they don't go to files.
|
|
2661
|
+
# We need to keep two FDs to these because cwltool will close what we give it.
|
|
2662
|
+
default_stdout = TemporaryFile()
|
|
2663
|
+
runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
|
|
2664
|
+
default_stderr = TemporaryFile()
|
|
2665
|
+
runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
|
|
2666
|
+
|
|
2617
2667
|
process_uuid = uuid.uuid4() # noqa F841
|
|
2618
2668
|
started_at = datetime.datetime.now() # noqa F841
|
|
2619
2669
|
|
|
@@ -2622,13 +2672,39 @@ class CWLJob(CWLNamedJob):
|
|
|
2622
2672
|
logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
|
|
2623
2673
|
|
|
2624
2674
|
runtime_context.name = self.description.unitName
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2675
|
+
|
|
2676
|
+
if isinstance(self.cwltool, ToilTool):
|
|
2677
|
+
# Connect the CWL tool to us so it can call into the Toil job when
|
|
2678
|
+
# it reaches points where we might need to debug it.
|
|
2679
|
+
self.cwltool.connect_toil_job(self)
|
|
2680
|
+
|
|
2681
|
+
status = "did_not_run"
|
|
2682
|
+
try:
|
|
2683
|
+
output, status = ToilSingleJobExecutor().execute(
|
|
2684
|
+
process=self.cwltool,
|
|
2685
|
+
job_order_object=cwljob,
|
|
2686
|
+
runtime_context=runtime_context,
|
|
2687
|
+
logger=cwllogger,
|
|
2688
|
+
)
|
|
2689
|
+
finally:
|
|
2690
|
+
ended_at = datetime.datetime.now() # noqa F841
|
|
2691
|
+
|
|
2692
|
+
# Log any output/error data
|
|
2693
|
+
default_stdout.seek(0, os.SEEK_END)
|
|
2694
|
+
if default_stdout.tell() > 0:
|
|
2695
|
+
default_stdout.seek(0)
|
|
2696
|
+
file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
|
|
2697
|
+
if status != "success":
|
|
2698
|
+
default_stdout.seek(0)
|
|
2699
|
+
logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
|
|
2700
|
+
default_stderr.seek(0, os.SEEK_END)
|
|
2701
|
+
if default_stderr.tell():
|
|
2702
|
+
default_stderr.seek(0)
|
|
2703
|
+
file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
|
|
2704
|
+
if status != "success":
|
|
2705
|
+
default_stderr.seek(0)
|
|
2706
|
+
logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
|
|
2707
|
+
|
|
2632
2708
|
if status != "success":
|
|
2633
2709
|
raise cwl_utils.errors.WorkflowException(status)
|
|
2634
2710
|
|
|
@@ -3263,9 +3339,8 @@ def filtered_secondary_files(
|
|
|
3263
3339
|
but add the resolved fields to the list of unresolved fields so we remove
|
|
3264
3340
|
them here after the fact.
|
|
3265
3341
|
|
|
3266
|
-
We keep secondary files
|
|
3267
|
-
|
|
3268
|
-
exist. The 'required' logic seems to be handled deeper in
|
|
3342
|
+
We keep secondary files with anything other than MISSING_FILE as their
|
|
3343
|
+
location. The 'required' logic seems to be handled deeper in
|
|
3269
3344
|
cwltool.builder.Builder(), and correctly determines which files should be
|
|
3270
3345
|
imported. Therefore we remove the files here and if this file is SUPPOSED
|
|
3271
3346
|
to exist, it will still give the appropriate file does not exist error, but
|
|
@@ -3280,24 +3355,22 @@ def filtered_secondary_files(
|
|
|
3280
3355
|
if ("$(" not in sf_bn) and ("${" not in sf_bn):
|
|
3281
3356
|
if ("$(" not in sf_loc) and ("${" not in sf_loc):
|
|
3282
3357
|
intermediate_secondary_files.append(sf)
|
|
3358
|
+
else:
|
|
3359
|
+
logger.debug("Secondary file %s is dropped because it has an uninterpolated location", sf)
|
|
3360
|
+
else:
|
|
3361
|
+
logger.debug("Secondary file %s is dropped because it has an uninterpolated basename", sf)
|
|
3283
3362
|
# remove secondary files that are not present in the filestore or pointing
|
|
3284
3363
|
# to existant things on disk
|
|
3285
3364
|
for sf in intermediate_secondary_files:
|
|
3286
3365
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3287
3366
|
if (
|
|
3288
|
-
sf_loc
|
|
3289
|
-
or sf_loc.startswith("toildir:")
|
|
3290
|
-
or sf_loc.startswith("_:")
|
|
3367
|
+
sf_loc != MISSING_FILE
|
|
3291
3368
|
or sf.get("class", "") == "Directory"
|
|
3292
3369
|
):
|
|
3293
3370
|
# Pass imported files, and all Directories
|
|
3294
3371
|
final_secondary_files.append(sf)
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
):
|
|
3298
|
-
# Pass things that exist on disk (which we presumably declined to
|
|
3299
|
-
# import because we aren't using the file store)
|
|
3300
|
-
final_secondary_files.append(sf)
|
|
3372
|
+
else:
|
|
3373
|
+
logger.debug("Secondary file %s is dropped because it is known to be missing", sf)
|
|
3301
3374
|
return final_secondary_files
|
|
3302
3375
|
|
|
3303
3376
|
|
|
@@ -3352,12 +3425,12 @@ def determine_load_listing(
|
|
|
3352
3425
|
|
|
3353
3426
|
1. no_listing: DIRECTORY_NAME.listing will be undefined.
|
|
3354
3427
|
e.g.
|
|
3355
|
-
|
|
3428
|
+
|
|
3356
3429
|
inputs.DIRECTORY_NAME.listing == unspecified
|
|
3357
3430
|
|
|
3358
3431
|
2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
|
|
3359
3432
|
deep of DIRECTORY_NAME's contents.
|
|
3360
|
-
e.g.
|
|
3433
|
+
e.g.
|
|
3361
3434
|
|
|
3362
3435
|
inputs.DIRECTORY_NAME.listing == [items in directory]
|
|
3363
3436
|
inputs.DIRECTORY_NAME.listing[0].listing == undefined
|
|
@@ -3546,6 +3619,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3546
3619
|
#
|
|
3547
3620
|
# If set, workDir needs to exist, so we directly use the prefix
|
|
3548
3621
|
options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3622
|
+
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
|
|
3623
|
+
# override coordination_dir as default Toil will pick somewhere else
|
|
3624
|
+
# ignoring --tmpdir_prefix
|
|
3625
|
+
options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3549
3626
|
|
|
3550
3627
|
if options.batchSystem == "kubernetes":
|
|
3551
3628
|
# Containers under Kubernetes can only run in Singularity
|
|
@@ -3576,7 +3653,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3576
3653
|
dependencies_configuration = DependenciesConfiguration(options)
|
|
3577
3654
|
job_script_provider = dependencies_configuration
|
|
3578
3655
|
|
|
3579
|
-
options.default_container = None
|
|
3580
3656
|
runtime_context = cwltool.context.RuntimeContext(vars(options))
|
|
3581
3657
|
runtime_context.toplevel = True # enable discovery of secondaryFiles
|
|
3582
3658
|
runtime_context.find_default_container = functools.partial(
|
|
@@ -3617,314 +3693,309 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3617
3693
|
)
|
|
3618
3694
|
runtime_context.research_obj = research_obj
|
|
3619
3695
|
|
|
3620
|
-
|
|
3621
|
-
|
|
3622
|
-
|
|
3696
|
+
try:
|
|
3697
|
+
with Toil(options) as toil:
|
|
3698
|
+
if options.restart:
|
|
3623
3699
|
outobj = toil.restart()
|
|
3624
|
-
|
|
3625
|
-
|
|
3626
|
-
|
|
3627
|
-
|
|
3628
|
-
|
|
3629
|
-
"
|
|
3700
|
+
else:
|
|
3701
|
+
loading_context.hints = [
|
|
3702
|
+
{
|
|
3703
|
+
"class": "ResourceRequirement",
|
|
3704
|
+
"coresMin": toil.config.defaultCores,
|
|
3705
|
+
"ramMin": toil.config.defaultMemory / (2**20),
|
|
3706
|
+
"outdirMin": toil.config.defaultDisk / (2**20),
|
|
3707
|
+
"tmpdirMin": 0,
|
|
3708
|
+
}
|
|
3709
|
+
]
|
|
3710
|
+
loading_context.construct_tool_object = toil_make_tool
|
|
3711
|
+
loading_context.strict = not options.not_strict
|
|
3712
|
+
options.workflow = options.cwltool
|
|
3713
|
+
options.job_order = options.cwljob
|
|
3714
|
+
|
|
3715
|
+
try:
|
|
3716
|
+
uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
|
|
3717
|
+
options.cwltool,
|
|
3718
|
+
loading_context.resolver,
|
|
3719
|
+
loading_context.fetcher_constructor,
|
|
3720
|
+
)
|
|
3721
|
+
except ValidationException:
|
|
3722
|
+
print(
|
|
3723
|
+
"\nYou may be getting this error because your arguments are incorrect or out of order."
|
|
3724
|
+
+ usage_message,
|
|
3725
|
+
file=sys.stderr,
|
|
3630
3726
|
)
|
|
3631
|
-
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3632
|
-
else:
|
|
3633
3727
|
raise
|
|
3634
|
-
else:
|
|
3635
|
-
loading_context.hints = [
|
|
3636
|
-
{
|
|
3637
|
-
"class": "ResourceRequirement",
|
|
3638
|
-
"coresMin": toil.config.defaultCores,
|
|
3639
|
-
"ramMin": toil.config.defaultMemory / (2**20),
|
|
3640
|
-
"outdirMin": toil.config.defaultDisk / (2**20),
|
|
3641
|
-
"tmpdirMin": 0,
|
|
3642
|
-
}
|
|
3643
|
-
]
|
|
3644
|
-
loading_context.construct_tool_object = toil_make_tool
|
|
3645
|
-
loading_context.strict = not options.not_strict
|
|
3646
|
-
options.workflow = options.cwltool
|
|
3647
|
-
options.job_order = options.cwljob
|
|
3648
3728
|
|
|
3649
|
-
|
|
3650
|
-
|
|
3651
|
-
|
|
3652
|
-
|
|
3729
|
+
options.tool_help = None
|
|
3730
|
+
options.debug = options.logLevel == "DEBUG"
|
|
3731
|
+
job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
|
|
3732
|
+
options,
|
|
3733
|
+
sys.stdin,
|
|
3653
3734
|
loading_context.fetcher_constructor,
|
|
3735
|
+
loading_context.overrides_list,
|
|
3736
|
+
tool_file_uri,
|
|
3654
3737
|
)
|
|
3655
|
-
|
|
3656
|
-
|
|
3657
|
-
|
|
3658
|
-
|
|
3659
|
-
|
|
3660
|
-
|
|
3661
|
-
|
|
3662
|
-
|
|
3663
|
-
options.tool_help = None
|
|
3664
|
-
options.debug = options.logLevel == "DEBUG"
|
|
3665
|
-
job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
|
|
3666
|
-
options,
|
|
3667
|
-
sys.stdin,
|
|
3668
|
-
loading_context.fetcher_constructor,
|
|
3669
|
-
loading_context.overrides_list,
|
|
3670
|
-
tool_file_uri,
|
|
3671
|
-
)
|
|
3672
|
-
if options.overrides:
|
|
3673
|
-
loading_context.overrides_list.extend(
|
|
3674
|
-
cwltool.load_tool.load_overrides(
|
|
3675
|
-
schema_salad.ref_resolver.file_uri(
|
|
3676
|
-
os.path.abspath(options.overrides)
|
|
3677
|
-
),
|
|
3678
|
-
tool_file_uri,
|
|
3738
|
+
if options.overrides:
|
|
3739
|
+
loading_context.overrides_list.extend(
|
|
3740
|
+
cwltool.load_tool.load_overrides(
|
|
3741
|
+
schema_salad.ref_resolver.file_uri(
|
|
3742
|
+
os.path.abspath(options.overrides)
|
|
3743
|
+
),
|
|
3744
|
+
tool_file_uri,
|
|
3745
|
+
)
|
|
3679
3746
|
)
|
|
3747
|
+
|
|
3748
|
+
loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
|
|
3749
|
+
uri, loading_context
|
|
3750
|
+
)
|
|
3751
|
+
loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
|
|
3752
|
+
loading_context, workflowobj, uri
|
|
3680
3753
|
)
|
|
3754
|
+
if not loading_context.loader:
|
|
3755
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
3756
|
+
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
3757
|
+
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3681
3758
|
|
|
3682
|
-
|
|
3683
|
-
uri, loading_context
|
|
3684
|
-
)
|
|
3685
|
-
loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
|
|
3686
|
-
loading_context, workflowobj, uri
|
|
3687
|
-
)
|
|
3688
|
-
if not loading_context.loader:
|
|
3689
|
-
raise RuntimeError("cwltool loader is not set.")
|
|
3690
|
-
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
3691
|
-
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3759
|
+
document_loader = loading_context.loader
|
|
3692
3760
|
|
|
3693
|
-
|
|
3761
|
+
if options.provenance and runtime_context.research_obj:
|
|
3762
|
+
cwltool.cwlprov.writablebagfile.packed_workflow(
|
|
3763
|
+
runtime_context.research_obj,
|
|
3764
|
+
cwltool.main.print_pack(loading_context, uri),
|
|
3765
|
+
)
|
|
3694
3766
|
|
|
3695
|
-
|
|
3696
|
-
|
|
3697
|
-
|
|
3698
|
-
|
|
3699
|
-
|
|
3767
|
+
try:
|
|
3768
|
+
tool = cwltool.load_tool.make_tool(uri, loading_context)
|
|
3769
|
+
scan_for_unsupported_requirements(
|
|
3770
|
+
tool, bypass_file_store=options.bypass_file_store
|
|
3771
|
+
)
|
|
3772
|
+
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3773
|
+
logging.error(err)
|
|
3774
|
+
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3775
|
+
runtime_context.secret_store = SecretStore()
|
|
3776
|
+
|
|
3777
|
+
try:
|
|
3778
|
+
# Get the "order" for the execution of the root job. CWLTool
|
|
3779
|
+
# doesn't document this much, but this is an "order" in the
|
|
3780
|
+
# sense of a "specification" for running a single job. It
|
|
3781
|
+
# describes the inputs to the workflow.
|
|
3782
|
+
initialized_job_order = cwltool.main.init_job_order(
|
|
3783
|
+
job_order_object,
|
|
3784
|
+
options,
|
|
3785
|
+
tool,
|
|
3786
|
+
jobloader,
|
|
3787
|
+
sys.stdout,
|
|
3788
|
+
make_fs_access=runtime_context.make_fs_access,
|
|
3789
|
+
input_basedir=options.basedir,
|
|
3790
|
+
secret_store=runtime_context.secret_store,
|
|
3791
|
+
input_required=True,
|
|
3792
|
+
)
|
|
3793
|
+
except SystemExit as err:
|
|
3794
|
+
if err.code == 2: # raised by argparse's parse_args() function
|
|
3795
|
+
print(
|
|
3796
|
+
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
3797
|
+
"provided, this may be the argument order." + usage_message,
|
|
3798
|
+
file=sys.stderr,
|
|
3799
|
+
)
|
|
3800
|
+
raise
|
|
3700
3801
|
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
|
|
3704
|
-
|
|
3802
|
+
# Leave the defaults un-filled in the top-level order. The tool or
|
|
3803
|
+
# workflow will fill them when it runs
|
|
3804
|
+
|
|
3805
|
+
for inp in tool.tool["inputs"]:
|
|
3806
|
+
if (
|
|
3807
|
+
shortname(inp["id"]) in initialized_job_order
|
|
3808
|
+
and inp["type"] == "File"
|
|
3809
|
+
):
|
|
3810
|
+
cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
|
|
3811
|
+
"streamable"
|
|
3812
|
+
] = inp.get("streamable", False)
|
|
3813
|
+
# TODO also for nested types that contain streamable Files
|
|
3814
|
+
|
|
3815
|
+
runtime_context.use_container = not options.no_container
|
|
3816
|
+
runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
|
|
3817
|
+
runtime_context.job_script_provider = job_script_provider
|
|
3818
|
+
runtime_context.force_docker_pull = options.force_docker_pull
|
|
3819
|
+
runtime_context.no_match_user = options.no_match_user
|
|
3820
|
+
runtime_context.no_read_only = options.no_read_only
|
|
3821
|
+
runtime_context.basedir = options.basedir
|
|
3822
|
+
if not options.bypass_file_store:
|
|
3823
|
+
# If we're using the file store we need to start moving output
|
|
3824
|
+
# files now.
|
|
3825
|
+
runtime_context.move_outputs = "move"
|
|
3826
|
+
|
|
3827
|
+
# We instantiate an early builder object here to populate indirect
|
|
3828
|
+
# secondaryFile references using cwltool's library because we need
|
|
3829
|
+
# to resolve them before toil imports them into the filestore.
|
|
3830
|
+
# A second builder will be built in the job's run method when toil
|
|
3831
|
+
# actually starts the cwl job.
|
|
3832
|
+
# Note that this accesses input files for tools, so the
|
|
3833
|
+
# ToilFsAccess needs to be set up if we want to be able to use
|
|
3834
|
+
# URLs.
|
|
3835
|
+
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
3836
|
+
|
|
3837
|
+
# make sure this doesn't add listing items; if shallow_listing is
|
|
3838
|
+
# selected, it will discover dirs one deep and then again later on
|
|
3839
|
+
# (probably when the cwltool builder gets ahold of the job in the
|
|
3840
|
+
# CWL job's run()), producing 2+ deep listings instead of only 1.
|
|
3841
|
+
builder.loadListing = "no_listing"
|
|
3842
|
+
|
|
3843
|
+
builder.bind_input(
|
|
3844
|
+
tool.inputs_record_schema,
|
|
3845
|
+
initialized_job_order,
|
|
3846
|
+
discover_secondaryFiles=True,
|
|
3705
3847
|
)
|
|
3706
|
-
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3707
|
-
logging.error(err)
|
|
3708
|
-
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3709
|
-
runtime_context.secret_store = SecretStore()
|
|
3710
3848
|
|
|
3711
|
-
|
|
3712
|
-
#
|
|
3713
|
-
#
|
|
3714
|
-
#
|
|
3715
|
-
#
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
tool,
|
|
3720
|
-
jobloader,
|
|
3721
|
-
sys.stdout,
|
|
3722
|
-
make_fs_access=runtime_context.make_fs_access,
|
|
3723
|
-
input_basedir=options.basedir,
|
|
3724
|
-
secret_store=runtime_context.secret_store,
|
|
3725
|
-
input_required=True,
|
|
3849
|
+
# Define something we can call to import a file and get its file
|
|
3850
|
+
# ID.
|
|
3851
|
+
# We cast this because import_file is overloaded depending on if we
|
|
3852
|
+
# pass a shared file name or not, and we know the way we call it we
|
|
3853
|
+
# always get a FileID out.
|
|
3854
|
+
file_import_function = cast(
|
|
3855
|
+
Callable[[str], FileID],
|
|
3856
|
+
functools.partial(toil.import_file, symlink=True),
|
|
3726
3857
|
)
|
|
3727
|
-
except SystemExit as e:
|
|
3728
|
-
if e.code == 2: # raised by argparse's parse_args() function
|
|
3729
|
-
print(
|
|
3730
|
-
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
3731
|
-
"provided, this may be the argument order." + usage_message,
|
|
3732
|
-
file=sys.stderr,
|
|
3733
|
-
)
|
|
3734
|
-
raise
|
|
3735
|
-
|
|
3736
|
-
# Leave the defaults un-filled in the top-level order. The tool or
|
|
3737
|
-
# workflow will fill them when it runs
|
|
3738
|
-
|
|
3739
|
-
for inp in tool.tool["inputs"]:
|
|
3740
|
-
if (
|
|
3741
|
-
shortname(inp["id"]) in initialized_job_order
|
|
3742
|
-
and inp["type"] == "File"
|
|
3743
|
-
):
|
|
3744
|
-
cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
|
|
3745
|
-
"streamable"
|
|
3746
|
-
] = inp.get("streamable", False)
|
|
3747
|
-
# TODO also for nested types that contain streamable Files
|
|
3748
|
-
|
|
3749
|
-
runtime_context.use_container = not options.no_container
|
|
3750
|
-
runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
|
|
3751
|
-
runtime_context.job_script_provider = job_script_provider
|
|
3752
|
-
runtime_context.force_docker_pull = options.force_docker_pull
|
|
3753
|
-
runtime_context.no_match_user = options.no_match_user
|
|
3754
|
-
runtime_context.no_read_only = options.no_read_only
|
|
3755
|
-
runtime_context.basedir = options.basedir
|
|
3756
|
-
if not options.bypass_file_store:
|
|
3757
|
-
# If we're using the file store we need to start moving output
|
|
3758
|
-
# files now.
|
|
3759
|
-
runtime_context.move_outputs = "move"
|
|
3760
|
-
|
|
3761
|
-
# We instantiate an early builder object here to populate indirect
|
|
3762
|
-
# secondaryFile references using cwltool's library because we need
|
|
3763
|
-
# to resolve them before toil imports them into the filestore.
|
|
3764
|
-
# A second builder will be built in the job's run method when toil
|
|
3765
|
-
# actually starts the cwl job.
|
|
3766
|
-
# Note that this accesses input files for tools, so the
|
|
3767
|
-
# ToilFsAccess needs to be set up if we want to be able to use
|
|
3768
|
-
# URLs.
|
|
3769
|
-
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
3770
|
-
|
|
3771
|
-
# make sure this doesn't add listing items; if shallow_listing is
|
|
3772
|
-
# selected, it will discover dirs one deep and then again later on
|
|
3773
|
-
# (probably when the cwltool builder gets ahold of the job in the
|
|
3774
|
-
# CWL job's run()), producing 2+ deep listings instead of only 1.
|
|
3775
|
-
builder.loadListing = "no_listing"
|
|
3776
|
-
|
|
3777
|
-
builder.bind_input(
|
|
3778
|
-
tool.inputs_record_schema,
|
|
3779
|
-
initialized_job_order,
|
|
3780
|
-
discover_secondaryFiles=True,
|
|
3781
|
-
)
|
|
3782
3858
|
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
file_import_function = cast(
|
|
3789
|
-
Callable[[str], FileID],
|
|
3790
|
-
functools.partial(toil.import_file, symlink=True),
|
|
3791
|
-
)
|
|
3792
|
-
|
|
3793
|
-
# Import all the input files, some of which may be missing optional
|
|
3794
|
-
# files.
|
|
3795
|
-
logger.info("Importing input files...")
|
|
3796
|
-
fs_access = ToilFsAccess(options.basedir)
|
|
3797
|
-
import_files(
|
|
3798
|
-
file_import_function,
|
|
3799
|
-
fs_access,
|
|
3800
|
-
fileindex,
|
|
3801
|
-
existing,
|
|
3802
|
-
initialized_job_order,
|
|
3803
|
-
skip_broken=True,
|
|
3804
|
-
skip_remote=options.reference_inputs,
|
|
3805
|
-
bypass_file_store=options.bypass_file_store,
|
|
3806
|
-
log_level=logging.INFO,
|
|
3807
|
-
)
|
|
3808
|
-
# Import all the files associated with tools (binaries, etc.).
|
|
3809
|
-
# Not sure why you would have an optional secondary file here, but
|
|
3810
|
-
# the spec probably needs us to support them.
|
|
3811
|
-
logger.info("Importing tool-associated files...")
|
|
3812
|
-
visitSteps(
|
|
3813
|
-
tool,
|
|
3814
|
-
functools.partial(
|
|
3815
|
-
import_files,
|
|
3859
|
+
# Import all the input files, some of which may be missing optional
|
|
3860
|
+
# files.
|
|
3861
|
+
logger.info("Importing input files...")
|
|
3862
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3863
|
+
import_files(
|
|
3816
3864
|
file_import_function,
|
|
3817
3865
|
fs_access,
|
|
3818
3866
|
fileindex,
|
|
3819
3867
|
existing,
|
|
3820
|
-
|
|
3868
|
+
initialized_job_order,
|
|
3869
|
+
mark_broken=True,
|
|
3821
3870
|
skip_remote=options.reference_inputs,
|
|
3822
3871
|
bypass_file_store=options.bypass_file_store,
|
|
3823
3872
|
log_level=logging.INFO,
|
|
3824
|
-
),
|
|
3825
|
-
)
|
|
3826
|
-
|
|
3827
|
-
# We always expect to have processed all files that exist
|
|
3828
|
-
for param_name, param_value in initialized_job_order.items():
|
|
3829
|
-
# Loop through all the parameters for the workflow overall.
|
|
3830
|
-
# Drop any files that aren't either imported (for when we use
|
|
3831
|
-
# the file store) or available on disk (for when we don't).
|
|
3832
|
-
# This will properly make them cause an error later if they
|
|
3833
|
-
# were required.
|
|
3834
|
-
rm_unprocessed_secondary_files(param_value)
|
|
3835
|
-
|
|
3836
|
-
logger.info("Creating root job")
|
|
3837
|
-
logger.debug("Root tool: %s", tool)
|
|
3838
|
-
try:
|
|
3839
|
-
wf1, _ = makeJob(
|
|
3840
|
-
tool=tool,
|
|
3841
|
-
jobobj={},
|
|
3842
|
-
runtime_context=runtime_context,
|
|
3843
|
-
parent_name=None, # toplevel, no name needed
|
|
3844
|
-
conditional=None,
|
|
3845
3873
|
)
|
|
3846
|
-
|
|
3847
|
-
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
|
|
3856
|
-
|
|
3857
|
-
|
|
3858
|
-
|
|
3874
|
+
# Import all the files associated with tools (binaries, etc.).
|
|
3875
|
+
# Not sure why you would have an optional secondary file here, but
|
|
3876
|
+
# the spec probably needs us to support them.
|
|
3877
|
+
logger.info("Importing tool-associated files...")
|
|
3878
|
+
visitSteps(
|
|
3879
|
+
tool,
|
|
3880
|
+
functools.partial(
|
|
3881
|
+
import_files,
|
|
3882
|
+
file_import_function,
|
|
3883
|
+
fs_access,
|
|
3884
|
+
fileindex,
|
|
3885
|
+
existing,
|
|
3886
|
+
mark_broken=True,
|
|
3887
|
+
skip_remote=options.reference_inputs,
|
|
3888
|
+
bypass_file_store=options.bypass_file_store,
|
|
3889
|
+
log_level=logging.INFO,
|
|
3890
|
+
),
|
|
3891
|
+
)
|
|
3892
|
+
|
|
3893
|
+
# We always expect to have processed all files that exist
|
|
3894
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3895
|
+
# Loop through all the parameters for the workflow overall.
|
|
3896
|
+
# Drop any files that aren't either imported (for when we use
|
|
3897
|
+
# the file store) or available on disk (for when we don't).
|
|
3898
|
+
# This will properly make them cause an error later if they
|
|
3899
|
+
# were required.
|
|
3900
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3901
|
+
|
|
3902
|
+
logger.info("Creating root job")
|
|
3903
|
+
logger.debug("Root tool: %s", tool)
|
|
3904
|
+
try:
|
|
3905
|
+
wf1, _ = makeJob(
|
|
3906
|
+
tool=tool,
|
|
3907
|
+
jobobj={},
|
|
3908
|
+
runtime_context=runtime_context,
|
|
3909
|
+
parent_name=None, # toplevel, no name needed
|
|
3910
|
+
conditional=None,
|
|
3859
3911
|
)
|
|
3912
|
+
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3913
|
+
logging.error(err)
|
|
3860
3914
|
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3861
|
-
|
|
3862
|
-
|
|
3863
|
-
|
|
3864
|
-
# Now the workflow has completed. We need to make sure the outputs (and
|
|
3865
|
-
# inputs) end up where the user wants them to be.
|
|
3866
|
-
logger.info("Collecting workflow outputs...")
|
|
3867
|
-
outobj = resolve_dict_w_promises(outobj)
|
|
3868
|
-
|
|
3869
|
-
# Stage files. Specify destination bucket if specified in CLI
|
|
3870
|
-
# options. If destination bucket not passed in,
|
|
3871
|
-
# options.destBucket's value will be None.
|
|
3872
|
-
toilStageFiles(
|
|
3873
|
-
toil,
|
|
3874
|
-
outobj,
|
|
3875
|
-
outdir,
|
|
3876
|
-
destBucket=options.destBucket,
|
|
3877
|
-
log_level=logging.INFO
|
|
3878
|
-
)
|
|
3879
|
-
logger.info("Stored workflow outputs")
|
|
3915
|
+
wf1.cwljob = initialized_job_order
|
|
3916
|
+
logger.info("Starting workflow")
|
|
3917
|
+
outobj = toil.start(wf1)
|
|
3880
3918
|
|
|
3881
|
-
|
|
3882
|
-
|
|
3883
|
-
|
|
3884
|
-
)
|
|
3919
|
+
# Now the workflow has completed. We need to make sure the outputs (and
|
|
3920
|
+
# inputs) end up where the user wants them to be.
|
|
3921
|
+
logger.info("Collecting workflow outputs...")
|
|
3922
|
+
outobj = resolve_dict_w_promises(outobj)
|
|
3885
3923
|
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
else:
|
|
3892
|
-
value = doc[key]
|
|
3893
|
-
if isinstance(value, MutableMapping):
|
|
3894
|
-
remove_at_id(value)
|
|
3895
|
-
if isinstance(value, MutableSequence):
|
|
3896
|
-
for entry in value:
|
|
3897
|
-
if isinstance(value, MutableMapping):
|
|
3898
|
-
remove_at_id(entry)
|
|
3899
|
-
|
|
3900
|
-
remove_at_id(outobj)
|
|
3901
|
-
visit_class(
|
|
3924
|
+
# Stage files. Specify destination bucket if specified in CLI
|
|
3925
|
+
# options. If destination bucket not passed in,
|
|
3926
|
+
# options.destBucket's value will be None.
|
|
3927
|
+
toilStageFiles(
|
|
3928
|
+
toil,
|
|
3902
3929
|
outobj,
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
|
|
3906
|
-
if not document_loader:
|
|
3907
|
-
raise RuntimeError("cwltool loader is not set.")
|
|
3908
|
-
prov_dependencies = cwltool.main.prov_deps(
|
|
3909
|
-
workflowobj, document_loader, uri
|
|
3910
|
-
)
|
|
3911
|
-
runtime_context.research_obj.generate_snapshot(prov_dependencies)
|
|
3912
|
-
cwltool.cwlprov.writablebagfile.close_ro(
|
|
3913
|
-
runtime_context.research_obj, options.provenance
|
|
3930
|
+
outdir,
|
|
3931
|
+
destBucket=options.destBucket,
|
|
3932
|
+
log_level=logging.INFO
|
|
3914
3933
|
)
|
|
3934
|
+
logger.info("Stored workflow outputs")
|
|
3915
3935
|
|
|
3916
|
-
|
|
3917
|
-
|
|
3918
|
-
|
|
3919
|
-
|
|
3920
|
-
|
|
3921
|
-
|
|
3922
|
-
|
|
3936
|
+
if runtime_context.research_obj is not None:
|
|
3937
|
+
cwltool.cwlprov.writablebagfile.create_job(
|
|
3938
|
+
runtime_context.research_obj, outobj, True
|
|
3939
|
+
)
|
|
3940
|
+
|
|
3941
|
+
def remove_at_id(doc: Any) -> None:
|
|
3942
|
+
if isinstance(doc, MutableMapping):
|
|
3943
|
+
for key in list(doc.keys()):
|
|
3944
|
+
if key == "@id":
|
|
3945
|
+
del doc[key]
|
|
3946
|
+
else:
|
|
3947
|
+
value = doc[key]
|
|
3948
|
+
if isinstance(value, MutableMapping):
|
|
3949
|
+
remove_at_id(value)
|
|
3950
|
+
if isinstance(value, MutableSequence):
|
|
3951
|
+
for entry in value:
|
|
3952
|
+
if isinstance(value, MutableMapping):
|
|
3953
|
+
remove_at_id(entry)
|
|
3954
|
+
|
|
3955
|
+
remove_at_id(outobj)
|
|
3956
|
+
visit_class(
|
|
3957
|
+
outobj,
|
|
3958
|
+
("File",),
|
|
3959
|
+
functools.partial(add_sizes, runtime_context.make_fs_access("")),
|
|
3960
|
+
)
|
|
3961
|
+
if not document_loader:
|
|
3962
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
3963
|
+
prov_dependencies = cwltool.main.prov_deps(
|
|
3964
|
+
workflowobj, document_loader, uri
|
|
3965
|
+
)
|
|
3966
|
+
runtime_context.research_obj.generate_snapshot(prov_dependencies)
|
|
3967
|
+
cwltool.cwlprov.writablebagfile.close_ro(
|
|
3968
|
+
runtime_context.research_obj, options.provenance
|
|
3969
|
+
)
|
|
3970
|
+
|
|
3971
|
+
if not options.destBucket and options.compute_checksum:
|
|
3972
|
+
logger.info("Computing output file checksums...")
|
|
3973
|
+
visit_class(
|
|
3974
|
+
outobj,
|
|
3975
|
+
("File",),
|
|
3976
|
+
functools.partial(compute_checksums, StdFsAccess("")),
|
|
3977
|
+
)
|
|
3923
3978
|
|
|
3924
|
-
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
|
|
3979
|
+
visit_class(outobj, ("File",), MutationManager().unset_generation)
|
|
3980
|
+
stdout.write(json.dumps(outobj, indent=4, default=str))
|
|
3981
|
+
stdout.write("\n")
|
|
3982
|
+
logger.info("CWL run complete!")
|
|
3983
|
+
# Don't expose tracebacks to the user for exceptions that may be expected
|
|
3984
|
+
except FailedJobsException as err:
|
|
3985
|
+
if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
|
|
3986
|
+
# We figured out that we can't support this workflow.
|
|
3987
|
+
logging.error(err)
|
|
3988
|
+
logging.error(
|
|
3989
|
+
"Your workflow uses a CWL requirement that Toil does not support!"
|
|
3990
|
+
)
|
|
3991
|
+
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3992
|
+
else:
|
|
3993
|
+
logging.error(err)
|
|
3994
|
+
return 1
|
|
3995
|
+
except (InsufficientSystemResources, LocatorException, InvalidImportExportUrlException, UnimplementedURLException,
|
|
3996
|
+
JobTooBigError) as err:
|
|
3997
|
+
logging.error(err)
|
|
3998
|
+
return 1
|
|
3928
3999
|
|
|
3929
4000
|
return 0
|
|
3930
4001
|
|