toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Implemented support for Common Workflow Language (CWL) for Toil."""
|
|
2
|
+
|
|
2
3
|
# Copyright (C) 2015 Curoverse, Inc
|
|
3
4
|
# Copyright (C) 2015-2021 Regents of the University of California
|
|
4
5
|
# Copyright (C) 2019-2020 Seven Bridges
|
|
@@ -33,25 +34,26 @@ import stat
|
|
|
33
34
|
import sys
|
|
34
35
|
import textwrap
|
|
35
36
|
import uuid
|
|
37
|
+
from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
|
|
36
38
|
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
37
39
|
from threading import Thread
|
|
38
|
-
from typing import (
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
40
|
+
from typing import (
|
|
41
|
+
IO,
|
|
42
|
+
Any,
|
|
43
|
+
Callable,
|
|
44
|
+
Iterator,
|
|
45
|
+
Mapping,
|
|
46
|
+
MutableMapping,
|
|
47
|
+
MutableSequence,
|
|
48
|
+
Optional,
|
|
49
|
+
TextIO,
|
|
50
|
+
Tuple,
|
|
51
|
+
TypeVar,
|
|
52
|
+
Union,
|
|
53
|
+
cast,
|
|
54
|
+
Literal,
|
|
55
|
+
Protocol,
|
|
56
|
+
)
|
|
55
57
|
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
56
58
|
|
|
57
59
|
import cwl_utils.errors
|
|
@@ -65,59 +67,87 @@ import cwltool.load_tool
|
|
|
65
67
|
import cwltool.main
|
|
66
68
|
import cwltool.resolver
|
|
67
69
|
import schema_salad.ref_resolver
|
|
70
|
+
|
|
71
|
+
# This is also in configargparse but MyPy doesn't know it
|
|
72
|
+
from argparse import RawDescriptionHelpFormatter
|
|
68
73
|
from configargparse import ArgParser, Namespace
|
|
69
74
|
from cwltool.loghandler import _logger as cwllogger
|
|
70
75
|
from cwltool.loghandler import defaultStreamHandler
|
|
71
76
|
from cwltool.mpi import MpiConfig
|
|
72
77
|
from cwltool.mutation import MutationManager
|
|
73
78
|
from cwltool.pathmapper import MapperEnt, PathMapper
|
|
74
|
-
from cwltool.process import (
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
+
from cwltool.process import (
|
|
80
|
+
Process,
|
|
81
|
+
add_sizes,
|
|
82
|
+
compute_checksums,
|
|
83
|
+
fill_in_defaults,
|
|
84
|
+
shortname,
|
|
85
|
+
)
|
|
79
86
|
from cwltool.secrets import SecretStore
|
|
80
|
-
from cwltool.
|
|
81
|
-
|
|
87
|
+
from cwltool.singularity import SingularityCommandLineJob
|
|
88
|
+
from cwltool.software_requirements import (
|
|
89
|
+
DependenciesConfiguration,
|
|
90
|
+
get_container_from_software_requirements,
|
|
91
|
+
)
|
|
82
92
|
from cwltool.stdfsaccess import StdFsAccess, abspath
|
|
83
|
-
from cwltool.utils import (
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
93
|
+
from cwltool.utils import (
|
|
94
|
+
CWLObjectType,
|
|
95
|
+
CWLOutputType,
|
|
96
|
+
DirectoryType,
|
|
97
|
+
adjustDirObjs,
|
|
98
|
+
aslist,
|
|
99
|
+
downloadHttpFile,
|
|
100
|
+
get_listing,
|
|
101
|
+
normalizeFilesDirs,
|
|
102
|
+
visit_class,
|
|
103
|
+
)
|
|
92
104
|
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
|
93
105
|
from schema_salad.avro.schema import Names
|
|
94
106
|
from schema_salad.exceptions import ValidationException
|
|
95
107
|
from schema_salad.ref_resolver import file_uri, uri_file_path
|
|
96
108
|
from schema_salad.sourceline import SourceLine
|
|
97
|
-
from typing_extensions import Literal
|
|
98
109
|
|
|
99
110
|
from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
100
111
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
101
|
-
from toil.common import Toil, addOptions
|
|
112
|
+
from toil.common import Config, Toil, addOptions
|
|
102
113
|
from toil.cwl import check_cwltool_version
|
|
114
|
+
from toil.lib.trs import resolve_workflow
|
|
115
|
+
from toil.lib.misc import call_command
|
|
103
116
|
from toil.provisioners.clusterScaler import JobTooBigError
|
|
104
117
|
|
|
105
118
|
check_cwltool_version()
|
|
106
|
-
from toil.cwl.utils import (
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
from toil.cwl.utils import (
|
|
120
|
+
CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
|
|
121
|
+
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
|
|
122
|
+
download_structure,
|
|
123
|
+
get_from_structure,
|
|
124
|
+
visit_cwl_class_and_reduce,
|
|
125
|
+
)
|
|
111
126
|
from toil.exceptions import FailedJobsException
|
|
112
127
|
from toil.fileStores import FileID
|
|
113
128
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
114
|
-
from toil.job import
|
|
115
|
-
|
|
116
|
-
|
|
129
|
+
from toil.job import (
|
|
130
|
+
AcceleratorRequirement,
|
|
131
|
+
Job,
|
|
132
|
+
Promise,
|
|
133
|
+
Promised,
|
|
134
|
+
unwrap,
|
|
135
|
+
ImportsJob,
|
|
136
|
+
get_file_sizes,
|
|
137
|
+
FileMetadata,
|
|
138
|
+
WorkerImportJob,
|
|
139
|
+
)
|
|
140
|
+
from toil.jobStores.abstractJobStore import (
|
|
141
|
+
AbstractJobStore,
|
|
142
|
+
NoSuchFileException,
|
|
143
|
+
InvalidImportExportUrlException,
|
|
144
|
+
LocatorException,
|
|
145
|
+
)
|
|
146
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
117
147
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
118
148
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
119
149
|
from toil.lib.io import mkdtemp
|
|
120
|
-
from toil.lib.threading import ExceptionalThread
|
|
150
|
+
from toil.lib.threading import ExceptionalThread, global_mutex
|
|
121
151
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
122
152
|
|
|
123
153
|
logger = logging.getLogger(__name__)
|
|
@@ -149,7 +179,7 @@ def cwltoil_was_removed() -> None:
|
|
|
149
179
|
# output object to the correct key of the input object.
|
|
150
180
|
|
|
151
181
|
|
|
152
|
-
class UnresolvedDict(
|
|
182
|
+
class UnresolvedDict(dict[Any, Any]):
|
|
153
183
|
"""Tag to indicate a dict contains promises that must be resolved."""
|
|
154
184
|
|
|
155
185
|
|
|
@@ -184,7 +214,7 @@ def filter_skip_null(name: str, value: Any) -> Any:
|
|
|
184
214
|
return value
|
|
185
215
|
|
|
186
216
|
|
|
187
|
-
def _filter_skip_null(value: Any, err_flag:
|
|
217
|
+
def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
|
|
188
218
|
"""
|
|
189
219
|
Private implementation for recursively filtering out SkipNull objects from 'value'.
|
|
190
220
|
|
|
@@ -233,18 +263,50 @@ def ensure_no_collisions(
|
|
|
233
263
|
seen_names.add(wanted_name)
|
|
234
264
|
|
|
235
265
|
|
|
266
|
+
def try_prepull(
|
|
267
|
+
cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str
|
|
268
|
+
) -> None:
|
|
269
|
+
"""
|
|
270
|
+
Try to prepull all containers in a CWL workflow with Singularity or Docker.
|
|
271
|
+
This will not prepull the default container specified on the command line.
|
|
272
|
+
:param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well
|
|
273
|
+
:param runtime_context: runtime context of cwltool
|
|
274
|
+
:param batchsystem: type of Toil batchsystem
|
|
275
|
+
:return:
|
|
276
|
+
"""
|
|
277
|
+
if runtime_context.singularity:
|
|
278
|
+
if "CWL_SINGULARITY_CACHE" in os.environ:
|
|
279
|
+
logger.info("Prepulling the workflow's containers with Singularity...")
|
|
280
|
+
call_command(
|
|
281
|
+
[
|
|
282
|
+
"cwl-docker-extract",
|
|
283
|
+
"--singularity",
|
|
284
|
+
"--dir",
|
|
285
|
+
os.environ["CWL_SINGULARITY_CACHE"],
|
|
286
|
+
cwl_tool_uri,
|
|
287
|
+
]
|
|
288
|
+
)
|
|
289
|
+
elif not runtime_context.user_space_docker_cmd and not runtime_context.podman:
|
|
290
|
+
# For udocker and podman prefetching is unimplemented
|
|
291
|
+
# This is docker
|
|
292
|
+
if batchsystem == "single_machine":
|
|
293
|
+
# Only on single machine will the docker daemon be accessible by all workers and the leader
|
|
294
|
+
logger.info("Prepulling the workflow's containers with Docker...")
|
|
295
|
+
call_command(["cwl-docker-extract", cwl_tool_uri])
|
|
296
|
+
|
|
297
|
+
|
|
236
298
|
class Conditional:
|
|
237
299
|
"""
|
|
238
300
|
Object holding conditional expression until we are ready to evaluate it.
|
|
239
301
|
|
|
240
|
-
Evaluation occurs
|
|
302
|
+
Evaluation occurs before the enclosing step's inputs are type-checked.
|
|
241
303
|
"""
|
|
242
304
|
|
|
243
305
|
def __init__(
|
|
244
306
|
self,
|
|
245
307
|
expression: Optional[str] = None,
|
|
246
|
-
outputs: Union[
|
|
247
|
-
requirements: Optional[
|
|
308
|
+
outputs: Union[dict[str, CWLOutputType], None] = None,
|
|
309
|
+
requirements: Optional[list[CWLObjectType]] = None,
|
|
248
310
|
container_engine: str = "docker",
|
|
249
311
|
):
|
|
250
312
|
"""
|
|
@@ -289,7 +351,7 @@ class Conditional:
|
|
|
289
351
|
"'%s' evaluated to a non-boolean value" % self.expression
|
|
290
352
|
)
|
|
291
353
|
|
|
292
|
-
def skipped_outputs(self) ->
|
|
354
|
+
def skipped_outputs(self) -> dict[str, SkipNull]:
|
|
293
355
|
"""Generate a dict of SkipNull objects corresponding to the output structure."""
|
|
294
356
|
outobj = {}
|
|
295
357
|
|
|
@@ -309,14 +371,14 @@ class Conditional:
|
|
|
309
371
|
class ResolveSource:
|
|
310
372
|
"""Apply linkMerge and pickValue operators to values coming into a port."""
|
|
311
373
|
|
|
312
|
-
promise_tuples: Union[
|
|
374
|
+
promise_tuples: Union[list[tuple[str, Promise]], tuple[str, Promise]]
|
|
313
375
|
|
|
314
376
|
def __init__(
|
|
315
377
|
self,
|
|
316
378
|
name: str,
|
|
317
|
-
input:
|
|
379
|
+
input: dict[str, CWLObjectType],
|
|
318
380
|
source_key: str,
|
|
319
|
-
promises:
|
|
381
|
+
promises: dict[str, Job],
|
|
320
382
|
):
|
|
321
383
|
"""
|
|
322
384
|
Construct a container object.
|
|
@@ -375,7 +437,7 @@ class ResolveSource:
|
|
|
375
437
|
)
|
|
376
438
|
else:
|
|
377
439
|
name, rv = self.promise_tuples
|
|
378
|
-
result = cast(
|
|
440
|
+
result = cast(dict[str, Any], rv).get(name)
|
|
379
441
|
|
|
380
442
|
result = self.pick_value(result)
|
|
381
443
|
result = filter_skip_null(self.name, result)
|
|
@@ -383,7 +445,7 @@ class ResolveSource:
|
|
|
383
445
|
|
|
384
446
|
def link_merge(
|
|
385
447
|
self, values: CWLObjectType
|
|
386
|
-
) -> Union[
|
|
448
|
+
) -> Union[list[CWLOutputType], CWLOutputType]:
|
|
387
449
|
"""
|
|
388
450
|
Apply linkMerge operator to `values` object.
|
|
389
451
|
|
|
@@ -396,7 +458,7 @@ class ResolveSource:
|
|
|
396
458
|
return values
|
|
397
459
|
|
|
398
460
|
elif link_merge_type == "merge_flattened":
|
|
399
|
-
result:
|
|
461
|
+
result: list[CWLOutputType] = []
|
|
400
462
|
for v in values:
|
|
401
463
|
if isinstance(v, MutableSequence):
|
|
402
464
|
result.extend(v)
|
|
@@ -409,7 +471,7 @@ class ResolveSource:
|
|
|
409
471
|
f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
|
|
410
472
|
)
|
|
411
473
|
|
|
412
|
-
def pick_value(self, values: Union[
|
|
474
|
+
def pick_value(self, values: Union[list[Union[str, SkipNull]], Any]) -> Any:
|
|
413
475
|
"""
|
|
414
476
|
Apply pickValue operator to `values` object.
|
|
415
477
|
|
|
@@ -477,7 +539,7 @@ class StepValueFrom:
|
|
|
477
539
|
"""
|
|
478
540
|
|
|
479
541
|
def __init__(
|
|
480
|
-
self, expr: str, source: Any, req:
|
|
542
|
+
self, expr: str, source: Any, req: list[CWLObjectType], container_engine: str
|
|
481
543
|
):
|
|
482
544
|
"""
|
|
483
545
|
Instantiate an object to carry all know about this valueFrom expression.
|
|
@@ -609,7 +671,7 @@ class JustAValue:
|
|
|
609
671
|
|
|
610
672
|
def resolve_dict_w_promises(
|
|
611
673
|
dict_w_promises: Union[
|
|
612
|
-
UnresolvedDict, CWLObjectType,
|
|
674
|
+
UnresolvedDict, CWLObjectType, dict[str, Union[str, StepValueFrom]]
|
|
613
675
|
],
|
|
614
676
|
file_store: Optional[AbstractFileStore] = None,
|
|
615
677
|
) -> CWLObjectType:
|
|
@@ -664,7 +726,7 @@ class ToilPathMapper(PathMapper):
|
|
|
664
726
|
|
|
665
727
|
def __init__(
|
|
666
728
|
self,
|
|
667
|
-
referenced_files:
|
|
729
|
+
referenced_files: list[CWLObjectType],
|
|
668
730
|
basedir: str,
|
|
669
731
|
stagedir: str,
|
|
670
732
|
separateDirs: bool = True,
|
|
@@ -779,19 +841,44 @@ class ToilPathMapper(PathMapper):
|
|
|
779
841
|
# TODO: why would we do that?
|
|
780
842
|
stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
|
|
781
843
|
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
)
|
|
844
|
+
if obj["class"] not in ("File", "Directory"):
|
|
845
|
+
# We only handle files and directories; only they have locations.
|
|
846
|
+
return
|
|
847
|
+
|
|
848
|
+
location = cast(str, obj["location"])
|
|
849
|
+
if location in self:
|
|
850
|
+
# If we've already mapped this, map it consistently.
|
|
851
|
+
tgt = self._pathmap[location].target
|
|
852
|
+
logger.debug(
|
|
853
|
+
"ToilPathMapper re-using target %s for path %s",
|
|
854
|
+
tgt,
|
|
855
|
+
location,
|
|
856
|
+
)
|
|
857
|
+
else:
|
|
858
|
+
# Decide where to put the file or directory, as an absolute path.
|
|
859
|
+
tgt = os.path.join(
|
|
860
|
+
stagedir,
|
|
861
|
+
cast(str, obj["basename"]),
|
|
862
|
+
)
|
|
863
|
+
if self.reversemap(tgt) is not None:
|
|
864
|
+
# If the target already exists in the pathmap, but we haven't yet
|
|
865
|
+
# mapped this, it means we have a conflict.
|
|
866
|
+
i = 2
|
|
867
|
+
new_tgt = f"{tgt}_{i}"
|
|
868
|
+
while self.reversemap(new_tgt) is not None:
|
|
869
|
+
i += 1
|
|
870
|
+
new_tgt = f"{tgt}_{i}"
|
|
871
|
+
logger.debug(
|
|
872
|
+
"ToilPathMapper resolving mapping conflict: %s is now %s",
|
|
873
|
+
tgt,
|
|
874
|
+
new_tgt,
|
|
875
|
+
)
|
|
876
|
+
tgt = new_tgt
|
|
787
877
|
|
|
788
878
|
if obj["class"] == "Directory":
|
|
789
879
|
# Whether or not we've already mapped this path, we need to map all
|
|
790
880
|
# children recursively.
|
|
791
881
|
|
|
792
|
-
# Grab its location
|
|
793
|
-
location = cast(str, obj["location"])
|
|
794
|
-
|
|
795
882
|
logger.debug("ToilPathMapper visiting directory %s", location)
|
|
796
883
|
|
|
797
884
|
# We want to check the directory to make sure it is not
|
|
@@ -877,7 +964,7 @@ class ToilPathMapper(PathMapper):
|
|
|
877
964
|
|
|
878
965
|
# Keep recursing
|
|
879
966
|
self.visitlisting(
|
|
880
|
-
cast(
|
|
967
|
+
cast(list[CWLObjectType], obj.get("listing", [])),
|
|
881
968
|
tgt,
|
|
882
969
|
basedir,
|
|
883
970
|
copy=copy,
|
|
@@ -885,23 +972,21 @@ class ToilPathMapper(PathMapper):
|
|
|
885
972
|
)
|
|
886
973
|
|
|
887
974
|
elif obj["class"] == "File":
|
|
888
|
-
|
|
975
|
+
logger.debug("ToilPathMapper visiting file %s", location)
|
|
889
976
|
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
if path in self._pathmap:
|
|
977
|
+
if location in self._pathmap:
|
|
893
978
|
# Don't map the same file twice
|
|
894
979
|
logger.debug(
|
|
895
980
|
"ToilPathMapper stopping recursion because we have already "
|
|
896
981
|
"mapped file: %s",
|
|
897
|
-
|
|
982
|
+
location,
|
|
898
983
|
)
|
|
899
984
|
return
|
|
900
985
|
|
|
901
|
-
ab = abspath(
|
|
902
|
-
if "contents" in obj and
|
|
986
|
+
ab = abspath(location, basedir)
|
|
987
|
+
if "contents" in obj and location.startswith("_:"):
|
|
903
988
|
# We are supposed to create this file
|
|
904
|
-
self._pathmap[
|
|
989
|
+
self._pathmap[location] = MapperEnt(
|
|
905
990
|
cast(str, obj["contents"]),
|
|
906
991
|
tgt,
|
|
907
992
|
"CreateWritableFile" if copy else "CreateFile",
|
|
@@ -919,14 +1004,16 @@ class ToilPathMapper(PathMapper):
|
|
|
919
1004
|
# URI for a local file it downloaded.
|
|
920
1005
|
if self.get_file:
|
|
921
1006
|
deref = self.get_file(
|
|
922
|
-
|
|
1007
|
+
location,
|
|
1008
|
+
obj.get("streamable", False),
|
|
1009
|
+
self.streaming_allowed,
|
|
923
1010
|
)
|
|
924
1011
|
else:
|
|
925
1012
|
deref = ab
|
|
926
1013
|
if deref.startswith("file:"):
|
|
927
1014
|
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
928
1015
|
if urlsplit(deref).scheme in ["http", "https"]:
|
|
929
|
-
deref = downloadHttpFile(
|
|
1016
|
+
deref = downloadHttpFile(location)
|
|
930
1017
|
elif urlsplit(deref).scheme != "toilfile":
|
|
931
1018
|
# Dereference symbolic links
|
|
932
1019
|
st = os.lstat(deref)
|
|
@@ -944,42 +1031,18 @@ class ToilPathMapper(PathMapper):
|
|
|
944
1031
|
# reference, we just pass that along.
|
|
945
1032
|
|
|
946
1033
|
"""Link or copy files to their targets. Create them as needed."""
|
|
947
|
-
targets: Dict[str, str] = {}
|
|
948
|
-
for _, value in self._pathmap.items():
|
|
949
|
-
# If the target already exists in the pathmap, it means we have a conflict. But we didn't change tgt to reflect new name.
|
|
950
|
-
if value.target == tgt: # Conflict detected in the pathmap
|
|
951
|
-
i = 2
|
|
952
|
-
new_tgt = f"{tgt}_{i}"
|
|
953
|
-
while new_tgt in targets:
|
|
954
|
-
i += 1
|
|
955
|
-
new_tgt = f"{tgt}_{i}"
|
|
956
|
-
targets[new_tgt] = new_tgt
|
|
957
|
-
|
|
958
|
-
for _, value_conflict in targets.items():
|
|
959
|
-
logger.debug(
|
|
960
|
-
"ToilPathMapper adding file mapping for conflict %s -> %s",
|
|
961
|
-
deref,
|
|
962
|
-
value_conflict,
|
|
963
|
-
)
|
|
964
|
-
self._pathmap[path] = MapperEnt(
|
|
965
|
-
deref,
|
|
966
|
-
value_conflict,
|
|
967
|
-
"WritableFile" if copy else "File",
|
|
968
|
-
staged,
|
|
969
|
-
)
|
|
970
|
-
# No conflicts detected so we can write out the original name.
|
|
971
|
-
if not targets:
|
|
972
|
-
logger.debug(
|
|
973
|
-
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
974
|
-
)
|
|
975
1034
|
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1035
|
+
logger.debug(
|
|
1036
|
+
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
self._pathmap[location] = MapperEnt(
|
|
1040
|
+
deref, tgt, "WritableFile" if copy else "File", staged
|
|
1041
|
+
)
|
|
979
1042
|
|
|
980
1043
|
# Handle all secondary files that need to be next to this one.
|
|
981
1044
|
self.visitlisting(
|
|
982
|
-
cast(
|
|
1045
|
+
cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
|
|
983
1046
|
stagedir,
|
|
984
1047
|
basedir,
|
|
985
1048
|
copy=copy,
|
|
@@ -1005,6 +1068,32 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
|
|
|
1005
1068
|
) -> None:
|
|
1006
1069
|
"""run_jobs from SingleJobExecutor, but not in a top level runtime context."""
|
|
1007
1070
|
runtime_context.toplevel = False
|
|
1071
|
+
if isinstance(
|
|
1072
|
+
process, cwltool.command_line_tool.CommandLineTool
|
|
1073
|
+
) and isinstance(
|
|
1074
|
+
process.make_job_runner(runtime_context), SingularityCommandLineJob
|
|
1075
|
+
):
|
|
1076
|
+
# Set defaults for singularity cache environment variables, similar to what we do in wdltoil
|
|
1077
|
+
# Use the same place as the default singularity cache directory
|
|
1078
|
+
singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
|
|
1079
|
+
os.environ["SINGULARITY_CACHEDIR"] = os.environ.get(
|
|
1080
|
+
"SINGULARITY_CACHEDIR", singularity_cache
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
# If singularity is detected, prepull the image to ensure locking
|
|
1084
|
+
(docker_req, docker_is_req) = process.get_requirement(
|
|
1085
|
+
feature="DockerRequirement"
|
|
1086
|
+
)
|
|
1087
|
+
with global_mutex(
|
|
1088
|
+
os.environ["SINGULARITY_CACHEDIR"], "toil_singularity_cache_mutex"
|
|
1089
|
+
):
|
|
1090
|
+
SingularityCommandLineJob.get_image(
|
|
1091
|
+
dockerRequirement=cast(dict[str, str], docker_req),
|
|
1092
|
+
pull_image=runtime_context.pull_image,
|
|
1093
|
+
force_pull=runtime_context.force_docker_pull,
|
|
1094
|
+
tmp_outdir_prefix=runtime_context.tmp_outdir_prefix,
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1008
1097
|
return super().run_jobs(process, job_order_object, logger, runtime_context)
|
|
1009
1098
|
|
|
1010
1099
|
|
|
@@ -1019,7 +1108,7 @@ class ToilTool:
|
|
|
1019
1108
|
# Reserve a spot for the Toil job that ends up executing this tool.
|
|
1020
1109
|
self._toil_job: Optional[Job] = None
|
|
1021
1110
|
# Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
|
|
1022
|
-
self._path_mappers:
|
|
1111
|
+
self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
|
|
1023
1112
|
|
|
1024
1113
|
def connect_toil_job(self, job: Job) -> None:
|
|
1025
1114
|
"""
|
|
@@ -1031,7 +1120,7 @@ class ToilTool:
|
|
|
1031
1120
|
|
|
1032
1121
|
def make_path_mapper(
|
|
1033
1122
|
self,
|
|
1034
|
-
reffiles:
|
|
1123
|
+
reffiles: list[Any],
|
|
1035
1124
|
stagedir: str,
|
|
1036
1125
|
runtimeContext: cwltool.context.RuntimeContext,
|
|
1037
1126
|
separateDirs: bool,
|
|
@@ -1089,13 +1178,15 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1089
1178
|
# Make a table of all the places we mapped files to when downloading the inputs.
|
|
1090
1179
|
|
|
1091
1180
|
# We want to hint which host paths and container (if any) paths correspond
|
|
1092
|
-
host_and_job_paths:
|
|
1181
|
+
host_and_job_paths: list[tuple[str, str]] = []
|
|
1093
1182
|
|
|
1094
1183
|
for pm in self._path_mappers:
|
|
1095
1184
|
for _, mapper_entry in pm.items_exclude_children():
|
|
1096
1185
|
# We know that mapper_entry.target as seen by the task is
|
|
1097
1186
|
# mapper_entry.resolved on the host.
|
|
1098
|
-
host_and_job_paths.append(
|
|
1187
|
+
host_and_job_paths.append(
|
|
1188
|
+
(mapper_entry.resolved, mapper_entry.target)
|
|
1189
|
+
)
|
|
1099
1190
|
|
|
1100
1191
|
# Notice that we have downloaded our inputs. Explain which files
|
|
1101
1192
|
# those are here and what the task will expect to call them.
|
|
@@ -1123,11 +1214,11 @@ def toil_make_tool(
|
|
|
1123
1214
|
return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
|
|
1124
1215
|
|
|
1125
1216
|
|
|
1126
|
-
# When a file we want to have is missing, we can give it this
|
|
1217
|
+
# When a file we want to have is missing, we can give it this sentinel location
|
|
1127
1218
|
# URI instead of raising an error right away, in case it is optional.
|
|
1128
1219
|
MISSING_FILE = "missing://"
|
|
1129
1220
|
|
|
1130
|
-
DirectoryContents =
|
|
1221
|
+
DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
|
|
1131
1222
|
|
|
1132
1223
|
|
|
1133
1224
|
def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
@@ -1149,7 +1240,7 @@ def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
|
1149
1240
|
|
|
1150
1241
|
def decode_directory(
|
|
1151
1242
|
dir_path: str,
|
|
1152
|
-
) ->
|
|
1243
|
+
) -> tuple[DirectoryContents, Optional[str], str]:
|
|
1153
1244
|
"""
|
|
1154
1245
|
Decode a directory from a "toildir:" path to a directory (or a file in it).
|
|
1155
1246
|
|
|
@@ -1224,7 +1315,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1224
1315
|
# they know what will happen.
|
|
1225
1316
|
# Also maps files and directories from external URLs to downloaded
|
|
1226
1317
|
# locations.
|
|
1227
|
-
self.dir_to_download:
|
|
1318
|
+
self.dir_to_download: dict[str, str] = {}
|
|
1228
1319
|
|
|
1229
1320
|
super().__init__(basedir)
|
|
1230
1321
|
|
|
@@ -1347,14 +1438,16 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1347
1438
|
destination = super()._abs(destination)
|
|
1348
1439
|
return destination
|
|
1349
1440
|
|
|
1350
|
-
def glob(self, pattern: str) ->
|
|
1441
|
+
def glob(self, pattern: str) -> list[str]:
|
|
1351
1442
|
parse = urlparse(pattern)
|
|
1352
1443
|
if parse.scheme == "file":
|
|
1353
1444
|
pattern = os.path.abspath(unquote(parse.path))
|
|
1354
1445
|
elif parse.scheme == "":
|
|
1355
1446
|
pattern = os.path.abspath(pattern)
|
|
1356
1447
|
else:
|
|
1357
|
-
raise RuntimeError(
|
|
1448
|
+
raise RuntimeError(
|
|
1449
|
+
f"Cannot efficiently support globbing on {parse.scheme} URIs"
|
|
1450
|
+
)
|
|
1358
1451
|
|
|
1359
1452
|
# Actually do the glob
|
|
1360
1453
|
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
|
|
@@ -1391,12 +1484,12 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1391
1484
|
else:
|
|
1392
1485
|
# This should be supported by a job store.
|
|
1393
1486
|
byte_stream = AbstractJobStore.open_url(fn)
|
|
1394
|
-
if
|
|
1487
|
+
if "b" in mode:
|
|
1395
1488
|
# Pass stream along in binary
|
|
1396
1489
|
return byte_stream
|
|
1397
1490
|
else:
|
|
1398
1491
|
# Wrap it in a text decoder
|
|
1399
|
-
return io.TextIOWrapper(byte_stream, encoding=
|
|
1492
|
+
return io.TextIOWrapper(byte_stream, encoding="utf-8")
|
|
1400
1493
|
|
|
1401
1494
|
def exists(self, path: str) -> bool:
|
|
1402
1495
|
"""Test for file existence."""
|
|
@@ -1503,7 +1596,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1503
1596
|
logger.debug("AbstractJobStore said: %s", status)
|
|
1504
1597
|
return status
|
|
1505
1598
|
|
|
1506
|
-
def listdir(self, fn: str) ->
|
|
1599
|
+
def listdir(self, fn: str) -> list[str]:
|
|
1507
1600
|
# This needs to return full URLs for everything in the directory.
|
|
1508
1601
|
# URLs are not allowed to end in '/', even for subdirectories.
|
|
1509
1602
|
logger.debug("ToilFsAccess listing %s", fn)
|
|
@@ -1524,7 +1617,9 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1524
1617
|
if got is None:
|
|
1525
1618
|
raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
|
|
1526
1619
|
if isinstance(got, str):
|
|
1527
|
-
raise RuntimeError(
|
|
1620
|
+
raise RuntimeError(
|
|
1621
|
+
f"Cannot list file or dubdirectory of a file: {fn}"
|
|
1622
|
+
)
|
|
1528
1623
|
here = got
|
|
1529
1624
|
# List all the things in here and make full URIs to them
|
|
1530
1625
|
return [os.path.join(fn, k) for k in here.keys()]
|
|
@@ -1534,7 +1629,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1534
1629
|
for entry in AbstractJobStore.list_url(fn)
|
|
1535
1630
|
]
|
|
1536
1631
|
|
|
1537
|
-
def join(self, path, *paths
|
|
1632
|
+
def join(self, path: str, *paths: str) -> str:
|
|
1538
1633
|
# This falls back on os.path.join
|
|
1539
1634
|
return super().join(path, *paths)
|
|
1540
1635
|
|
|
@@ -1547,12 +1642,12 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1547
1642
|
|
|
1548
1643
|
def toil_get_file(
|
|
1549
1644
|
file_store: AbstractFileStore,
|
|
1550
|
-
index:
|
|
1551
|
-
existing:
|
|
1645
|
+
index: dict[str, str],
|
|
1646
|
+
existing: dict[str, str],
|
|
1552
1647
|
uri: str,
|
|
1553
1648
|
streamable: bool = False,
|
|
1554
1649
|
streaming_allowed: bool = True,
|
|
1555
|
-
pipe_threads: Optional[
|
|
1650
|
+
pipe_threads: Optional[list[tuple[Thread, int]]] = None,
|
|
1556
1651
|
) -> str:
|
|
1557
1652
|
"""
|
|
1558
1653
|
Set up the given file or directory from the Toil jobstore at a file URI
|
|
@@ -1653,9 +1748,7 @@ def toil_get_file(
|
|
|
1653
1748
|
and streamable
|
|
1654
1749
|
and not isinstance(file_store.jobStore, FileJobStore)
|
|
1655
1750
|
):
|
|
1656
|
-
logger.debug(
|
|
1657
|
-
"Streaming file %s", uri
|
|
1658
|
-
)
|
|
1751
|
+
logger.debug("Streaming file %s", uri)
|
|
1659
1752
|
src_path = file_store.getLocalTempFileName()
|
|
1660
1753
|
os.mkfifo(src_path)
|
|
1661
1754
|
th = ExceptionalThread(
|
|
@@ -1677,34 +1770,35 @@ def toil_get_file(
|
|
|
1677
1770
|
if uri.startswith("toilfile:"):
|
|
1678
1771
|
# Download from the file store
|
|
1679
1772
|
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1680
|
-
src_path = file_store.readGlobalFile(
|
|
1681
|
-
file_store_id, symlink=True
|
|
1682
|
-
)
|
|
1773
|
+
src_path = file_store.readGlobalFile(file_store_id, symlink=True)
|
|
1683
1774
|
else:
|
|
1684
1775
|
# Download from the URI via the job store.
|
|
1685
1776
|
|
|
1686
1777
|
# Figure out where it goes.
|
|
1687
1778
|
src_path = file_store.getLocalTempFileName()
|
|
1688
1779
|
# Open that path exclusively to make sure we created it
|
|
1689
|
-
with open(src_path,
|
|
1780
|
+
with open(src_path, "xb") as fh:
|
|
1690
1781
|
# Download into the file
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1782
|
+
size, executable = AbstractJobStore.read_from_url(uri, fh)
|
|
1783
|
+
if executable:
|
|
1784
|
+
# Set the execute bit in the file's permissions
|
|
1785
|
+
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
1695
1786
|
|
|
1696
1787
|
index[src_path] = uri
|
|
1697
1788
|
existing[uri] = src_path
|
|
1698
1789
|
return schema_salad.ref_resolver.file_uri(src_path)
|
|
1699
1790
|
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1791
|
+
|
|
1792
|
+
def convert_file_uri_to_toil_uri(
|
|
1793
|
+
applyFunc: Callable[[str], FileID],
|
|
1794
|
+
index: dict[str, str],
|
|
1795
|
+
existing: dict[str, str],
|
|
1704
1796
|
file_uri: str,
|
|
1705
1797
|
) -> str:
|
|
1706
1798
|
"""
|
|
1707
|
-
|
|
1799
|
+
Given a file URI, convert it to a toil file URI. Uses applyFunc to handle the conversion.
|
|
1800
|
+
|
|
1801
|
+
Runs once on every unique file URI.
|
|
1708
1802
|
|
|
1709
1803
|
'existing' is a set of files retrieved as inputs from toil_get_file. This
|
|
1710
1804
|
ensures they are mapped back as the same name if passed through.
|
|
@@ -1718,15 +1812,14 @@ def write_file(
|
|
|
1718
1812
|
# with unsupportedRequirement when retrieving later with getFile
|
|
1719
1813
|
elif file_uri.startswith("_:"):
|
|
1720
1814
|
return file_uri
|
|
1815
|
+
elif file_uri.startswith(MISSING_FILE):
|
|
1816
|
+
# We cannot import a missing file
|
|
1817
|
+
raise FileNotFoundError(f"Could not find {file_uri[len(MISSING_FILE):]}")
|
|
1721
1818
|
else:
|
|
1722
1819
|
file_uri = existing.get(file_uri, file_uri)
|
|
1723
1820
|
if file_uri not in index:
|
|
1724
|
-
if not urlparse(file_uri).scheme:
|
|
1725
|
-
rp = os.path.realpath(file_uri)
|
|
1726
|
-
else:
|
|
1727
|
-
rp = file_uri
|
|
1728
1821
|
try:
|
|
1729
|
-
index[file_uri] = "toilfile:" +
|
|
1822
|
+
index[file_uri] = "toilfile:" + applyFunc(file_uri).pack()
|
|
1730
1823
|
existing[index[file_uri]] = file_uri
|
|
1731
1824
|
except Exception as e:
|
|
1732
1825
|
logger.error("Got exception '%s' while copying '%s'", e, file_uri)
|
|
@@ -1745,17 +1838,93 @@ def path_to_loc(obj: CWLObjectType) -> None:
|
|
|
1745
1838
|
del obj["path"]
|
|
1746
1839
|
|
|
1747
1840
|
|
|
1748
|
-
def
|
|
1749
|
-
|
|
1841
|
+
def extract_file_uri_once(
|
|
1842
|
+
fileindex: dict[str, str],
|
|
1843
|
+
existing: dict[str, str],
|
|
1844
|
+
file_metadata: CWLObjectType,
|
|
1845
|
+
mark_broken: bool = False,
|
|
1846
|
+
skip_remote: bool = False,
|
|
1847
|
+
) -> Optional[str]:
|
|
1848
|
+
"""
|
|
1849
|
+
Extract the filename from a CWL file record.
|
|
1850
|
+
|
|
1851
|
+
This function matches the predefined function signature in visit_files, which ensures
|
|
1852
|
+
that this function is called on all files inside a CWL object.
|
|
1853
|
+
|
|
1854
|
+
Ensures no duplicate files are returned according to fileindex. If a file has not been resolved already (and had file:// prepended)
|
|
1855
|
+
then resolve symlinks.
|
|
1856
|
+
:param fileindex: Forward mapping of filename
|
|
1857
|
+
:param existing: Reverse mapping of filename. This function does not use this
|
|
1858
|
+
:param file_metadata: CWL file record
|
|
1859
|
+
:param mark_broken: Whether files should be marked as missing
|
|
1860
|
+
:param skip_remote: Whether to skip remote files
|
|
1861
|
+
:return:
|
|
1862
|
+
"""
|
|
1863
|
+
location = cast(str, file_metadata["location"])
|
|
1864
|
+
if (
|
|
1865
|
+
location.startswith("toilfile:")
|
|
1866
|
+
or location.startswith("toildir:")
|
|
1867
|
+
or location.startswith("_:")
|
|
1868
|
+
):
|
|
1869
|
+
return None
|
|
1870
|
+
if location in fileindex:
|
|
1871
|
+
file_metadata["location"] = fileindex[location]
|
|
1872
|
+
return None
|
|
1873
|
+
if not location and file_metadata["path"]:
|
|
1874
|
+
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
1875
|
+
cast(str, file_metadata["path"])
|
|
1876
|
+
)
|
|
1877
|
+
if location.startswith("file://") and not os.path.isfile(
|
|
1878
|
+
schema_salad.ref_resolver.uri_file_path(location)
|
|
1879
|
+
):
|
|
1880
|
+
if mark_broken:
|
|
1881
|
+
logger.debug("File %s is missing", file_metadata)
|
|
1882
|
+
file_metadata["location"] = location = MISSING_FILE + location
|
|
1883
|
+
else:
|
|
1884
|
+
raise cwl_utils.errors.WorkflowException(
|
|
1885
|
+
"File is missing: %s" % file_metadata
|
|
1886
|
+
)
|
|
1887
|
+
if location.startswith("file://") or not skip_remote:
|
|
1888
|
+
# This is a local file or a remote file
|
|
1889
|
+
if location not in fileindex:
|
|
1890
|
+
# These dictionaries are meant to keep track of what we're going to import
|
|
1891
|
+
# In the actual import, this is used as a bidirectional mapping from unvirtualized to virtualized
|
|
1892
|
+
# For this case, keep track of the files to prevent returning duplicate files
|
|
1893
|
+
# see write_file
|
|
1894
|
+
|
|
1895
|
+
# If there is not a scheme, this file has not been resolved yet or is a URL.
|
|
1896
|
+
if not urlparse(location).scheme:
|
|
1897
|
+
rp = os.path.realpath(location)
|
|
1898
|
+
else:
|
|
1899
|
+
rp = location
|
|
1900
|
+
return rp
|
|
1901
|
+
return None
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
V = TypeVar("V", covariant=True)
|
|
1905
|
+
|
|
1906
|
+
|
|
1907
|
+
class VisitFunc(Protocol[V]):
|
|
1908
|
+
def __call__(
|
|
1909
|
+
self,
|
|
1910
|
+
fileindex: dict[str, str],
|
|
1911
|
+
existing: dict[str, str],
|
|
1912
|
+
file_metadata: CWLObjectType,
|
|
1913
|
+
mark_broken: bool,
|
|
1914
|
+
skip_remote: bool,
|
|
1915
|
+
) -> V: ...
|
|
1916
|
+
|
|
1917
|
+
|
|
1918
|
+
def visit_files(
|
|
1919
|
+
func: VisitFunc[V],
|
|
1750
1920
|
fs_access: StdFsAccess,
|
|
1751
|
-
fileindex:
|
|
1752
|
-
existing:
|
|
1921
|
+
fileindex: dict[str, str],
|
|
1922
|
+
existing: dict[str, str],
|
|
1753
1923
|
cwl_object: Optional[CWLObjectType],
|
|
1754
1924
|
mark_broken: bool = False,
|
|
1755
1925
|
skip_remote: bool = False,
|
|
1756
1926
|
bypass_file_store: bool = False,
|
|
1757
|
-
|
|
1758
|
-
) -> None:
|
|
1927
|
+
) -> list[V]:
|
|
1759
1928
|
"""
|
|
1760
1929
|
Prepare all files and directories.
|
|
1761
1930
|
|
|
@@ -1801,18 +1970,12 @@ def import_files(
|
|
|
1801
1970
|
|
|
1802
1971
|
:param log_level: Log imported files at the given level.
|
|
1803
1972
|
"""
|
|
1973
|
+
func_return: list[Any] = list()
|
|
1804
1974
|
tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
|
|
1805
1975
|
|
|
1806
1976
|
logger.debug("Importing files for %s", tool_id)
|
|
1807
1977
|
logger.debug("Importing files in %s", cwl_object)
|
|
1808
1978
|
|
|
1809
|
-
def import_and_log(url: str) -> FileID:
|
|
1810
|
-
"""
|
|
1811
|
-
Upload a file and log that we are doing so.
|
|
1812
|
-
"""
|
|
1813
|
-
logger.log(log_level, "Loading %s...", url)
|
|
1814
|
-
return import_function(url)
|
|
1815
|
-
|
|
1816
1979
|
# We need to upload all files to the Toil filestore, and encode structure
|
|
1817
1980
|
# recursively into all Directories' locations. But we cannot safely alter
|
|
1818
1981
|
# the listing fields of Directory objects, because the handling required by
|
|
@@ -1830,13 +1993,13 @@ def import_files(
|
|
|
1830
1993
|
if bypass_file_store:
|
|
1831
1994
|
# Don't go on to actually import files or encode contents for
|
|
1832
1995
|
# directories.
|
|
1833
|
-
return
|
|
1996
|
+
return func_return
|
|
1834
1997
|
|
|
1835
1998
|
# Otherwise we actually want to put the things in the file store.
|
|
1836
1999
|
|
|
1837
2000
|
def visit_file_or_directory_down(
|
|
1838
2001
|
rec: CWLObjectType,
|
|
1839
|
-
) -> Optional[
|
|
2002
|
+
) -> Optional[list[CWLObjectType]]:
|
|
1840
2003
|
"""
|
|
1841
2004
|
Visit each CWL File or Directory on the way down.
|
|
1842
2005
|
|
|
@@ -1863,7 +2026,7 @@ def import_files(
|
|
|
1863
2026
|
ensure_no_collisions(cast(DirectoryType, rec))
|
|
1864
2027
|
|
|
1865
2028
|
# Pull out the old listing, if any
|
|
1866
|
-
old_listing = cast(Optional[
|
|
2029
|
+
old_listing = cast(Optional[list[CWLObjectType]], rec.get("listing", None))
|
|
1867
2030
|
|
|
1868
2031
|
if not cast(str, rec["location"]).startswith("_:"):
|
|
1869
2032
|
# This is a thing we can list and not just a literal, so we
|
|
@@ -1885,8 +2048,8 @@ def import_files(
|
|
|
1885
2048
|
|
|
1886
2049
|
def visit_file_or_directory_up(
|
|
1887
2050
|
rec: CWLObjectType,
|
|
1888
|
-
down_result: Optional[
|
|
1889
|
-
child_results:
|
|
2051
|
+
down_result: Optional[list[CWLObjectType]],
|
|
2052
|
+
child_results: list[DirectoryContents],
|
|
1890
2053
|
) -> DirectoryContents:
|
|
1891
2054
|
"""
|
|
1892
2055
|
For a CWL File or Directory, make sure it is uploaded and it has a
|
|
@@ -1908,10 +2071,15 @@ def import_files(
|
|
|
1908
2071
|
# This is a CWL File
|
|
1909
2072
|
|
|
1910
2073
|
result: DirectoryContents = {}
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
2074
|
+
# Run a function on the file and store the return
|
|
2075
|
+
func_return.append(
|
|
2076
|
+
func(
|
|
2077
|
+
fileindex,
|
|
2078
|
+
existing,
|
|
2079
|
+
rec,
|
|
2080
|
+
mark_broken=mark_broken,
|
|
2081
|
+
skip_remote=skip_remote,
|
|
2082
|
+
)
|
|
1915
2083
|
)
|
|
1916
2084
|
|
|
1917
2085
|
# Make a record for this file under its name
|
|
@@ -1955,6 +2123,7 @@ def import_files(
|
|
|
1955
2123
|
visit_file_or_directory_down,
|
|
1956
2124
|
visit_file_or_directory_up,
|
|
1957
2125
|
)
|
|
2126
|
+
return func_return
|
|
1958
2127
|
|
|
1959
2128
|
|
|
1960
2129
|
def upload_directory(
|
|
@@ -2013,52 +2182,34 @@ def upload_directory(
|
|
|
2013
2182
|
directory_metadata["location"] = encode_directory(directory_contents)
|
|
2014
2183
|
|
|
2015
2184
|
|
|
2016
|
-
def
|
|
2017
|
-
|
|
2018
|
-
fileindex:
|
|
2019
|
-
existing:
|
|
2185
|
+
def extract_and_convert_file_to_toil_uri(
|
|
2186
|
+
convertfunc: Callable[[str], FileID],
|
|
2187
|
+
fileindex: dict[str, str],
|
|
2188
|
+
existing: dict[str, str],
|
|
2020
2189
|
file_metadata: CWLObjectType,
|
|
2021
2190
|
mark_broken: bool = False,
|
|
2022
|
-
skip_remote: bool = False
|
|
2191
|
+
skip_remote: bool = False,
|
|
2023
2192
|
) -> None:
|
|
2024
2193
|
"""
|
|
2025
|
-
|
|
2194
|
+
Extract the file URI out of a file object and convert it to a Toil URI.
|
|
2195
|
+
|
|
2196
|
+
Runs convertfunc on the file URI to handle conversion.
|
|
2026
2197
|
|
|
2027
|
-
|
|
2028
|
-
reference to the toil file store.
|
|
2198
|
+
Is used to handle importing files into the jobstore.
|
|
2029
2199
|
|
|
2030
2200
|
If a file doesn't exist, fails with an error, unless mark_broken is set, in
|
|
2031
2201
|
which case the missing file is given a special sentinel location.
|
|
2032
2202
|
|
|
2033
|
-
Unless skip_remote is set,
|
|
2034
|
-
|
|
2203
|
+
Unless skip_remote is set, also run on remote files and sets their locations
|
|
2204
|
+
to toil URIs as well.
|
|
2035
2205
|
"""
|
|
2036
|
-
location =
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
return
|
|
2043
|
-
if location in fileindex:
|
|
2044
|
-
file_metadata["location"] = fileindex[location]
|
|
2045
|
-
return
|
|
2046
|
-
if not location and file_metadata["path"]:
|
|
2047
|
-
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
2048
|
-
cast(str, file_metadata["path"])
|
|
2206
|
+
location = extract_file_uri_once(
|
|
2207
|
+
fileindex, existing, file_metadata, mark_broken, skip_remote
|
|
2208
|
+
)
|
|
2209
|
+
if location is not None:
|
|
2210
|
+
file_metadata["location"] = convert_file_uri_to_toil_uri(
|
|
2211
|
+
convertfunc, fileindex, existing, location
|
|
2049
2212
|
)
|
|
2050
|
-
if location.startswith("file://") and not os.path.isfile(
|
|
2051
|
-
schema_salad.ref_resolver.uri_file_path(location)
|
|
2052
|
-
):
|
|
2053
|
-
if mark_broken:
|
|
2054
|
-
logger.debug("File %s is missing", file_metadata)
|
|
2055
|
-
file_metadata["location"] = location = MISSING_FILE
|
|
2056
|
-
else:
|
|
2057
|
-
raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
|
|
2058
|
-
|
|
2059
|
-
if location.startswith("file://") or not skip_remote:
|
|
2060
|
-
# This is a local file, or we also need to download and re-upload remote files
|
|
2061
|
-
file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
|
|
2062
2213
|
|
|
2063
2214
|
logger.debug("Sending file at: %s", file_metadata["location"])
|
|
2064
2215
|
|
|
@@ -2071,7 +2222,7 @@ def writeGlobalFileWrapper(file_store: AbstractFileStore, fileuri: str) -> FileI
|
|
|
2071
2222
|
|
|
2072
2223
|
def remove_empty_listings(rec: CWLObjectType) -> None:
|
|
2073
2224
|
if rec.get("class") != "Directory":
|
|
2074
|
-
finddirs
|
|
2225
|
+
finddirs: list[CWLObjectType] = []
|
|
2075
2226
|
visit_class(rec, ("Directory",), finddirs.append)
|
|
2076
2227
|
for f in finddirs:
|
|
2077
2228
|
remove_empty_listings(f)
|
|
@@ -2091,7 +2242,7 @@ class CWLNamedJob(Job):
|
|
|
2091
2242
|
cores: Union[float, None] = 1,
|
|
2092
2243
|
memory: Union[int, str, None] = "1GiB",
|
|
2093
2244
|
disk: Union[int, str, None] = "1MiB",
|
|
2094
|
-
accelerators: Optional[
|
|
2245
|
+
accelerators: Optional[list[AcceleratorRequirement]] = None,
|
|
2095
2246
|
preemptible: Optional[bool] = None,
|
|
2096
2247
|
tool_id: Optional[str] = None,
|
|
2097
2248
|
parent_name: Optional[str] = None,
|
|
@@ -2166,10 +2317,10 @@ class ResolveIndirect(CWLNamedJob):
|
|
|
2166
2317
|
|
|
2167
2318
|
def toilStageFiles(
|
|
2168
2319
|
toil: Toil,
|
|
2169
|
-
cwljob: Union[CWLObjectType,
|
|
2320
|
+
cwljob: Union[CWLObjectType, list[CWLObjectType]],
|
|
2170
2321
|
outdir: str,
|
|
2171
2322
|
destBucket: Union[str, None] = None,
|
|
2172
|
-
log_level: int = logging.DEBUG
|
|
2323
|
+
log_level: int = logging.DEBUG,
|
|
2173
2324
|
) -> None:
|
|
2174
2325
|
"""
|
|
2175
2326
|
Copy input files out of the global file store and update location and path.
|
|
@@ -2181,7 +2332,7 @@ def toilStageFiles(
|
|
|
2181
2332
|
"""
|
|
2182
2333
|
|
|
2183
2334
|
def _collectDirEntries(
|
|
2184
|
-
obj: Union[CWLObjectType,
|
|
2335
|
+
obj: Union[CWLObjectType, list[CWLObjectType]]
|
|
2185
2336
|
) -> Iterator[CWLObjectType]:
|
|
2186
2337
|
if isinstance(obj, dict):
|
|
2187
2338
|
if obj.get("class") in ("File", "Directory"):
|
|
@@ -2263,13 +2414,17 @@ def toilStageFiles(
|
|
|
2263
2414
|
# TODO: Use direct S3 to S3 copy on exports as well
|
|
2264
2415
|
file_id_or_contents = (
|
|
2265
2416
|
"toilfile:"
|
|
2266
|
-
+ toil.import_file(
|
|
2417
|
+
+ toil.import_file(
|
|
2418
|
+
file_id_or_contents, symlink=False
|
|
2419
|
+
).pack()
|
|
2267
2420
|
)
|
|
2268
2421
|
|
|
2269
2422
|
if file_id_or_contents.startswith("toilfile:"):
|
|
2270
2423
|
# This is something we can export
|
|
2271
2424
|
# TODO: Do we need to urlencode the parts before sending them to S3?
|
|
2272
|
-
dest_url = "/".join(
|
|
2425
|
+
dest_url = "/".join(
|
|
2426
|
+
s.strip("/") for s in [destBucket, baseName]
|
|
2427
|
+
)
|
|
2273
2428
|
logger.log(log_level, "Saving %s...", dest_url)
|
|
2274
2429
|
toil.export_file(
|
|
2275
2430
|
FileID.unpack(file_id_or_contents[len("toilfile:") :]),
|
|
@@ -2291,7 +2446,12 @@ def toilStageFiles(
|
|
|
2291
2446
|
# Probably staging and bypassing file store. Just copy.
|
|
2292
2447
|
logger.log(log_level, "Saving %s...", dest_url)
|
|
2293
2448
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2294
|
-
|
|
2449
|
+
try:
|
|
2450
|
+
shutil.copyfile(p.resolved, p.target)
|
|
2451
|
+
except shutil.SameFileError:
|
|
2452
|
+
# If outdir isn't set and we're passing through an input file/directory as the output,
|
|
2453
|
+
# the file doesn't need to be copied because it is already there
|
|
2454
|
+
pass
|
|
2295
2455
|
else:
|
|
2296
2456
|
uri = p.resolved
|
|
2297
2457
|
if not uri.startswith("toilfile:"):
|
|
@@ -2364,26 +2524,31 @@ class CWLJobWrapper(CWLNamedJob):
|
|
|
2364
2524
|
subjob_name="_wrapper",
|
|
2365
2525
|
local=True,
|
|
2366
2526
|
)
|
|
2367
|
-
self.cwltool =
|
|
2527
|
+
self.cwltool = tool
|
|
2368
2528
|
self.cwljob = cwljob
|
|
2369
2529
|
self.runtime_context = runtime_context
|
|
2370
|
-
self.conditional = conditional
|
|
2530
|
+
self.conditional = conditional or Conditional()
|
|
2371
2531
|
self.parent_name = parent_name
|
|
2372
2532
|
|
|
2373
2533
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
2374
2534
|
"""Create a child job with the correct resource requirements set."""
|
|
2375
2535
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2536
|
+
|
|
2537
|
+
# Check confitional to license full evaluation of job inputs.
|
|
2538
|
+
if self.conditional.is_false(cwljob):
|
|
2539
|
+
return self.conditional.skipped_outputs()
|
|
2540
|
+
|
|
2376
2541
|
fill_in_defaults(
|
|
2377
2542
|
self.cwltool.tool["inputs"],
|
|
2378
2543
|
cwljob,
|
|
2379
2544
|
self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
|
|
2380
2545
|
)
|
|
2546
|
+
# Don't forward the conditional. We checked it already.
|
|
2381
2547
|
realjob = CWLJob(
|
|
2382
2548
|
tool=self.cwltool,
|
|
2383
2549
|
cwljob=cwljob,
|
|
2384
2550
|
runtime_context=self.runtime_context,
|
|
2385
2551
|
parent_name=self.parent_name,
|
|
2386
|
-
conditional=self.conditional,
|
|
2387
2552
|
)
|
|
2388
2553
|
self.addChild(realjob)
|
|
2389
2554
|
return realjob.rv()
|
|
@@ -2401,7 +2566,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2401
2566
|
conditional: Union[Conditional, None] = None,
|
|
2402
2567
|
):
|
|
2403
2568
|
"""Store the context for later execution."""
|
|
2404
|
-
self.cwltool =
|
|
2569
|
+
self.cwltool = tool
|
|
2405
2570
|
self.conditional = conditional or Conditional()
|
|
2406
2571
|
|
|
2407
2572
|
if runtime_context.builder:
|
|
@@ -2418,7 +2583,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2418
2583
|
resources={},
|
|
2419
2584
|
mutation_manager=runtime_context.mutation_manager,
|
|
2420
2585
|
formatgraph=tool.formatgraph,
|
|
2421
|
-
make_fs_access=cast(
|
|
2586
|
+
make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
|
|
2422
2587
|
fs_access=runtime_context.make_fs_access(""),
|
|
2423
2588
|
job_script_provider=runtime_context.job_script_provider,
|
|
2424
2589
|
timeout=runtime_context.eval_timeout,
|
|
@@ -2435,7 +2600,21 @@ class CWLJob(CWLNamedJob):
|
|
|
2435
2600
|
|
|
2436
2601
|
req = tool.evalResources(self.builder, runtime_context)
|
|
2437
2602
|
|
|
2438
|
-
|
|
2603
|
+
tool_own_resources = tool.get_requirement("ResourceRequirement")[0] or {}
|
|
2604
|
+
if "ramMin" in tool_own_resources or "ramMax" in tool_own_resources:
|
|
2605
|
+
# The tool is actually asking for memory.
|
|
2606
|
+
memory = int(req["ram"] * (2**20))
|
|
2607
|
+
else:
|
|
2608
|
+
# The tool is getting a default ram allocation.
|
|
2609
|
+
if getattr(runtime_context, "cwl_default_ram"):
|
|
2610
|
+
# We will respect the CWL spec and apply the default cwltool
|
|
2611
|
+
# computed, which might be different than Toil's default.
|
|
2612
|
+
memory = int(req["ram"] * (2**20))
|
|
2613
|
+
else:
|
|
2614
|
+
# We use a None requirement and the Toil default applies.
|
|
2615
|
+
memory = None
|
|
2616
|
+
|
|
2617
|
+
accelerators: Optional[list[AcceleratorRequirement]] = None
|
|
2439
2618
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
2440
2619
|
# There's a CUDARequirement, which cwltool processed for us
|
|
2441
2620
|
# TODO: How is cwltool deciding what value to use between min and max?
|
|
@@ -2499,7 +2678,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2499
2678
|
|
|
2500
2679
|
super().__init__(
|
|
2501
2680
|
cores=req["cores"],
|
|
2502
|
-
memory=
|
|
2681
|
+
memory=memory,
|
|
2503
2682
|
disk=int(total_disk),
|
|
2504
2683
|
accelerators=accelerators,
|
|
2505
2684
|
preemptible=preemptible,
|
|
@@ -2513,7 +2692,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2513
2692
|
self.step_inputs = self.cwltool.tool["inputs"]
|
|
2514
2693
|
self.workdir: str = runtime_context.workdir # type: ignore[attr-defined]
|
|
2515
2694
|
|
|
2516
|
-
def required_env_vars(self, cwljob: Any) -> Iterator[
|
|
2695
|
+
def required_env_vars(self, cwljob: Any) -> Iterator[tuple[str, str]]:
|
|
2517
2696
|
"""Yield environment variables from EnvVarRequirement."""
|
|
2518
2697
|
if isinstance(cwljob, dict):
|
|
2519
2698
|
if cwljob.get("class") == "EnvVarRequirement":
|
|
@@ -2525,7 +2704,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2525
2704
|
for env_var in cwljob:
|
|
2526
2705
|
yield from self.required_env_vars(env_var)
|
|
2527
2706
|
|
|
2528
|
-
def populate_env_vars(self, cwljob: CWLObjectType) ->
|
|
2707
|
+
def populate_env_vars(self, cwljob: CWLObjectType) -> dict[str, str]:
|
|
2529
2708
|
"""
|
|
2530
2709
|
Prepare environment variables necessary at runtime for the job.
|
|
2531
2710
|
|
|
@@ -2541,9 +2720,9 @@ class CWLJob(CWLNamedJob):
|
|
|
2541
2720
|
required_env_vars = {}
|
|
2542
2721
|
# iterate over EnvVarRequirement env vars, if any
|
|
2543
2722
|
for k, v in self.required_env_vars(cwljob):
|
|
2544
|
-
required_env_vars[
|
|
2545
|
-
|
|
2546
|
-
|
|
2723
|
+
required_env_vars[k] = (
|
|
2724
|
+
v # will tell cwltool which env vars to take from the environment
|
|
2725
|
+
)
|
|
2547
2726
|
os.environ[k] = v
|
|
2548
2727
|
# needs to actually be populated in the environment as well or
|
|
2549
2728
|
# they're not used
|
|
@@ -2553,7 +2732,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2553
2732
|
# env var with the same name is found
|
|
2554
2733
|
for req in self.cwltool.requirements:
|
|
2555
2734
|
if req["class"] == "EnvVarRequirement":
|
|
2556
|
-
envDefs = cast(
|
|
2735
|
+
envDefs = cast(list[dict[str, str]], req["envDef"])
|
|
2557
2736
|
for env_def in envDefs:
|
|
2558
2737
|
env_name = env_def["envName"]
|
|
2559
2738
|
if env_name in required_env_vars:
|
|
@@ -2585,7 +2764,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2585
2764
|
for inp_id in immobile_cwljob_dict.keys():
|
|
2586
2765
|
found = False
|
|
2587
2766
|
for field in cast(
|
|
2588
|
-
|
|
2767
|
+
list[dict[str, str]], self.cwltool.inputs_record_schema["fields"]
|
|
2589
2768
|
):
|
|
2590
2769
|
if field["name"] == inp_id:
|
|
2591
2770
|
found = True
|
|
@@ -2600,8 +2779,8 @@ class CWLJob(CWLNamedJob):
|
|
|
2600
2779
|
functools.partial(remove_empty_listings),
|
|
2601
2780
|
)
|
|
2602
2781
|
|
|
2603
|
-
index:
|
|
2604
|
-
existing:
|
|
2782
|
+
index: dict[str, str] = {}
|
|
2783
|
+
existing: dict[str, str] = {}
|
|
2605
2784
|
|
|
2606
2785
|
# Prepare the run instructions for cwltool
|
|
2607
2786
|
runtime_context = self.runtime_context.copy()
|
|
@@ -2613,7 +2792,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2613
2792
|
# will come and grab this function for fetching files from the Toil
|
|
2614
2793
|
# file store. pipe_threads is used for keeping track of separate
|
|
2615
2794
|
# threads launched to stream files around.
|
|
2616
|
-
pipe_threads:
|
|
2795
|
+
pipe_threads: list[tuple[Thread, int]] = []
|
|
2617
2796
|
setattr(
|
|
2618
2797
|
runtime_context,
|
|
2619
2798
|
"toil_get_file",
|
|
@@ -2647,7 +2826,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2647
2826
|
# function and a path_mapper type or factory function.
|
|
2648
2827
|
|
|
2649
2828
|
runtime_context.make_fs_access = cast(
|
|
2650
|
-
|
|
2829
|
+
type[StdFsAccess],
|
|
2651
2830
|
functools.partial(ToilFsAccess, file_store=file_store),
|
|
2652
2831
|
)
|
|
2653
2832
|
|
|
@@ -2660,9 +2839,13 @@ class CWLJob(CWLNamedJob):
|
|
|
2660
2839
|
# Collect standard output and standard error somewhere if they don't go to files.
|
|
2661
2840
|
# We need to keep two FDs to these because cwltool will close what we give it.
|
|
2662
2841
|
default_stdout = TemporaryFile()
|
|
2663
|
-
runtime_context.default_stdout = os.fdopen(
|
|
2842
|
+
runtime_context.default_stdout = os.fdopen(
|
|
2843
|
+
os.dup(default_stdout.fileno()), "wb"
|
|
2844
|
+
)
|
|
2664
2845
|
default_stderr = TemporaryFile()
|
|
2665
|
-
runtime_context.default_stderr = os.fdopen(
|
|
2846
|
+
runtime_context.default_stderr = os.fdopen(
|
|
2847
|
+
os.dup(default_stderr.fileno()), "wb"
|
|
2848
|
+
)
|
|
2666
2849
|
|
|
2667
2850
|
process_uuid = uuid.uuid4() # noqa F841
|
|
2668
2851
|
started_at = datetime.datetime.now() # noqa F841
|
|
@@ -2693,17 +2876,27 @@ class CWLJob(CWLNamedJob):
|
|
|
2693
2876
|
default_stdout.seek(0, os.SEEK_END)
|
|
2694
2877
|
if default_stdout.tell() > 0:
|
|
2695
2878
|
default_stdout.seek(0)
|
|
2696
|
-
file_store.log_user_stream(
|
|
2879
|
+
file_store.log_user_stream(
|
|
2880
|
+
self.description.unitName + ".stdout", default_stdout
|
|
2881
|
+
)
|
|
2697
2882
|
if status != "success":
|
|
2698
2883
|
default_stdout.seek(0)
|
|
2699
|
-
logger.error(
|
|
2884
|
+
logger.error(
|
|
2885
|
+
"Failed command standard output:\n%s",
|
|
2886
|
+
default_stdout.read().decode("utf-8", errors="replace"),
|
|
2887
|
+
)
|
|
2700
2888
|
default_stderr.seek(0, os.SEEK_END)
|
|
2701
2889
|
if default_stderr.tell():
|
|
2702
2890
|
default_stderr.seek(0)
|
|
2703
|
-
file_store.log_user_stream(
|
|
2891
|
+
file_store.log_user_stream(
|
|
2892
|
+
self.description.unitName + ".stderr", default_stderr
|
|
2893
|
+
)
|
|
2704
2894
|
if status != "success":
|
|
2705
2895
|
default_stderr.seek(0)
|
|
2706
|
-
logger.error(
|
|
2896
|
+
logger.error(
|
|
2897
|
+
"Failed command standard error:\n%s",
|
|
2898
|
+
default_stderr.read().decode("utf-8", errors="replace"),
|
|
2899
|
+
)
|
|
2707
2900
|
|
|
2708
2901
|
if status != "success":
|
|
2709
2902
|
raise cwl_utils.errors.WorkflowException(status)
|
|
@@ -2716,12 +2909,18 @@ class CWLJob(CWLNamedJob):
|
|
|
2716
2909
|
fs_access = runtime_context.make_fs_access(runtime_context.basedir)
|
|
2717
2910
|
|
|
2718
2911
|
# And a file importer that can go from a file:// URI to a Toil FileID
|
|
2719
|
-
file_import_function =
|
|
2912
|
+
def file_import_function(url: str, log_level: int = logging.DEBUG) -> FileID:
|
|
2913
|
+
logger.log(log_level, "Loading %s...", url)
|
|
2914
|
+
return writeGlobalFileWrapper(file_store, url)
|
|
2915
|
+
|
|
2916
|
+
file_upload_function = functools.partial(
|
|
2917
|
+
extract_and_convert_file_to_toil_uri, file_import_function
|
|
2918
|
+
)
|
|
2720
2919
|
|
|
2721
2920
|
# Upload all the Files and set their and the Directories' locations, if
|
|
2722
2921
|
# needed.
|
|
2723
|
-
|
|
2724
|
-
|
|
2922
|
+
visit_files(
|
|
2923
|
+
file_upload_function,
|
|
2725
2924
|
fs_access,
|
|
2726
2925
|
index,
|
|
2727
2926
|
existing,
|
|
@@ -2751,6 +2950,74 @@ def get_container_engine(runtime_context: cwltool.context.RuntimeContext) -> str
|
|
|
2751
2950
|
return "docker"
|
|
2752
2951
|
|
|
2753
2952
|
|
|
2953
|
+
def makeRootJob(
|
|
2954
|
+
tool: Process,
|
|
2955
|
+
jobobj: CWLObjectType,
|
|
2956
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
2957
|
+
initialized_job_order: CWLObjectType,
|
|
2958
|
+
options: Namespace,
|
|
2959
|
+
toil: Toil,
|
|
2960
|
+
) -> CWLNamedJob:
|
|
2961
|
+
"""
|
|
2962
|
+
Create the Toil root Job object for the CWL tool. Is the same as makeJob() except this also handles import logic.
|
|
2963
|
+
|
|
2964
|
+
Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
|
|
2965
|
+
If only one job is created, it is returned twice.
|
|
2966
|
+
|
|
2967
|
+
:return:
|
|
2968
|
+
"""
|
|
2969
|
+
if options.run_imports_on_workers:
|
|
2970
|
+
filenames = extract_workflow_inputs(options, initialized_job_order, tool)
|
|
2971
|
+
metadata = get_file_sizes(
|
|
2972
|
+
filenames, toil._jobStore, include_remote_files=options.reference_inputs
|
|
2973
|
+
)
|
|
2974
|
+
|
|
2975
|
+
# Mapping of files to metadata for files that will be imported on the worker
|
|
2976
|
+
# This will consist of files that we were able to get a file size for
|
|
2977
|
+
worker_metadata: dict[str, FileMetadata] = dict()
|
|
2978
|
+
# Mapping of files to metadata for files that will be imported on the leader
|
|
2979
|
+
# This will consist of files that we were not able to get a file size for
|
|
2980
|
+
leader_metadata = dict()
|
|
2981
|
+
for filename, file_data in metadata.items():
|
|
2982
|
+
if file_data.size is None:
|
|
2983
|
+
leader_metadata[filename] = file_data
|
|
2984
|
+
else:
|
|
2985
|
+
worker_metadata[filename] = file_data
|
|
2986
|
+
|
|
2987
|
+
# import the files for the leader first
|
|
2988
|
+
path_to_fileid = WorkerImportJob.import_files(
|
|
2989
|
+
list(leader_metadata.keys()), toil._jobStore
|
|
2990
|
+
)
|
|
2991
|
+
|
|
2992
|
+
# then install the imported files before importing the other files
|
|
2993
|
+
# this way the control flow can fall from the leader to workers
|
|
2994
|
+
tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
|
|
2995
|
+
initialized_job_order,
|
|
2996
|
+
tool,
|
|
2997
|
+
path_to_fileid,
|
|
2998
|
+
options.basedir,
|
|
2999
|
+
options.reference_inputs,
|
|
3000
|
+
options.bypass_file_store,
|
|
3001
|
+
)
|
|
3002
|
+
|
|
3003
|
+
import_job = CWLImportWrapper(
|
|
3004
|
+
initialized_job_order, tool, runtime_context, worker_metadata, options
|
|
3005
|
+
)
|
|
3006
|
+
return import_job
|
|
3007
|
+
else:
|
|
3008
|
+
import_workflow_inputs(
|
|
3009
|
+
toil._jobStore,
|
|
3010
|
+
options,
|
|
3011
|
+
initialized_job_order=initialized_job_order,
|
|
3012
|
+
tool=tool,
|
|
3013
|
+
)
|
|
3014
|
+
root_job, followOn = makeJob(
|
|
3015
|
+
tool, jobobj, runtime_context, None, None
|
|
3016
|
+
) # toplevel, no name needed
|
|
3017
|
+
root_job.cwljob = initialized_job_order
|
|
3018
|
+
return root_job
|
|
3019
|
+
|
|
3020
|
+
|
|
2754
3021
|
def makeJob(
|
|
2755
3022
|
tool: Process,
|
|
2756
3023
|
jobobj: CWLObjectType,
|
|
@@ -2758,13 +3025,16 @@ def makeJob(
|
|
|
2758
3025
|
parent_name: Optional[str],
|
|
2759
3026
|
conditional: Union[Conditional, None],
|
|
2760
3027
|
) -> Union[
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
3028
|
+
tuple["CWLWorkflow", ResolveIndirect],
|
|
3029
|
+
tuple[CWLJob, CWLJob],
|
|
3030
|
+
tuple[CWLJobWrapper, CWLJobWrapper],
|
|
2764
3031
|
]:
|
|
2765
3032
|
"""
|
|
2766
3033
|
Create the correct Toil Job object for the CWL tool.
|
|
2767
3034
|
|
|
3035
|
+
Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
|
|
3036
|
+
If only one job is created, it is returned twice.
|
|
3037
|
+
|
|
2768
3038
|
Types: workflow, job, or job wrapper for dynamic resource requirements.
|
|
2769
3039
|
|
|
2770
3040
|
:return: "wfjob, followOn" if the input tool is a workflow, and "job, job" otherwise
|
|
@@ -2844,16 +3114,16 @@ class CWLScatter(Job):
|
|
|
2844
3114
|
def flat_crossproduct_scatter(
|
|
2845
3115
|
self,
|
|
2846
3116
|
joborder: CWLObjectType,
|
|
2847
|
-
scatter_keys:
|
|
2848
|
-
outputs:
|
|
3117
|
+
scatter_keys: list[str],
|
|
3118
|
+
outputs: list[Promised[CWLObjectType]],
|
|
2849
3119
|
postScatterEval: Callable[[CWLObjectType], CWLObjectType],
|
|
2850
3120
|
) -> None:
|
|
2851
3121
|
"""Cartesian product of the inputs, then flattened."""
|
|
2852
3122
|
scatter_key = shortname(scatter_keys[0])
|
|
2853
|
-
for n in range(0, len(cast(
|
|
3123
|
+
for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
|
|
2854
3124
|
updated_joborder = copy.copy(joborder)
|
|
2855
3125
|
updated_joborder[scatter_key] = cast(
|
|
2856
|
-
|
|
3126
|
+
list[CWLObjectType], joborder[scatter_key]
|
|
2857
3127
|
)[n]
|
|
2858
3128
|
if len(scatter_keys) == 1:
|
|
2859
3129
|
updated_joborder = postScatterEval(updated_joborder)
|
|
@@ -2874,16 +3144,16 @@ class CWLScatter(Job):
|
|
|
2874
3144
|
def nested_crossproduct_scatter(
|
|
2875
3145
|
self,
|
|
2876
3146
|
joborder: CWLObjectType,
|
|
2877
|
-
scatter_keys:
|
|
3147
|
+
scatter_keys: list[str],
|
|
2878
3148
|
postScatterEval: Callable[[CWLObjectType], CWLObjectType],
|
|
2879
|
-
) ->
|
|
3149
|
+
) -> list[Promised[CWLObjectType]]:
|
|
2880
3150
|
"""Cartesian product of the inputs."""
|
|
2881
3151
|
scatter_key = shortname(scatter_keys[0])
|
|
2882
|
-
outputs:
|
|
2883
|
-
for n in range(0, len(cast(
|
|
3152
|
+
outputs: list[Promised[CWLObjectType]] = []
|
|
3153
|
+
for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
|
|
2884
3154
|
updated_joborder = copy.copy(joborder)
|
|
2885
3155
|
updated_joborder[scatter_key] = cast(
|
|
2886
|
-
|
|
3156
|
+
list[CWLObjectType], joborder[scatter_key]
|
|
2887
3157
|
)[n]
|
|
2888
3158
|
if len(scatter_keys) == 1:
|
|
2889
3159
|
updated_joborder = postScatterEval(updated_joborder)
|
|
@@ -2904,7 +3174,7 @@ class CWLScatter(Job):
|
|
|
2904
3174
|
)
|
|
2905
3175
|
return outputs
|
|
2906
3176
|
|
|
2907
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3177
|
+
def run(self, file_store: AbstractFileStore) -> list[Promised[CWLObjectType]]:
|
|
2908
3178
|
"""Generate the follow on scatter jobs."""
|
|
2909
3179
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2910
3180
|
|
|
@@ -2916,7 +3186,7 @@ class CWLScatter(Job):
|
|
|
2916
3186
|
scatterMethod = self.step.tool.get("scatterMethod", None)
|
|
2917
3187
|
if len(scatter) == 1:
|
|
2918
3188
|
scatterMethod = "dotproduct"
|
|
2919
|
-
outputs:
|
|
3189
|
+
outputs: list[Promised[CWLObjectType]] = []
|
|
2920
3190
|
|
|
2921
3191
|
valueFrom = {
|
|
2922
3192
|
shortname(i["id"]): i["valueFrom"]
|
|
@@ -2948,11 +3218,11 @@ class CWLScatter(Job):
|
|
|
2948
3218
|
|
|
2949
3219
|
if scatterMethod == "dotproduct":
|
|
2950
3220
|
for i in range(
|
|
2951
|
-
0, len(cast(
|
|
3221
|
+
0, len(cast(list[CWLObjectType], cwljob[shortname(scatter[0])]))
|
|
2952
3222
|
):
|
|
2953
3223
|
copyjob = copy.copy(cwljob)
|
|
2954
3224
|
for sc in [shortname(x) for x in scatter]:
|
|
2955
|
-
copyjob[sc] = cast(
|
|
3225
|
+
copyjob[sc] = cast(list[CWLObjectType], cwljob[sc])[i]
|
|
2956
3226
|
copyjob = postScatterEval(copyjob)
|
|
2957
3227
|
subjob, follow_on = makeJob(
|
|
2958
3228
|
tool=self.step.embedded_tool,
|
|
@@ -2991,7 +3261,7 @@ class CWLGather(Job):
|
|
|
2991
3261
|
def __init__(
|
|
2992
3262
|
self,
|
|
2993
3263
|
step: cwltool.workflow.WorkflowStep,
|
|
2994
|
-
outputs: Promised[Union[CWLObjectType,
|
|
3264
|
+
outputs: Promised[Union[CWLObjectType, list[CWLObjectType]]],
|
|
2995
3265
|
):
|
|
2996
3266
|
"""Collect our context for later gathering."""
|
|
2997
3267
|
super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
|
|
@@ -3000,24 +3270,24 @@ class CWLGather(Job):
|
|
|
3000
3270
|
|
|
3001
3271
|
@staticmethod
|
|
3002
3272
|
def extract(
|
|
3003
|
-
obj: Union[CWLObjectType,
|
|
3004
|
-
) -> Union[CWLOutputType,
|
|
3273
|
+
obj: Union[CWLObjectType, list[CWLObjectType]], k: str
|
|
3274
|
+
) -> Union[CWLOutputType, list[CWLObjectType]]:
|
|
3005
3275
|
"""
|
|
3006
3276
|
Extract the given key from the obj.
|
|
3007
3277
|
|
|
3008
3278
|
If the object is a list, extract it from all members of the list.
|
|
3009
3279
|
"""
|
|
3010
3280
|
if isinstance(obj, Mapping):
|
|
3011
|
-
return cast(Union[CWLOutputType,
|
|
3281
|
+
return cast(Union[CWLOutputType, list[CWLObjectType]], obj.get(k))
|
|
3012
3282
|
elif isinstance(obj, MutableSequence):
|
|
3013
|
-
cp:
|
|
3283
|
+
cp: list[CWLObjectType] = []
|
|
3014
3284
|
for item in obj:
|
|
3015
3285
|
cp.append(cast(CWLObjectType, CWLGather.extract(item, k)))
|
|
3016
3286
|
return cp
|
|
3017
3287
|
else:
|
|
3018
|
-
return cast(
|
|
3288
|
+
return cast(list[CWLObjectType], [])
|
|
3019
3289
|
|
|
3020
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3290
|
+
def run(self, file_store: AbstractFileStore) -> dict[str, Any]:
|
|
3021
3291
|
"""Gather all the outputs of the scatter."""
|
|
3022
3292
|
outobj = {}
|
|
3023
3293
|
|
|
@@ -3028,8 +3298,8 @@ class CWLGather(Job):
|
|
|
3028
3298
|
return shortname(n)
|
|
3029
3299
|
|
|
3030
3300
|
# TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
|
|
3031
|
-
outputs: Union[CWLObjectType,
|
|
3032
|
-
Union[CWLObjectType,
|
|
3301
|
+
outputs: Union[CWLObjectType, list[CWLObjectType]] = cast(
|
|
3302
|
+
Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
|
|
3033
3303
|
)
|
|
3034
3304
|
for k in [sn(i) for i in self.step.tool["out"]]:
|
|
3035
3305
|
outobj[k] = self.extract(outputs, k)
|
|
@@ -3071,7 +3341,11 @@ ProcessType = TypeVar(
|
|
|
3071
3341
|
|
|
3072
3342
|
|
|
3073
3343
|
def remove_pickle_problems(obj: ProcessType) -> ProcessType:
|
|
3074
|
-
"""
|
|
3344
|
+
"""
|
|
3345
|
+
Doc_loader does not pickle correctly, causing Toil errors, remove from objects.
|
|
3346
|
+
|
|
3347
|
+
See github issue: https://github.com/mypyc/mypyc/issues/804
|
|
3348
|
+
"""
|
|
3075
3349
|
if hasattr(obj, "doc_loader"):
|
|
3076
3350
|
obj.doc_loader = None
|
|
3077
3351
|
if isinstance(obj, cwltool.workflow.WorkflowStep):
|
|
@@ -3103,12 +3377,11 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3103
3377
|
self.cwlwf = cwlwf
|
|
3104
3378
|
self.cwljob = cwljob
|
|
3105
3379
|
self.runtime_context = runtime_context
|
|
3106
|
-
self.cwlwf = remove_pickle_problems(self.cwlwf)
|
|
3107
3380
|
self.conditional = conditional or Conditional()
|
|
3108
3381
|
|
|
3109
3382
|
def run(
|
|
3110
3383
|
self, file_store: AbstractFileStore
|
|
3111
|
-
) -> Union[UnresolvedDict,
|
|
3384
|
+
) -> Union[UnresolvedDict, dict[str, SkipNull]]:
|
|
3112
3385
|
"""
|
|
3113
3386
|
Convert a CWL Workflow graph into a Toil job graph.
|
|
3114
3387
|
|
|
@@ -3129,7 +3402,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3129
3402
|
# that may be used as a "source" for a step input workflow output
|
|
3130
3403
|
# parameter
|
|
3131
3404
|
# to: the job that will produce that value.
|
|
3132
|
-
promises:
|
|
3405
|
+
promises: dict[str, Job] = {}
|
|
3133
3406
|
|
|
3134
3407
|
parent_name = shortname(self.cwlwf.tool["id"])
|
|
3135
3408
|
|
|
@@ -3158,7 +3431,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3158
3431
|
stepinputs_fufilled = False
|
|
3159
3432
|
if stepinputs_fufilled:
|
|
3160
3433
|
logger.debug("Ready to make job for workflow step %s", step_id)
|
|
3161
|
-
jobobj:
|
|
3434
|
+
jobobj: dict[
|
|
3162
3435
|
str, Union[ResolveSource, DefaultWithSource, StepValueFrom]
|
|
3163
3436
|
] = {}
|
|
3164
3437
|
|
|
@@ -3292,30 +3565,349 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3292
3565
|
return UnresolvedDict(outobj)
|
|
3293
3566
|
|
|
3294
3567
|
|
|
3568
|
+
class CWLInstallImportsJob(Job):
|
|
3569
|
+
def __init__(
|
|
3570
|
+
self,
|
|
3571
|
+
initialized_job_order: Promised[CWLObjectType],
|
|
3572
|
+
tool: Promised[Process],
|
|
3573
|
+
basedir: str,
|
|
3574
|
+
skip_remote: bool,
|
|
3575
|
+
bypass_file_store: bool,
|
|
3576
|
+
import_data: Promised[dict[str, FileID]],
|
|
3577
|
+
**kwargs: Any,
|
|
3578
|
+
) -> None:
|
|
3579
|
+
"""
|
|
3580
|
+
Job to take the entire CWL object and a mapping of filenames to the imported URIs
|
|
3581
|
+
to convert all file locations to URIs.
|
|
3582
|
+
|
|
3583
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
3584
|
+
"""
|
|
3585
|
+
super().__init__(local=True, **kwargs)
|
|
3586
|
+
self.initialized_job_order = initialized_job_order
|
|
3587
|
+
self.tool = tool
|
|
3588
|
+
self.basedir = basedir
|
|
3589
|
+
self.skip_remote = skip_remote
|
|
3590
|
+
self.bypass_file_store = bypass_file_store
|
|
3591
|
+
self.import_data = import_data
|
|
3592
|
+
|
|
3593
|
+
@staticmethod
|
|
3594
|
+
def fill_in_files(
|
|
3595
|
+
initialized_job_order: CWLObjectType,
|
|
3596
|
+
tool: Process,
|
|
3597
|
+
candidate_to_fileid: dict[str, FileID],
|
|
3598
|
+
basedir: str,
|
|
3599
|
+
skip_remote: bool,
|
|
3600
|
+
bypass_file_store: bool,
|
|
3601
|
+
) -> tuple[Process, CWLObjectType]:
|
|
3602
|
+
"""
|
|
3603
|
+
Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
|
|
3604
|
+
"""
|
|
3605
|
+
|
|
3606
|
+
def fill_in_file(filename: str) -> FileID:
|
|
3607
|
+
"""
|
|
3608
|
+
Return the file name's associated Toil file ID
|
|
3609
|
+
"""
|
|
3610
|
+
return candidate_to_fileid[filename]
|
|
3611
|
+
|
|
3612
|
+
file_convert_function = functools.partial(
|
|
3613
|
+
extract_and_convert_file_to_toil_uri, fill_in_file
|
|
3614
|
+
)
|
|
3615
|
+
fs_access = ToilFsAccess(basedir)
|
|
3616
|
+
fileindex: dict[str, str] = {}
|
|
3617
|
+
existing: dict[str, str] = {}
|
|
3618
|
+
visit_files(
|
|
3619
|
+
file_convert_function,
|
|
3620
|
+
fs_access,
|
|
3621
|
+
fileindex,
|
|
3622
|
+
existing,
|
|
3623
|
+
initialized_job_order,
|
|
3624
|
+
mark_broken=True,
|
|
3625
|
+
skip_remote=skip_remote,
|
|
3626
|
+
bypass_file_store=bypass_file_store,
|
|
3627
|
+
)
|
|
3628
|
+
visitSteps(
|
|
3629
|
+
tool,
|
|
3630
|
+
functools.partial(
|
|
3631
|
+
visit_files,
|
|
3632
|
+
file_convert_function,
|
|
3633
|
+
fs_access,
|
|
3634
|
+
fileindex,
|
|
3635
|
+
existing,
|
|
3636
|
+
mark_broken=True,
|
|
3637
|
+
skip_remote=skip_remote,
|
|
3638
|
+
bypass_file_store=bypass_file_store,
|
|
3639
|
+
),
|
|
3640
|
+
)
|
|
3641
|
+
|
|
3642
|
+
# We always expect to have processed all files that exist
|
|
3643
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3644
|
+
# Loop through all the parameters for the workflow overall.
|
|
3645
|
+
# Drop any files that aren't either imported (for when we use
|
|
3646
|
+
# the file store) or available on disk (for when we don't).
|
|
3647
|
+
# This will properly make them cause an error later if they
|
|
3648
|
+
# were required.
|
|
3649
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3650
|
+
return tool, initialized_job_order
|
|
3651
|
+
|
|
3652
|
+
def run(self, file_store: AbstractFileStore) -> Tuple[Process, CWLObjectType]:
|
|
3653
|
+
"""
|
|
3654
|
+
Convert the filenames in the workflow inputs into the URIs
|
|
3655
|
+
:return: Promise of transformed workflow inputs. A tuple of the job order and process
|
|
3656
|
+
"""
|
|
3657
|
+
candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
|
|
3658
|
+
|
|
3659
|
+
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3660
|
+
tool = unwrap(self.tool)
|
|
3661
|
+
return CWLInstallImportsJob.fill_in_files(
|
|
3662
|
+
initialized_job_order,
|
|
3663
|
+
tool,
|
|
3664
|
+
candidate_to_fileid,
|
|
3665
|
+
self.basedir,
|
|
3666
|
+
self.skip_remote,
|
|
3667
|
+
self.bypass_file_store,
|
|
3668
|
+
)
|
|
3669
|
+
|
|
3670
|
+
|
|
3671
|
+
class CWLImportWrapper(CWLNamedJob):
|
|
3672
|
+
"""
|
|
3673
|
+
Job to organize importing files on workers instead of the leader. Responsible for extracting filenames and metadata,
|
|
3674
|
+
calling ImportsJob, applying imports to the job objects, and scheduling the start workflow job
|
|
3675
|
+
|
|
3676
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
3677
|
+
"""
|
|
3678
|
+
|
|
3679
|
+
def __init__(
|
|
3680
|
+
self,
|
|
3681
|
+
initialized_job_order: CWLObjectType,
|
|
3682
|
+
tool: Process,
|
|
3683
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
3684
|
+
file_to_data: dict[str, FileMetadata],
|
|
3685
|
+
options: Namespace,
|
|
3686
|
+
):
|
|
3687
|
+
super().__init__(local=False, disk=options.import_workers_threshold)
|
|
3688
|
+
self.initialized_job_order = initialized_job_order
|
|
3689
|
+
self.tool = tool
|
|
3690
|
+
self.options = options
|
|
3691
|
+
self.runtime_context = runtime_context
|
|
3692
|
+
self.file_to_data = file_to_data
|
|
3693
|
+
|
|
3694
|
+
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3695
|
+
imports_job = ImportsJob(
|
|
3696
|
+
self.file_to_data,
|
|
3697
|
+
self.options.import_workers_threshold,
|
|
3698
|
+
self.options.import_workers_disk,
|
|
3699
|
+
)
|
|
3700
|
+
self.addChild(imports_job)
|
|
3701
|
+
install_imports_job = CWLInstallImportsJob(
|
|
3702
|
+
initialized_job_order=self.initialized_job_order,
|
|
3703
|
+
tool=self.tool,
|
|
3704
|
+
basedir=self.options.basedir,
|
|
3705
|
+
skip_remote=self.options.reference_inputs,
|
|
3706
|
+
bypass_file_store=self.options.bypass_file_store,
|
|
3707
|
+
import_data=imports_job.rv(0),
|
|
3708
|
+
)
|
|
3709
|
+
self.addChild(install_imports_job)
|
|
3710
|
+
imports_job.addFollowOn(install_imports_job)
|
|
3711
|
+
|
|
3712
|
+
start_job = CWLStartJob(
|
|
3713
|
+
install_imports_job.rv(0),
|
|
3714
|
+
install_imports_job.rv(1),
|
|
3715
|
+
runtime_context=self.runtime_context,
|
|
3716
|
+
)
|
|
3717
|
+
self.addChild(start_job)
|
|
3718
|
+
install_imports_job.addFollowOn(start_job)
|
|
3719
|
+
|
|
3720
|
+
return start_job.rv()
|
|
3721
|
+
|
|
3722
|
+
|
|
3723
|
+
class CWLStartJob(CWLNamedJob):
|
|
3724
|
+
"""
|
|
3725
|
+
Job responsible for starting the CWL workflow.
|
|
3726
|
+
|
|
3727
|
+
Takes in the workflow/tool and inputs after all files are imported
|
|
3728
|
+
and creates jobs to run those workflows.
|
|
3729
|
+
"""
|
|
3730
|
+
|
|
3731
|
+
def __init__(
|
|
3732
|
+
self,
|
|
3733
|
+
tool: Promised[Process],
|
|
3734
|
+
initialized_job_order: Promised[CWLObjectType],
|
|
3735
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
3736
|
+
**kwargs: Any,
|
|
3737
|
+
) -> None:
|
|
3738
|
+
super().__init__(**kwargs)
|
|
3739
|
+
self.tool = tool
|
|
3740
|
+
self.initialized_job_order = initialized_job_order
|
|
3741
|
+
self.runtime_context = runtime_context
|
|
3742
|
+
|
|
3743
|
+
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3744
|
+
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3745
|
+
tool = unwrap(self.tool)
|
|
3746
|
+
cwljob, _ = makeJob(
|
|
3747
|
+
tool, initialized_job_order, self.runtime_context, None, None
|
|
3748
|
+
) # toplevel, no name needed
|
|
3749
|
+
cwljob.cwljob = initialized_job_order
|
|
3750
|
+
self.addChild(cwljob)
|
|
3751
|
+
return cwljob.rv()
|
|
3752
|
+
|
|
3753
|
+
|
|
3754
|
+
def extract_workflow_inputs(
|
|
3755
|
+
options: Namespace, initialized_job_order: CWLObjectType, tool: Process
|
|
3756
|
+
) -> list[str]:
|
|
3757
|
+
"""
|
|
3758
|
+
Collect all the workflow input files to import later.
|
|
3759
|
+
:param options: namespace
|
|
3760
|
+
:param initialized_job_order: cwl object
|
|
3761
|
+
:param tool: tool object
|
|
3762
|
+
:return:
|
|
3763
|
+
"""
|
|
3764
|
+
fileindex: dict[str, str] = {}
|
|
3765
|
+
existing: dict[str, str] = {}
|
|
3766
|
+
|
|
3767
|
+
# Extract out all the input files' filenames
|
|
3768
|
+
logger.info("Collecting input files...")
|
|
3769
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3770
|
+
filenames = visit_files(
|
|
3771
|
+
extract_file_uri_once,
|
|
3772
|
+
fs_access,
|
|
3773
|
+
fileindex,
|
|
3774
|
+
existing,
|
|
3775
|
+
initialized_job_order,
|
|
3776
|
+
mark_broken=True,
|
|
3777
|
+
skip_remote=options.reference_inputs,
|
|
3778
|
+
bypass_file_store=options.bypass_file_store,
|
|
3779
|
+
)
|
|
3780
|
+
# Extract filenames of all the files associated with tools (binaries, etc.).
|
|
3781
|
+
logger.info("Collecting tool-associated files...")
|
|
3782
|
+
tool_filenames = visitSteps(
|
|
3783
|
+
tool,
|
|
3784
|
+
functools.partial(
|
|
3785
|
+
visit_files,
|
|
3786
|
+
extract_file_uri_once,
|
|
3787
|
+
fs_access,
|
|
3788
|
+
fileindex,
|
|
3789
|
+
existing,
|
|
3790
|
+
mark_broken=True,
|
|
3791
|
+
skip_remote=options.reference_inputs,
|
|
3792
|
+
bypass_file_store=options.bypass_file_store,
|
|
3793
|
+
),
|
|
3794
|
+
)
|
|
3795
|
+
filenames.extend(tool_filenames)
|
|
3796
|
+
return [file for file in filenames if file is not None]
|
|
3797
|
+
|
|
3798
|
+
|
|
3799
|
+
def import_workflow_inputs(
|
|
3800
|
+
jobstore: AbstractJobStore,
|
|
3801
|
+
options: Namespace,
|
|
3802
|
+
initialized_job_order: CWLObjectType,
|
|
3803
|
+
tool: Process,
|
|
3804
|
+
log_level: int = logging.DEBUG,
|
|
3805
|
+
) -> None:
|
|
3806
|
+
"""
|
|
3807
|
+
Import all workflow inputs on the leader.
|
|
3808
|
+
|
|
3809
|
+
Ran when not importing on workers.
|
|
3810
|
+
:param jobstore: Toil jobstore
|
|
3811
|
+
:param options: Namespace
|
|
3812
|
+
:param initialized_job_order: CWL object
|
|
3813
|
+
:param tool: CWL tool
|
|
3814
|
+
:param log_level: log level
|
|
3815
|
+
:return:
|
|
3816
|
+
"""
|
|
3817
|
+
fileindex: dict[str, str] = {}
|
|
3818
|
+
existing: dict[str, str] = {}
|
|
3819
|
+
|
|
3820
|
+
# Define something we can call to import a file and get its file
|
|
3821
|
+
# ID.
|
|
3822
|
+
def file_import_function(url: str) -> FileID:
|
|
3823
|
+
logger.log(log_level, "Loading %s...", url)
|
|
3824
|
+
return jobstore.import_file(url, symlink=True)
|
|
3825
|
+
|
|
3826
|
+
import_function = functools.partial(
|
|
3827
|
+
extract_and_convert_file_to_toil_uri, file_import_function
|
|
3828
|
+
)
|
|
3829
|
+
# Import all the input files, some of which may be missing optional
|
|
3830
|
+
# files.
|
|
3831
|
+
logger.info("Importing input files...")
|
|
3832
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3833
|
+
visit_files(
|
|
3834
|
+
import_function,
|
|
3835
|
+
fs_access,
|
|
3836
|
+
fileindex,
|
|
3837
|
+
existing,
|
|
3838
|
+
initialized_job_order,
|
|
3839
|
+
mark_broken=True,
|
|
3840
|
+
skip_remote=options.reference_inputs,
|
|
3841
|
+
bypass_file_store=options.bypass_file_store,
|
|
3842
|
+
)
|
|
3843
|
+
|
|
3844
|
+
# Make another function for importing tool files. This one doesn't allow
|
|
3845
|
+
# symlinking, since the tools might be coming from storage not accessible
|
|
3846
|
+
# to all nodes.
|
|
3847
|
+
tool_import_function = functools.partial(
|
|
3848
|
+
extract_and_convert_file_to_toil_uri,
|
|
3849
|
+
cast(
|
|
3850
|
+
Callable[[str], FileID],
|
|
3851
|
+
functools.partial(jobstore.import_file, symlink=False),
|
|
3852
|
+
),
|
|
3853
|
+
)
|
|
3854
|
+
|
|
3855
|
+
# Import all the files associated with tools (binaries, etc.).
|
|
3856
|
+
# Not sure why you would have an optional secondary file here, but
|
|
3857
|
+
# the spec probably needs us to support them.
|
|
3858
|
+
logger.info("Importing tool-associated files...")
|
|
3859
|
+
visitSteps(
|
|
3860
|
+
tool,
|
|
3861
|
+
functools.partial(
|
|
3862
|
+
visit_files,
|
|
3863
|
+
tool_import_function,
|
|
3864
|
+
fs_access,
|
|
3865
|
+
fileindex,
|
|
3866
|
+
existing,
|
|
3867
|
+
mark_broken=True,
|
|
3868
|
+
skip_remote=options.reference_inputs,
|
|
3869
|
+
bypass_file_store=options.bypass_file_store,
|
|
3870
|
+
),
|
|
3871
|
+
)
|
|
3872
|
+
|
|
3873
|
+
# We always expect to have processed all files that exist
|
|
3874
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3875
|
+
# Loop through all the parameters for the workflow overall.
|
|
3876
|
+
# Drop any files that aren't either imported (for when we use
|
|
3877
|
+
# the file store) or available on disk (for when we don't).
|
|
3878
|
+
# This will properly make them cause an error later if they
|
|
3879
|
+
# were required.
|
|
3880
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3881
|
+
|
|
3882
|
+
|
|
3883
|
+
T = TypeVar("T")
|
|
3884
|
+
|
|
3885
|
+
|
|
3295
3886
|
def visitSteps(
|
|
3296
3887
|
cmdline_tool: Process,
|
|
3297
|
-
op: Callable[[CommentedMap],
|
|
3298
|
-
) ->
|
|
3888
|
+
op: Callable[[CommentedMap], list[T]],
|
|
3889
|
+
) -> list[T]:
|
|
3299
3890
|
"""
|
|
3300
3891
|
Iterate over a CWL Process object, running the op on each tool description
|
|
3301
3892
|
CWL object.
|
|
3302
3893
|
"""
|
|
3303
3894
|
if isinstance(cmdline_tool, cwltool.workflow.Workflow):
|
|
3304
3895
|
# For workflows we need to dispatch on steps
|
|
3896
|
+
ret = []
|
|
3305
3897
|
for step in cmdline_tool.steps:
|
|
3306
3898
|
# Handle the step's tool
|
|
3307
|
-
op(step.tool)
|
|
3899
|
+
ret.extend(op(step.tool))
|
|
3308
3900
|
# Recures on the embedded tool; maybe it's a workflow.
|
|
3309
|
-
visitSteps(step.embedded_tool, op)
|
|
3901
|
+
recurse_ret = visitSteps(step.embedded_tool, op)
|
|
3902
|
+
ret.extend(recurse_ret)
|
|
3903
|
+
return ret
|
|
3310
3904
|
elif isinstance(cmdline_tool, cwltool.process.Process):
|
|
3311
3905
|
# All CWL Process objects (including CommandLineTool) will have tools
|
|
3312
3906
|
# if they bothered to run the Process __init__.
|
|
3313
|
-
op(cmdline_tool.tool)
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
f"traversal: {type(cmdline_tool)}"
|
|
3318
|
-
)
|
|
3907
|
+
return op(cmdline_tool.tool)
|
|
3908
|
+
raise RuntimeError(
|
|
3909
|
+
f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
|
|
3910
|
+
)
|
|
3319
3911
|
|
|
3320
3912
|
|
|
3321
3913
|
def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
@@ -3328,7 +3920,7 @@ def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
|
3328
3920
|
|
|
3329
3921
|
def filtered_secondary_files(
|
|
3330
3922
|
unfiltered_secondary_files: CWLObjectType,
|
|
3331
|
-
) ->
|
|
3923
|
+
) -> list[CWLObjectType]:
|
|
3332
3924
|
"""
|
|
3333
3925
|
Remove unprocessed secondary files.
|
|
3334
3926
|
|
|
@@ -3349,28 +3941,33 @@ def filtered_secondary_files(
|
|
|
3349
3941
|
intermediate_secondary_files = []
|
|
3350
3942
|
final_secondary_files = []
|
|
3351
3943
|
# remove secondary files still containing interpolated strings
|
|
3352
|
-
for sf in cast(
|
|
3944
|
+
for sf in cast(list[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
|
|
3353
3945
|
sf_bn = cast(str, sf.get("basename", ""))
|
|
3354
3946
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3355
3947
|
if ("$(" not in sf_bn) and ("${" not in sf_bn):
|
|
3356
3948
|
if ("$(" not in sf_loc) and ("${" not in sf_loc):
|
|
3357
3949
|
intermediate_secondary_files.append(sf)
|
|
3358
3950
|
else:
|
|
3359
|
-
logger.debug(
|
|
3951
|
+
logger.debug(
|
|
3952
|
+
"Secondary file %s is dropped because it has an uninterpolated location",
|
|
3953
|
+
sf,
|
|
3954
|
+
)
|
|
3360
3955
|
else:
|
|
3361
|
-
logger.debug(
|
|
3956
|
+
logger.debug(
|
|
3957
|
+
"Secondary file %s is dropped because it has an uninterpolated basename",
|
|
3958
|
+
sf,
|
|
3959
|
+
)
|
|
3362
3960
|
# remove secondary files that are not present in the filestore or pointing
|
|
3363
|
-
# to
|
|
3961
|
+
# to existent things on disk
|
|
3364
3962
|
for sf in intermediate_secondary_files:
|
|
3365
3963
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3366
|
-
if (
|
|
3367
|
-
sf_loc != MISSING_FILE
|
|
3368
|
-
or sf.get("class", "") == "Directory"
|
|
3369
|
-
):
|
|
3964
|
+
if not sf_loc.startswith(MISSING_FILE) or sf.get("class", "") == "Directory":
|
|
3370
3965
|
# Pass imported files, and all Directories
|
|
3371
3966
|
final_secondary_files.append(sf)
|
|
3372
3967
|
else:
|
|
3373
|
-
logger.debug(
|
|
3968
|
+
logger.debug(
|
|
3969
|
+
"Secondary file %s is dropped because it is known to be missing", sf
|
|
3970
|
+
)
|
|
3374
3971
|
return final_secondary_files
|
|
3375
3972
|
|
|
3376
3973
|
|
|
@@ -3475,8 +4072,6 @@ def determine_load_listing(
|
|
|
3475
4072
|
class NoAvailableJobStoreException(Exception):
|
|
3476
4073
|
"""Indicates that no job store name is available."""
|
|
3477
4074
|
|
|
3478
|
-
pass
|
|
3479
|
-
|
|
3480
4075
|
|
|
3481
4076
|
def generate_default_job_store(
|
|
3482
4077
|
batch_system_name: Optional[str],
|
|
@@ -3544,37 +4139,64 @@ def generate_default_job_store(
|
|
|
3544
4139
|
|
|
3545
4140
|
usage_message = "\n\n" + textwrap.dedent(
|
|
3546
4141
|
"""
|
|
3547
|
-
|
|
3548
|
-
|
|
3549
|
-
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
|
|
3554
|
-
|
|
3555
|
-
|
|
3556
|
-
|
|
3557
|
-
|
|
4142
|
+
NOTE: If you're trying to specify a jobstore, you must use --jobStore, not a positional argument.
|
|
4143
|
+
|
|
4144
|
+
Usage: toil-cwl-runner [options] <workflow> [<input file>] [workflow options]
|
|
4145
|
+
|
|
4146
|
+
Example: toil-cwl-runner \\
|
|
4147
|
+
--jobStore aws:us-west-2:jobstore \\
|
|
4148
|
+
--realTimeLogging \\
|
|
4149
|
+
--logInfo \\
|
|
4150
|
+
example.cwl \\
|
|
4151
|
+
example-job.yaml \\
|
|
4152
|
+
--wf_input="hello world"
|
|
4153
|
+
"""[
|
|
3558
4154
|
1:
|
|
3559
4155
|
]
|
|
3560
4156
|
)
|
|
3561
4157
|
|
|
3562
|
-
|
|
4158
|
+
|
|
4159
|
+
def get_options(args: list[str]) -> Namespace:
|
|
3563
4160
|
"""
|
|
3564
4161
|
Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
|
|
3565
4162
|
:param args: List of args from command line
|
|
3566
4163
|
:return: options namespace
|
|
3567
4164
|
"""
|
|
3568
|
-
|
|
4165
|
+
# We can't allow abbreviations in case the workflow defines an option that
|
|
4166
|
+
# is a prefix of a Toil option.
|
|
4167
|
+
parser = ArgParser(
|
|
4168
|
+
allow_abbrev=False,
|
|
4169
|
+
usage="%(prog)s [options] WORKFLOW [INFILE] [WF_OPTIONS...]",
|
|
4170
|
+
description=textwrap.dedent(
|
|
4171
|
+
"""
|
|
4172
|
+
positional arguments:
|
|
4173
|
+
|
|
4174
|
+
WORKFLOW CWL file to run.
|
|
4175
|
+
|
|
4176
|
+
INFILE YAML or JSON file of workflow inputs.
|
|
4177
|
+
|
|
4178
|
+
WF_OPTIONS Additional inputs to the workflow as command-line
|
|
4179
|
+
flags. If CWL workflow takes an input, the name of the
|
|
4180
|
+
input can be used as an option. For example:
|
|
4181
|
+
|
|
4182
|
+
%(prog)s workflow.cwl --file1 file
|
|
4183
|
+
|
|
4184
|
+
If an input has the same name as a Toil option, pass
|
|
4185
|
+
'--' before it.
|
|
4186
|
+
"""
|
|
4187
|
+
),
|
|
4188
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
4189
|
+
)
|
|
4190
|
+
|
|
3569
4191
|
addOptions(parser, jobstore_as_flag=True, cwl=True)
|
|
3570
4192
|
options: Namespace
|
|
3571
|
-
options,
|
|
3572
|
-
options.cwljob
|
|
4193
|
+
options, extra = parser.parse_known_args(args)
|
|
4194
|
+
options.cwljob = extra
|
|
3573
4195
|
|
|
3574
4196
|
return options
|
|
3575
4197
|
|
|
3576
4198
|
|
|
3577
|
-
def main(args: Optional[
|
|
4199
|
+
def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
3578
4200
|
"""Run the main loop for toil-cwl-runner."""
|
|
3579
4201
|
# Remove cwltool logger's stream handler so it uses Toil's
|
|
3580
4202
|
cwllogger.removeHandler(defaultStreamHandler)
|
|
@@ -3586,25 +4208,21 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3586
4208
|
|
|
3587
4209
|
# Do cwltool setup
|
|
3588
4210
|
cwltool.main.setup_schema(args=options, custom_schema_callback=None)
|
|
3589
|
-
tmpdir_prefix = options.tmpdir_prefix =
|
|
3590
|
-
|
|
3591
|
-
|
|
3592
|
-
|
|
3593
|
-
|
|
3594
|
-
# workdir and the default job store under it
|
|
3595
|
-
workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3596
|
-
else:
|
|
3597
|
-
# Use a directory in the default tmpdir
|
|
3598
|
-
workdir = mkdtemp()
|
|
3599
|
-
# Make sure workdir doesn't exist so it can be a job store
|
|
3600
|
-
os.rmdir(workdir)
|
|
4211
|
+
tmpdir_prefix = options.tmpdir_prefix = (
|
|
4212
|
+
options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
4213
|
+
)
|
|
4214
|
+
tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
|
|
4215
|
+
workdir = options.workDir or tmp_outdir_prefix
|
|
3601
4216
|
|
|
3602
4217
|
if options.jobStore is None:
|
|
4218
|
+
jobstore = cwltool.utils.create_tmp_dir(tmp_outdir_prefix)
|
|
4219
|
+
# Make sure directory doesn't exist so it can be a job store
|
|
4220
|
+
os.rmdir(jobstore)
|
|
3603
4221
|
# Pick a default job store specifier appropriate to our choice of batch
|
|
3604
4222
|
# system and provisioner and installed modules, given this available
|
|
3605
4223
|
# local directory name. Fail if no good default can be used.
|
|
3606
4224
|
options.jobStore = generate_default_job_store(
|
|
3607
|
-
options.batchSystem, options.provisioner,
|
|
4225
|
+
options.batchSystem, options.provisioner, jobstore
|
|
3608
4226
|
)
|
|
3609
4227
|
|
|
3610
4228
|
options.doc_cache = True
|
|
@@ -3612,17 +4230,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3612
4230
|
options.do_validate = True
|
|
3613
4231
|
options.pack = False
|
|
3614
4232
|
options.print_subgraph = False
|
|
3615
|
-
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
|
|
3616
|
-
# We need to override workDir because by default Toil will pick
|
|
3617
|
-
# somewhere under the system temp directory if unset, ignoring
|
|
3618
|
-
# --tmpdir-prefix.
|
|
3619
|
-
#
|
|
3620
|
-
# If set, workDir needs to exist, so we directly use the prefix
|
|
3621
|
-
options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3622
|
-
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
|
|
3623
|
-
# override coordination_dir as default Toil will pick somewhere else
|
|
3624
|
-
# ignoring --tmpdir_prefix
|
|
3625
|
-
options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3626
4233
|
|
|
3627
4234
|
if options.batchSystem == "kubernetes":
|
|
3628
4235
|
# Containers under Kubernetes can only run in Singularity
|
|
@@ -3640,12 +4247,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3640
4247
|
logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
|
|
3641
4248
|
|
|
3642
4249
|
outdir = os.path.abspath(options.outdir or os.getcwd())
|
|
3643
|
-
tmp_outdir_prefix = os.path.abspath(
|
|
3644
|
-
options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3645
|
-
)
|
|
3646
|
-
|
|
3647
|
-
fileindex: Dict[str, str] = {}
|
|
3648
|
-
existing: Dict[str, str] = {}
|
|
3649
4250
|
conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)
|
|
3650
4251
|
use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
|
|
3651
4252
|
job_script_provider = None
|
|
@@ -3660,11 +4261,22 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3660
4261
|
)
|
|
3661
4262
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
3662
4263
|
runtime_context.outdir = outdir
|
|
4264
|
+
setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
|
|
3663
4265
|
runtime_context.move_outputs = "leave"
|
|
3664
4266
|
runtime_context.rm_tmpdir = False
|
|
3665
4267
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
4268
|
+
if options.cachedir is not None:
|
|
4269
|
+
runtime_context.cachedir = os.path.abspath(options.cachedir)
|
|
4270
|
+
# Automatically bypass the file store to be compatible with cwltool caching
|
|
4271
|
+
# Otherwise, the CWL caching code makes links to temporary local copies
|
|
4272
|
+
# of filestore files and caches those.
|
|
4273
|
+
logger.debug("CWL task caching is turned on. Bypassing file store.")
|
|
4274
|
+
options.bypass_file_store = True
|
|
3666
4275
|
if options.mpi_config_file is not None:
|
|
3667
4276
|
runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
|
|
4277
|
+
if cwltool.main.check_working_directories(runtime_context) is not None:
|
|
4278
|
+
logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
|
|
4279
|
+
return 1
|
|
3668
4280
|
setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
|
|
3669
4281
|
if options.bypass_file_store and options.destBucket:
|
|
3670
4282
|
# We use the file store to write to buckets, so we can't do this (yet?)
|
|
@@ -3694,225 +4306,210 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3694
4306
|
runtime_context.research_obj = research_obj
|
|
3695
4307
|
|
|
3696
4308
|
try:
|
|
3697
|
-
with Toil(options) as toil:
|
|
3698
|
-
if options.restart:
|
|
3699
|
-
outobj = toil.restart()
|
|
3700
|
-
else:
|
|
3701
|
-
loading_context.hints = [
|
|
3702
|
-
{
|
|
3703
|
-
"class": "ResourceRequirement",
|
|
3704
|
-
"coresMin": toil.config.defaultCores,
|
|
3705
|
-
"ramMin": toil.config.defaultMemory / (2**20),
|
|
3706
|
-
"outdirMin": toil.config.defaultDisk / (2**20),
|
|
3707
|
-
"tmpdirMin": 0,
|
|
3708
|
-
}
|
|
3709
|
-
]
|
|
3710
|
-
loading_context.construct_tool_object = toil_make_tool
|
|
3711
|
-
loading_context.strict = not options.not_strict
|
|
3712
|
-
options.workflow = options.cwltool
|
|
3713
|
-
options.job_order = options.cwljob
|
|
3714
4309
|
|
|
3715
|
-
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
|
|
3722
|
-
|
|
3723
|
-
|
|
3724
|
-
|
|
3725
|
-
|
|
3726
|
-
|
|
3727
|
-
|
|
4310
|
+
# We might have workflow metadata to pass to Toil
|
|
4311
|
+
workflow_name=None
|
|
4312
|
+
trs_spec = None
|
|
4313
|
+
|
|
4314
|
+
if not options.restart:
|
|
4315
|
+
# Make a version of the config based on the initial options, for
|
|
4316
|
+
# setting up CWL option stuff
|
|
4317
|
+
expected_config = Config()
|
|
4318
|
+
expected_config.setOptions(options)
|
|
4319
|
+
|
|
4320
|
+
# Before showing the options to any cwltool stuff that wants to
|
|
4321
|
+
# load the workflow, transform options.cwltool, where our
|
|
4322
|
+
# argument for what to run is, to handle Dockstore workflows.
|
|
4323
|
+
options.cwltool, trs_spec = resolve_workflow(options.cwltool)
|
|
4324
|
+
# Figure out what to call the workflow
|
|
4325
|
+
workflow_name = trs_spec or options.cwltool
|
|
4326
|
+
|
|
4327
|
+
# TODO: why are we doing this? Does this get applied to all
|
|
4328
|
+
# tools as a default or something?
|
|
4329
|
+
loading_context.hints = [
|
|
4330
|
+
{
|
|
4331
|
+
"class": "ResourceRequirement",
|
|
4332
|
+
"coresMin": expected_config.defaultCores,
|
|
4333
|
+
# Don't include any RAM requirement because we want to
|
|
4334
|
+
# know when tools don't manually ask for RAM.
|
|
4335
|
+
"outdirMin": expected_config.defaultDisk / (2**20),
|
|
4336
|
+
"tmpdirMin": 0,
|
|
4337
|
+
}
|
|
4338
|
+
]
|
|
4339
|
+
loading_context.construct_tool_object = toil_make_tool
|
|
4340
|
+
loading_context.strict = not options.not_strict
|
|
4341
|
+
options.workflow = options.cwltool
|
|
4342
|
+
options.job_order = options.cwljob
|
|
3728
4343
|
|
|
3729
|
-
|
|
3730
|
-
|
|
3731
|
-
|
|
3732
|
-
|
|
3733
|
-
sys.stdin,
|
|
4344
|
+
try:
|
|
4345
|
+
uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
|
|
4346
|
+
options.cwltool,
|
|
4347
|
+
loading_context.resolver,
|
|
3734
4348
|
loading_context.fetcher_constructor,
|
|
3735
|
-
loading_context.overrides_list,
|
|
3736
|
-
tool_file_uri,
|
|
3737
4349
|
)
|
|
3738
|
-
|
|
3739
|
-
|
|
3740
|
-
|
|
3741
|
-
|
|
3742
|
-
|
|
3743
|
-
),
|
|
3744
|
-
tool_file_uri,
|
|
3745
|
-
)
|
|
3746
|
-
)
|
|
3747
|
-
|
|
3748
|
-
loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
|
|
3749
|
-
uri, loading_context
|
|
4350
|
+
except ValidationException:
|
|
4351
|
+
print(
|
|
4352
|
+
"\nYou may be getting this error because your arguments are incorrect or out of order."
|
|
4353
|
+
+ usage_message,
|
|
4354
|
+
file=sys.stderr,
|
|
3750
4355
|
)
|
|
3751
|
-
|
|
3752
|
-
|
|
4356
|
+
raise
|
|
4357
|
+
|
|
4358
|
+
# Attempt to prepull the containers
|
|
4359
|
+
if not options.no_prepull and not options.no_container:
|
|
4360
|
+
try_prepull(uri, runtime_context, expected_config.batchSystem)
|
|
4361
|
+
|
|
4362
|
+
options.tool_help = None
|
|
4363
|
+
options.debug = options.logLevel == "DEBUG"
|
|
4364
|
+
job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
|
|
4365
|
+
options,
|
|
4366
|
+
sys.stdin,
|
|
4367
|
+
loading_context.fetcher_constructor,
|
|
4368
|
+
loading_context.overrides_list,
|
|
4369
|
+
tool_file_uri,
|
|
4370
|
+
)
|
|
4371
|
+
if options.overrides:
|
|
4372
|
+
loading_context.overrides_list.extend(
|
|
4373
|
+
cwltool.load_tool.load_overrides(
|
|
4374
|
+
schema_salad.ref_resolver.file_uri(
|
|
4375
|
+
os.path.abspath(options.overrides)
|
|
4376
|
+
),
|
|
4377
|
+
tool_file_uri,
|
|
4378
|
+
)
|
|
3753
4379
|
)
|
|
3754
|
-
if not loading_context.loader:
|
|
3755
|
-
raise RuntimeError("cwltool loader is not set.")
|
|
3756
|
-
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
3757
|
-
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3758
4380
|
|
|
3759
|
-
|
|
4381
|
+
loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
|
|
4382
|
+
uri, loading_context
|
|
4383
|
+
)
|
|
4384
|
+
loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
|
|
4385
|
+
loading_context, workflowobj, uri
|
|
4386
|
+
)
|
|
4387
|
+
if not loading_context.loader:
|
|
4388
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
4389
|
+
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
4390
|
+
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3760
4391
|
|
|
3761
|
-
|
|
3762
|
-
cwltool.cwlprov.writablebagfile.packed_workflow(
|
|
3763
|
-
runtime_context.research_obj,
|
|
3764
|
-
cwltool.main.print_pack(loading_context, uri),
|
|
3765
|
-
)
|
|
4392
|
+
document_loader = loading_context.loader
|
|
3766
4393
|
|
|
3767
|
-
|
|
3768
|
-
|
|
3769
|
-
|
|
3770
|
-
|
|
3771
|
-
|
|
3772
|
-
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3773
|
-
logging.error(err)
|
|
3774
|
-
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3775
|
-
runtime_context.secret_store = SecretStore()
|
|
4394
|
+
if options.provenance and runtime_context.research_obj:
|
|
4395
|
+
cwltool.cwlprov.writablebagfile.packed_workflow(
|
|
4396
|
+
runtime_context.research_obj,
|
|
4397
|
+
cwltool.main.print_pack(loading_context, uri),
|
|
4398
|
+
)
|
|
3776
4399
|
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
4400
|
+
try:
|
|
4401
|
+
tool = cwltool.load_tool.make_tool(uri, loading_context)
|
|
4402
|
+
scan_for_unsupported_requirements(
|
|
4403
|
+
tool, bypass_file_store=options.bypass_file_store
|
|
4404
|
+
)
|
|
4405
|
+
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
4406
|
+
logging.error(err)
|
|
4407
|
+
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
4408
|
+
runtime_context.secret_store = SecretStore()
|
|
4409
|
+
|
|
4410
|
+
try:
|
|
4411
|
+
# Get the "order" for the execution of the root job. CWLTool
|
|
4412
|
+
# doesn't document this much, but this is an "order" in the
|
|
4413
|
+
# sense of a "specification" for running a single job. It
|
|
4414
|
+
# describes the inputs to the workflow.
|
|
4415
|
+
initialized_job_order = cwltool.main.init_job_order(
|
|
4416
|
+
job_order_object,
|
|
4417
|
+
options,
|
|
4418
|
+
tool,
|
|
4419
|
+
jobloader,
|
|
4420
|
+
sys.stdout,
|
|
4421
|
+
make_fs_access=runtime_context.make_fs_access,
|
|
4422
|
+
input_basedir=options.basedir,
|
|
4423
|
+
secret_store=runtime_context.secret_store,
|
|
4424
|
+
input_required=True,
|
|
4425
|
+
)
|
|
4426
|
+
except SystemExit as err:
|
|
4427
|
+
if err.code == 2: # raised by argparse's parse_args() function
|
|
4428
|
+
print(
|
|
4429
|
+
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
4430
|
+
"provided, the problem may be the argument order."
|
|
4431
|
+
+ usage_message,
|
|
4432
|
+
file=sys.stderr,
|
|
3792
4433
|
)
|
|
3793
|
-
|
|
3794
|
-
if err.code == 2: # raised by argparse's parse_args() function
|
|
3795
|
-
print(
|
|
3796
|
-
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
3797
|
-
"provided, this may be the argument order." + usage_message,
|
|
3798
|
-
file=sys.stderr,
|
|
3799
|
-
)
|
|
3800
|
-
raise
|
|
4434
|
+
raise
|
|
3801
4435
|
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
for inp in tool.tool["inputs"]:
|
|
3806
|
-
if (
|
|
3807
|
-
shortname(inp["id"]) in initialized_job_order
|
|
3808
|
-
and inp["type"] == "File"
|
|
3809
|
-
):
|
|
3810
|
-
cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
|
|
3811
|
-
"streamable"
|
|
3812
|
-
] = inp.get("streamable", False)
|
|
3813
|
-
# TODO also for nested types that contain streamable Files
|
|
3814
|
-
|
|
3815
|
-
runtime_context.use_container = not options.no_container
|
|
3816
|
-
runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
|
|
3817
|
-
runtime_context.job_script_provider = job_script_provider
|
|
3818
|
-
runtime_context.force_docker_pull = options.force_docker_pull
|
|
3819
|
-
runtime_context.no_match_user = options.no_match_user
|
|
3820
|
-
runtime_context.no_read_only = options.no_read_only
|
|
3821
|
-
runtime_context.basedir = options.basedir
|
|
3822
|
-
if not options.bypass_file_store:
|
|
3823
|
-
# If we're using the file store we need to start moving output
|
|
3824
|
-
# files now.
|
|
3825
|
-
runtime_context.move_outputs = "move"
|
|
3826
|
-
|
|
3827
|
-
# We instantiate an early builder object here to populate indirect
|
|
3828
|
-
# secondaryFile references using cwltool's library because we need
|
|
3829
|
-
# to resolve them before toil imports them into the filestore.
|
|
3830
|
-
# A second builder will be built in the job's run method when toil
|
|
3831
|
-
# actually starts the cwl job.
|
|
3832
|
-
# Note that this accesses input files for tools, so the
|
|
3833
|
-
# ToilFsAccess needs to be set up if we want to be able to use
|
|
3834
|
-
# URLs.
|
|
3835
|
-
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
4436
|
+
# Leave the defaults un-filled in the top-level order. The tool or
|
|
4437
|
+
# workflow will fill them when it runs
|
|
3836
4438
|
|
|
4439
|
+
for inp in tool.tool["inputs"]:
|
|
4440
|
+
if (
|
|
4441
|
+
shortname(inp["id"]) in initialized_job_order
|
|
4442
|
+
and inp["type"] == "File"
|
|
4443
|
+
):
|
|
4444
|
+
cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
|
|
4445
|
+
"streamable"
|
|
4446
|
+
] = inp.get("streamable", False)
|
|
4447
|
+
# TODO also for nested types that contain streamable Files
|
|
4448
|
+
|
|
4449
|
+
runtime_context.use_container = not options.no_container
|
|
4450
|
+
runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
|
|
4451
|
+
runtime_context.job_script_provider = job_script_provider
|
|
4452
|
+
runtime_context.force_docker_pull = options.force_docker_pull
|
|
4453
|
+
runtime_context.no_match_user = options.no_match_user
|
|
4454
|
+
runtime_context.no_read_only = options.no_read_only
|
|
4455
|
+
runtime_context.basedir = options.basedir
|
|
4456
|
+
if not options.bypass_file_store:
|
|
4457
|
+
# If we're using the file store we need to start moving output
|
|
4458
|
+
# files now.
|
|
4459
|
+
runtime_context.move_outputs = "move"
|
|
4460
|
+
|
|
4461
|
+
# We instantiate an early builder object here to populate indirect
|
|
4462
|
+
# secondaryFile references using cwltool's library because we need
|
|
4463
|
+
# to resolve them before toil imports them into the filestore.
|
|
4464
|
+
# A second builder will be built in the job's run method when toil
|
|
4465
|
+
# actually starts the cwl job.
|
|
4466
|
+
# Note that this accesses input files for tools, so the
|
|
4467
|
+
# ToilFsAccess needs to be set up if we want to be able to use
|
|
4468
|
+
# URLs.
|
|
4469
|
+
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
4470
|
+
if not isinstance(tool, cwltool.workflow.Workflow):
|
|
3837
4471
|
# make sure this doesn't add listing items; if shallow_listing is
|
|
3838
4472
|
# selected, it will discover dirs one deep and then again later on
|
|
3839
|
-
# (
|
|
3840
|
-
#
|
|
4473
|
+
# (when the cwltool builder gets constructed from the job in the
|
|
4474
|
+
# CommandLineTool's job() method,
|
|
4475
|
+
# see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L951),
|
|
4476
|
+
# producing 2+ deep listings instead of only 1.
|
|
4477
|
+
# ExpressionTool also uses a builder, see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L207
|
|
4478
|
+
# Workflows don't need this because they don't go through CommandLineTool or ExpressionTool
|
|
3841
4479
|
builder.loadListing = "no_listing"
|
|
3842
4480
|
|
|
3843
|
-
|
|
3844
|
-
|
|
3845
|
-
|
|
3846
|
-
|
|
3847
|
-
|
|
4481
|
+
# make sure this doesn't add listing items; if shallow_listing is
|
|
4482
|
+
# selected, it will discover dirs one deep and then again later on
|
|
4483
|
+
# (probably when the cwltool builder gets ahold of the job in the
|
|
4484
|
+
# CWL job's run()), producing 2+ deep listings instead of only 1.
|
|
4485
|
+
builder.loadListing = "no_listing"
|
|
3848
4486
|
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
file_import_function = cast(
|
|
3855
|
-
Callable[[str], FileID],
|
|
3856
|
-
functools.partial(toil.import_file, symlink=True),
|
|
3857
|
-
)
|
|
4487
|
+
builder.bind_input(
|
|
4488
|
+
tool.inputs_record_schema,
|
|
4489
|
+
initialized_job_order,
|
|
4490
|
+
discover_secondaryFiles=True,
|
|
4491
|
+
)
|
|
3858
4492
|
|
|
3859
|
-
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
fs_access = ToilFsAccess(options.basedir)
|
|
3863
|
-
import_files(
|
|
3864
|
-
file_import_function,
|
|
3865
|
-
fs_access,
|
|
3866
|
-
fileindex,
|
|
3867
|
-
existing,
|
|
3868
|
-
initialized_job_order,
|
|
3869
|
-
mark_broken=True,
|
|
3870
|
-
skip_remote=options.reference_inputs,
|
|
3871
|
-
bypass_file_store=options.bypass_file_store,
|
|
3872
|
-
log_level=logging.INFO,
|
|
3873
|
-
)
|
|
3874
|
-
# Import all the files associated with tools (binaries, etc.).
|
|
3875
|
-
# Not sure why you would have an optional secondary file here, but
|
|
3876
|
-
# the spec probably needs us to support them.
|
|
3877
|
-
logger.info("Importing tool-associated files...")
|
|
3878
|
-
visitSteps(
|
|
3879
|
-
tool,
|
|
3880
|
-
functools.partial(
|
|
3881
|
-
import_files,
|
|
3882
|
-
file_import_function,
|
|
3883
|
-
fs_access,
|
|
3884
|
-
fileindex,
|
|
3885
|
-
existing,
|
|
3886
|
-
mark_broken=True,
|
|
3887
|
-
skip_remote=options.reference_inputs,
|
|
3888
|
-
bypass_file_store=options.bypass_file_store,
|
|
3889
|
-
log_level=logging.INFO,
|
|
3890
|
-
),
|
|
3891
|
-
)
|
|
4493
|
+
logger.info("Creating root job")
|
|
4494
|
+
logger.debug("Root tool: %s", tool)
|
|
4495
|
+
tool = remove_pickle_problems(tool)
|
|
3892
4496
|
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
# the file store) or available on disk (for when we don't).
|
|
3898
|
-
# This will properly make them cause an error later if they
|
|
3899
|
-
# were required.
|
|
3900
|
-
rm_unprocessed_secondary_files(param_value)
|
|
3901
|
-
|
|
3902
|
-
logger.info("Creating root job")
|
|
3903
|
-
logger.debug("Root tool: %s", tool)
|
|
4497
|
+
with Toil(options, workflow_name=workflow_name, trs_spec=trs_spec) as toil:
|
|
4498
|
+
if options.restart:
|
|
4499
|
+
outobj = toil.restart()
|
|
4500
|
+
else:
|
|
3904
4501
|
try:
|
|
3905
|
-
wf1
|
|
4502
|
+
wf1 = makeRootJob(
|
|
3906
4503
|
tool=tool,
|
|
3907
4504
|
jobobj={},
|
|
3908
4505
|
runtime_context=runtime_context,
|
|
3909
|
-
|
|
3910
|
-
|
|
4506
|
+
initialized_job_order=initialized_job_order,
|
|
4507
|
+
options=options,
|
|
4508
|
+
toil=toil,
|
|
3911
4509
|
)
|
|
3912
4510
|
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3913
4511
|
logging.error(err)
|
|
3914
4512
|
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3915
|
-
wf1.cwljob = initialized_job_order
|
|
3916
4513
|
logger.info("Starting workflow")
|
|
3917
4514
|
outobj = toil.start(wf1)
|
|
3918
4515
|
|
|
@@ -3929,7 +4526,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3929
4526
|
outobj,
|
|
3930
4527
|
outdir,
|
|
3931
4528
|
destBucket=options.destBucket,
|
|
3932
|
-
log_level=logging.INFO
|
|
4529
|
+
log_level=logging.INFO,
|
|
3933
4530
|
)
|
|
3934
4531
|
logger.info("Stored workflow outputs")
|
|
3935
4532
|
|
|
@@ -3992,8 +4589,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3992
4589
|
else:
|
|
3993
4590
|
logging.error(err)
|
|
3994
4591
|
return 1
|
|
3995
|
-
except (
|
|
3996
|
-
|
|
4592
|
+
except (
|
|
4593
|
+
InsufficientSystemResources,
|
|
4594
|
+
LocatorException,
|
|
4595
|
+
InvalidImportExportUrlException,
|
|
4596
|
+
UnimplementedURLException,
|
|
4597
|
+
JobTooBigError,
|
|
4598
|
+
FileNotFoundError
|
|
4599
|
+
) as err:
|
|
3997
4600
|
logging.error(err)
|
|
3998
4601
|
return 1
|
|
3999
4602
|
|