toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Implemented support for Common Workflow Language (CWL) for Toil."""
|
|
2
|
+
|
|
2
3
|
# Copyright (C) 2015 Curoverse, Inc
|
|
3
4
|
# Copyright (C) 2015-2021 Regents of the University of California
|
|
4
5
|
# Copyright (C) 2019-2020 Seven Bridges
|
|
@@ -33,25 +34,26 @@ import stat
|
|
|
33
34
|
import sys
|
|
34
35
|
import textwrap
|
|
35
36
|
import uuid
|
|
37
|
+
from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
|
|
36
38
|
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
37
39
|
from threading import Thread
|
|
38
|
-
from typing import (
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
40
|
+
from typing import (
|
|
41
|
+
IO,
|
|
42
|
+
Any,
|
|
43
|
+
Callable,
|
|
44
|
+
Iterator,
|
|
45
|
+
Mapping,
|
|
46
|
+
MutableMapping,
|
|
47
|
+
MutableSequence,
|
|
48
|
+
Optional,
|
|
49
|
+
TextIO,
|
|
50
|
+
Tuple,
|
|
51
|
+
TypeVar,
|
|
52
|
+
Union,
|
|
53
|
+
cast,
|
|
54
|
+
Literal,
|
|
55
|
+
Protocol,
|
|
56
|
+
)
|
|
55
57
|
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
56
58
|
|
|
57
59
|
import cwl_utils.errors
|
|
@@ -65,59 +67,87 @@ import cwltool.load_tool
|
|
|
65
67
|
import cwltool.main
|
|
66
68
|
import cwltool.resolver
|
|
67
69
|
import schema_salad.ref_resolver
|
|
70
|
+
|
|
71
|
+
# This is also in configargparse but MyPy doesn't know it
|
|
72
|
+
from argparse import RawDescriptionHelpFormatter
|
|
68
73
|
from configargparse import ArgParser, Namespace
|
|
69
74
|
from cwltool.loghandler import _logger as cwllogger
|
|
70
75
|
from cwltool.loghandler import defaultStreamHandler
|
|
71
76
|
from cwltool.mpi import MpiConfig
|
|
72
77
|
from cwltool.mutation import MutationManager
|
|
73
78
|
from cwltool.pathmapper import MapperEnt, PathMapper
|
|
74
|
-
from cwltool.process import (
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
+
from cwltool.process import (
|
|
80
|
+
Process,
|
|
81
|
+
add_sizes,
|
|
82
|
+
compute_checksums,
|
|
83
|
+
fill_in_defaults,
|
|
84
|
+
shortname,
|
|
85
|
+
)
|
|
79
86
|
from cwltool.secrets import SecretStore
|
|
80
|
-
from cwltool.
|
|
81
|
-
|
|
87
|
+
from cwltool.singularity import SingularityCommandLineJob
|
|
88
|
+
from cwltool.software_requirements import (
|
|
89
|
+
DependenciesConfiguration,
|
|
90
|
+
get_container_from_software_requirements,
|
|
91
|
+
)
|
|
82
92
|
from cwltool.stdfsaccess import StdFsAccess, abspath
|
|
83
|
-
from cwltool.utils import (
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
93
|
+
from cwltool.utils import (
|
|
94
|
+
CWLObjectType,
|
|
95
|
+
CWLOutputType,
|
|
96
|
+
DirectoryType,
|
|
97
|
+
adjustDirObjs,
|
|
98
|
+
aslist,
|
|
99
|
+
downloadHttpFile,
|
|
100
|
+
get_listing,
|
|
101
|
+
normalizeFilesDirs,
|
|
102
|
+
visit_class,
|
|
103
|
+
)
|
|
92
104
|
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
|
93
105
|
from schema_salad.avro.schema import Names
|
|
94
106
|
from schema_salad.exceptions import ValidationException
|
|
95
107
|
from schema_salad.ref_resolver import file_uri, uri_file_path
|
|
96
108
|
from schema_salad.sourceline import SourceLine
|
|
97
|
-
from typing_extensions import Literal
|
|
98
109
|
|
|
99
110
|
from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
100
111
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
101
|
-
from toil.common import Toil, addOptions
|
|
112
|
+
from toil.common import Config, Toil, addOptions
|
|
102
113
|
from toil.cwl import check_cwltool_version
|
|
114
|
+
from toil.lib.integration import resolve_workflow
|
|
115
|
+
from toil.lib.misc import call_command
|
|
103
116
|
from toil.provisioners.clusterScaler import JobTooBigError
|
|
104
117
|
|
|
105
118
|
check_cwltool_version()
|
|
106
|
-
from toil.cwl.utils import (
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
from toil.cwl.utils import (
|
|
120
|
+
CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
|
|
121
|
+
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
|
|
122
|
+
download_structure,
|
|
123
|
+
get_from_structure,
|
|
124
|
+
visit_cwl_class_and_reduce,
|
|
125
|
+
)
|
|
111
126
|
from toil.exceptions import FailedJobsException
|
|
112
127
|
from toil.fileStores import FileID
|
|
113
128
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
114
|
-
from toil.job import
|
|
115
|
-
|
|
116
|
-
|
|
129
|
+
from toil.job import (
|
|
130
|
+
AcceleratorRequirement,
|
|
131
|
+
Job,
|
|
132
|
+
Promise,
|
|
133
|
+
Promised,
|
|
134
|
+
unwrap,
|
|
135
|
+
ImportsJob,
|
|
136
|
+
get_file_sizes,
|
|
137
|
+
FileMetadata,
|
|
138
|
+
WorkerImportJob,
|
|
139
|
+
)
|
|
140
|
+
from toil.jobStores.abstractJobStore import (
|
|
141
|
+
AbstractJobStore,
|
|
142
|
+
NoSuchFileException,
|
|
143
|
+
InvalidImportExportUrlException,
|
|
144
|
+
LocatorException,
|
|
145
|
+
)
|
|
146
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
117
147
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
118
148
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
119
149
|
from toil.lib.io import mkdtemp
|
|
120
|
-
from toil.lib.threading import ExceptionalThread
|
|
150
|
+
from toil.lib.threading import ExceptionalThread, global_mutex
|
|
121
151
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
122
152
|
|
|
123
153
|
logger = logging.getLogger(__name__)
|
|
@@ -149,7 +179,7 @@ def cwltoil_was_removed() -> None:
|
|
|
149
179
|
# output object to the correct key of the input object.
|
|
150
180
|
|
|
151
181
|
|
|
152
|
-
class UnresolvedDict(
|
|
182
|
+
class UnresolvedDict(dict[Any, Any]):
|
|
153
183
|
"""Tag to indicate a dict contains promises that must be resolved."""
|
|
154
184
|
|
|
155
185
|
|
|
@@ -184,7 +214,7 @@ def filter_skip_null(name: str, value: Any) -> Any:
|
|
|
184
214
|
return value
|
|
185
215
|
|
|
186
216
|
|
|
187
|
-
def _filter_skip_null(value: Any, err_flag:
|
|
217
|
+
def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
|
|
188
218
|
"""
|
|
189
219
|
Private implementation for recursively filtering out SkipNull objects from 'value'.
|
|
190
220
|
|
|
@@ -233,18 +263,50 @@ def ensure_no_collisions(
|
|
|
233
263
|
seen_names.add(wanted_name)
|
|
234
264
|
|
|
235
265
|
|
|
266
|
+
def try_prepull(
|
|
267
|
+
cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str
|
|
268
|
+
) -> None:
|
|
269
|
+
"""
|
|
270
|
+
Try to prepull all containers in a CWL workflow with Singularity or Docker.
|
|
271
|
+
This will not prepull the default container specified on the command line.
|
|
272
|
+
:param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well
|
|
273
|
+
:param runtime_context: runtime context of cwltool
|
|
274
|
+
:param batchsystem: type of Toil batchsystem
|
|
275
|
+
:return:
|
|
276
|
+
"""
|
|
277
|
+
if runtime_context.singularity:
|
|
278
|
+
if "CWL_SINGULARITY_CACHE" in os.environ:
|
|
279
|
+
logger.info("Prepulling the workflow's containers with Singularity...")
|
|
280
|
+
call_command(
|
|
281
|
+
[
|
|
282
|
+
"cwl-docker-extract",
|
|
283
|
+
"--singularity",
|
|
284
|
+
"--dir",
|
|
285
|
+
os.environ["CWL_SINGULARITY_CACHE"],
|
|
286
|
+
cwl_tool_uri,
|
|
287
|
+
]
|
|
288
|
+
)
|
|
289
|
+
elif not runtime_context.user_space_docker_cmd and not runtime_context.podman:
|
|
290
|
+
# For udocker and podman prefetching is unimplemented
|
|
291
|
+
# This is docker
|
|
292
|
+
if batchsystem == "single_machine":
|
|
293
|
+
# Only on single machine will the docker daemon be accessible by all workers and the leader
|
|
294
|
+
logger.info("Prepulling the workflow's containers with Docker...")
|
|
295
|
+
call_command(["cwl-docker-extract", cwl_tool_uri])
|
|
296
|
+
|
|
297
|
+
|
|
236
298
|
class Conditional:
|
|
237
299
|
"""
|
|
238
300
|
Object holding conditional expression until we are ready to evaluate it.
|
|
239
301
|
|
|
240
|
-
Evaluation occurs
|
|
302
|
+
Evaluation occurs before the enclosing step's inputs are type-checked.
|
|
241
303
|
"""
|
|
242
304
|
|
|
243
305
|
def __init__(
|
|
244
306
|
self,
|
|
245
307
|
expression: Optional[str] = None,
|
|
246
|
-
outputs: Union[
|
|
247
|
-
requirements: Optional[
|
|
308
|
+
outputs: Union[dict[str, CWLOutputType], None] = None,
|
|
309
|
+
requirements: Optional[list[CWLObjectType]] = None,
|
|
248
310
|
container_engine: str = "docker",
|
|
249
311
|
):
|
|
250
312
|
"""
|
|
@@ -289,7 +351,7 @@ class Conditional:
|
|
|
289
351
|
"'%s' evaluated to a non-boolean value" % self.expression
|
|
290
352
|
)
|
|
291
353
|
|
|
292
|
-
def skipped_outputs(self) ->
|
|
354
|
+
def skipped_outputs(self) -> dict[str, SkipNull]:
|
|
293
355
|
"""Generate a dict of SkipNull objects corresponding to the output structure."""
|
|
294
356
|
outobj = {}
|
|
295
357
|
|
|
@@ -309,14 +371,14 @@ class Conditional:
|
|
|
309
371
|
class ResolveSource:
|
|
310
372
|
"""Apply linkMerge and pickValue operators to values coming into a port."""
|
|
311
373
|
|
|
312
|
-
promise_tuples: Union[
|
|
374
|
+
promise_tuples: Union[list[tuple[str, Promise]], tuple[str, Promise]]
|
|
313
375
|
|
|
314
376
|
def __init__(
|
|
315
377
|
self,
|
|
316
378
|
name: str,
|
|
317
|
-
input:
|
|
379
|
+
input: dict[str, CWLObjectType],
|
|
318
380
|
source_key: str,
|
|
319
|
-
promises:
|
|
381
|
+
promises: dict[str, Job],
|
|
320
382
|
):
|
|
321
383
|
"""
|
|
322
384
|
Construct a container object.
|
|
@@ -375,7 +437,7 @@ class ResolveSource:
|
|
|
375
437
|
)
|
|
376
438
|
else:
|
|
377
439
|
name, rv = self.promise_tuples
|
|
378
|
-
result = cast(
|
|
440
|
+
result = cast(dict[str, Any], rv).get(name)
|
|
379
441
|
|
|
380
442
|
result = self.pick_value(result)
|
|
381
443
|
result = filter_skip_null(self.name, result)
|
|
@@ -383,7 +445,7 @@ class ResolveSource:
|
|
|
383
445
|
|
|
384
446
|
def link_merge(
|
|
385
447
|
self, values: CWLObjectType
|
|
386
|
-
) -> Union[
|
|
448
|
+
) -> Union[list[CWLOutputType], CWLOutputType]:
|
|
387
449
|
"""
|
|
388
450
|
Apply linkMerge operator to `values` object.
|
|
389
451
|
|
|
@@ -396,7 +458,7 @@ class ResolveSource:
|
|
|
396
458
|
return values
|
|
397
459
|
|
|
398
460
|
elif link_merge_type == "merge_flattened":
|
|
399
|
-
result:
|
|
461
|
+
result: list[CWLOutputType] = []
|
|
400
462
|
for v in values:
|
|
401
463
|
if isinstance(v, MutableSequence):
|
|
402
464
|
result.extend(v)
|
|
@@ -409,7 +471,7 @@ class ResolveSource:
|
|
|
409
471
|
f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
|
|
410
472
|
)
|
|
411
473
|
|
|
412
|
-
def pick_value(self, values: Union[
|
|
474
|
+
def pick_value(self, values: Union[list[Union[str, SkipNull]], Any]) -> Any:
|
|
413
475
|
"""
|
|
414
476
|
Apply pickValue operator to `values` object.
|
|
415
477
|
|
|
@@ -477,7 +539,7 @@ class StepValueFrom:
|
|
|
477
539
|
"""
|
|
478
540
|
|
|
479
541
|
def __init__(
|
|
480
|
-
self, expr: str, source: Any, req:
|
|
542
|
+
self, expr: str, source: Any, req: list[CWLObjectType], container_engine: str
|
|
481
543
|
):
|
|
482
544
|
"""
|
|
483
545
|
Instantiate an object to carry all know about this valueFrom expression.
|
|
@@ -609,7 +671,7 @@ class JustAValue:
|
|
|
609
671
|
|
|
610
672
|
def resolve_dict_w_promises(
|
|
611
673
|
dict_w_promises: Union[
|
|
612
|
-
UnresolvedDict, CWLObjectType,
|
|
674
|
+
UnresolvedDict, CWLObjectType, dict[str, Union[str, StepValueFrom]]
|
|
613
675
|
],
|
|
614
676
|
file_store: Optional[AbstractFileStore] = None,
|
|
615
677
|
) -> CWLObjectType:
|
|
@@ -664,7 +726,7 @@ class ToilPathMapper(PathMapper):
|
|
|
664
726
|
|
|
665
727
|
def __init__(
|
|
666
728
|
self,
|
|
667
|
-
referenced_files:
|
|
729
|
+
referenced_files: list[CWLObjectType],
|
|
668
730
|
basedir: str,
|
|
669
731
|
stagedir: str,
|
|
670
732
|
separateDirs: bool = True,
|
|
@@ -779,19 +841,44 @@ class ToilPathMapper(PathMapper):
|
|
|
779
841
|
# TODO: why would we do that?
|
|
780
842
|
stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
|
|
781
843
|
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
)
|
|
844
|
+
if obj["class"] not in ("File", "Directory"):
|
|
845
|
+
# We only handle files and directories; only they have locations.
|
|
846
|
+
return
|
|
847
|
+
|
|
848
|
+
location = cast(str, obj["location"])
|
|
849
|
+
if location in self:
|
|
850
|
+
# If we've already mapped this, map it consistently.
|
|
851
|
+
tgt = self._pathmap[location].target
|
|
852
|
+
logger.debug(
|
|
853
|
+
"ToilPathMapper re-using target %s for path %s",
|
|
854
|
+
tgt,
|
|
855
|
+
location,
|
|
856
|
+
)
|
|
857
|
+
else:
|
|
858
|
+
# Decide where to put the file or directory, as an absolute path.
|
|
859
|
+
tgt = os.path.join(
|
|
860
|
+
stagedir,
|
|
861
|
+
cast(str, obj["basename"]),
|
|
862
|
+
)
|
|
863
|
+
if self.reversemap(tgt) is not None:
|
|
864
|
+
# If the target already exists in the pathmap, but we haven't yet
|
|
865
|
+
# mapped this, it means we have a conflict.
|
|
866
|
+
i = 2
|
|
867
|
+
new_tgt = f"{tgt}_{i}"
|
|
868
|
+
while self.reversemap(new_tgt) is not None:
|
|
869
|
+
i += 1
|
|
870
|
+
new_tgt = f"{tgt}_{i}"
|
|
871
|
+
logger.debug(
|
|
872
|
+
"ToilPathMapper resolving mapping conflict: %s is now %s",
|
|
873
|
+
tgt,
|
|
874
|
+
new_tgt,
|
|
875
|
+
)
|
|
876
|
+
tgt = new_tgt
|
|
787
877
|
|
|
788
878
|
if obj["class"] == "Directory":
|
|
789
879
|
# Whether or not we've already mapped this path, we need to map all
|
|
790
880
|
# children recursively.
|
|
791
881
|
|
|
792
|
-
# Grab its location
|
|
793
|
-
location = cast(str, obj["location"])
|
|
794
|
-
|
|
795
882
|
logger.debug("ToilPathMapper visiting directory %s", location)
|
|
796
883
|
|
|
797
884
|
# We want to check the directory to make sure it is not
|
|
@@ -877,7 +964,7 @@ class ToilPathMapper(PathMapper):
|
|
|
877
964
|
|
|
878
965
|
# Keep recursing
|
|
879
966
|
self.visitlisting(
|
|
880
|
-
cast(
|
|
967
|
+
cast(list[CWLObjectType], obj.get("listing", [])),
|
|
881
968
|
tgt,
|
|
882
969
|
basedir,
|
|
883
970
|
copy=copy,
|
|
@@ -885,23 +972,21 @@ class ToilPathMapper(PathMapper):
|
|
|
885
972
|
)
|
|
886
973
|
|
|
887
974
|
elif obj["class"] == "File":
|
|
888
|
-
|
|
975
|
+
logger.debug("ToilPathMapper visiting file %s", location)
|
|
889
976
|
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
if path in self._pathmap:
|
|
977
|
+
if location in self._pathmap:
|
|
893
978
|
# Don't map the same file twice
|
|
894
979
|
logger.debug(
|
|
895
980
|
"ToilPathMapper stopping recursion because we have already "
|
|
896
981
|
"mapped file: %s",
|
|
897
|
-
|
|
982
|
+
location,
|
|
898
983
|
)
|
|
899
984
|
return
|
|
900
985
|
|
|
901
|
-
ab = abspath(
|
|
902
|
-
if "contents" in obj and
|
|
986
|
+
ab = abspath(location, basedir)
|
|
987
|
+
if "contents" in obj and location.startswith("_:"):
|
|
903
988
|
# We are supposed to create this file
|
|
904
|
-
self._pathmap[
|
|
989
|
+
self._pathmap[location] = MapperEnt(
|
|
905
990
|
cast(str, obj["contents"]),
|
|
906
991
|
tgt,
|
|
907
992
|
"CreateWritableFile" if copy else "CreateFile",
|
|
@@ -919,14 +1004,16 @@ class ToilPathMapper(PathMapper):
|
|
|
919
1004
|
# URI for a local file it downloaded.
|
|
920
1005
|
if self.get_file:
|
|
921
1006
|
deref = self.get_file(
|
|
922
|
-
|
|
1007
|
+
location,
|
|
1008
|
+
obj.get("streamable", False),
|
|
1009
|
+
self.streaming_allowed,
|
|
923
1010
|
)
|
|
924
1011
|
else:
|
|
925
1012
|
deref = ab
|
|
926
1013
|
if deref.startswith("file:"):
|
|
927
1014
|
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
928
1015
|
if urlsplit(deref).scheme in ["http", "https"]:
|
|
929
|
-
deref = downloadHttpFile(
|
|
1016
|
+
deref = downloadHttpFile(location)
|
|
930
1017
|
elif urlsplit(deref).scheme != "toilfile":
|
|
931
1018
|
# Dereference symbolic links
|
|
932
1019
|
st = os.lstat(deref)
|
|
@@ -944,42 +1031,18 @@ class ToilPathMapper(PathMapper):
|
|
|
944
1031
|
# reference, we just pass that along.
|
|
945
1032
|
|
|
946
1033
|
"""Link or copy files to their targets. Create them as needed."""
|
|
947
|
-
targets: Dict[str, str] = {}
|
|
948
|
-
for _, value in self._pathmap.items():
|
|
949
|
-
# If the target already exists in the pathmap, it means we have a conflict. But we didn't change tgt to reflect new name.
|
|
950
|
-
if value.target == tgt: # Conflict detected in the pathmap
|
|
951
|
-
i = 2
|
|
952
|
-
new_tgt = f"{tgt}_{i}"
|
|
953
|
-
while new_tgt in targets:
|
|
954
|
-
i += 1
|
|
955
|
-
new_tgt = f"{tgt}_{i}"
|
|
956
|
-
targets[new_tgt] = new_tgt
|
|
957
|
-
|
|
958
|
-
for _, value_conflict in targets.items():
|
|
959
|
-
logger.debug(
|
|
960
|
-
"ToilPathMapper adding file mapping for conflict %s -> %s",
|
|
961
|
-
deref,
|
|
962
|
-
value_conflict,
|
|
963
|
-
)
|
|
964
|
-
self._pathmap[path] = MapperEnt(
|
|
965
|
-
deref,
|
|
966
|
-
value_conflict,
|
|
967
|
-
"WritableFile" if copy else "File",
|
|
968
|
-
staged,
|
|
969
|
-
)
|
|
970
|
-
# No conflicts detected so we can write out the original name.
|
|
971
|
-
if not targets:
|
|
972
|
-
logger.debug(
|
|
973
|
-
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
974
|
-
)
|
|
975
1034
|
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1035
|
+
logger.debug(
|
|
1036
|
+
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
self._pathmap[location] = MapperEnt(
|
|
1040
|
+
deref, tgt, "WritableFile" if copy else "File", staged
|
|
1041
|
+
)
|
|
979
1042
|
|
|
980
1043
|
# Handle all secondary files that need to be next to this one.
|
|
981
1044
|
self.visitlisting(
|
|
982
|
-
cast(
|
|
1045
|
+
cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
|
|
983
1046
|
stagedir,
|
|
984
1047
|
basedir,
|
|
985
1048
|
copy=copy,
|
|
@@ -1005,6 +1068,32 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
|
|
|
1005
1068
|
) -> None:
|
|
1006
1069
|
"""run_jobs from SingleJobExecutor, but not in a top level runtime context."""
|
|
1007
1070
|
runtime_context.toplevel = False
|
|
1071
|
+
if isinstance(
|
|
1072
|
+
process, cwltool.command_line_tool.CommandLineTool
|
|
1073
|
+
) and isinstance(
|
|
1074
|
+
process.make_job_runner(runtime_context), SingularityCommandLineJob
|
|
1075
|
+
):
|
|
1076
|
+
# Set defaults for singularity cache environment variables, similar to what we do in wdltoil
|
|
1077
|
+
# Use the same place as the default singularity cache directory
|
|
1078
|
+
singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
|
|
1079
|
+
os.environ["SINGULARITY_CACHEDIR"] = os.environ.get(
|
|
1080
|
+
"SINGULARITY_CACHEDIR", singularity_cache
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
# If singularity is detected, prepull the image to ensure locking
|
|
1084
|
+
(docker_req, docker_is_req) = process.get_requirement(
|
|
1085
|
+
feature="DockerRequirement"
|
|
1086
|
+
)
|
|
1087
|
+
with global_mutex(
|
|
1088
|
+
os.environ["SINGULARITY_CACHEDIR"], "toil_singularity_cache_mutex"
|
|
1089
|
+
):
|
|
1090
|
+
SingularityCommandLineJob.get_image(
|
|
1091
|
+
dockerRequirement=cast(dict[str, str], docker_req),
|
|
1092
|
+
pull_image=runtime_context.pull_image,
|
|
1093
|
+
force_pull=runtime_context.force_docker_pull,
|
|
1094
|
+
tmp_outdir_prefix=runtime_context.tmp_outdir_prefix,
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1008
1097
|
return super().run_jobs(process, job_order_object, logger, runtime_context)
|
|
1009
1098
|
|
|
1010
1099
|
|
|
@@ -1019,7 +1108,7 @@ class ToilTool:
|
|
|
1019
1108
|
# Reserve a spot for the Toil job that ends up executing this tool.
|
|
1020
1109
|
self._toil_job: Optional[Job] = None
|
|
1021
1110
|
# Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
|
|
1022
|
-
self._path_mappers:
|
|
1111
|
+
self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
|
|
1023
1112
|
|
|
1024
1113
|
def connect_toil_job(self, job: Job) -> None:
|
|
1025
1114
|
"""
|
|
@@ -1031,7 +1120,7 @@ class ToilTool:
|
|
|
1031
1120
|
|
|
1032
1121
|
def make_path_mapper(
|
|
1033
1122
|
self,
|
|
1034
|
-
reffiles:
|
|
1123
|
+
reffiles: list[Any],
|
|
1035
1124
|
stagedir: str,
|
|
1036
1125
|
runtimeContext: cwltool.context.RuntimeContext,
|
|
1037
1126
|
separateDirs: bool,
|
|
@@ -1089,13 +1178,15 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1089
1178
|
# Make a table of all the places we mapped files to when downloading the inputs.
|
|
1090
1179
|
|
|
1091
1180
|
# We want to hint which host paths and container (if any) paths correspond
|
|
1092
|
-
host_and_job_paths:
|
|
1181
|
+
host_and_job_paths: list[tuple[str, str]] = []
|
|
1093
1182
|
|
|
1094
1183
|
for pm in self._path_mappers:
|
|
1095
1184
|
for _, mapper_entry in pm.items_exclude_children():
|
|
1096
1185
|
# We know that mapper_entry.target as seen by the task is
|
|
1097
1186
|
# mapper_entry.resolved on the host.
|
|
1098
|
-
host_and_job_paths.append(
|
|
1187
|
+
host_and_job_paths.append(
|
|
1188
|
+
(mapper_entry.resolved, mapper_entry.target)
|
|
1189
|
+
)
|
|
1099
1190
|
|
|
1100
1191
|
# Notice that we have downloaded our inputs. Explain which files
|
|
1101
1192
|
# those are here and what the task will expect to call them.
|
|
@@ -1127,7 +1218,7 @@ def toil_make_tool(
|
|
|
1127
1218
|
# URI instead of raising an error right away, in case it is optional.
|
|
1128
1219
|
MISSING_FILE = "missing://"
|
|
1129
1220
|
|
|
1130
|
-
DirectoryContents =
|
|
1221
|
+
DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
|
|
1131
1222
|
|
|
1132
1223
|
|
|
1133
1224
|
def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
@@ -1149,7 +1240,7 @@ def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
|
1149
1240
|
|
|
1150
1241
|
def decode_directory(
|
|
1151
1242
|
dir_path: str,
|
|
1152
|
-
) ->
|
|
1243
|
+
) -> tuple[DirectoryContents, Optional[str], str]:
|
|
1153
1244
|
"""
|
|
1154
1245
|
Decode a directory from a "toildir:" path to a directory (or a file in it).
|
|
1155
1246
|
|
|
@@ -1224,7 +1315,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1224
1315
|
# they know what will happen.
|
|
1225
1316
|
# Also maps files and directories from external URLs to downloaded
|
|
1226
1317
|
# locations.
|
|
1227
|
-
self.dir_to_download:
|
|
1318
|
+
self.dir_to_download: dict[str, str] = {}
|
|
1228
1319
|
|
|
1229
1320
|
super().__init__(basedir)
|
|
1230
1321
|
|
|
@@ -1347,14 +1438,16 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1347
1438
|
destination = super()._abs(destination)
|
|
1348
1439
|
return destination
|
|
1349
1440
|
|
|
1350
|
-
def glob(self, pattern: str) ->
|
|
1441
|
+
def glob(self, pattern: str) -> list[str]:
|
|
1351
1442
|
parse = urlparse(pattern)
|
|
1352
1443
|
if parse.scheme == "file":
|
|
1353
1444
|
pattern = os.path.abspath(unquote(parse.path))
|
|
1354
1445
|
elif parse.scheme == "":
|
|
1355
1446
|
pattern = os.path.abspath(pattern)
|
|
1356
1447
|
else:
|
|
1357
|
-
raise RuntimeError(
|
|
1448
|
+
raise RuntimeError(
|
|
1449
|
+
f"Cannot efficiently support globbing on {parse.scheme} URIs"
|
|
1450
|
+
)
|
|
1358
1451
|
|
|
1359
1452
|
# Actually do the glob
|
|
1360
1453
|
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
|
|
@@ -1391,12 +1484,12 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1391
1484
|
else:
|
|
1392
1485
|
# This should be supported by a job store.
|
|
1393
1486
|
byte_stream = AbstractJobStore.open_url(fn)
|
|
1394
|
-
if
|
|
1487
|
+
if "b" in mode:
|
|
1395
1488
|
# Pass stream along in binary
|
|
1396
1489
|
return byte_stream
|
|
1397
1490
|
else:
|
|
1398
1491
|
# Wrap it in a text decoder
|
|
1399
|
-
return io.TextIOWrapper(byte_stream, encoding=
|
|
1492
|
+
return io.TextIOWrapper(byte_stream, encoding="utf-8")
|
|
1400
1493
|
|
|
1401
1494
|
def exists(self, path: str) -> bool:
|
|
1402
1495
|
"""Test for file existence."""
|
|
@@ -1503,7 +1596,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1503
1596
|
logger.debug("AbstractJobStore said: %s", status)
|
|
1504
1597
|
return status
|
|
1505
1598
|
|
|
1506
|
-
def listdir(self, fn: str) ->
|
|
1599
|
+
def listdir(self, fn: str) -> list[str]:
|
|
1507
1600
|
# This needs to return full URLs for everything in the directory.
|
|
1508
1601
|
# URLs are not allowed to end in '/', even for subdirectories.
|
|
1509
1602
|
logger.debug("ToilFsAccess listing %s", fn)
|
|
@@ -1524,7 +1617,9 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1524
1617
|
if got is None:
|
|
1525
1618
|
raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
|
|
1526
1619
|
if isinstance(got, str):
|
|
1527
|
-
raise RuntimeError(
|
|
1620
|
+
raise RuntimeError(
|
|
1621
|
+
f"Cannot list file or dubdirectory of a file: {fn}"
|
|
1622
|
+
)
|
|
1528
1623
|
here = got
|
|
1529
1624
|
# List all the things in here and make full URIs to them
|
|
1530
1625
|
return [os.path.join(fn, k) for k in here.keys()]
|
|
@@ -1534,7 +1629,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1534
1629
|
for entry in AbstractJobStore.list_url(fn)
|
|
1535
1630
|
]
|
|
1536
1631
|
|
|
1537
|
-
def join(self, path, *paths
|
|
1632
|
+
def join(self, path: str, *paths: str) -> str:
|
|
1538
1633
|
# This falls back on os.path.join
|
|
1539
1634
|
return super().join(path, *paths)
|
|
1540
1635
|
|
|
@@ -1547,12 +1642,12 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1547
1642
|
|
|
1548
1643
|
def toil_get_file(
|
|
1549
1644
|
file_store: AbstractFileStore,
|
|
1550
|
-
index:
|
|
1551
|
-
existing:
|
|
1645
|
+
index: dict[str, str],
|
|
1646
|
+
existing: dict[str, str],
|
|
1552
1647
|
uri: str,
|
|
1553
1648
|
streamable: bool = False,
|
|
1554
1649
|
streaming_allowed: bool = True,
|
|
1555
|
-
pipe_threads: Optional[
|
|
1650
|
+
pipe_threads: Optional[list[tuple[Thread, int]]] = None,
|
|
1556
1651
|
) -> str:
|
|
1557
1652
|
"""
|
|
1558
1653
|
Set up the given file or directory from the Toil jobstore at a file URI
|
|
@@ -1653,9 +1748,7 @@ def toil_get_file(
|
|
|
1653
1748
|
and streamable
|
|
1654
1749
|
and not isinstance(file_store.jobStore, FileJobStore)
|
|
1655
1750
|
):
|
|
1656
|
-
logger.debug(
|
|
1657
|
-
"Streaming file %s", uri
|
|
1658
|
-
)
|
|
1751
|
+
logger.debug("Streaming file %s", uri)
|
|
1659
1752
|
src_path = file_store.getLocalTempFileName()
|
|
1660
1753
|
os.mkfifo(src_path)
|
|
1661
1754
|
th = ExceptionalThread(
|
|
@@ -1677,34 +1770,35 @@ def toil_get_file(
|
|
|
1677
1770
|
if uri.startswith("toilfile:"):
|
|
1678
1771
|
# Download from the file store
|
|
1679
1772
|
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1680
|
-
src_path = file_store.readGlobalFile(
|
|
1681
|
-
file_store_id, symlink=True
|
|
1682
|
-
)
|
|
1773
|
+
src_path = file_store.readGlobalFile(file_store_id, symlink=True)
|
|
1683
1774
|
else:
|
|
1684
1775
|
# Download from the URI via the job store.
|
|
1685
1776
|
|
|
1686
1777
|
# Figure out where it goes.
|
|
1687
1778
|
src_path = file_store.getLocalTempFileName()
|
|
1688
1779
|
# Open that path exclusively to make sure we created it
|
|
1689
|
-
with open(src_path,
|
|
1780
|
+
with open(src_path, "xb") as fh:
|
|
1690
1781
|
# Download into the file
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1782
|
+
size, executable = AbstractJobStore.read_from_url(uri, fh)
|
|
1783
|
+
if executable:
|
|
1784
|
+
# Set the execute bit in the file's permissions
|
|
1785
|
+
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
1695
1786
|
|
|
1696
1787
|
index[src_path] = uri
|
|
1697
1788
|
existing[uri] = src_path
|
|
1698
1789
|
return schema_salad.ref_resolver.file_uri(src_path)
|
|
1699
1790
|
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1791
|
+
|
|
1792
|
+
def convert_file_uri_to_toil_uri(
|
|
1793
|
+
applyFunc: Callable[[str], FileID],
|
|
1794
|
+
index: dict[str, str],
|
|
1795
|
+
existing: dict[str, str],
|
|
1704
1796
|
file_uri: str,
|
|
1705
1797
|
) -> str:
|
|
1706
1798
|
"""
|
|
1707
|
-
|
|
1799
|
+
Given a file URI, convert it to a toil file URI. Uses applyFunc to handle the conversion.
|
|
1800
|
+
|
|
1801
|
+
Runs once on every unique file URI.
|
|
1708
1802
|
|
|
1709
1803
|
'existing' is a set of files retrieved as inputs from toil_get_file. This
|
|
1710
1804
|
ensures they are mapped back as the same name if passed through.
|
|
@@ -1721,12 +1815,8 @@ def write_file(
|
|
|
1721
1815
|
else:
|
|
1722
1816
|
file_uri = existing.get(file_uri, file_uri)
|
|
1723
1817
|
if file_uri not in index:
|
|
1724
|
-
if not urlparse(file_uri).scheme:
|
|
1725
|
-
rp = os.path.realpath(file_uri)
|
|
1726
|
-
else:
|
|
1727
|
-
rp = file_uri
|
|
1728
1818
|
try:
|
|
1729
|
-
index[file_uri] = "toilfile:" +
|
|
1819
|
+
index[file_uri] = "toilfile:" + applyFunc(file_uri).pack()
|
|
1730
1820
|
existing[index[file_uri]] = file_uri
|
|
1731
1821
|
except Exception as e:
|
|
1732
1822
|
logger.error("Got exception '%s' while copying '%s'", e, file_uri)
|
|
@@ -1745,17 +1835,93 @@ def path_to_loc(obj: CWLObjectType) -> None:
|
|
|
1745
1835
|
del obj["path"]
|
|
1746
1836
|
|
|
1747
1837
|
|
|
1748
|
-
def
|
|
1749
|
-
|
|
1838
|
+
def extract_file_uri_once(
|
|
1839
|
+
fileindex: dict[str, str],
|
|
1840
|
+
existing: dict[str, str],
|
|
1841
|
+
file_metadata: CWLObjectType,
|
|
1842
|
+
mark_broken: bool = False,
|
|
1843
|
+
skip_remote: bool = False,
|
|
1844
|
+
) -> Optional[str]:
|
|
1845
|
+
"""
|
|
1846
|
+
Extract the filename from a CWL file record.
|
|
1847
|
+
|
|
1848
|
+
This function matches the predefined function signature in visit_files, which ensures
|
|
1849
|
+
that this function is called on all files inside a CWL object.
|
|
1850
|
+
|
|
1851
|
+
Ensures no duplicate files are returned according to fileindex. If a file has not been resolved already (and had file:// prepended)
|
|
1852
|
+
then resolve symlinks.
|
|
1853
|
+
:param fileindex: Forward mapping of filename
|
|
1854
|
+
:param existing: Reverse mapping of filename. This function does not use this
|
|
1855
|
+
:param file_metadata: CWL file record
|
|
1856
|
+
:param mark_broken: Whether files should be marked as missing
|
|
1857
|
+
:param skip_remote: Whether to skip remote files
|
|
1858
|
+
:return:
|
|
1859
|
+
"""
|
|
1860
|
+
location = cast(str, file_metadata["location"])
|
|
1861
|
+
if (
|
|
1862
|
+
location.startswith("toilfile:")
|
|
1863
|
+
or location.startswith("toildir:")
|
|
1864
|
+
or location.startswith("_:")
|
|
1865
|
+
):
|
|
1866
|
+
return None
|
|
1867
|
+
if location in fileindex:
|
|
1868
|
+
file_metadata["location"] = fileindex[location]
|
|
1869
|
+
return None
|
|
1870
|
+
if not location and file_metadata["path"]:
|
|
1871
|
+
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
1872
|
+
cast(str, file_metadata["path"])
|
|
1873
|
+
)
|
|
1874
|
+
if location.startswith("file://") and not os.path.isfile(
|
|
1875
|
+
schema_salad.ref_resolver.uri_file_path(location)
|
|
1876
|
+
):
|
|
1877
|
+
if mark_broken:
|
|
1878
|
+
logger.debug("File %s is missing", file_metadata)
|
|
1879
|
+
file_metadata["location"] = location = MISSING_FILE
|
|
1880
|
+
else:
|
|
1881
|
+
raise cwl_utils.errors.WorkflowException(
|
|
1882
|
+
"File is missing: %s" % file_metadata
|
|
1883
|
+
)
|
|
1884
|
+
if location.startswith("file://") or not skip_remote:
|
|
1885
|
+
# This is a local file or a remote file
|
|
1886
|
+
if location not in fileindex:
|
|
1887
|
+
# These dictionaries are meant to keep track of what we're going to import
|
|
1888
|
+
# In the actual import, this is used as a bidirectional mapping from unvirtualized to virtualized
|
|
1889
|
+
# For this case, keep track of the files to prevent returning duplicate files
|
|
1890
|
+
# see write_file
|
|
1891
|
+
|
|
1892
|
+
# If there is not a scheme, this file has not been resolved yet or is a URL.
|
|
1893
|
+
if not urlparse(location).scheme:
|
|
1894
|
+
rp = os.path.realpath(location)
|
|
1895
|
+
else:
|
|
1896
|
+
rp = location
|
|
1897
|
+
return rp
|
|
1898
|
+
return None
|
|
1899
|
+
|
|
1900
|
+
|
|
1901
|
+
V = TypeVar("V", covariant=True)
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
class VisitFunc(Protocol[V]):
|
|
1905
|
+
def __call__(
|
|
1906
|
+
self,
|
|
1907
|
+
fileindex: dict[str, str],
|
|
1908
|
+
existing: dict[str, str],
|
|
1909
|
+
file_metadata: CWLObjectType,
|
|
1910
|
+
mark_broken: bool,
|
|
1911
|
+
skip_remote: bool,
|
|
1912
|
+
) -> V: ...
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
def visit_files(
|
|
1916
|
+
func: VisitFunc[V],
|
|
1750
1917
|
fs_access: StdFsAccess,
|
|
1751
|
-
fileindex:
|
|
1752
|
-
existing:
|
|
1918
|
+
fileindex: dict[str, str],
|
|
1919
|
+
existing: dict[str, str],
|
|
1753
1920
|
cwl_object: Optional[CWLObjectType],
|
|
1754
1921
|
mark_broken: bool = False,
|
|
1755
1922
|
skip_remote: bool = False,
|
|
1756
1923
|
bypass_file_store: bool = False,
|
|
1757
|
-
|
|
1758
|
-
) -> None:
|
|
1924
|
+
) -> list[V]:
|
|
1759
1925
|
"""
|
|
1760
1926
|
Prepare all files and directories.
|
|
1761
1927
|
|
|
@@ -1801,18 +1967,12 @@ def import_files(
|
|
|
1801
1967
|
|
|
1802
1968
|
:param log_level: Log imported files at the given level.
|
|
1803
1969
|
"""
|
|
1970
|
+
func_return: list[Any] = list()
|
|
1804
1971
|
tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
|
|
1805
1972
|
|
|
1806
1973
|
logger.debug("Importing files for %s", tool_id)
|
|
1807
1974
|
logger.debug("Importing files in %s", cwl_object)
|
|
1808
1975
|
|
|
1809
|
-
def import_and_log(url: str) -> FileID:
|
|
1810
|
-
"""
|
|
1811
|
-
Upload a file and log that we are doing so.
|
|
1812
|
-
"""
|
|
1813
|
-
logger.log(log_level, "Loading %s...", url)
|
|
1814
|
-
return import_function(url)
|
|
1815
|
-
|
|
1816
1976
|
# We need to upload all files to the Toil filestore, and encode structure
|
|
1817
1977
|
# recursively into all Directories' locations. But we cannot safely alter
|
|
1818
1978
|
# the listing fields of Directory objects, because the handling required by
|
|
@@ -1830,13 +1990,13 @@ def import_files(
|
|
|
1830
1990
|
if bypass_file_store:
|
|
1831
1991
|
# Don't go on to actually import files or encode contents for
|
|
1832
1992
|
# directories.
|
|
1833
|
-
return
|
|
1993
|
+
return func_return
|
|
1834
1994
|
|
|
1835
1995
|
# Otherwise we actually want to put the things in the file store.
|
|
1836
1996
|
|
|
1837
1997
|
def visit_file_or_directory_down(
|
|
1838
1998
|
rec: CWLObjectType,
|
|
1839
|
-
) -> Optional[
|
|
1999
|
+
) -> Optional[list[CWLObjectType]]:
|
|
1840
2000
|
"""
|
|
1841
2001
|
Visit each CWL File or Directory on the way down.
|
|
1842
2002
|
|
|
@@ -1863,7 +2023,7 @@ def import_files(
|
|
|
1863
2023
|
ensure_no_collisions(cast(DirectoryType, rec))
|
|
1864
2024
|
|
|
1865
2025
|
# Pull out the old listing, if any
|
|
1866
|
-
old_listing = cast(Optional[
|
|
2026
|
+
old_listing = cast(Optional[list[CWLObjectType]], rec.get("listing", None))
|
|
1867
2027
|
|
|
1868
2028
|
if not cast(str, rec["location"]).startswith("_:"):
|
|
1869
2029
|
# This is a thing we can list and not just a literal, so we
|
|
@@ -1885,8 +2045,8 @@ def import_files(
|
|
|
1885
2045
|
|
|
1886
2046
|
def visit_file_or_directory_up(
|
|
1887
2047
|
rec: CWLObjectType,
|
|
1888
|
-
down_result: Optional[
|
|
1889
|
-
child_results:
|
|
2048
|
+
down_result: Optional[list[CWLObjectType]],
|
|
2049
|
+
child_results: list[DirectoryContents],
|
|
1890
2050
|
) -> DirectoryContents:
|
|
1891
2051
|
"""
|
|
1892
2052
|
For a CWL File or Directory, make sure it is uploaded and it has a
|
|
@@ -1908,10 +2068,15 @@ def import_files(
|
|
|
1908
2068
|
# This is a CWL File
|
|
1909
2069
|
|
|
1910
2070
|
result: DirectoryContents = {}
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
2071
|
+
# Run a function on the file and store the return
|
|
2072
|
+
func_return.append(
|
|
2073
|
+
func(
|
|
2074
|
+
fileindex,
|
|
2075
|
+
existing,
|
|
2076
|
+
rec,
|
|
2077
|
+
mark_broken=mark_broken,
|
|
2078
|
+
skip_remote=skip_remote,
|
|
2079
|
+
)
|
|
1915
2080
|
)
|
|
1916
2081
|
|
|
1917
2082
|
# Make a record for this file under its name
|
|
@@ -1955,6 +2120,7 @@ def import_files(
|
|
|
1955
2120
|
visit_file_or_directory_down,
|
|
1956
2121
|
visit_file_or_directory_up,
|
|
1957
2122
|
)
|
|
2123
|
+
return func_return
|
|
1958
2124
|
|
|
1959
2125
|
|
|
1960
2126
|
def upload_directory(
|
|
@@ -2013,52 +2179,34 @@ def upload_directory(
|
|
|
2013
2179
|
directory_metadata["location"] = encode_directory(directory_contents)
|
|
2014
2180
|
|
|
2015
2181
|
|
|
2016
|
-
def
|
|
2017
|
-
|
|
2018
|
-
fileindex:
|
|
2019
|
-
existing:
|
|
2182
|
+
def extract_and_convert_file_to_toil_uri(
|
|
2183
|
+
convertfunc: Callable[[str], FileID],
|
|
2184
|
+
fileindex: dict[str, str],
|
|
2185
|
+
existing: dict[str, str],
|
|
2020
2186
|
file_metadata: CWLObjectType,
|
|
2021
2187
|
mark_broken: bool = False,
|
|
2022
|
-
skip_remote: bool = False
|
|
2188
|
+
skip_remote: bool = False,
|
|
2023
2189
|
) -> None:
|
|
2024
2190
|
"""
|
|
2025
|
-
|
|
2191
|
+
Extract the file URI out of a file object and convert it to a Toil URI.
|
|
2192
|
+
|
|
2193
|
+
Runs convertfunc on the file URI to handle conversion.
|
|
2026
2194
|
|
|
2027
|
-
|
|
2028
|
-
reference to the toil file store.
|
|
2195
|
+
Is used to handle importing files into the jobstore.
|
|
2029
2196
|
|
|
2030
2197
|
If a file doesn't exist, fails with an error, unless mark_broken is set, in
|
|
2031
2198
|
which case the missing file is given a special sentinel location.
|
|
2032
2199
|
|
|
2033
|
-
Unless skip_remote is set,
|
|
2034
|
-
|
|
2200
|
+
Unless skip_remote is set, also run on remote files and sets their locations
|
|
2201
|
+
to toil URIs as well.
|
|
2035
2202
|
"""
|
|
2036
|
-
location =
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
return
|
|
2043
|
-
if location in fileindex:
|
|
2044
|
-
file_metadata["location"] = fileindex[location]
|
|
2045
|
-
return
|
|
2046
|
-
if not location and file_metadata["path"]:
|
|
2047
|
-
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
2048
|
-
cast(str, file_metadata["path"])
|
|
2203
|
+
location = extract_file_uri_once(
|
|
2204
|
+
fileindex, existing, file_metadata, mark_broken, skip_remote
|
|
2205
|
+
)
|
|
2206
|
+
if location is not None:
|
|
2207
|
+
file_metadata["location"] = convert_file_uri_to_toil_uri(
|
|
2208
|
+
convertfunc, fileindex, existing, location
|
|
2049
2209
|
)
|
|
2050
|
-
if location.startswith("file://") and not os.path.isfile(
|
|
2051
|
-
schema_salad.ref_resolver.uri_file_path(location)
|
|
2052
|
-
):
|
|
2053
|
-
if mark_broken:
|
|
2054
|
-
logger.debug("File %s is missing", file_metadata)
|
|
2055
|
-
file_metadata["location"] = location = MISSING_FILE
|
|
2056
|
-
else:
|
|
2057
|
-
raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
|
|
2058
|
-
|
|
2059
|
-
if location.startswith("file://") or not skip_remote:
|
|
2060
|
-
# This is a local file, or we also need to download and re-upload remote files
|
|
2061
|
-
file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
|
|
2062
2210
|
|
|
2063
2211
|
logger.debug("Sending file at: %s", file_metadata["location"])
|
|
2064
2212
|
|
|
@@ -2071,7 +2219,7 @@ def writeGlobalFileWrapper(file_store: AbstractFileStore, fileuri: str) -> FileI
|
|
|
2071
2219
|
|
|
2072
2220
|
def remove_empty_listings(rec: CWLObjectType) -> None:
|
|
2073
2221
|
if rec.get("class") != "Directory":
|
|
2074
|
-
finddirs
|
|
2222
|
+
finddirs: list[CWLObjectType] = []
|
|
2075
2223
|
visit_class(rec, ("Directory",), finddirs.append)
|
|
2076
2224
|
for f in finddirs:
|
|
2077
2225
|
remove_empty_listings(f)
|
|
@@ -2091,7 +2239,7 @@ class CWLNamedJob(Job):
|
|
|
2091
2239
|
cores: Union[float, None] = 1,
|
|
2092
2240
|
memory: Union[int, str, None] = "1GiB",
|
|
2093
2241
|
disk: Union[int, str, None] = "1MiB",
|
|
2094
|
-
accelerators: Optional[
|
|
2242
|
+
accelerators: Optional[list[AcceleratorRequirement]] = None,
|
|
2095
2243
|
preemptible: Optional[bool] = None,
|
|
2096
2244
|
tool_id: Optional[str] = None,
|
|
2097
2245
|
parent_name: Optional[str] = None,
|
|
@@ -2166,10 +2314,10 @@ class ResolveIndirect(CWLNamedJob):
|
|
|
2166
2314
|
|
|
2167
2315
|
def toilStageFiles(
|
|
2168
2316
|
toil: Toil,
|
|
2169
|
-
cwljob: Union[CWLObjectType,
|
|
2317
|
+
cwljob: Union[CWLObjectType, list[CWLObjectType]],
|
|
2170
2318
|
outdir: str,
|
|
2171
2319
|
destBucket: Union[str, None] = None,
|
|
2172
|
-
log_level: int = logging.DEBUG
|
|
2320
|
+
log_level: int = logging.DEBUG,
|
|
2173
2321
|
) -> None:
|
|
2174
2322
|
"""
|
|
2175
2323
|
Copy input files out of the global file store and update location and path.
|
|
@@ -2181,7 +2329,7 @@ def toilStageFiles(
|
|
|
2181
2329
|
"""
|
|
2182
2330
|
|
|
2183
2331
|
def _collectDirEntries(
|
|
2184
|
-
obj: Union[CWLObjectType,
|
|
2332
|
+
obj: Union[CWLObjectType, list[CWLObjectType]]
|
|
2185
2333
|
) -> Iterator[CWLObjectType]:
|
|
2186
2334
|
if isinstance(obj, dict):
|
|
2187
2335
|
if obj.get("class") in ("File", "Directory"):
|
|
@@ -2263,13 +2411,17 @@ def toilStageFiles(
|
|
|
2263
2411
|
# TODO: Use direct S3 to S3 copy on exports as well
|
|
2264
2412
|
file_id_or_contents = (
|
|
2265
2413
|
"toilfile:"
|
|
2266
|
-
+ toil.import_file(
|
|
2414
|
+
+ toil.import_file(
|
|
2415
|
+
file_id_or_contents, symlink=False
|
|
2416
|
+
).pack()
|
|
2267
2417
|
)
|
|
2268
2418
|
|
|
2269
2419
|
if file_id_or_contents.startswith("toilfile:"):
|
|
2270
2420
|
# This is something we can export
|
|
2271
2421
|
# TODO: Do we need to urlencode the parts before sending them to S3?
|
|
2272
|
-
dest_url = "/".join(
|
|
2422
|
+
dest_url = "/".join(
|
|
2423
|
+
s.strip("/") for s in [destBucket, baseName]
|
|
2424
|
+
)
|
|
2273
2425
|
logger.log(log_level, "Saving %s...", dest_url)
|
|
2274
2426
|
toil.export_file(
|
|
2275
2427
|
FileID.unpack(file_id_or_contents[len("toilfile:") :]),
|
|
@@ -2291,7 +2443,12 @@ def toilStageFiles(
|
|
|
2291
2443
|
# Probably staging and bypassing file store. Just copy.
|
|
2292
2444
|
logger.log(log_level, "Saving %s...", dest_url)
|
|
2293
2445
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2294
|
-
|
|
2446
|
+
try:
|
|
2447
|
+
shutil.copyfile(p.resolved, p.target)
|
|
2448
|
+
except shutil.SameFileError:
|
|
2449
|
+
# If outdir isn't set and we're passing through an input file/directory as the output,
|
|
2450
|
+
# the file doesn't need to be copied because it is already there
|
|
2451
|
+
pass
|
|
2295
2452
|
else:
|
|
2296
2453
|
uri = p.resolved
|
|
2297
2454
|
if not uri.startswith("toilfile:"):
|
|
@@ -2364,26 +2521,31 @@ class CWLJobWrapper(CWLNamedJob):
|
|
|
2364
2521
|
subjob_name="_wrapper",
|
|
2365
2522
|
local=True,
|
|
2366
2523
|
)
|
|
2367
|
-
self.cwltool =
|
|
2524
|
+
self.cwltool = tool
|
|
2368
2525
|
self.cwljob = cwljob
|
|
2369
2526
|
self.runtime_context = runtime_context
|
|
2370
|
-
self.conditional = conditional
|
|
2527
|
+
self.conditional = conditional or Conditional()
|
|
2371
2528
|
self.parent_name = parent_name
|
|
2372
2529
|
|
|
2373
2530
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
2374
2531
|
"""Create a child job with the correct resource requirements set."""
|
|
2375
2532
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2533
|
+
|
|
2534
|
+
# Check confitional to license full evaluation of job inputs.
|
|
2535
|
+
if self.conditional.is_false(cwljob):
|
|
2536
|
+
return self.conditional.skipped_outputs()
|
|
2537
|
+
|
|
2376
2538
|
fill_in_defaults(
|
|
2377
2539
|
self.cwltool.tool["inputs"],
|
|
2378
2540
|
cwljob,
|
|
2379
2541
|
self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
|
|
2380
2542
|
)
|
|
2543
|
+
# Don't forward the conditional. We checked it already.
|
|
2381
2544
|
realjob = CWLJob(
|
|
2382
2545
|
tool=self.cwltool,
|
|
2383
2546
|
cwljob=cwljob,
|
|
2384
2547
|
runtime_context=self.runtime_context,
|
|
2385
2548
|
parent_name=self.parent_name,
|
|
2386
|
-
conditional=self.conditional,
|
|
2387
2549
|
)
|
|
2388
2550
|
self.addChild(realjob)
|
|
2389
2551
|
return realjob.rv()
|
|
@@ -2401,7 +2563,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2401
2563
|
conditional: Union[Conditional, None] = None,
|
|
2402
2564
|
):
|
|
2403
2565
|
"""Store the context for later execution."""
|
|
2404
|
-
self.cwltool =
|
|
2566
|
+
self.cwltool = tool
|
|
2405
2567
|
self.conditional = conditional or Conditional()
|
|
2406
2568
|
|
|
2407
2569
|
if runtime_context.builder:
|
|
@@ -2418,7 +2580,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2418
2580
|
resources={},
|
|
2419
2581
|
mutation_manager=runtime_context.mutation_manager,
|
|
2420
2582
|
formatgraph=tool.formatgraph,
|
|
2421
|
-
make_fs_access=cast(
|
|
2583
|
+
make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
|
|
2422
2584
|
fs_access=runtime_context.make_fs_access(""),
|
|
2423
2585
|
job_script_provider=runtime_context.job_script_provider,
|
|
2424
2586
|
timeout=runtime_context.eval_timeout,
|
|
@@ -2435,7 +2597,21 @@ class CWLJob(CWLNamedJob):
|
|
|
2435
2597
|
|
|
2436
2598
|
req = tool.evalResources(self.builder, runtime_context)
|
|
2437
2599
|
|
|
2438
|
-
|
|
2600
|
+
tool_own_resources = tool.get_requirement("ResourceRequirement")[0] or {}
|
|
2601
|
+
if "ramMin" in tool_own_resources or "ramMax" in tool_own_resources:
|
|
2602
|
+
# The tool is actually asking for memory.
|
|
2603
|
+
memory = int(req["ram"] * (2**20))
|
|
2604
|
+
else:
|
|
2605
|
+
# The tool is getting a default ram allocation.
|
|
2606
|
+
if getattr(runtime_context, "cwl_default_ram"):
|
|
2607
|
+
# We will respect the CWL spec and apply the default cwltool
|
|
2608
|
+
# computed, which might be different than Toil's default.
|
|
2609
|
+
memory = int(req["ram"] * (2**20))
|
|
2610
|
+
else:
|
|
2611
|
+
# We use a None requirement and the Toil default applies.
|
|
2612
|
+
memory = None
|
|
2613
|
+
|
|
2614
|
+
accelerators: Optional[list[AcceleratorRequirement]] = None
|
|
2439
2615
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
2440
2616
|
# There's a CUDARequirement, which cwltool processed for us
|
|
2441
2617
|
# TODO: How is cwltool deciding what value to use between min and max?
|
|
@@ -2499,7 +2675,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2499
2675
|
|
|
2500
2676
|
super().__init__(
|
|
2501
2677
|
cores=req["cores"],
|
|
2502
|
-
memory=
|
|
2678
|
+
memory=memory,
|
|
2503
2679
|
disk=int(total_disk),
|
|
2504
2680
|
accelerators=accelerators,
|
|
2505
2681
|
preemptible=preemptible,
|
|
@@ -2513,7 +2689,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2513
2689
|
self.step_inputs = self.cwltool.tool["inputs"]
|
|
2514
2690
|
self.workdir: str = runtime_context.workdir # type: ignore[attr-defined]
|
|
2515
2691
|
|
|
2516
|
-
def required_env_vars(self, cwljob: Any) -> Iterator[
|
|
2692
|
+
def required_env_vars(self, cwljob: Any) -> Iterator[tuple[str, str]]:
|
|
2517
2693
|
"""Yield environment variables from EnvVarRequirement."""
|
|
2518
2694
|
if isinstance(cwljob, dict):
|
|
2519
2695
|
if cwljob.get("class") == "EnvVarRequirement":
|
|
@@ -2525,7 +2701,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2525
2701
|
for env_var in cwljob:
|
|
2526
2702
|
yield from self.required_env_vars(env_var)
|
|
2527
2703
|
|
|
2528
|
-
def populate_env_vars(self, cwljob: CWLObjectType) ->
|
|
2704
|
+
def populate_env_vars(self, cwljob: CWLObjectType) -> dict[str, str]:
|
|
2529
2705
|
"""
|
|
2530
2706
|
Prepare environment variables necessary at runtime for the job.
|
|
2531
2707
|
|
|
@@ -2541,9 +2717,9 @@ class CWLJob(CWLNamedJob):
|
|
|
2541
2717
|
required_env_vars = {}
|
|
2542
2718
|
# iterate over EnvVarRequirement env vars, if any
|
|
2543
2719
|
for k, v in self.required_env_vars(cwljob):
|
|
2544
|
-
required_env_vars[
|
|
2545
|
-
|
|
2546
|
-
|
|
2720
|
+
required_env_vars[k] = (
|
|
2721
|
+
v # will tell cwltool which env vars to take from the environment
|
|
2722
|
+
)
|
|
2547
2723
|
os.environ[k] = v
|
|
2548
2724
|
# needs to actually be populated in the environment as well or
|
|
2549
2725
|
# they're not used
|
|
@@ -2553,7 +2729,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2553
2729
|
# env var with the same name is found
|
|
2554
2730
|
for req in self.cwltool.requirements:
|
|
2555
2731
|
if req["class"] == "EnvVarRequirement":
|
|
2556
|
-
envDefs = cast(
|
|
2732
|
+
envDefs = cast(list[dict[str, str]], req["envDef"])
|
|
2557
2733
|
for env_def in envDefs:
|
|
2558
2734
|
env_name = env_def["envName"]
|
|
2559
2735
|
if env_name in required_env_vars:
|
|
@@ -2585,7 +2761,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2585
2761
|
for inp_id in immobile_cwljob_dict.keys():
|
|
2586
2762
|
found = False
|
|
2587
2763
|
for field in cast(
|
|
2588
|
-
|
|
2764
|
+
list[dict[str, str]], self.cwltool.inputs_record_schema["fields"]
|
|
2589
2765
|
):
|
|
2590
2766
|
if field["name"] == inp_id:
|
|
2591
2767
|
found = True
|
|
@@ -2600,8 +2776,8 @@ class CWLJob(CWLNamedJob):
|
|
|
2600
2776
|
functools.partial(remove_empty_listings),
|
|
2601
2777
|
)
|
|
2602
2778
|
|
|
2603
|
-
index:
|
|
2604
|
-
existing:
|
|
2779
|
+
index: dict[str, str] = {}
|
|
2780
|
+
existing: dict[str, str] = {}
|
|
2605
2781
|
|
|
2606
2782
|
# Prepare the run instructions for cwltool
|
|
2607
2783
|
runtime_context = self.runtime_context.copy()
|
|
@@ -2613,7 +2789,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2613
2789
|
# will come and grab this function for fetching files from the Toil
|
|
2614
2790
|
# file store. pipe_threads is used for keeping track of separate
|
|
2615
2791
|
# threads launched to stream files around.
|
|
2616
|
-
pipe_threads:
|
|
2792
|
+
pipe_threads: list[tuple[Thread, int]] = []
|
|
2617
2793
|
setattr(
|
|
2618
2794
|
runtime_context,
|
|
2619
2795
|
"toil_get_file",
|
|
@@ -2647,7 +2823,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2647
2823
|
# function and a path_mapper type or factory function.
|
|
2648
2824
|
|
|
2649
2825
|
runtime_context.make_fs_access = cast(
|
|
2650
|
-
|
|
2826
|
+
type[StdFsAccess],
|
|
2651
2827
|
functools.partial(ToilFsAccess, file_store=file_store),
|
|
2652
2828
|
)
|
|
2653
2829
|
|
|
@@ -2660,9 +2836,13 @@ class CWLJob(CWLNamedJob):
|
|
|
2660
2836
|
# Collect standard output and standard error somewhere if they don't go to files.
|
|
2661
2837
|
# We need to keep two FDs to these because cwltool will close what we give it.
|
|
2662
2838
|
default_stdout = TemporaryFile()
|
|
2663
|
-
runtime_context.default_stdout = os.fdopen(
|
|
2839
|
+
runtime_context.default_stdout = os.fdopen(
|
|
2840
|
+
os.dup(default_stdout.fileno()), "wb"
|
|
2841
|
+
)
|
|
2664
2842
|
default_stderr = TemporaryFile()
|
|
2665
|
-
runtime_context.default_stderr = os.fdopen(
|
|
2843
|
+
runtime_context.default_stderr = os.fdopen(
|
|
2844
|
+
os.dup(default_stderr.fileno()), "wb"
|
|
2845
|
+
)
|
|
2666
2846
|
|
|
2667
2847
|
process_uuid = uuid.uuid4() # noqa F841
|
|
2668
2848
|
started_at = datetime.datetime.now() # noqa F841
|
|
@@ -2693,17 +2873,27 @@ class CWLJob(CWLNamedJob):
|
|
|
2693
2873
|
default_stdout.seek(0, os.SEEK_END)
|
|
2694
2874
|
if default_stdout.tell() > 0:
|
|
2695
2875
|
default_stdout.seek(0)
|
|
2696
|
-
file_store.log_user_stream(
|
|
2876
|
+
file_store.log_user_stream(
|
|
2877
|
+
self.description.unitName + ".stdout", default_stdout
|
|
2878
|
+
)
|
|
2697
2879
|
if status != "success":
|
|
2698
2880
|
default_stdout.seek(0)
|
|
2699
|
-
logger.error(
|
|
2881
|
+
logger.error(
|
|
2882
|
+
"Failed command standard output:\n%s",
|
|
2883
|
+
default_stdout.read().decode("utf-8", errors="replace"),
|
|
2884
|
+
)
|
|
2700
2885
|
default_stderr.seek(0, os.SEEK_END)
|
|
2701
2886
|
if default_stderr.tell():
|
|
2702
2887
|
default_stderr.seek(0)
|
|
2703
|
-
file_store.log_user_stream(
|
|
2888
|
+
file_store.log_user_stream(
|
|
2889
|
+
self.description.unitName + ".stderr", default_stderr
|
|
2890
|
+
)
|
|
2704
2891
|
if status != "success":
|
|
2705
2892
|
default_stderr.seek(0)
|
|
2706
|
-
logger.error(
|
|
2893
|
+
logger.error(
|
|
2894
|
+
"Failed command standard error:\n%s",
|
|
2895
|
+
default_stderr.read().decode("utf-8", errors="replace"),
|
|
2896
|
+
)
|
|
2707
2897
|
|
|
2708
2898
|
if status != "success":
|
|
2709
2899
|
raise cwl_utils.errors.WorkflowException(status)
|
|
@@ -2716,12 +2906,18 @@ class CWLJob(CWLNamedJob):
|
|
|
2716
2906
|
fs_access = runtime_context.make_fs_access(runtime_context.basedir)
|
|
2717
2907
|
|
|
2718
2908
|
# And a file importer that can go from a file:// URI to a Toil FileID
|
|
2719
|
-
file_import_function =
|
|
2909
|
+
def file_import_function(url: str, log_level: int = logging.DEBUG) -> FileID:
|
|
2910
|
+
logger.log(log_level, "Loading %s...", url)
|
|
2911
|
+
return writeGlobalFileWrapper(file_store, url)
|
|
2912
|
+
|
|
2913
|
+
file_upload_function = functools.partial(
|
|
2914
|
+
extract_and_convert_file_to_toil_uri, file_import_function
|
|
2915
|
+
)
|
|
2720
2916
|
|
|
2721
2917
|
# Upload all the Files and set their and the Directories' locations, if
|
|
2722
2918
|
# needed.
|
|
2723
|
-
|
|
2724
|
-
|
|
2919
|
+
visit_files(
|
|
2920
|
+
file_upload_function,
|
|
2725
2921
|
fs_access,
|
|
2726
2922
|
index,
|
|
2727
2923
|
existing,
|
|
@@ -2751,6 +2947,74 @@ def get_container_engine(runtime_context: cwltool.context.RuntimeContext) -> str
|
|
|
2751
2947
|
return "docker"
|
|
2752
2948
|
|
|
2753
2949
|
|
|
2950
|
+
def makeRootJob(
|
|
2951
|
+
tool: Process,
|
|
2952
|
+
jobobj: CWLObjectType,
|
|
2953
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
2954
|
+
initialized_job_order: CWLObjectType,
|
|
2955
|
+
options: Namespace,
|
|
2956
|
+
toil: Toil,
|
|
2957
|
+
) -> CWLNamedJob:
|
|
2958
|
+
"""
|
|
2959
|
+
Create the Toil root Job object for the CWL tool. Is the same as makeJob() except this also handles import logic.
|
|
2960
|
+
|
|
2961
|
+
Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
|
|
2962
|
+
If only one job is created, it is returned twice.
|
|
2963
|
+
|
|
2964
|
+
:return:
|
|
2965
|
+
"""
|
|
2966
|
+
if options.run_imports_on_workers:
|
|
2967
|
+
filenames = extract_workflow_inputs(options, initialized_job_order, tool)
|
|
2968
|
+
metadata = get_file_sizes(
|
|
2969
|
+
filenames, toil._jobStore, include_remote_files=options.reference_inputs
|
|
2970
|
+
)
|
|
2971
|
+
|
|
2972
|
+
# Mapping of files to metadata for files that will be imported on the worker
|
|
2973
|
+
# This will consist of files that we were able to get a file size for
|
|
2974
|
+
worker_metadata: dict[str, FileMetadata] = dict()
|
|
2975
|
+
# Mapping of files to metadata for files that will be imported on the leader
|
|
2976
|
+
# This will consist of files that we were not able to get a file size for
|
|
2977
|
+
leader_metadata = dict()
|
|
2978
|
+
for filename, file_data in metadata.items():
|
|
2979
|
+
if file_data.size is None:
|
|
2980
|
+
leader_metadata[filename] = file_data
|
|
2981
|
+
else:
|
|
2982
|
+
worker_metadata[filename] = file_data
|
|
2983
|
+
|
|
2984
|
+
# import the files for the leader first
|
|
2985
|
+
path_to_fileid = WorkerImportJob.import_files(
|
|
2986
|
+
list(leader_metadata.keys()), toil._jobStore
|
|
2987
|
+
)
|
|
2988
|
+
|
|
2989
|
+
# then install the imported files before importing the other files
|
|
2990
|
+
# this way the control flow can fall from the leader to workers
|
|
2991
|
+
tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
|
|
2992
|
+
initialized_job_order,
|
|
2993
|
+
tool,
|
|
2994
|
+
path_to_fileid,
|
|
2995
|
+
options.basedir,
|
|
2996
|
+
options.reference_inputs,
|
|
2997
|
+
options.bypass_file_store,
|
|
2998
|
+
)
|
|
2999
|
+
|
|
3000
|
+
import_job = CWLImportWrapper(
|
|
3001
|
+
initialized_job_order, tool, runtime_context, worker_metadata, options
|
|
3002
|
+
)
|
|
3003
|
+
return import_job
|
|
3004
|
+
else:
|
|
3005
|
+
import_workflow_inputs(
|
|
3006
|
+
toil._jobStore,
|
|
3007
|
+
options,
|
|
3008
|
+
initialized_job_order=initialized_job_order,
|
|
3009
|
+
tool=tool,
|
|
3010
|
+
)
|
|
3011
|
+
root_job, followOn = makeJob(
|
|
3012
|
+
tool, jobobj, runtime_context, None, None
|
|
3013
|
+
) # toplevel, no name needed
|
|
3014
|
+
root_job.cwljob = initialized_job_order
|
|
3015
|
+
return root_job
|
|
3016
|
+
|
|
3017
|
+
|
|
2754
3018
|
def makeJob(
|
|
2755
3019
|
tool: Process,
|
|
2756
3020
|
jobobj: CWLObjectType,
|
|
@@ -2758,13 +3022,16 @@ def makeJob(
|
|
|
2758
3022
|
parent_name: Optional[str],
|
|
2759
3023
|
conditional: Union[Conditional, None],
|
|
2760
3024
|
) -> Union[
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
3025
|
+
tuple["CWLWorkflow", ResolveIndirect],
|
|
3026
|
+
tuple[CWLJob, CWLJob],
|
|
3027
|
+
tuple[CWLJobWrapper, CWLJobWrapper],
|
|
2764
3028
|
]:
|
|
2765
3029
|
"""
|
|
2766
3030
|
Create the correct Toil Job object for the CWL tool.
|
|
2767
3031
|
|
|
3032
|
+
Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
|
|
3033
|
+
If only one job is created, it is returned twice.
|
|
3034
|
+
|
|
2768
3035
|
Types: workflow, job, or job wrapper for dynamic resource requirements.
|
|
2769
3036
|
|
|
2770
3037
|
:return: "wfjob, followOn" if the input tool is a workflow, and "job, job" otherwise
|
|
@@ -2844,16 +3111,16 @@ class CWLScatter(Job):
|
|
|
2844
3111
|
def flat_crossproduct_scatter(
|
|
2845
3112
|
self,
|
|
2846
3113
|
joborder: CWLObjectType,
|
|
2847
|
-
scatter_keys:
|
|
2848
|
-
outputs:
|
|
3114
|
+
scatter_keys: list[str],
|
|
3115
|
+
outputs: list[Promised[CWLObjectType]],
|
|
2849
3116
|
postScatterEval: Callable[[CWLObjectType], CWLObjectType],
|
|
2850
3117
|
) -> None:
|
|
2851
3118
|
"""Cartesian product of the inputs, then flattened."""
|
|
2852
3119
|
scatter_key = shortname(scatter_keys[0])
|
|
2853
|
-
for n in range(0, len(cast(
|
|
3120
|
+
for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
|
|
2854
3121
|
updated_joborder = copy.copy(joborder)
|
|
2855
3122
|
updated_joborder[scatter_key] = cast(
|
|
2856
|
-
|
|
3123
|
+
list[CWLObjectType], joborder[scatter_key]
|
|
2857
3124
|
)[n]
|
|
2858
3125
|
if len(scatter_keys) == 1:
|
|
2859
3126
|
updated_joborder = postScatterEval(updated_joborder)
|
|
@@ -2874,16 +3141,16 @@ class CWLScatter(Job):
|
|
|
2874
3141
|
def nested_crossproduct_scatter(
|
|
2875
3142
|
self,
|
|
2876
3143
|
joborder: CWLObjectType,
|
|
2877
|
-
scatter_keys:
|
|
3144
|
+
scatter_keys: list[str],
|
|
2878
3145
|
postScatterEval: Callable[[CWLObjectType], CWLObjectType],
|
|
2879
|
-
) ->
|
|
3146
|
+
) -> list[Promised[CWLObjectType]]:
|
|
2880
3147
|
"""Cartesian product of the inputs."""
|
|
2881
3148
|
scatter_key = shortname(scatter_keys[0])
|
|
2882
|
-
outputs:
|
|
2883
|
-
for n in range(0, len(cast(
|
|
3149
|
+
outputs: list[Promised[CWLObjectType]] = []
|
|
3150
|
+
for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
|
|
2884
3151
|
updated_joborder = copy.copy(joborder)
|
|
2885
3152
|
updated_joborder[scatter_key] = cast(
|
|
2886
|
-
|
|
3153
|
+
list[CWLObjectType], joborder[scatter_key]
|
|
2887
3154
|
)[n]
|
|
2888
3155
|
if len(scatter_keys) == 1:
|
|
2889
3156
|
updated_joborder = postScatterEval(updated_joborder)
|
|
@@ -2904,7 +3171,7 @@ class CWLScatter(Job):
|
|
|
2904
3171
|
)
|
|
2905
3172
|
return outputs
|
|
2906
3173
|
|
|
2907
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3174
|
+
def run(self, file_store: AbstractFileStore) -> list[Promised[CWLObjectType]]:
|
|
2908
3175
|
"""Generate the follow on scatter jobs."""
|
|
2909
3176
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2910
3177
|
|
|
@@ -2916,7 +3183,7 @@ class CWLScatter(Job):
|
|
|
2916
3183
|
scatterMethod = self.step.tool.get("scatterMethod", None)
|
|
2917
3184
|
if len(scatter) == 1:
|
|
2918
3185
|
scatterMethod = "dotproduct"
|
|
2919
|
-
outputs:
|
|
3186
|
+
outputs: list[Promised[CWLObjectType]] = []
|
|
2920
3187
|
|
|
2921
3188
|
valueFrom = {
|
|
2922
3189
|
shortname(i["id"]): i["valueFrom"]
|
|
@@ -2948,11 +3215,11 @@ class CWLScatter(Job):
|
|
|
2948
3215
|
|
|
2949
3216
|
if scatterMethod == "dotproduct":
|
|
2950
3217
|
for i in range(
|
|
2951
|
-
0, len(cast(
|
|
3218
|
+
0, len(cast(list[CWLObjectType], cwljob[shortname(scatter[0])]))
|
|
2952
3219
|
):
|
|
2953
3220
|
copyjob = copy.copy(cwljob)
|
|
2954
3221
|
for sc in [shortname(x) for x in scatter]:
|
|
2955
|
-
copyjob[sc] = cast(
|
|
3222
|
+
copyjob[sc] = cast(list[CWLObjectType], cwljob[sc])[i]
|
|
2956
3223
|
copyjob = postScatterEval(copyjob)
|
|
2957
3224
|
subjob, follow_on = makeJob(
|
|
2958
3225
|
tool=self.step.embedded_tool,
|
|
@@ -2991,7 +3258,7 @@ class CWLGather(Job):
|
|
|
2991
3258
|
def __init__(
|
|
2992
3259
|
self,
|
|
2993
3260
|
step: cwltool.workflow.WorkflowStep,
|
|
2994
|
-
outputs: Promised[Union[CWLObjectType,
|
|
3261
|
+
outputs: Promised[Union[CWLObjectType, list[CWLObjectType]]],
|
|
2995
3262
|
):
|
|
2996
3263
|
"""Collect our context for later gathering."""
|
|
2997
3264
|
super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
|
|
@@ -3000,24 +3267,24 @@ class CWLGather(Job):
|
|
|
3000
3267
|
|
|
3001
3268
|
@staticmethod
|
|
3002
3269
|
def extract(
|
|
3003
|
-
obj: Union[CWLObjectType,
|
|
3004
|
-
) -> Union[CWLOutputType,
|
|
3270
|
+
obj: Union[CWLObjectType, list[CWLObjectType]], k: str
|
|
3271
|
+
) -> Union[CWLOutputType, list[CWLObjectType]]:
|
|
3005
3272
|
"""
|
|
3006
3273
|
Extract the given key from the obj.
|
|
3007
3274
|
|
|
3008
3275
|
If the object is a list, extract it from all members of the list.
|
|
3009
3276
|
"""
|
|
3010
3277
|
if isinstance(obj, Mapping):
|
|
3011
|
-
return cast(Union[CWLOutputType,
|
|
3278
|
+
return cast(Union[CWLOutputType, list[CWLObjectType]], obj.get(k))
|
|
3012
3279
|
elif isinstance(obj, MutableSequence):
|
|
3013
|
-
cp:
|
|
3280
|
+
cp: list[CWLObjectType] = []
|
|
3014
3281
|
for item in obj:
|
|
3015
3282
|
cp.append(cast(CWLObjectType, CWLGather.extract(item, k)))
|
|
3016
3283
|
return cp
|
|
3017
3284
|
else:
|
|
3018
|
-
return cast(
|
|
3285
|
+
return cast(list[CWLObjectType], [])
|
|
3019
3286
|
|
|
3020
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3287
|
+
def run(self, file_store: AbstractFileStore) -> dict[str, Any]:
|
|
3021
3288
|
"""Gather all the outputs of the scatter."""
|
|
3022
3289
|
outobj = {}
|
|
3023
3290
|
|
|
@@ -3028,8 +3295,8 @@ class CWLGather(Job):
|
|
|
3028
3295
|
return shortname(n)
|
|
3029
3296
|
|
|
3030
3297
|
# TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
|
|
3031
|
-
outputs: Union[CWLObjectType,
|
|
3032
|
-
Union[CWLObjectType,
|
|
3298
|
+
outputs: Union[CWLObjectType, list[CWLObjectType]] = cast(
|
|
3299
|
+
Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
|
|
3033
3300
|
)
|
|
3034
3301
|
for k in [sn(i) for i in self.step.tool["out"]]:
|
|
3035
3302
|
outobj[k] = self.extract(outputs, k)
|
|
@@ -3071,7 +3338,11 @@ ProcessType = TypeVar(
|
|
|
3071
3338
|
|
|
3072
3339
|
|
|
3073
3340
|
def remove_pickle_problems(obj: ProcessType) -> ProcessType:
|
|
3074
|
-
"""
|
|
3341
|
+
"""
|
|
3342
|
+
Doc_loader does not pickle correctly, causing Toil errors, remove from objects.
|
|
3343
|
+
|
|
3344
|
+
See github issue: https://github.com/mypyc/mypyc/issues/804
|
|
3345
|
+
"""
|
|
3075
3346
|
if hasattr(obj, "doc_loader"):
|
|
3076
3347
|
obj.doc_loader = None
|
|
3077
3348
|
if isinstance(obj, cwltool.workflow.WorkflowStep):
|
|
@@ -3103,12 +3374,11 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3103
3374
|
self.cwlwf = cwlwf
|
|
3104
3375
|
self.cwljob = cwljob
|
|
3105
3376
|
self.runtime_context = runtime_context
|
|
3106
|
-
self.cwlwf = remove_pickle_problems(self.cwlwf)
|
|
3107
3377
|
self.conditional = conditional or Conditional()
|
|
3108
3378
|
|
|
3109
3379
|
def run(
|
|
3110
3380
|
self, file_store: AbstractFileStore
|
|
3111
|
-
) -> Union[UnresolvedDict,
|
|
3381
|
+
) -> Union[UnresolvedDict, dict[str, SkipNull]]:
|
|
3112
3382
|
"""
|
|
3113
3383
|
Convert a CWL Workflow graph into a Toil job graph.
|
|
3114
3384
|
|
|
@@ -3129,7 +3399,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3129
3399
|
# that may be used as a "source" for a step input workflow output
|
|
3130
3400
|
# parameter
|
|
3131
3401
|
# to: the job that will produce that value.
|
|
3132
|
-
promises:
|
|
3402
|
+
promises: dict[str, Job] = {}
|
|
3133
3403
|
|
|
3134
3404
|
parent_name = shortname(self.cwlwf.tool["id"])
|
|
3135
3405
|
|
|
@@ -3158,7 +3428,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3158
3428
|
stepinputs_fufilled = False
|
|
3159
3429
|
if stepinputs_fufilled:
|
|
3160
3430
|
logger.debug("Ready to make job for workflow step %s", step_id)
|
|
3161
|
-
jobobj:
|
|
3431
|
+
jobobj: dict[
|
|
3162
3432
|
str, Union[ResolveSource, DefaultWithSource, StepValueFrom]
|
|
3163
3433
|
] = {}
|
|
3164
3434
|
|
|
@@ -3292,30 +3562,348 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3292
3562
|
return UnresolvedDict(outobj)
|
|
3293
3563
|
|
|
3294
3564
|
|
|
3565
|
+
class CWLInstallImportsJob(Job):
|
|
3566
|
+
def __init__(
|
|
3567
|
+
self,
|
|
3568
|
+
initialized_job_order: Promised[CWLObjectType],
|
|
3569
|
+
tool: Promised[Process],
|
|
3570
|
+
basedir: str,
|
|
3571
|
+
skip_remote: bool,
|
|
3572
|
+
bypass_file_store: bool,
|
|
3573
|
+
import_data: Promised[dict[str, FileID]],
|
|
3574
|
+
**kwargs: Any,
|
|
3575
|
+
) -> None:
|
|
3576
|
+
"""
|
|
3577
|
+
Job to take the entire CWL object and a mapping of filenames to the imported URIs
|
|
3578
|
+
to convert all file locations to URIs.
|
|
3579
|
+
|
|
3580
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
3581
|
+
"""
|
|
3582
|
+
super().__init__(local=True, **kwargs)
|
|
3583
|
+
self.initialized_job_order = initialized_job_order
|
|
3584
|
+
self.tool = tool
|
|
3585
|
+
self.basedir = basedir
|
|
3586
|
+
self.skip_remote = skip_remote
|
|
3587
|
+
self.bypass_file_store = bypass_file_store
|
|
3588
|
+
self.import_data = import_data
|
|
3589
|
+
|
|
3590
|
+
@staticmethod
|
|
3591
|
+
def fill_in_files(
|
|
3592
|
+
initialized_job_order: CWLObjectType,
|
|
3593
|
+
tool: Process,
|
|
3594
|
+
candidate_to_fileid: dict[str, FileID],
|
|
3595
|
+
basedir: str,
|
|
3596
|
+
skip_remote: bool,
|
|
3597
|
+
bypass_file_store: bool,
|
|
3598
|
+
) -> tuple[Process, CWLObjectType]:
|
|
3599
|
+
"""
|
|
3600
|
+
Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
|
|
3601
|
+
"""
|
|
3602
|
+
def fill_in_file(filename: str) -> FileID:
|
|
3603
|
+
"""
|
|
3604
|
+
Return the file name's associated Toil file ID
|
|
3605
|
+
"""
|
|
3606
|
+
return candidate_to_fileid[filename]
|
|
3607
|
+
|
|
3608
|
+
file_convert_function = functools.partial(
|
|
3609
|
+
extract_and_convert_file_to_toil_uri, fill_in_file
|
|
3610
|
+
)
|
|
3611
|
+
fs_access = ToilFsAccess(basedir)
|
|
3612
|
+
fileindex: dict[str, str] = {}
|
|
3613
|
+
existing: dict[str, str] = {}
|
|
3614
|
+
visit_files(
|
|
3615
|
+
file_convert_function,
|
|
3616
|
+
fs_access,
|
|
3617
|
+
fileindex,
|
|
3618
|
+
existing,
|
|
3619
|
+
initialized_job_order,
|
|
3620
|
+
mark_broken=True,
|
|
3621
|
+
skip_remote=skip_remote,
|
|
3622
|
+
bypass_file_store=bypass_file_store,
|
|
3623
|
+
)
|
|
3624
|
+
visitSteps(
|
|
3625
|
+
tool,
|
|
3626
|
+
functools.partial(
|
|
3627
|
+
visit_files,
|
|
3628
|
+
file_convert_function,
|
|
3629
|
+
fs_access,
|
|
3630
|
+
fileindex,
|
|
3631
|
+
existing,
|
|
3632
|
+
mark_broken=True,
|
|
3633
|
+
skip_remote=skip_remote,
|
|
3634
|
+
bypass_file_store=bypass_file_store,
|
|
3635
|
+
),
|
|
3636
|
+
)
|
|
3637
|
+
|
|
3638
|
+
# We always expect to have processed all files that exist
|
|
3639
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3640
|
+
# Loop through all the parameters for the workflow overall.
|
|
3641
|
+
# Drop any files that aren't either imported (for when we use
|
|
3642
|
+
# the file store) or available on disk (for when we don't).
|
|
3643
|
+
# This will properly make them cause an error later if they
|
|
3644
|
+
# were required.
|
|
3645
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3646
|
+
return tool, initialized_job_order
|
|
3647
|
+
|
|
3648
|
+
def run(self, file_store: AbstractFileStore) -> Tuple[Process, CWLObjectType]:
|
|
3649
|
+
"""
|
|
3650
|
+
Convert the filenames in the workflow inputs into the URIs
|
|
3651
|
+
:return: Promise of transformed workflow inputs. A tuple of the job order and process
|
|
3652
|
+
"""
|
|
3653
|
+
candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
|
|
3654
|
+
|
|
3655
|
+
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3656
|
+
tool = unwrap(self.tool)
|
|
3657
|
+
return CWLInstallImportsJob.fill_in_files(
|
|
3658
|
+
initialized_job_order,
|
|
3659
|
+
tool,
|
|
3660
|
+
candidate_to_fileid,
|
|
3661
|
+
self.basedir,
|
|
3662
|
+
self.skip_remote,
|
|
3663
|
+
self.bypass_file_store,
|
|
3664
|
+
)
|
|
3665
|
+
|
|
3666
|
+
|
|
3667
|
+
class CWLImportWrapper(CWLNamedJob):
|
|
3668
|
+
"""
|
|
3669
|
+
Job to organize importing files on workers instead of the leader. Responsible for extracting filenames and metadata,
|
|
3670
|
+
calling ImportsJob, applying imports to the job objects, and scheduling the start workflow job
|
|
3671
|
+
|
|
3672
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
3673
|
+
"""
|
|
3674
|
+
|
|
3675
|
+
def __init__(
|
|
3676
|
+
self,
|
|
3677
|
+
initialized_job_order: CWLObjectType,
|
|
3678
|
+
tool: Process,
|
|
3679
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
3680
|
+
file_to_data: dict[str, FileMetadata],
|
|
3681
|
+
options: Namespace,
|
|
3682
|
+
):
|
|
3683
|
+
super().__init__(local=False, disk=options.import_workers_threshold)
|
|
3684
|
+
self.initialized_job_order = initialized_job_order
|
|
3685
|
+
self.tool = tool
|
|
3686
|
+
self.options = options
|
|
3687
|
+
self.runtime_context = runtime_context
|
|
3688
|
+
self.file_to_data = file_to_data
|
|
3689
|
+
|
|
3690
|
+
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3691
|
+
imports_job = ImportsJob(
|
|
3692
|
+
self.file_to_data,
|
|
3693
|
+
self.options.import_workers_threshold,
|
|
3694
|
+
self.options.import_workers_disk,
|
|
3695
|
+
)
|
|
3696
|
+
self.addChild(imports_job)
|
|
3697
|
+
install_imports_job = CWLInstallImportsJob(
|
|
3698
|
+
initialized_job_order=self.initialized_job_order,
|
|
3699
|
+
tool=self.tool,
|
|
3700
|
+
basedir=self.options.basedir,
|
|
3701
|
+
skip_remote=self.options.reference_inputs,
|
|
3702
|
+
bypass_file_store=self.options.bypass_file_store,
|
|
3703
|
+
import_data=imports_job.rv(0),
|
|
3704
|
+
)
|
|
3705
|
+
self.addChild(install_imports_job)
|
|
3706
|
+
imports_job.addFollowOn(install_imports_job)
|
|
3707
|
+
|
|
3708
|
+
start_job = CWLStartJob(
|
|
3709
|
+
install_imports_job.rv(0),
|
|
3710
|
+
install_imports_job.rv(1),
|
|
3711
|
+
runtime_context=self.runtime_context,
|
|
3712
|
+
)
|
|
3713
|
+
self.addChild(start_job)
|
|
3714
|
+
install_imports_job.addFollowOn(start_job)
|
|
3715
|
+
|
|
3716
|
+
return start_job.rv()
|
|
3717
|
+
|
|
3718
|
+
|
|
3719
|
+
class CWLStartJob(CWLNamedJob):
|
|
3720
|
+
"""
|
|
3721
|
+
Job responsible for starting the CWL workflow.
|
|
3722
|
+
|
|
3723
|
+
Takes in the workflow/tool and inputs after all files are imported
|
|
3724
|
+
and creates jobs to run those workflows.
|
|
3725
|
+
"""
|
|
3726
|
+
|
|
3727
|
+
def __init__(
|
|
3728
|
+
self,
|
|
3729
|
+
tool: Promised[Process],
|
|
3730
|
+
initialized_job_order: Promised[CWLObjectType],
|
|
3731
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
3732
|
+
**kwargs: Any,
|
|
3733
|
+
) -> None:
|
|
3734
|
+
super().__init__(**kwargs)
|
|
3735
|
+
self.tool = tool
|
|
3736
|
+
self.initialized_job_order = initialized_job_order
|
|
3737
|
+
self.runtime_context = runtime_context
|
|
3738
|
+
|
|
3739
|
+
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3740
|
+
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3741
|
+
tool = unwrap(self.tool)
|
|
3742
|
+
cwljob, _ = makeJob(
|
|
3743
|
+
tool, initialized_job_order, self.runtime_context, None, None
|
|
3744
|
+
) # toplevel, no name needed
|
|
3745
|
+
cwljob.cwljob = initialized_job_order
|
|
3746
|
+
self.addChild(cwljob)
|
|
3747
|
+
return cwljob.rv()
|
|
3748
|
+
|
|
3749
|
+
|
|
3750
|
+
def extract_workflow_inputs(
|
|
3751
|
+
options: Namespace, initialized_job_order: CWLObjectType, tool: Process
|
|
3752
|
+
) -> list[str]:
|
|
3753
|
+
"""
|
|
3754
|
+
Collect all the workflow input files to import later.
|
|
3755
|
+
:param options: namespace
|
|
3756
|
+
:param initialized_job_order: cwl object
|
|
3757
|
+
:param tool: tool object
|
|
3758
|
+
:return:
|
|
3759
|
+
"""
|
|
3760
|
+
fileindex: dict[str, str] = {}
|
|
3761
|
+
existing: dict[str, str] = {}
|
|
3762
|
+
|
|
3763
|
+
# Extract out all the input files' filenames
|
|
3764
|
+
logger.info("Collecting input files...")
|
|
3765
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3766
|
+
filenames = visit_files(
|
|
3767
|
+
extract_file_uri_once,
|
|
3768
|
+
fs_access,
|
|
3769
|
+
fileindex,
|
|
3770
|
+
existing,
|
|
3771
|
+
initialized_job_order,
|
|
3772
|
+
mark_broken=True,
|
|
3773
|
+
skip_remote=options.reference_inputs,
|
|
3774
|
+
bypass_file_store=options.bypass_file_store,
|
|
3775
|
+
)
|
|
3776
|
+
# Extract filenames of all the files associated with tools (binaries, etc.).
|
|
3777
|
+
logger.info("Collecting tool-associated files...")
|
|
3778
|
+
tool_filenames = visitSteps(
|
|
3779
|
+
tool,
|
|
3780
|
+
functools.partial(
|
|
3781
|
+
visit_files,
|
|
3782
|
+
extract_file_uri_once,
|
|
3783
|
+
fs_access,
|
|
3784
|
+
fileindex,
|
|
3785
|
+
existing,
|
|
3786
|
+
mark_broken=True,
|
|
3787
|
+
skip_remote=options.reference_inputs,
|
|
3788
|
+
bypass_file_store=options.bypass_file_store,
|
|
3789
|
+
),
|
|
3790
|
+
)
|
|
3791
|
+
filenames.extend(tool_filenames)
|
|
3792
|
+
return [file for file in filenames if file is not None]
|
|
3793
|
+
|
|
3794
|
+
|
|
3795
|
+
def import_workflow_inputs(
|
|
3796
|
+
jobstore: AbstractJobStore,
|
|
3797
|
+
options: Namespace,
|
|
3798
|
+
initialized_job_order: CWLObjectType,
|
|
3799
|
+
tool: Process,
|
|
3800
|
+
log_level: int = logging.DEBUG,
|
|
3801
|
+
) -> None:
|
|
3802
|
+
"""
|
|
3803
|
+
Import all workflow inputs on the leader.
|
|
3804
|
+
|
|
3805
|
+
Ran when not importing on workers.
|
|
3806
|
+
:param jobstore: Toil jobstore
|
|
3807
|
+
:param options: Namespace
|
|
3808
|
+
:param initialized_job_order: CWL object
|
|
3809
|
+
:param tool: CWL tool
|
|
3810
|
+
:param log_level: log level
|
|
3811
|
+
:return:
|
|
3812
|
+
"""
|
|
3813
|
+
fileindex: dict[str, str] = {}
|
|
3814
|
+
existing: dict[str, str] = {}
|
|
3815
|
+
|
|
3816
|
+
# Define something we can call to import a file and get its file
|
|
3817
|
+
# ID.
|
|
3818
|
+
def file_import_function(url: str) -> FileID:
|
|
3819
|
+
logger.log(log_level, "Loading %s...", url)
|
|
3820
|
+
return jobstore.import_file(url, symlink=True)
|
|
3821
|
+
|
|
3822
|
+
import_function = functools.partial(
|
|
3823
|
+
extract_and_convert_file_to_toil_uri, file_import_function
|
|
3824
|
+
)
|
|
3825
|
+
# Import all the input files, some of which may be missing optional
|
|
3826
|
+
# files.
|
|
3827
|
+
logger.info("Importing input files...")
|
|
3828
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3829
|
+
visit_files(
|
|
3830
|
+
import_function,
|
|
3831
|
+
fs_access,
|
|
3832
|
+
fileindex,
|
|
3833
|
+
existing,
|
|
3834
|
+
initialized_job_order,
|
|
3835
|
+
mark_broken=True,
|
|
3836
|
+
skip_remote=options.reference_inputs,
|
|
3837
|
+
bypass_file_store=options.bypass_file_store,
|
|
3838
|
+
)
|
|
3839
|
+
|
|
3840
|
+
# Make another function for importing tool files. This one doesn't allow
|
|
3841
|
+
# symlinking, since the tools might be coming from storage not accessible
|
|
3842
|
+
# to all nodes.
|
|
3843
|
+
tool_import_function = functools.partial(
|
|
3844
|
+
extract_and_convert_file_to_toil_uri,
|
|
3845
|
+
cast(
|
|
3846
|
+
Callable[[str], FileID],
|
|
3847
|
+
functools.partial(jobstore.import_file, symlink=False),
|
|
3848
|
+
),
|
|
3849
|
+
)
|
|
3850
|
+
|
|
3851
|
+
# Import all the files associated with tools (binaries, etc.).
|
|
3852
|
+
# Not sure why you would have an optional secondary file here, but
|
|
3853
|
+
# the spec probably needs us to support them.
|
|
3854
|
+
logger.info("Importing tool-associated files...")
|
|
3855
|
+
visitSteps(
|
|
3856
|
+
tool,
|
|
3857
|
+
functools.partial(
|
|
3858
|
+
visit_files,
|
|
3859
|
+
tool_import_function,
|
|
3860
|
+
fs_access,
|
|
3861
|
+
fileindex,
|
|
3862
|
+
existing,
|
|
3863
|
+
mark_broken=True,
|
|
3864
|
+
skip_remote=options.reference_inputs,
|
|
3865
|
+
bypass_file_store=options.bypass_file_store,
|
|
3866
|
+
),
|
|
3867
|
+
)
|
|
3868
|
+
|
|
3869
|
+
# We always expect to have processed all files that exist
|
|
3870
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3871
|
+
# Loop through all the parameters for the workflow overall.
|
|
3872
|
+
# Drop any files that aren't either imported (for when we use
|
|
3873
|
+
# the file store) or available on disk (for when we don't).
|
|
3874
|
+
# This will properly make them cause an error later if they
|
|
3875
|
+
# were required.
|
|
3876
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3877
|
+
|
|
3878
|
+
|
|
3879
|
+
T = TypeVar("T")
|
|
3880
|
+
|
|
3881
|
+
|
|
3295
3882
|
def visitSteps(
|
|
3296
3883
|
cmdline_tool: Process,
|
|
3297
|
-
op: Callable[[CommentedMap],
|
|
3298
|
-
) ->
|
|
3884
|
+
op: Callable[[CommentedMap], list[T]],
|
|
3885
|
+
) -> list[T]:
|
|
3299
3886
|
"""
|
|
3300
3887
|
Iterate over a CWL Process object, running the op on each tool description
|
|
3301
3888
|
CWL object.
|
|
3302
3889
|
"""
|
|
3303
3890
|
if isinstance(cmdline_tool, cwltool.workflow.Workflow):
|
|
3304
3891
|
# For workflows we need to dispatch on steps
|
|
3892
|
+
ret = []
|
|
3305
3893
|
for step in cmdline_tool.steps:
|
|
3306
3894
|
# Handle the step's tool
|
|
3307
|
-
op(step.tool)
|
|
3895
|
+
ret.extend(op(step.tool))
|
|
3308
3896
|
# Recures on the embedded tool; maybe it's a workflow.
|
|
3309
|
-
visitSteps(step.embedded_tool, op)
|
|
3897
|
+
recurse_ret = visitSteps(step.embedded_tool, op)
|
|
3898
|
+
ret.extend(recurse_ret)
|
|
3899
|
+
return ret
|
|
3310
3900
|
elif isinstance(cmdline_tool, cwltool.process.Process):
|
|
3311
3901
|
# All CWL Process objects (including CommandLineTool) will have tools
|
|
3312
3902
|
# if they bothered to run the Process __init__.
|
|
3313
|
-
op(cmdline_tool.tool)
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
f"traversal: {type(cmdline_tool)}"
|
|
3318
|
-
)
|
|
3903
|
+
return op(cmdline_tool.tool)
|
|
3904
|
+
raise RuntimeError(
|
|
3905
|
+
f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
|
|
3906
|
+
)
|
|
3319
3907
|
|
|
3320
3908
|
|
|
3321
3909
|
def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
@@ -3328,7 +3916,7 @@ def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
|
3328
3916
|
|
|
3329
3917
|
def filtered_secondary_files(
|
|
3330
3918
|
unfiltered_secondary_files: CWLObjectType,
|
|
3331
|
-
) ->
|
|
3919
|
+
) -> list[CWLObjectType]:
|
|
3332
3920
|
"""
|
|
3333
3921
|
Remove unprocessed secondary files.
|
|
3334
3922
|
|
|
@@ -3349,28 +3937,33 @@ def filtered_secondary_files(
|
|
|
3349
3937
|
intermediate_secondary_files = []
|
|
3350
3938
|
final_secondary_files = []
|
|
3351
3939
|
# remove secondary files still containing interpolated strings
|
|
3352
|
-
for sf in cast(
|
|
3940
|
+
for sf in cast(list[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
|
|
3353
3941
|
sf_bn = cast(str, sf.get("basename", ""))
|
|
3354
3942
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3355
3943
|
if ("$(" not in sf_bn) and ("${" not in sf_bn):
|
|
3356
3944
|
if ("$(" not in sf_loc) and ("${" not in sf_loc):
|
|
3357
3945
|
intermediate_secondary_files.append(sf)
|
|
3358
3946
|
else:
|
|
3359
|
-
logger.debug(
|
|
3947
|
+
logger.debug(
|
|
3948
|
+
"Secondary file %s is dropped because it has an uninterpolated location",
|
|
3949
|
+
sf,
|
|
3950
|
+
)
|
|
3360
3951
|
else:
|
|
3361
|
-
logger.debug(
|
|
3952
|
+
logger.debug(
|
|
3953
|
+
"Secondary file %s is dropped because it has an uninterpolated basename",
|
|
3954
|
+
sf,
|
|
3955
|
+
)
|
|
3362
3956
|
# remove secondary files that are not present in the filestore or pointing
|
|
3363
3957
|
# to existant things on disk
|
|
3364
3958
|
for sf in intermediate_secondary_files:
|
|
3365
3959
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3366
|
-
if (
|
|
3367
|
-
sf_loc != MISSING_FILE
|
|
3368
|
-
or sf.get("class", "") == "Directory"
|
|
3369
|
-
):
|
|
3960
|
+
if sf_loc != MISSING_FILE or sf.get("class", "") == "Directory":
|
|
3370
3961
|
# Pass imported files, and all Directories
|
|
3371
3962
|
final_secondary_files.append(sf)
|
|
3372
3963
|
else:
|
|
3373
|
-
logger.debug(
|
|
3964
|
+
logger.debug(
|
|
3965
|
+
"Secondary file %s is dropped because it is known to be missing", sf
|
|
3966
|
+
)
|
|
3374
3967
|
return final_secondary_files
|
|
3375
3968
|
|
|
3376
3969
|
|
|
@@ -3475,8 +4068,6 @@ def determine_load_listing(
|
|
|
3475
4068
|
class NoAvailableJobStoreException(Exception):
|
|
3476
4069
|
"""Indicates that no job store name is available."""
|
|
3477
4070
|
|
|
3478
|
-
pass
|
|
3479
|
-
|
|
3480
4071
|
|
|
3481
4072
|
def generate_default_job_store(
|
|
3482
4073
|
batch_system_name: Optional[str],
|
|
@@ -3544,37 +4135,64 @@ def generate_default_job_store(
|
|
|
3544
4135
|
|
|
3545
4136
|
usage_message = "\n\n" + textwrap.dedent(
|
|
3546
4137
|
"""
|
|
3547
|
-
|
|
3548
|
-
|
|
3549
|
-
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
|
|
3554
|
-
|
|
3555
|
-
|
|
3556
|
-
|
|
3557
|
-
|
|
4138
|
+
NOTE: If you're trying to specify a jobstore, you must use --jobStore, not a positional argument.
|
|
4139
|
+
|
|
4140
|
+
Usage: toil-cwl-runner [options] <workflow> [<input file>] [workflow options]
|
|
4141
|
+
|
|
4142
|
+
Example: toil-cwl-runner \\
|
|
4143
|
+
--jobStore aws:us-west-2:jobstore \\
|
|
4144
|
+
--realTimeLogging \\
|
|
4145
|
+
--logInfo \\
|
|
4146
|
+
example.cwl \\
|
|
4147
|
+
example-job.yaml \\
|
|
4148
|
+
--wf_input="hello world"
|
|
4149
|
+
"""[
|
|
3558
4150
|
1:
|
|
3559
4151
|
]
|
|
3560
4152
|
)
|
|
3561
4153
|
|
|
3562
|
-
|
|
4154
|
+
|
|
4155
|
+
def get_options(args: list[str]) -> Namespace:
|
|
3563
4156
|
"""
|
|
3564
4157
|
Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
|
|
3565
4158
|
:param args: List of args from command line
|
|
3566
4159
|
:return: options namespace
|
|
3567
4160
|
"""
|
|
3568
|
-
|
|
4161
|
+
# We can't allow abbreviations in case the workflow defines an option that
|
|
4162
|
+
# is a prefix of a Toil option.
|
|
4163
|
+
parser = ArgParser(
|
|
4164
|
+
allow_abbrev=False,
|
|
4165
|
+
usage="%(prog)s [options] WORKFLOW [INFILE] [WF_OPTIONS...]",
|
|
4166
|
+
description=textwrap.dedent(
|
|
4167
|
+
"""
|
|
4168
|
+
positional arguments:
|
|
4169
|
+
|
|
4170
|
+
WORKFLOW CWL file to run.
|
|
4171
|
+
|
|
4172
|
+
INFILE YAML or JSON file of workflow inputs.
|
|
4173
|
+
|
|
4174
|
+
WF_OPTIONS Additional inputs to the workflow as command-line
|
|
4175
|
+
flags. If CWL workflow takes an input, the name of the
|
|
4176
|
+
input can be used as an option. For example:
|
|
4177
|
+
|
|
4178
|
+
%(prog)s workflow.cwl --file1 file
|
|
4179
|
+
|
|
4180
|
+
If an input has the same name as a Toil option, pass
|
|
4181
|
+
'--' before it.
|
|
4182
|
+
"""
|
|
4183
|
+
),
|
|
4184
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
4185
|
+
)
|
|
4186
|
+
|
|
3569
4187
|
addOptions(parser, jobstore_as_flag=True, cwl=True)
|
|
3570
4188
|
options: Namespace
|
|
3571
|
-
options,
|
|
3572
|
-
options.cwljob
|
|
4189
|
+
options, extra = parser.parse_known_args(args)
|
|
4190
|
+
options.cwljob = extra
|
|
3573
4191
|
|
|
3574
4192
|
return options
|
|
3575
4193
|
|
|
3576
4194
|
|
|
3577
|
-
def main(args: Optional[
|
|
4195
|
+
def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
3578
4196
|
"""Run the main loop for toil-cwl-runner."""
|
|
3579
4197
|
# Remove cwltool logger's stream handler so it uses Toil's
|
|
3580
4198
|
cwllogger.removeHandler(defaultStreamHandler)
|
|
@@ -3586,25 +4204,21 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3586
4204
|
|
|
3587
4205
|
# Do cwltool setup
|
|
3588
4206
|
cwltool.main.setup_schema(args=options, custom_schema_callback=None)
|
|
3589
|
-
tmpdir_prefix = options.tmpdir_prefix =
|
|
3590
|
-
|
|
3591
|
-
|
|
3592
|
-
|
|
3593
|
-
|
|
3594
|
-
# workdir and the default job store under it
|
|
3595
|
-
workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3596
|
-
else:
|
|
3597
|
-
# Use a directory in the default tmpdir
|
|
3598
|
-
workdir = mkdtemp()
|
|
3599
|
-
# Make sure workdir doesn't exist so it can be a job store
|
|
3600
|
-
os.rmdir(workdir)
|
|
4207
|
+
tmpdir_prefix = options.tmpdir_prefix = (
|
|
4208
|
+
options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
4209
|
+
)
|
|
4210
|
+
tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
|
|
4211
|
+
workdir = options.workDir or tmp_outdir_prefix
|
|
3601
4212
|
|
|
3602
4213
|
if options.jobStore is None:
|
|
4214
|
+
jobstore = cwltool.utils.create_tmp_dir(tmp_outdir_prefix)
|
|
4215
|
+
# Make sure directory doesn't exist so it can be a job store
|
|
4216
|
+
os.rmdir(jobstore)
|
|
3603
4217
|
# Pick a default job store specifier appropriate to our choice of batch
|
|
3604
4218
|
# system and provisioner and installed modules, given this available
|
|
3605
4219
|
# local directory name. Fail if no good default can be used.
|
|
3606
4220
|
options.jobStore = generate_default_job_store(
|
|
3607
|
-
options.batchSystem, options.provisioner,
|
|
4221
|
+
options.batchSystem, options.provisioner, jobstore
|
|
3608
4222
|
)
|
|
3609
4223
|
|
|
3610
4224
|
options.doc_cache = True
|
|
@@ -3612,17 +4226,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3612
4226
|
options.do_validate = True
|
|
3613
4227
|
options.pack = False
|
|
3614
4228
|
options.print_subgraph = False
|
|
3615
|
-
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
|
|
3616
|
-
# We need to override workDir because by default Toil will pick
|
|
3617
|
-
# somewhere under the system temp directory if unset, ignoring
|
|
3618
|
-
# --tmpdir-prefix.
|
|
3619
|
-
#
|
|
3620
|
-
# If set, workDir needs to exist, so we directly use the prefix
|
|
3621
|
-
options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3622
|
-
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
|
|
3623
|
-
# override coordination_dir as default Toil will pick somewhere else
|
|
3624
|
-
# ignoring --tmpdir_prefix
|
|
3625
|
-
options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3626
4229
|
|
|
3627
4230
|
if options.batchSystem == "kubernetes":
|
|
3628
4231
|
# Containers under Kubernetes can only run in Singularity
|
|
@@ -3640,12 +4243,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3640
4243
|
logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
|
|
3641
4244
|
|
|
3642
4245
|
outdir = os.path.abspath(options.outdir or os.getcwd())
|
|
3643
|
-
tmp_outdir_prefix = os.path.abspath(
|
|
3644
|
-
options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3645
|
-
)
|
|
3646
|
-
|
|
3647
|
-
fileindex: Dict[str, str] = {}
|
|
3648
|
-
existing: Dict[str, str] = {}
|
|
3649
4246
|
conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)
|
|
3650
4247
|
use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
|
|
3651
4248
|
job_script_provider = None
|
|
@@ -3660,6 +4257,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3660
4257
|
)
|
|
3661
4258
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
3662
4259
|
runtime_context.outdir = outdir
|
|
4260
|
+
setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
|
|
3663
4261
|
runtime_context.move_outputs = "leave"
|
|
3664
4262
|
runtime_context.rm_tmpdir = False
|
|
3665
4263
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
@@ -3694,225 +4292,204 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3694
4292
|
runtime_context.research_obj = research_obj
|
|
3695
4293
|
|
|
3696
4294
|
try:
|
|
3697
|
-
with Toil(options) as toil:
|
|
3698
|
-
if options.restart:
|
|
3699
|
-
outobj = toil.restart()
|
|
3700
|
-
else:
|
|
3701
|
-
loading_context.hints = [
|
|
3702
|
-
{
|
|
3703
|
-
"class": "ResourceRequirement",
|
|
3704
|
-
"coresMin": toil.config.defaultCores,
|
|
3705
|
-
"ramMin": toil.config.defaultMemory / (2**20),
|
|
3706
|
-
"outdirMin": toil.config.defaultDisk / (2**20),
|
|
3707
|
-
"tmpdirMin": 0,
|
|
3708
|
-
}
|
|
3709
|
-
]
|
|
3710
|
-
loading_context.construct_tool_object = toil_make_tool
|
|
3711
|
-
loading_context.strict = not options.not_strict
|
|
3712
|
-
options.workflow = options.cwltool
|
|
3713
|
-
options.job_order = options.cwljob
|
|
3714
4295
|
|
|
3715
|
-
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
|
|
3720
|
-
)
|
|
3721
|
-
except ValidationException:
|
|
3722
|
-
print(
|
|
3723
|
-
"\nYou may be getting this error because your arguments are incorrect or out of order."
|
|
3724
|
-
+ usage_message,
|
|
3725
|
-
file=sys.stderr,
|
|
3726
|
-
)
|
|
3727
|
-
raise
|
|
4296
|
+
if not options.restart:
|
|
4297
|
+
# Make a version of the config based on the initial options, for
|
|
4298
|
+
# setting up CWL option stuff
|
|
4299
|
+
expected_config = Config()
|
|
4300
|
+
expected_config.setOptions(options)
|
|
3728
4301
|
|
|
3729
|
-
|
|
3730
|
-
|
|
3731
|
-
|
|
3732
|
-
|
|
3733
|
-
|
|
4302
|
+
# Before showing the options to any cwltool stuff that wants to
|
|
4303
|
+
# load the workflow, transform options.cwltool, where our
|
|
4304
|
+
# argument for what to run is, to handle Dockstore workflows.
|
|
4305
|
+
options.cwltool = resolve_workflow(options.cwltool)
|
|
4306
|
+
|
|
4307
|
+
# TODO: why are we doing this? Does this get applied to all
|
|
4308
|
+
# tools as a default or something?
|
|
4309
|
+
loading_context.hints = [
|
|
4310
|
+
{
|
|
4311
|
+
"class": "ResourceRequirement",
|
|
4312
|
+
"coresMin": expected_config.defaultCores,
|
|
4313
|
+
# Don't include any RAM requirement because we want to
|
|
4314
|
+
# know when tools don't manually ask for RAM.
|
|
4315
|
+
"outdirMin": expected_config.defaultDisk / (2**20),
|
|
4316
|
+
"tmpdirMin": 0,
|
|
4317
|
+
}
|
|
4318
|
+
]
|
|
4319
|
+
loading_context.construct_tool_object = toil_make_tool
|
|
4320
|
+
loading_context.strict = not options.not_strict
|
|
4321
|
+
options.workflow = options.cwltool
|
|
4322
|
+
options.job_order = options.cwljob
|
|
4323
|
+
|
|
4324
|
+
try:
|
|
4325
|
+
uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
|
|
4326
|
+
options.cwltool,
|
|
4327
|
+
loading_context.resolver,
|
|
3734
4328
|
loading_context.fetcher_constructor,
|
|
3735
|
-
loading_context.overrides_list,
|
|
3736
|
-
tool_file_uri,
|
|
3737
4329
|
)
|
|
3738
|
-
|
|
3739
|
-
|
|
3740
|
-
|
|
3741
|
-
|
|
3742
|
-
|
|
3743
|
-
),
|
|
3744
|
-
tool_file_uri,
|
|
3745
|
-
)
|
|
3746
|
-
)
|
|
3747
|
-
|
|
3748
|
-
loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
|
|
3749
|
-
uri, loading_context
|
|
4330
|
+
except ValidationException:
|
|
4331
|
+
print(
|
|
4332
|
+
"\nYou may be getting this error because your arguments are incorrect or out of order."
|
|
4333
|
+
+ usage_message,
|
|
4334
|
+
file=sys.stderr,
|
|
3750
4335
|
)
|
|
3751
|
-
|
|
3752
|
-
|
|
4336
|
+
raise
|
|
4337
|
+
|
|
4338
|
+
# Attempt to prepull the containers
|
|
4339
|
+
if not options.no_prepull and not options.no_container:
|
|
4340
|
+
try_prepull(uri, runtime_context, expected_config.batchSystem)
|
|
4341
|
+
|
|
4342
|
+
options.tool_help = None
|
|
4343
|
+
options.debug = options.logLevel == "DEBUG"
|
|
4344
|
+
job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
|
|
4345
|
+
options,
|
|
4346
|
+
sys.stdin,
|
|
4347
|
+
loading_context.fetcher_constructor,
|
|
4348
|
+
loading_context.overrides_list,
|
|
4349
|
+
tool_file_uri,
|
|
4350
|
+
)
|
|
4351
|
+
if options.overrides:
|
|
4352
|
+
loading_context.overrides_list.extend(
|
|
4353
|
+
cwltool.load_tool.load_overrides(
|
|
4354
|
+
schema_salad.ref_resolver.file_uri(
|
|
4355
|
+
os.path.abspath(options.overrides)
|
|
4356
|
+
),
|
|
4357
|
+
tool_file_uri,
|
|
4358
|
+
)
|
|
3753
4359
|
)
|
|
3754
|
-
if not loading_context.loader:
|
|
3755
|
-
raise RuntimeError("cwltool loader is not set.")
|
|
3756
|
-
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
3757
|
-
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3758
4360
|
|
|
3759
|
-
|
|
4361
|
+
loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
|
|
4362
|
+
uri, loading_context
|
|
4363
|
+
)
|
|
4364
|
+
loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
|
|
4365
|
+
loading_context, workflowobj, uri
|
|
4366
|
+
)
|
|
4367
|
+
if not loading_context.loader:
|
|
4368
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
4369
|
+
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
4370
|
+
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3760
4371
|
|
|
3761
|
-
|
|
3762
|
-
cwltool.cwlprov.writablebagfile.packed_workflow(
|
|
3763
|
-
runtime_context.research_obj,
|
|
3764
|
-
cwltool.main.print_pack(loading_context, uri),
|
|
3765
|
-
)
|
|
4372
|
+
document_loader = loading_context.loader
|
|
3766
4373
|
|
|
3767
|
-
|
|
3768
|
-
|
|
3769
|
-
|
|
3770
|
-
|
|
3771
|
-
|
|
3772
|
-
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3773
|
-
logging.error(err)
|
|
3774
|
-
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3775
|
-
runtime_context.secret_store = SecretStore()
|
|
4374
|
+
if options.provenance and runtime_context.research_obj:
|
|
4375
|
+
cwltool.cwlprov.writablebagfile.packed_workflow(
|
|
4376
|
+
runtime_context.research_obj,
|
|
4377
|
+
cwltool.main.print_pack(loading_context, uri),
|
|
4378
|
+
)
|
|
3776
4379
|
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
4380
|
+
try:
|
|
4381
|
+
tool = cwltool.load_tool.make_tool(uri, loading_context)
|
|
4382
|
+
scan_for_unsupported_requirements(
|
|
4383
|
+
tool, bypass_file_store=options.bypass_file_store
|
|
4384
|
+
)
|
|
4385
|
+
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
4386
|
+
logging.error(err)
|
|
4387
|
+
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
4388
|
+
runtime_context.secret_store = SecretStore()
|
|
4389
|
+
|
|
4390
|
+
try:
|
|
4391
|
+
# Get the "order" for the execution of the root job. CWLTool
|
|
4392
|
+
# doesn't document this much, but this is an "order" in the
|
|
4393
|
+
# sense of a "specification" for running a single job. It
|
|
4394
|
+
# describes the inputs to the workflow.
|
|
4395
|
+
initialized_job_order = cwltool.main.init_job_order(
|
|
4396
|
+
job_order_object,
|
|
4397
|
+
options,
|
|
4398
|
+
tool,
|
|
4399
|
+
jobloader,
|
|
4400
|
+
sys.stdout,
|
|
4401
|
+
make_fs_access=runtime_context.make_fs_access,
|
|
4402
|
+
input_basedir=options.basedir,
|
|
4403
|
+
secret_store=runtime_context.secret_store,
|
|
4404
|
+
input_required=True,
|
|
4405
|
+
)
|
|
4406
|
+
except SystemExit as err:
|
|
4407
|
+
if err.code == 2: # raised by argparse's parse_args() function
|
|
4408
|
+
print(
|
|
4409
|
+
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
4410
|
+
"provided, the problem may be the argument order."
|
|
4411
|
+
+ usage_message,
|
|
4412
|
+
file=sys.stderr,
|
|
3792
4413
|
)
|
|
3793
|
-
|
|
3794
|
-
if err.code == 2: # raised by argparse's parse_args() function
|
|
3795
|
-
print(
|
|
3796
|
-
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
3797
|
-
"provided, this may be the argument order." + usage_message,
|
|
3798
|
-
file=sys.stderr,
|
|
3799
|
-
)
|
|
3800
|
-
raise
|
|
4414
|
+
raise
|
|
3801
4415
|
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
for inp in tool.tool["inputs"]:
|
|
3806
|
-
if (
|
|
3807
|
-
shortname(inp["id"]) in initialized_job_order
|
|
3808
|
-
and inp["type"] == "File"
|
|
3809
|
-
):
|
|
3810
|
-
cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
|
|
3811
|
-
"streamable"
|
|
3812
|
-
] = inp.get("streamable", False)
|
|
3813
|
-
# TODO also for nested types that contain streamable Files
|
|
3814
|
-
|
|
3815
|
-
runtime_context.use_container = not options.no_container
|
|
3816
|
-
runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
|
|
3817
|
-
runtime_context.job_script_provider = job_script_provider
|
|
3818
|
-
runtime_context.force_docker_pull = options.force_docker_pull
|
|
3819
|
-
runtime_context.no_match_user = options.no_match_user
|
|
3820
|
-
runtime_context.no_read_only = options.no_read_only
|
|
3821
|
-
runtime_context.basedir = options.basedir
|
|
3822
|
-
if not options.bypass_file_store:
|
|
3823
|
-
# If we're using the file store we need to start moving output
|
|
3824
|
-
# files now.
|
|
3825
|
-
runtime_context.move_outputs = "move"
|
|
3826
|
-
|
|
3827
|
-
# We instantiate an early builder object here to populate indirect
|
|
3828
|
-
# secondaryFile references using cwltool's library because we need
|
|
3829
|
-
# to resolve them before toil imports them into the filestore.
|
|
3830
|
-
# A second builder will be built in the job's run method when toil
|
|
3831
|
-
# actually starts the cwl job.
|
|
3832
|
-
# Note that this accesses input files for tools, so the
|
|
3833
|
-
# ToilFsAccess needs to be set up if we want to be able to use
|
|
3834
|
-
# URLs.
|
|
3835
|
-
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
4416
|
+
# Leave the defaults un-filled in the top-level order. The tool or
|
|
4417
|
+
# workflow will fill them when it runs
|
|
3836
4418
|
|
|
4419
|
+
for inp in tool.tool["inputs"]:
|
|
4420
|
+
if (
|
|
4421
|
+
shortname(inp["id"]) in initialized_job_order
|
|
4422
|
+
and inp["type"] == "File"
|
|
4423
|
+
):
|
|
4424
|
+
cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
|
|
4425
|
+
"streamable"
|
|
4426
|
+
] = inp.get("streamable", False)
|
|
4427
|
+
# TODO also for nested types that contain streamable Files
|
|
4428
|
+
|
|
4429
|
+
runtime_context.use_container = not options.no_container
|
|
4430
|
+
runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
|
|
4431
|
+
runtime_context.job_script_provider = job_script_provider
|
|
4432
|
+
runtime_context.force_docker_pull = options.force_docker_pull
|
|
4433
|
+
runtime_context.no_match_user = options.no_match_user
|
|
4434
|
+
runtime_context.no_read_only = options.no_read_only
|
|
4435
|
+
runtime_context.basedir = options.basedir
|
|
4436
|
+
if not options.bypass_file_store:
|
|
4437
|
+
# If we're using the file store we need to start moving output
|
|
4438
|
+
# files now.
|
|
4439
|
+
runtime_context.move_outputs = "move"
|
|
4440
|
+
|
|
4441
|
+
# We instantiate an early builder object here to populate indirect
|
|
4442
|
+
# secondaryFile references using cwltool's library because we need
|
|
4443
|
+
# to resolve them before toil imports them into the filestore.
|
|
4444
|
+
# A second builder will be built in the job's run method when toil
|
|
4445
|
+
# actually starts the cwl job.
|
|
4446
|
+
# Note that this accesses input files for tools, so the
|
|
4447
|
+
# ToilFsAccess needs to be set up if we want to be able to use
|
|
4448
|
+
# URLs.
|
|
4449
|
+
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
4450
|
+
if not isinstance(tool, cwltool.workflow.Workflow):
|
|
3837
4451
|
# make sure this doesn't add listing items; if shallow_listing is
|
|
3838
4452
|
# selected, it will discover dirs one deep and then again later on
|
|
3839
|
-
# (
|
|
3840
|
-
#
|
|
4453
|
+
# (when the cwltool builder gets constructed from the job in the
|
|
4454
|
+
# CommandLineTool's job() method,
|
|
4455
|
+
# see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L951),
|
|
4456
|
+
# producing 2+ deep listings instead of only 1.
|
|
4457
|
+
# ExpressionTool also uses a builder, see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L207
|
|
4458
|
+
# Workflows don't need this because they don't go through CommandLineTool or ExpressionTool
|
|
3841
4459
|
builder.loadListing = "no_listing"
|
|
3842
4460
|
|
|
3843
|
-
|
|
3844
|
-
|
|
3845
|
-
|
|
3846
|
-
|
|
3847
|
-
|
|
4461
|
+
# make sure this doesn't add listing items; if shallow_listing is
|
|
4462
|
+
# selected, it will discover dirs one deep and then again later on
|
|
4463
|
+
# (probably when the cwltool builder gets ahold of the job in the
|
|
4464
|
+
# CWL job's run()), producing 2+ deep listings instead of only 1.
|
|
4465
|
+
builder.loadListing = "no_listing"
|
|
3848
4466
|
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
file_import_function = cast(
|
|
3855
|
-
Callable[[str], FileID],
|
|
3856
|
-
functools.partial(toil.import_file, symlink=True),
|
|
3857
|
-
)
|
|
4467
|
+
builder.bind_input(
|
|
4468
|
+
tool.inputs_record_schema,
|
|
4469
|
+
initialized_job_order,
|
|
4470
|
+
discover_secondaryFiles=True,
|
|
4471
|
+
)
|
|
3858
4472
|
|
|
3859
|
-
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
fs_access = ToilFsAccess(options.basedir)
|
|
3863
|
-
import_files(
|
|
3864
|
-
file_import_function,
|
|
3865
|
-
fs_access,
|
|
3866
|
-
fileindex,
|
|
3867
|
-
existing,
|
|
3868
|
-
initialized_job_order,
|
|
3869
|
-
mark_broken=True,
|
|
3870
|
-
skip_remote=options.reference_inputs,
|
|
3871
|
-
bypass_file_store=options.bypass_file_store,
|
|
3872
|
-
log_level=logging.INFO,
|
|
3873
|
-
)
|
|
3874
|
-
# Import all the files associated with tools (binaries, etc.).
|
|
3875
|
-
# Not sure why you would have an optional secondary file here, but
|
|
3876
|
-
# the spec probably needs us to support them.
|
|
3877
|
-
logger.info("Importing tool-associated files...")
|
|
3878
|
-
visitSteps(
|
|
3879
|
-
tool,
|
|
3880
|
-
functools.partial(
|
|
3881
|
-
import_files,
|
|
3882
|
-
file_import_function,
|
|
3883
|
-
fs_access,
|
|
3884
|
-
fileindex,
|
|
3885
|
-
existing,
|
|
3886
|
-
mark_broken=True,
|
|
3887
|
-
skip_remote=options.reference_inputs,
|
|
3888
|
-
bypass_file_store=options.bypass_file_store,
|
|
3889
|
-
log_level=logging.INFO,
|
|
3890
|
-
),
|
|
3891
|
-
)
|
|
4473
|
+
logger.info("Creating root job")
|
|
4474
|
+
logger.debug("Root tool: %s", tool)
|
|
4475
|
+
tool = remove_pickle_problems(tool)
|
|
3892
4476
|
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
# the file store) or available on disk (for when we don't).
|
|
3898
|
-
# This will properly make them cause an error later if they
|
|
3899
|
-
# were required.
|
|
3900
|
-
rm_unprocessed_secondary_files(param_value)
|
|
3901
|
-
|
|
3902
|
-
logger.info("Creating root job")
|
|
3903
|
-
logger.debug("Root tool: %s", tool)
|
|
4477
|
+
with Toil(options) as toil:
|
|
4478
|
+
if options.restart:
|
|
4479
|
+
outobj = toil.restart()
|
|
4480
|
+
else:
|
|
3904
4481
|
try:
|
|
3905
|
-
wf1
|
|
4482
|
+
wf1 = makeRootJob(
|
|
3906
4483
|
tool=tool,
|
|
3907
4484
|
jobobj={},
|
|
3908
4485
|
runtime_context=runtime_context,
|
|
3909
|
-
|
|
3910
|
-
|
|
4486
|
+
initialized_job_order=initialized_job_order,
|
|
4487
|
+
options=options,
|
|
4488
|
+
toil=toil,
|
|
3911
4489
|
)
|
|
3912
4490
|
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
3913
4491
|
logging.error(err)
|
|
3914
4492
|
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3915
|
-
wf1.cwljob = initialized_job_order
|
|
3916
4493
|
logger.info("Starting workflow")
|
|
3917
4494
|
outobj = toil.start(wf1)
|
|
3918
4495
|
|
|
@@ -3929,7 +4506,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3929
4506
|
outobj,
|
|
3930
4507
|
outdir,
|
|
3931
4508
|
destBucket=options.destBucket,
|
|
3932
|
-
log_level=logging.INFO
|
|
4509
|
+
log_level=logging.INFO,
|
|
3933
4510
|
)
|
|
3934
4511
|
logger.info("Stored workflow outputs")
|
|
3935
4512
|
|
|
@@ -3992,8 +4569,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3992
4569
|
else:
|
|
3993
4570
|
logging.error(err)
|
|
3994
4571
|
return 1
|
|
3995
|
-
except (
|
|
3996
|
-
|
|
4572
|
+
except (
|
|
4573
|
+
InsufficientSystemResources,
|
|
4574
|
+
LocatorException,
|
|
4575
|
+
InvalidImportExportUrlException,
|
|
4576
|
+
UnimplementedURLException,
|
|
4577
|
+
JobTooBigError,
|
|
4578
|
+
) as err:
|
|
3997
4579
|
logging.error(err)
|
|
3998
4580
|
return 1
|
|
3999
4581
|
|