toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Implemented support for Common Workflow Language (CWL) for Toil."""
|
|
2
|
+
|
|
2
3
|
# Copyright (C) 2015 Curoverse, Inc
|
|
3
4
|
# Copyright (C) 2015-2021 Regents of the University of California
|
|
4
5
|
# Copyright (C) 2019-2020 Seven Bridges
|
|
@@ -29,31 +30,29 @@ import logging
|
|
|
29
30
|
import os
|
|
30
31
|
import pprint
|
|
31
32
|
import shutil
|
|
32
|
-
import socket
|
|
33
33
|
import stat
|
|
34
34
|
import sys
|
|
35
35
|
import textwrap
|
|
36
36
|
import uuid
|
|
37
|
-
from
|
|
37
|
+
from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
|
|
38
|
+
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
38
39
|
from threading import Thread
|
|
39
40
|
from typing import (
|
|
40
41
|
IO,
|
|
41
42
|
Any,
|
|
42
43
|
Callable,
|
|
43
|
-
Dict,
|
|
44
44
|
Iterator,
|
|
45
|
-
List,
|
|
46
45
|
Mapping,
|
|
47
46
|
MutableMapping,
|
|
48
47
|
MutableSequence,
|
|
49
48
|
Optional,
|
|
50
49
|
TextIO,
|
|
51
50
|
Tuple,
|
|
52
|
-
Type,
|
|
53
51
|
TypeVar,
|
|
54
52
|
Union,
|
|
55
53
|
cast,
|
|
56
|
-
|
|
54
|
+
Literal,
|
|
55
|
+
Protocol,
|
|
57
56
|
)
|
|
58
57
|
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
59
58
|
|
|
@@ -68,7 +67,10 @@ import cwltool.load_tool
|
|
|
68
67
|
import cwltool.main
|
|
69
68
|
import cwltool.resolver
|
|
70
69
|
import schema_salad.ref_resolver
|
|
71
|
-
|
|
70
|
+
|
|
71
|
+
# This is also in configargparse but MyPy doesn't know it
|
|
72
|
+
from argparse import RawDescriptionHelpFormatter
|
|
73
|
+
from configargparse import ArgParser, Namespace
|
|
72
74
|
from cwltool.loghandler import _logger as cwllogger
|
|
73
75
|
from cwltool.loghandler import defaultStreamHandler
|
|
74
76
|
from cwltool.mpi import MpiConfig
|
|
@@ -82,6 +84,7 @@ from cwltool.process import (
|
|
|
82
84
|
shortname,
|
|
83
85
|
)
|
|
84
86
|
from cwltool.secrets import SecretStore
|
|
87
|
+
from cwltool.singularity import SingularityCommandLineJob
|
|
85
88
|
from cwltool.software_requirements import (
|
|
86
89
|
DependenciesConfiguration,
|
|
87
90
|
get_container_from_software_requirements,
|
|
@@ -103,11 +106,14 @@ from schema_salad.avro.schema import Names
|
|
|
103
106
|
from schema_salad.exceptions import ValidationException
|
|
104
107
|
from schema_salad.ref_resolver import file_uri, uri_file_path
|
|
105
108
|
from schema_salad.sourceline import SourceLine
|
|
106
|
-
from typing_extensions import Literal
|
|
107
109
|
|
|
110
|
+
from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
108
111
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
109
|
-
from toil.common import Toil, addOptions
|
|
112
|
+
from toil.common import Config, Toil, addOptions
|
|
110
113
|
from toil.cwl import check_cwltool_version
|
|
114
|
+
from toil.lib.integration import resolve_workflow
|
|
115
|
+
from toil.lib.misc import call_command
|
|
116
|
+
from toil.provisioners.clusterScaler import JobTooBigError
|
|
111
117
|
|
|
112
118
|
check_cwltool_version()
|
|
113
119
|
from toil.cwl.utils import (
|
|
@@ -120,12 +126,28 @@ from toil.cwl.utils import (
|
|
|
120
126
|
from toil.exceptions import FailedJobsException
|
|
121
127
|
from toil.fileStores import FileID
|
|
122
128
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
123
|
-
from toil.job import
|
|
124
|
-
|
|
129
|
+
from toil.job import (
|
|
130
|
+
AcceleratorRequirement,
|
|
131
|
+
Job,
|
|
132
|
+
Promise,
|
|
133
|
+
Promised,
|
|
134
|
+
unwrap,
|
|
135
|
+
ImportsJob,
|
|
136
|
+
get_file_sizes,
|
|
137
|
+
FileMetadata,
|
|
138
|
+
WorkerImportJob,
|
|
139
|
+
)
|
|
140
|
+
from toil.jobStores.abstractJobStore import (
|
|
141
|
+
AbstractJobStore,
|
|
142
|
+
NoSuchFileException,
|
|
143
|
+
InvalidImportExportUrlException,
|
|
144
|
+
LocatorException,
|
|
145
|
+
)
|
|
146
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
125
147
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
126
148
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
127
149
|
from toil.lib.io import mkdtemp
|
|
128
|
-
from toil.lib.threading import ExceptionalThread
|
|
150
|
+
from toil.lib.threading import ExceptionalThread, global_mutex
|
|
129
151
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
130
152
|
|
|
131
153
|
logger = logging.getLogger(__name__)
|
|
@@ -157,7 +179,7 @@ def cwltoil_was_removed() -> None:
|
|
|
157
179
|
# output object to the correct key of the input object.
|
|
158
180
|
|
|
159
181
|
|
|
160
|
-
class UnresolvedDict(
|
|
182
|
+
class UnresolvedDict(dict[Any, Any]):
|
|
161
183
|
"""Tag to indicate a dict contains promises that must be resolved."""
|
|
162
184
|
|
|
163
185
|
|
|
@@ -192,7 +214,7 @@ def filter_skip_null(name: str, value: Any) -> Any:
|
|
|
192
214
|
return value
|
|
193
215
|
|
|
194
216
|
|
|
195
|
-
def _filter_skip_null(value: Any, err_flag:
|
|
217
|
+
def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
|
|
196
218
|
"""
|
|
197
219
|
Private implementation for recursively filtering out SkipNull objects from 'value'.
|
|
198
220
|
|
|
@@ -241,18 +263,50 @@ def ensure_no_collisions(
|
|
|
241
263
|
seen_names.add(wanted_name)
|
|
242
264
|
|
|
243
265
|
|
|
266
|
+
def try_prepull(
|
|
267
|
+
cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str
|
|
268
|
+
) -> None:
|
|
269
|
+
"""
|
|
270
|
+
Try to prepull all containers in a CWL workflow with Singularity or Docker.
|
|
271
|
+
This will not prepull the default container specified on the command line.
|
|
272
|
+
:param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well
|
|
273
|
+
:param runtime_context: runtime context of cwltool
|
|
274
|
+
:param batchsystem: type of Toil batchsystem
|
|
275
|
+
:return:
|
|
276
|
+
"""
|
|
277
|
+
if runtime_context.singularity:
|
|
278
|
+
if "CWL_SINGULARITY_CACHE" in os.environ:
|
|
279
|
+
logger.info("Prepulling the workflow's containers with Singularity...")
|
|
280
|
+
call_command(
|
|
281
|
+
[
|
|
282
|
+
"cwl-docker-extract",
|
|
283
|
+
"--singularity",
|
|
284
|
+
"--dir",
|
|
285
|
+
os.environ["CWL_SINGULARITY_CACHE"],
|
|
286
|
+
cwl_tool_uri,
|
|
287
|
+
]
|
|
288
|
+
)
|
|
289
|
+
elif not runtime_context.user_space_docker_cmd and not runtime_context.podman:
|
|
290
|
+
# For udocker and podman prefetching is unimplemented
|
|
291
|
+
# This is docker
|
|
292
|
+
if batchsystem == "single_machine":
|
|
293
|
+
# Only on single machine will the docker daemon be accessible by all workers and the leader
|
|
294
|
+
logger.info("Prepulling the workflow's containers with Docker...")
|
|
295
|
+
call_command(["cwl-docker-extract", cwl_tool_uri])
|
|
296
|
+
|
|
297
|
+
|
|
244
298
|
class Conditional:
|
|
245
299
|
"""
|
|
246
300
|
Object holding conditional expression until we are ready to evaluate it.
|
|
247
301
|
|
|
248
|
-
Evaluation occurs
|
|
302
|
+
Evaluation occurs before the enclosing step's inputs are type-checked.
|
|
249
303
|
"""
|
|
250
304
|
|
|
251
305
|
def __init__(
|
|
252
306
|
self,
|
|
253
307
|
expression: Optional[str] = None,
|
|
254
|
-
outputs: Union[
|
|
255
|
-
requirements: Optional[
|
|
308
|
+
outputs: Union[dict[str, CWLOutputType], None] = None,
|
|
309
|
+
requirements: Optional[list[CWLObjectType]] = None,
|
|
256
310
|
container_engine: str = "docker",
|
|
257
311
|
):
|
|
258
312
|
"""
|
|
@@ -297,7 +351,7 @@ class Conditional:
|
|
|
297
351
|
"'%s' evaluated to a non-boolean value" % self.expression
|
|
298
352
|
)
|
|
299
353
|
|
|
300
|
-
def skipped_outputs(self) ->
|
|
354
|
+
def skipped_outputs(self) -> dict[str, SkipNull]:
|
|
301
355
|
"""Generate a dict of SkipNull objects corresponding to the output structure."""
|
|
302
356
|
outobj = {}
|
|
303
357
|
|
|
@@ -317,14 +371,14 @@ class Conditional:
|
|
|
317
371
|
class ResolveSource:
|
|
318
372
|
"""Apply linkMerge and pickValue operators to values coming into a port."""
|
|
319
373
|
|
|
320
|
-
promise_tuples: Union[
|
|
374
|
+
promise_tuples: Union[list[tuple[str, Promise]], tuple[str, Promise]]
|
|
321
375
|
|
|
322
376
|
def __init__(
|
|
323
377
|
self,
|
|
324
378
|
name: str,
|
|
325
|
-
input:
|
|
379
|
+
input: dict[str, CWLObjectType],
|
|
326
380
|
source_key: str,
|
|
327
|
-
promises:
|
|
381
|
+
promises: dict[str, Job],
|
|
328
382
|
):
|
|
329
383
|
"""
|
|
330
384
|
Construct a container object.
|
|
@@ -383,7 +437,7 @@ class ResolveSource:
|
|
|
383
437
|
)
|
|
384
438
|
else:
|
|
385
439
|
name, rv = self.promise_tuples
|
|
386
|
-
result = cast(
|
|
440
|
+
result = cast(dict[str, Any], rv).get(name)
|
|
387
441
|
|
|
388
442
|
result = self.pick_value(result)
|
|
389
443
|
result = filter_skip_null(self.name, result)
|
|
@@ -391,7 +445,7 @@ class ResolveSource:
|
|
|
391
445
|
|
|
392
446
|
def link_merge(
|
|
393
447
|
self, values: CWLObjectType
|
|
394
|
-
) -> Union[
|
|
448
|
+
) -> Union[list[CWLOutputType], CWLOutputType]:
|
|
395
449
|
"""
|
|
396
450
|
Apply linkMerge operator to `values` object.
|
|
397
451
|
|
|
@@ -404,7 +458,7 @@ class ResolveSource:
|
|
|
404
458
|
return values
|
|
405
459
|
|
|
406
460
|
elif link_merge_type == "merge_flattened":
|
|
407
|
-
result:
|
|
461
|
+
result: list[CWLOutputType] = []
|
|
408
462
|
for v in values:
|
|
409
463
|
if isinstance(v, MutableSequence):
|
|
410
464
|
result.extend(v)
|
|
@@ -417,7 +471,7 @@ class ResolveSource:
|
|
|
417
471
|
f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
|
|
418
472
|
)
|
|
419
473
|
|
|
420
|
-
def pick_value(self, values: Union[
|
|
474
|
+
def pick_value(self, values: Union[list[Union[str, SkipNull]], Any]) -> Any:
|
|
421
475
|
"""
|
|
422
476
|
Apply pickValue operator to `values` object.
|
|
423
477
|
|
|
@@ -485,7 +539,7 @@ class StepValueFrom:
|
|
|
485
539
|
"""
|
|
486
540
|
|
|
487
541
|
def __init__(
|
|
488
|
-
self, expr: str, source: Any, req:
|
|
542
|
+
self, expr: str, source: Any, req: list[CWLObjectType], container_engine: str
|
|
489
543
|
):
|
|
490
544
|
"""
|
|
491
545
|
Instantiate an object to carry all know about this valueFrom expression.
|
|
@@ -617,7 +671,7 @@ class JustAValue:
|
|
|
617
671
|
|
|
618
672
|
def resolve_dict_w_promises(
|
|
619
673
|
dict_w_promises: Union[
|
|
620
|
-
UnresolvedDict, CWLObjectType,
|
|
674
|
+
UnresolvedDict, CWLObjectType, dict[str, Union[str, StepValueFrom]]
|
|
621
675
|
],
|
|
622
676
|
file_store: Optional[AbstractFileStore] = None,
|
|
623
677
|
) -> CWLObjectType:
|
|
@@ -672,7 +726,7 @@ class ToilPathMapper(PathMapper):
|
|
|
672
726
|
|
|
673
727
|
def __init__(
|
|
674
728
|
self,
|
|
675
|
-
referenced_files:
|
|
729
|
+
referenced_files: list[CWLObjectType],
|
|
676
730
|
basedir: str,
|
|
677
731
|
stagedir: str,
|
|
678
732
|
separateDirs: bool = True,
|
|
@@ -787,19 +841,44 @@ class ToilPathMapper(PathMapper):
|
|
|
787
841
|
# TODO: why would we do that?
|
|
788
842
|
stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
|
|
789
843
|
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
)
|
|
844
|
+
if obj["class"] not in ("File", "Directory"):
|
|
845
|
+
# We only handle files and directories; only they have locations.
|
|
846
|
+
return
|
|
847
|
+
|
|
848
|
+
location = cast(str, obj["location"])
|
|
849
|
+
if location in self:
|
|
850
|
+
# If we've already mapped this, map it consistently.
|
|
851
|
+
tgt = self._pathmap[location].target
|
|
852
|
+
logger.debug(
|
|
853
|
+
"ToilPathMapper re-using target %s for path %s",
|
|
854
|
+
tgt,
|
|
855
|
+
location,
|
|
856
|
+
)
|
|
857
|
+
else:
|
|
858
|
+
# Decide where to put the file or directory, as an absolute path.
|
|
859
|
+
tgt = os.path.join(
|
|
860
|
+
stagedir,
|
|
861
|
+
cast(str, obj["basename"]),
|
|
862
|
+
)
|
|
863
|
+
if self.reversemap(tgt) is not None:
|
|
864
|
+
# If the target already exists in the pathmap, but we haven't yet
|
|
865
|
+
# mapped this, it means we have a conflict.
|
|
866
|
+
i = 2
|
|
867
|
+
new_tgt = f"{tgt}_{i}"
|
|
868
|
+
while self.reversemap(new_tgt) is not None:
|
|
869
|
+
i += 1
|
|
870
|
+
new_tgt = f"{tgt}_{i}"
|
|
871
|
+
logger.debug(
|
|
872
|
+
"ToilPathMapper resolving mapping conflict: %s is now %s",
|
|
873
|
+
tgt,
|
|
874
|
+
new_tgt,
|
|
875
|
+
)
|
|
876
|
+
tgt = new_tgt
|
|
795
877
|
|
|
796
878
|
if obj["class"] == "Directory":
|
|
797
879
|
# Whether or not we've already mapped this path, we need to map all
|
|
798
880
|
# children recursively.
|
|
799
881
|
|
|
800
|
-
# Grab its location
|
|
801
|
-
location = cast(str, obj["location"])
|
|
802
|
-
|
|
803
882
|
logger.debug("ToilPathMapper visiting directory %s", location)
|
|
804
883
|
|
|
805
884
|
# We want to check the directory to make sure it is not
|
|
@@ -885,7 +964,7 @@ class ToilPathMapper(PathMapper):
|
|
|
885
964
|
|
|
886
965
|
# Keep recursing
|
|
887
966
|
self.visitlisting(
|
|
888
|
-
cast(
|
|
967
|
+
cast(list[CWLObjectType], obj.get("listing", [])),
|
|
889
968
|
tgt,
|
|
890
969
|
basedir,
|
|
891
970
|
copy=copy,
|
|
@@ -893,23 +972,21 @@ class ToilPathMapper(PathMapper):
|
|
|
893
972
|
)
|
|
894
973
|
|
|
895
974
|
elif obj["class"] == "File":
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
logger.debug("ToilPathMapper visiting file %s", path)
|
|
975
|
+
logger.debug("ToilPathMapper visiting file %s", location)
|
|
899
976
|
|
|
900
|
-
if
|
|
977
|
+
if location in self._pathmap:
|
|
901
978
|
# Don't map the same file twice
|
|
902
979
|
logger.debug(
|
|
903
980
|
"ToilPathMapper stopping recursion because we have already "
|
|
904
981
|
"mapped file: %s",
|
|
905
|
-
|
|
982
|
+
location,
|
|
906
983
|
)
|
|
907
984
|
return
|
|
908
985
|
|
|
909
|
-
ab = abspath(
|
|
910
|
-
if "contents" in obj and
|
|
986
|
+
ab = abspath(location, basedir)
|
|
987
|
+
if "contents" in obj and location.startswith("_:"):
|
|
911
988
|
# We are supposed to create this file
|
|
912
|
-
self._pathmap[
|
|
989
|
+
self._pathmap[location] = MapperEnt(
|
|
913
990
|
cast(str, obj["contents"]),
|
|
914
991
|
tgt,
|
|
915
992
|
"CreateWritableFile" if copy else "CreateFile",
|
|
@@ -927,14 +1004,16 @@ class ToilPathMapper(PathMapper):
|
|
|
927
1004
|
# URI for a local file it downloaded.
|
|
928
1005
|
if self.get_file:
|
|
929
1006
|
deref = self.get_file(
|
|
930
|
-
|
|
1007
|
+
location,
|
|
1008
|
+
obj.get("streamable", False),
|
|
1009
|
+
self.streaming_allowed,
|
|
931
1010
|
)
|
|
932
1011
|
else:
|
|
933
1012
|
deref = ab
|
|
934
1013
|
if deref.startswith("file:"):
|
|
935
1014
|
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
936
1015
|
if urlsplit(deref).scheme in ["http", "https"]:
|
|
937
|
-
deref = downloadHttpFile(
|
|
1016
|
+
deref = downloadHttpFile(location)
|
|
938
1017
|
elif urlsplit(deref).scheme != "toilfile":
|
|
939
1018
|
# Dereference symbolic links
|
|
940
1019
|
st = os.lstat(deref)
|
|
@@ -952,42 +1031,18 @@ class ToilPathMapper(PathMapper):
|
|
|
952
1031
|
# reference, we just pass that along.
|
|
953
1032
|
|
|
954
1033
|
"""Link or copy files to their targets. Create them as needed."""
|
|
955
|
-
targets: Dict[str, str] = {}
|
|
956
|
-
for _, value in self._pathmap.items():
|
|
957
|
-
# If the target already exists in the pathmap, it means we have a conflict. But we didn't change tgt to reflect new name.
|
|
958
|
-
if value.target == tgt: # Conflict detected in the pathmap
|
|
959
|
-
i = 2
|
|
960
|
-
new_tgt = f"{tgt}_{i}"
|
|
961
|
-
while new_tgt in targets:
|
|
962
|
-
i += 1
|
|
963
|
-
new_tgt = f"{tgt}_{i}"
|
|
964
|
-
targets[new_tgt] = new_tgt
|
|
965
|
-
|
|
966
|
-
for _, value_conflict in targets.items():
|
|
967
|
-
logger.debug(
|
|
968
|
-
"ToilPathMapper adding file mapping for conflict %s -> %s",
|
|
969
|
-
deref,
|
|
970
|
-
value_conflict,
|
|
971
|
-
)
|
|
972
|
-
self._pathmap[path] = MapperEnt(
|
|
973
|
-
deref,
|
|
974
|
-
value_conflict,
|
|
975
|
-
"WritableFile" if copy else "File",
|
|
976
|
-
staged,
|
|
977
|
-
)
|
|
978
|
-
# No conflicts detected so we can write out the original name.
|
|
979
|
-
if not targets:
|
|
980
|
-
logger.debug(
|
|
981
|
-
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
982
|
-
)
|
|
983
1034
|
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1035
|
+
logger.debug(
|
|
1036
|
+
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
self._pathmap[location] = MapperEnt(
|
|
1040
|
+
deref, tgt, "WritableFile" if copy else "File", staged
|
|
1041
|
+
)
|
|
987
1042
|
|
|
988
1043
|
# Handle all secondary files that need to be next to this one.
|
|
989
1044
|
self.visitlisting(
|
|
990
|
-
cast(
|
|
1045
|
+
cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
|
|
991
1046
|
stagedir,
|
|
992
1047
|
basedir,
|
|
993
1048
|
copy=copy,
|
|
@@ -1013,15 +1068,59 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
|
|
|
1013
1068
|
) -> None:
|
|
1014
1069
|
"""run_jobs from SingleJobExecutor, but not in a top level runtime context."""
|
|
1015
1070
|
runtime_context.toplevel = False
|
|
1071
|
+
if isinstance(
|
|
1072
|
+
process, cwltool.command_line_tool.CommandLineTool
|
|
1073
|
+
) and isinstance(
|
|
1074
|
+
process.make_job_runner(runtime_context), SingularityCommandLineJob
|
|
1075
|
+
):
|
|
1076
|
+
# Set defaults for singularity cache environment variables, similar to what we do in wdltoil
|
|
1077
|
+
# Use the same place as the default singularity cache directory
|
|
1078
|
+
singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
|
|
1079
|
+
os.environ["SINGULARITY_CACHEDIR"] = os.environ.get(
|
|
1080
|
+
"SINGULARITY_CACHEDIR", singularity_cache
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
# If singularity is detected, prepull the image to ensure locking
|
|
1084
|
+
(docker_req, docker_is_req) = process.get_requirement(
|
|
1085
|
+
feature="DockerRequirement"
|
|
1086
|
+
)
|
|
1087
|
+
with global_mutex(
|
|
1088
|
+
os.environ["SINGULARITY_CACHEDIR"], "toil_singularity_cache_mutex"
|
|
1089
|
+
):
|
|
1090
|
+
SingularityCommandLineJob.get_image(
|
|
1091
|
+
dockerRequirement=cast(dict[str, str], docker_req),
|
|
1092
|
+
pull_image=runtime_context.pull_image,
|
|
1093
|
+
force_pull=runtime_context.force_docker_pull,
|
|
1094
|
+
tmp_outdir_prefix=runtime_context.tmp_outdir_prefix,
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1016
1097
|
return super().run_jobs(process, job_order_object, logger, runtime_context)
|
|
1017
1098
|
|
|
1018
1099
|
|
|
1019
1100
|
class ToilTool:
|
|
1020
1101
|
"""Mixin to hook Toil into a cwltool tool type."""
|
|
1021
1102
|
|
|
1103
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
1104
|
+
"""
|
|
1105
|
+
Init hook to set up member variables.
|
|
1106
|
+
"""
|
|
1107
|
+
super().__init__(*args, **kwargs)
|
|
1108
|
+
# Reserve a spot for the Toil job that ends up executing this tool.
|
|
1109
|
+
self._toil_job: Optional[Job] = None
|
|
1110
|
+
# Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
|
|
1111
|
+
self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
|
|
1112
|
+
|
|
1113
|
+
def connect_toil_job(self, job: Job) -> None:
|
|
1114
|
+
"""
|
|
1115
|
+
Attach the Toil tool to the Toil job that is executing it. This allows
|
|
1116
|
+
it to use the Toil job to stop at certain points if debugging flags are
|
|
1117
|
+
set.
|
|
1118
|
+
"""
|
|
1119
|
+
self._toil_job = job
|
|
1120
|
+
|
|
1022
1121
|
def make_path_mapper(
|
|
1023
1122
|
self,
|
|
1024
|
-
reffiles:
|
|
1123
|
+
reffiles: list[Any],
|
|
1025
1124
|
stagedir: str,
|
|
1026
1125
|
runtimeContext: cwltool.context.RuntimeContext,
|
|
1027
1126
|
separateDirs: bool,
|
|
@@ -1029,12 +1128,12 @@ class ToilTool:
|
|
|
1029
1128
|
"""Create the appropriate PathMapper for the situation."""
|
|
1030
1129
|
if getattr(runtimeContext, "bypass_file_store", False):
|
|
1031
1130
|
# We only need to understand cwltool's supported URIs
|
|
1032
|
-
|
|
1131
|
+
mapper = PathMapper(
|
|
1033
1132
|
reffiles, runtimeContext.basedir, stagedir, separateDirs=separateDirs
|
|
1034
1133
|
)
|
|
1035
1134
|
else:
|
|
1036
1135
|
# We need to be able to read from Toil-provided URIs
|
|
1037
|
-
|
|
1136
|
+
mapper = ToilPathMapper(
|
|
1038
1137
|
reffiles,
|
|
1039
1138
|
runtimeContext.basedir,
|
|
1040
1139
|
stagedir,
|
|
@@ -1043,6 +1142,10 @@ class ToilTool:
|
|
|
1043
1142
|
streaming_allowed=runtimeContext.streaming_allowed,
|
|
1044
1143
|
)
|
|
1045
1144
|
|
|
1145
|
+
# Remember the path mappers
|
|
1146
|
+
self._path_mappers.append(mapper)
|
|
1147
|
+
return mapper
|
|
1148
|
+
|
|
1046
1149
|
def __str__(self) -> str:
|
|
1047
1150
|
"""Return string representation of this tool type."""
|
|
1048
1151
|
return f'{self.__class__.__name__}({repr(getattr(self, "tool", {}).get("id", "???"))})'
|
|
@@ -1059,17 +1162,36 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1059
1162
|
name conflicts at the top level of the work directory.
|
|
1060
1163
|
"""
|
|
1061
1164
|
|
|
1165
|
+
# Set up the initial work dir with all its files
|
|
1062
1166
|
super()._initialworkdir(j, builder)
|
|
1063
1167
|
|
|
1064
1168
|
# The initial work dir listing is now in j.generatefiles["listing"]
|
|
1065
|
-
# Also j.
|
|
1169
|
+
# Also j.generatefiles is a CWL Directory.
|
|
1066
1170
|
# So check the initial working directory.
|
|
1067
|
-
logger.
|
|
1171
|
+
logger.debug("Initial work dir: %s", j.generatefiles)
|
|
1068
1172
|
ensure_no_collisions(
|
|
1069
1173
|
j.generatefiles,
|
|
1070
1174
|
"the job's working directory as specified by the InitialWorkDirRequirement",
|
|
1071
1175
|
)
|
|
1072
1176
|
|
|
1177
|
+
if self._toil_job is not None:
|
|
1178
|
+
# Make a table of all the places we mapped files to when downloading the inputs.
|
|
1179
|
+
|
|
1180
|
+
# We want to hint which host paths and container (if any) paths correspond
|
|
1181
|
+
host_and_job_paths: list[tuple[str, str]] = []
|
|
1182
|
+
|
|
1183
|
+
for pm in self._path_mappers:
|
|
1184
|
+
for _, mapper_entry in pm.items_exclude_children():
|
|
1185
|
+
# We know that mapper_entry.target as seen by the task is
|
|
1186
|
+
# mapper_entry.resolved on the host.
|
|
1187
|
+
host_and_job_paths.append(
|
|
1188
|
+
(mapper_entry.resolved, mapper_entry.target)
|
|
1189
|
+
)
|
|
1190
|
+
|
|
1191
|
+
# Notice that we have downloaded our inputs. Explain which files
|
|
1192
|
+
# those are here and what the task will expect to call them.
|
|
1193
|
+
self._toil_job.files_downloaded_hook(host_and_job_paths)
|
|
1194
|
+
|
|
1073
1195
|
|
|
1074
1196
|
class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
|
|
1075
1197
|
"""Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
|
|
@@ -1092,7 +1214,11 @@ def toil_make_tool(
|
|
|
1092
1214
|
return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
|
|
1093
1215
|
|
|
1094
1216
|
|
|
1095
|
-
|
|
1217
|
+
# When a file we want to have is missing, we can give it this sentinal location
|
|
1218
|
+
# URI instead of raising an error right away, in case it is optional.
|
|
1219
|
+
MISSING_FILE = "missing://"
|
|
1220
|
+
|
|
1221
|
+
DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
|
|
1096
1222
|
|
|
1097
1223
|
|
|
1098
1224
|
def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
@@ -1114,7 +1240,7 @@ def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
|
1114
1240
|
|
|
1115
1241
|
def decode_directory(
|
|
1116
1242
|
dir_path: str,
|
|
1117
|
-
) ->
|
|
1243
|
+
) -> tuple[DirectoryContents, Optional[str], str]:
|
|
1118
1244
|
"""
|
|
1119
1245
|
Decode a directory from a "toildir:" path to a directory (or a file in it).
|
|
1120
1246
|
|
|
@@ -1189,7 +1315,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1189
1315
|
# they know what will happen.
|
|
1190
1316
|
# Also maps files and directories from external URLs to downloaded
|
|
1191
1317
|
# locations.
|
|
1192
|
-
self.dir_to_download:
|
|
1318
|
+
self.dir_to_download: dict[str, str] = {}
|
|
1193
1319
|
|
|
1194
1320
|
super().__init__(basedir)
|
|
1195
1321
|
|
|
@@ -1312,14 +1438,16 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1312
1438
|
destination = super()._abs(destination)
|
|
1313
1439
|
return destination
|
|
1314
1440
|
|
|
1315
|
-
def glob(self, pattern: str) ->
|
|
1441
|
+
def glob(self, pattern: str) -> list[str]:
|
|
1316
1442
|
parse = urlparse(pattern)
|
|
1317
1443
|
if parse.scheme == "file":
|
|
1318
1444
|
pattern = os.path.abspath(unquote(parse.path))
|
|
1319
1445
|
elif parse.scheme == "":
|
|
1320
1446
|
pattern = os.path.abspath(pattern)
|
|
1321
1447
|
else:
|
|
1322
|
-
raise RuntimeError(
|
|
1448
|
+
raise RuntimeError(
|
|
1449
|
+
f"Cannot efficiently support globbing on {parse.scheme} URIs"
|
|
1450
|
+
)
|
|
1323
1451
|
|
|
1324
1452
|
# Actually do the glob
|
|
1325
1453
|
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
|
|
@@ -1356,12 +1484,12 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1356
1484
|
else:
|
|
1357
1485
|
# This should be supported by a job store.
|
|
1358
1486
|
byte_stream = AbstractJobStore.open_url(fn)
|
|
1359
|
-
if
|
|
1487
|
+
if "b" in mode:
|
|
1360
1488
|
# Pass stream along in binary
|
|
1361
1489
|
return byte_stream
|
|
1362
1490
|
else:
|
|
1363
1491
|
# Wrap it in a text decoder
|
|
1364
|
-
return io.TextIOWrapper(byte_stream, encoding=
|
|
1492
|
+
return io.TextIOWrapper(byte_stream, encoding="utf-8")
|
|
1365
1493
|
|
|
1366
1494
|
def exists(self, path: str) -> bool:
|
|
1367
1495
|
"""Test for file existence."""
|
|
@@ -1468,7 +1596,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1468
1596
|
logger.debug("AbstractJobStore said: %s", status)
|
|
1469
1597
|
return status
|
|
1470
1598
|
|
|
1471
|
-
def listdir(self, fn: str) ->
|
|
1599
|
+
def listdir(self, fn: str) -> list[str]:
|
|
1472
1600
|
# This needs to return full URLs for everything in the directory.
|
|
1473
1601
|
# URLs are not allowed to end in '/', even for subdirectories.
|
|
1474
1602
|
logger.debug("ToilFsAccess listing %s", fn)
|
|
@@ -1489,7 +1617,9 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1489
1617
|
if got is None:
|
|
1490
1618
|
raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
|
|
1491
1619
|
if isinstance(got, str):
|
|
1492
|
-
raise RuntimeError(
|
|
1620
|
+
raise RuntimeError(
|
|
1621
|
+
f"Cannot list file or dubdirectory of a file: {fn}"
|
|
1622
|
+
)
|
|
1493
1623
|
here = got
|
|
1494
1624
|
# List all the things in here and make full URIs to them
|
|
1495
1625
|
return [os.path.join(fn, k) for k in here.keys()]
|
|
@@ -1499,7 +1629,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1499
1629
|
for entry in AbstractJobStore.list_url(fn)
|
|
1500
1630
|
]
|
|
1501
1631
|
|
|
1502
|
-
def join(self, path, *paths
|
|
1632
|
+
def join(self, path: str, *paths: str) -> str:
|
|
1503
1633
|
# This falls back on os.path.join
|
|
1504
1634
|
return super().join(path, *paths)
|
|
1505
1635
|
|
|
@@ -1512,12 +1642,12 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1512
1642
|
|
|
1513
1643
|
def toil_get_file(
|
|
1514
1644
|
file_store: AbstractFileStore,
|
|
1515
|
-
index:
|
|
1516
|
-
existing:
|
|
1645
|
+
index: dict[str, str],
|
|
1646
|
+
existing: dict[str, str],
|
|
1517
1647
|
uri: str,
|
|
1518
1648
|
streamable: bool = False,
|
|
1519
1649
|
streaming_allowed: bool = True,
|
|
1520
|
-
pipe_threads: Optional[
|
|
1650
|
+
pipe_threads: Optional[list[tuple[Thread, int]]] = None,
|
|
1521
1651
|
) -> str:
|
|
1522
1652
|
"""
|
|
1523
1653
|
Set up the given file or directory from the Toil jobstore at a file URI
|
|
@@ -1618,9 +1748,7 @@ def toil_get_file(
|
|
|
1618
1748
|
and streamable
|
|
1619
1749
|
and not isinstance(file_store.jobStore, FileJobStore)
|
|
1620
1750
|
):
|
|
1621
|
-
logger.debug(
|
|
1622
|
-
"Streaming file %s", uri
|
|
1623
|
-
)
|
|
1751
|
+
logger.debug("Streaming file %s", uri)
|
|
1624
1752
|
src_path = file_store.getLocalTempFileName()
|
|
1625
1753
|
os.mkfifo(src_path)
|
|
1626
1754
|
th = ExceptionalThread(
|
|
@@ -1642,34 +1770,35 @@ def toil_get_file(
|
|
|
1642
1770
|
if uri.startswith("toilfile:"):
|
|
1643
1771
|
# Download from the file store
|
|
1644
1772
|
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1645
|
-
src_path = file_store.readGlobalFile(
|
|
1646
|
-
file_store_id, symlink=True
|
|
1647
|
-
)
|
|
1773
|
+
src_path = file_store.readGlobalFile(file_store_id, symlink=True)
|
|
1648
1774
|
else:
|
|
1649
1775
|
# Download from the URI via the job store.
|
|
1650
1776
|
|
|
1651
1777
|
# Figure out where it goes.
|
|
1652
1778
|
src_path = file_store.getLocalTempFileName()
|
|
1653
1779
|
# Open that path exclusively to make sure we created it
|
|
1654
|
-
with open(src_path,
|
|
1780
|
+
with open(src_path, "xb") as fh:
|
|
1655
1781
|
# Download into the file
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1782
|
+
size, executable = AbstractJobStore.read_from_url(uri, fh)
|
|
1783
|
+
if executable:
|
|
1784
|
+
# Set the execute bit in the file's permissions
|
|
1785
|
+
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
1660
1786
|
|
|
1661
1787
|
index[src_path] = uri
|
|
1662
1788
|
existing[uri] = src_path
|
|
1663
1789
|
return schema_salad.ref_resolver.file_uri(src_path)
|
|
1664
1790
|
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1791
|
+
|
|
1792
|
+
def convert_file_uri_to_toil_uri(
|
|
1793
|
+
applyFunc: Callable[[str], FileID],
|
|
1794
|
+
index: dict[str, str],
|
|
1795
|
+
existing: dict[str, str],
|
|
1669
1796
|
file_uri: str,
|
|
1670
1797
|
) -> str:
|
|
1671
1798
|
"""
|
|
1672
|
-
|
|
1799
|
+
Given a file URI, convert it to a toil file URI. Uses applyFunc to handle the conversion.
|
|
1800
|
+
|
|
1801
|
+
Runs once on every unique file URI.
|
|
1673
1802
|
|
|
1674
1803
|
'existing' is a set of files retrieved as inputs from toil_get_file. This
|
|
1675
1804
|
ensures they are mapped back as the same name if passed through.
|
|
@@ -1686,12 +1815,8 @@ def write_file(
|
|
|
1686
1815
|
else:
|
|
1687
1816
|
file_uri = existing.get(file_uri, file_uri)
|
|
1688
1817
|
if file_uri not in index:
|
|
1689
|
-
if not urlparse(file_uri).scheme:
|
|
1690
|
-
rp = os.path.realpath(file_uri)
|
|
1691
|
-
else:
|
|
1692
|
-
rp = file_uri
|
|
1693
1818
|
try:
|
|
1694
|
-
index[file_uri] = "toilfile:" +
|
|
1819
|
+
index[file_uri] = "toilfile:" + applyFunc(file_uri).pack()
|
|
1695
1820
|
existing[index[file_uri]] = file_uri
|
|
1696
1821
|
except Exception as e:
|
|
1697
1822
|
logger.error("Got exception '%s' while copying '%s'", e, file_uri)
|
|
@@ -1710,17 +1835,93 @@ def path_to_loc(obj: CWLObjectType) -> None:
|
|
|
1710
1835
|
del obj["path"]
|
|
1711
1836
|
|
|
1712
1837
|
|
|
1713
|
-
def
|
|
1714
|
-
|
|
1838
|
+
def extract_file_uri_once(
|
|
1839
|
+
fileindex: dict[str, str],
|
|
1840
|
+
existing: dict[str, str],
|
|
1841
|
+
file_metadata: CWLObjectType,
|
|
1842
|
+
mark_broken: bool = False,
|
|
1843
|
+
skip_remote: bool = False,
|
|
1844
|
+
) -> Optional[str]:
|
|
1845
|
+
"""
|
|
1846
|
+
Extract the filename from a CWL file record.
|
|
1847
|
+
|
|
1848
|
+
This function matches the predefined function signature in visit_files, which ensures
|
|
1849
|
+
that this function is called on all files inside a CWL object.
|
|
1850
|
+
|
|
1851
|
+
Ensures no duplicate files are returned according to fileindex. If a file has not been resolved already (and had file:// prepended)
|
|
1852
|
+
then resolve symlinks.
|
|
1853
|
+
:param fileindex: Forward mapping of filename
|
|
1854
|
+
:param existing: Reverse mapping of filename. This function does not use this
|
|
1855
|
+
:param file_metadata: CWL file record
|
|
1856
|
+
:param mark_broken: Whether files should be marked as missing
|
|
1857
|
+
:param skip_remote: Whether to skip remote files
|
|
1858
|
+
:return:
|
|
1859
|
+
"""
|
|
1860
|
+
location = cast(str, file_metadata["location"])
|
|
1861
|
+
if (
|
|
1862
|
+
location.startswith("toilfile:")
|
|
1863
|
+
or location.startswith("toildir:")
|
|
1864
|
+
or location.startswith("_:")
|
|
1865
|
+
):
|
|
1866
|
+
return None
|
|
1867
|
+
if location in fileindex:
|
|
1868
|
+
file_metadata["location"] = fileindex[location]
|
|
1869
|
+
return None
|
|
1870
|
+
if not location and file_metadata["path"]:
|
|
1871
|
+
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
1872
|
+
cast(str, file_metadata["path"])
|
|
1873
|
+
)
|
|
1874
|
+
if location.startswith("file://") and not os.path.isfile(
|
|
1875
|
+
schema_salad.ref_resolver.uri_file_path(location)
|
|
1876
|
+
):
|
|
1877
|
+
if mark_broken:
|
|
1878
|
+
logger.debug("File %s is missing", file_metadata)
|
|
1879
|
+
file_metadata["location"] = location = MISSING_FILE
|
|
1880
|
+
else:
|
|
1881
|
+
raise cwl_utils.errors.WorkflowException(
|
|
1882
|
+
"File is missing: %s" % file_metadata
|
|
1883
|
+
)
|
|
1884
|
+
if location.startswith("file://") or not skip_remote:
|
|
1885
|
+
# This is a local file or a remote file
|
|
1886
|
+
if location not in fileindex:
|
|
1887
|
+
# These dictionaries are meant to keep track of what we're going to import
|
|
1888
|
+
# In the actual import, this is used as a bidirectional mapping from unvirtualized to virtualized
|
|
1889
|
+
# For this case, keep track of the files to prevent returning duplicate files
|
|
1890
|
+
# see write_file
|
|
1891
|
+
|
|
1892
|
+
# If there is not a scheme, this file has not been resolved yet or is a URL.
|
|
1893
|
+
if not urlparse(location).scheme:
|
|
1894
|
+
rp = os.path.realpath(location)
|
|
1895
|
+
else:
|
|
1896
|
+
rp = location
|
|
1897
|
+
return rp
|
|
1898
|
+
return None
|
|
1899
|
+
|
|
1900
|
+
|
|
1901
|
+
V = TypeVar("V", covariant=True)
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
class VisitFunc(Protocol[V]):
|
|
1905
|
+
def __call__(
|
|
1906
|
+
self,
|
|
1907
|
+
fileindex: dict[str, str],
|
|
1908
|
+
existing: dict[str, str],
|
|
1909
|
+
file_metadata: CWLObjectType,
|
|
1910
|
+
mark_broken: bool,
|
|
1911
|
+
skip_remote: bool,
|
|
1912
|
+
) -> V: ...
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
def visit_files(
|
|
1916
|
+
func: VisitFunc[V],
|
|
1715
1917
|
fs_access: StdFsAccess,
|
|
1716
|
-
fileindex:
|
|
1717
|
-
existing:
|
|
1918
|
+
fileindex: dict[str, str],
|
|
1919
|
+
existing: dict[str, str],
|
|
1718
1920
|
cwl_object: Optional[CWLObjectType],
|
|
1719
|
-
|
|
1921
|
+
mark_broken: bool = False,
|
|
1720
1922
|
skip_remote: bool = False,
|
|
1721
1923
|
bypass_file_store: bool = False,
|
|
1722
|
-
|
|
1723
|
-
) -> None:
|
|
1924
|
+
) -> list[V]:
|
|
1724
1925
|
"""
|
|
1725
1926
|
Prepare all files and directories.
|
|
1726
1927
|
|
|
@@ -1735,10 +1936,10 @@ def import_files(
|
|
|
1735
1936
|
Preserves any listing fields.
|
|
1736
1937
|
|
|
1737
1938
|
If a file cannot be found (like if it is an optional secondary file that
|
|
1738
|
-
doesn't exist), fails, unless
|
|
1739
|
-
|
|
1939
|
+
doesn't exist), fails, unless mark_broken is set, in which case it applies
|
|
1940
|
+
a sentinel location.
|
|
1740
1941
|
|
|
1741
|
-
Also does some
|
|
1942
|
+
Also does some miscellaneous normalization.
|
|
1742
1943
|
|
|
1743
1944
|
:param import_function: The function used to upload a URI and get a
|
|
1744
1945
|
Toil FileID for it.
|
|
@@ -1754,8 +1955,9 @@ def import_files(
|
|
|
1754
1955
|
|
|
1755
1956
|
:param cwl_object: CWL tool (or workflow order) we are importing files for
|
|
1756
1957
|
|
|
1757
|
-
:param
|
|
1758
|
-
don't exist,
|
|
1958
|
+
:param mark_broken: If True, when files can't be imported because they e.g.
|
|
1959
|
+
don't exist, set their locations to MISSING_FILE rather than failing
|
|
1960
|
+
with an error.
|
|
1759
1961
|
|
|
1760
1962
|
:param skp_remote: If True, leave remote URIs in place instead of importing
|
|
1761
1963
|
files.
|
|
@@ -1765,18 +1967,12 @@ def import_files(
|
|
|
1765
1967
|
|
|
1766
1968
|
:param log_level: Log imported files at the given level.
|
|
1767
1969
|
"""
|
|
1970
|
+
func_return: list[Any] = list()
|
|
1768
1971
|
tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
|
|
1769
1972
|
|
|
1770
1973
|
logger.debug("Importing files for %s", tool_id)
|
|
1771
1974
|
logger.debug("Importing files in %s", cwl_object)
|
|
1772
1975
|
|
|
1773
|
-
def import_and_log(url: str) -> FileID:
|
|
1774
|
-
"""
|
|
1775
|
-
Upload a file and log that we are doing so.
|
|
1776
|
-
"""
|
|
1777
|
-
logger.log(log_level, "Loading %s...", url)
|
|
1778
|
-
return import_function(url)
|
|
1779
|
-
|
|
1780
1976
|
# We need to upload all files to the Toil filestore, and encode structure
|
|
1781
1977
|
# recursively into all Directories' locations. But we cannot safely alter
|
|
1782
1978
|
# the listing fields of Directory objects, because the handling required by
|
|
@@ -1794,13 +1990,13 @@ def import_files(
|
|
|
1794
1990
|
if bypass_file_store:
|
|
1795
1991
|
# Don't go on to actually import files or encode contents for
|
|
1796
1992
|
# directories.
|
|
1797
|
-
return
|
|
1993
|
+
return func_return
|
|
1798
1994
|
|
|
1799
1995
|
# Otherwise we actually want to put the things in the file store.
|
|
1800
1996
|
|
|
1801
1997
|
def visit_file_or_directory_down(
|
|
1802
1998
|
rec: CWLObjectType,
|
|
1803
|
-
) -> Optional[
|
|
1999
|
+
) -> Optional[list[CWLObjectType]]:
|
|
1804
2000
|
"""
|
|
1805
2001
|
Visit each CWL File or Directory on the way down.
|
|
1806
2002
|
|
|
@@ -1827,7 +2023,7 @@ def import_files(
|
|
|
1827
2023
|
ensure_no_collisions(cast(DirectoryType, rec))
|
|
1828
2024
|
|
|
1829
2025
|
# Pull out the old listing, if any
|
|
1830
|
-
old_listing = cast(Optional[
|
|
2026
|
+
old_listing = cast(Optional[list[CWLObjectType]], rec.get("listing", None))
|
|
1831
2027
|
|
|
1832
2028
|
if not cast(str, rec["location"]).startswith("_:"):
|
|
1833
2029
|
# This is a thing we can list and not just a literal, so we
|
|
@@ -1849,8 +2045,8 @@ def import_files(
|
|
|
1849
2045
|
|
|
1850
2046
|
def visit_file_or_directory_up(
|
|
1851
2047
|
rec: CWLObjectType,
|
|
1852
|
-
down_result: Optional[
|
|
1853
|
-
child_results:
|
|
2048
|
+
down_result: Optional[list[CWLObjectType]],
|
|
2049
|
+
child_results: list[DirectoryContents],
|
|
1854
2050
|
) -> DirectoryContents:
|
|
1855
2051
|
"""
|
|
1856
2052
|
For a CWL File or Directory, make sure it is uploaded and it has a
|
|
@@ -1872,10 +2068,15 @@ def import_files(
|
|
|
1872
2068
|
# This is a CWL File
|
|
1873
2069
|
|
|
1874
2070
|
result: DirectoryContents = {}
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
2071
|
+
# Run a function on the file and store the return
|
|
2072
|
+
func_return.append(
|
|
2073
|
+
func(
|
|
2074
|
+
fileindex,
|
|
2075
|
+
existing,
|
|
2076
|
+
rec,
|
|
2077
|
+
mark_broken=mark_broken,
|
|
2078
|
+
skip_remote=skip_remote,
|
|
2079
|
+
)
|
|
1879
2080
|
)
|
|
1880
2081
|
|
|
1881
2082
|
# Make a record for this file under its name
|
|
@@ -1904,7 +2105,7 @@ def import_files(
|
|
|
1904
2105
|
contents.update(child_result)
|
|
1905
2106
|
|
|
1906
2107
|
# Upload the directory itself, which will adjust its location.
|
|
1907
|
-
upload_directory(rec, contents,
|
|
2108
|
+
upload_directory(rec, contents, mark_broken=mark_broken)
|
|
1908
2109
|
|
|
1909
2110
|
# Show those contents as being under our name in our parent.
|
|
1910
2111
|
return {cast(str, rec["basename"]): contents}
|
|
@@ -1919,12 +2120,13 @@ def import_files(
|
|
|
1919
2120
|
visit_file_or_directory_down,
|
|
1920
2121
|
visit_file_or_directory_up,
|
|
1921
2122
|
)
|
|
2123
|
+
return func_return
|
|
1922
2124
|
|
|
1923
2125
|
|
|
1924
2126
|
def upload_directory(
|
|
1925
2127
|
directory_metadata: CWLObjectType,
|
|
1926
2128
|
directory_contents: DirectoryContents,
|
|
1927
|
-
|
|
2129
|
+
mark_broken: bool = False,
|
|
1928
2130
|
) -> None:
|
|
1929
2131
|
"""
|
|
1930
2132
|
Upload a Directory object.
|
|
@@ -1936,6 +2138,9 @@ def upload_directory(
|
|
|
1936
2138
|
Makes sure the directory actually exists, and rewrites its location to be
|
|
1937
2139
|
something we can use on another machine.
|
|
1938
2140
|
|
|
2141
|
+
If mark_broken is set, ignores missing directories and replaces them with
|
|
2142
|
+
directories containing the given (possibly empty) contents.
|
|
2143
|
+
|
|
1939
2144
|
We can't rely on the directory's listing as visible to the next tool as a
|
|
1940
2145
|
complete recursive description of the files we will need to present to the
|
|
1941
2146
|
tool, since some tools require it to be cleared or single-level but still
|
|
@@ -1956,8 +2161,8 @@ def upload_directory(
|
|
|
1956
2161
|
if location.startswith("file://") and not os.path.isdir(
|
|
1957
2162
|
schema_salad.ref_resolver.uri_file_path(location)
|
|
1958
2163
|
):
|
|
1959
|
-
if
|
|
1960
|
-
|
|
2164
|
+
if mark_broken:
|
|
2165
|
+
logger.debug("Directory %s is missing as a whole", directory_metadata)
|
|
1961
2166
|
else:
|
|
1962
2167
|
raise cwl_utils.errors.WorkflowException(
|
|
1963
2168
|
"Directory is missing: %s" % directory_metadata["location"]
|
|
@@ -1974,48 +2179,34 @@ def upload_directory(
|
|
|
1974
2179
|
directory_metadata["location"] = encode_directory(directory_contents)
|
|
1975
2180
|
|
|
1976
2181
|
|
|
1977
|
-
def
|
|
1978
|
-
|
|
1979
|
-
fileindex:
|
|
1980
|
-
existing:
|
|
2182
|
+
def extract_and_convert_file_to_toil_uri(
|
|
2183
|
+
convertfunc: Callable[[str], FileID],
|
|
2184
|
+
fileindex: dict[str, str],
|
|
2185
|
+
existing: dict[str, str],
|
|
1981
2186
|
file_metadata: CWLObjectType,
|
|
1982
|
-
|
|
1983
|
-
skip_remote: bool = False
|
|
2187
|
+
mark_broken: bool = False,
|
|
2188
|
+
skip_remote: bool = False,
|
|
1984
2189
|
) -> None:
|
|
1985
2190
|
"""
|
|
1986
|
-
|
|
2191
|
+
Extract the file URI out of a file object and convert it to a Toil URI.
|
|
1987
2192
|
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
2193
|
+
Runs convertfunc on the file URI to handle conversion.
|
|
2194
|
+
|
|
2195
|
+
Is used to handle importing files into the jobstore.
|
|
2196
|
+
|
|
2197
|
+
If a file doesn't exist, fails with an error, unless mark_broken is set, in
|
|
2198
|
+
which case the missing file is given a special sentinel location.
|
|
2199
|
+
|
|
2200
|
+
Unless skip_remote is set, also run on remote files and sets their locations
|
|
2201
|
+
to toil URIs as well.
|
|
1993
2202
|
"""
|
|
1994
|
-
location =
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
return
|
|
2001
|
-
if location in fileindex:
|
|
2002
|
-
file_metadata["location"] = fileindex[location]
|
|
2003
|
-
return
|
|
2004
|
-
if not location and file_metadata["path"]:
|
|
2005
|
-
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
2006
|
-
cast(str, file_metadata["path"])
|
|
2203
|
+
location = extract_file_uri_once(
|
|
2204
|
+
fileindex, existing, file_metadata, mark_broken, skip_remote
|
|
2205
|
+
)
|
|
2206
|
+
if location is not None:
|
|
2207
|
+
file_metadata["location"] = convert_file_uri_to_toil_uri(
|
|
2208
|
+
convertfunc, fileindex, existing, location
|
|
2007
2209
|
)
|
|
2008
|
-
if location.startswith("file://") and not os.path.isfile(
|
|
2009
|
-
schema_salad.ref_resolver.uri_file_path(location)
|
|
2010
|
-
):
|
|
2011
|
-
if skip_broken:
|
|
2012
|
-
return
|
|
2013
|
-
else:
|
|
2014
|
-
raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
|
|
2015
|
-
|
|
2016
|
-
if location.startswith("file://") or not skip_remote:
|
|
2017
|
-
# This is a local file, or we also need to download and re-upload remote files
|
|
2018
|
-
file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
|
|
2019
2210
|
|
|
2020
2211
|
logger.debug("Sending file at: %s", file_metadata["location"])
|
|
2021
2212
|
|
|
@@ -2028,7 +2219,7 @@ def writeGlobalFileWrapper(file_store: AbstractFileStore, fileuri: str) -> FileI
|
|
|
2028
2219
|
|
|
2029
2220
|
def remove_empty_listings(rec: CWLObjectType) -> None:
|
|
2030
2221
|
if rec.get("class") != "Directory":
|
|
2031
|
-
finddirs
|
|
2222
|
+
finddirs: list[CWLObjectType] = []
|
|
2032
2223
|
visit_class(rec, ("Directory",), finddirs.append)
|
|
2033
2224
|
for f in finddirs:
|
|
2034
2225
|
remove_empty_listings(f)
|
|
@@ -2048,7 +2239,7 @@ class CWLNamedJob(Job):
|
|
|
2048
2239
|
cores: Union[float, None] = 1,
|
|
2049
2240
|
memory: Union[int, str, None] = "1GiB",
|
|
2050
2241
|
disk: Union[int, str, None] = "1MiB",
|
|
2051
|
-
accelerators: Optional[
|
|
2242
|
+
accelerators: Optional[list[AcceleratorRequirement]] = None,
|
|
2052
2243
|
preemptible: Optional[bool] = None,
|
|
2053
2244
|
tool_id: Optional[str] = None,
|
|
2054
2245
|
parent_name: Optional[str] = None,
|
|
@@ -2123,10 +2314,10 @@ class ResolveIndirect(CWLNamedJob):
|
|
|
2123
2314
|
|
|
2124
2315
|
def toilStageFiles(
|
|
2125
2316
|
toil: Toil,
|
|
2126
|
-
cwljob: Union[CWLObjectType,
|
|
2317
|
+
cwljob: Union[CWLObjectType, list[CWLObjectType]],
|
|
2127
2318
|
outdir: str,
|
|
2128
2319
|
destBucket: Union[str, None] = None,
|
|
2129
|
-
log_level: int = logging.DEBUG
|
|
2320
|
+
log_level: int = logging.DEBUG,
|
|
2130
2321
|
) -> None:
|
|
2131
2322
|
"""
|
|
2132
2323
|
Copy input files out of the global file store and update location and path.
|
|
@@ -2134,11 +2325,11 @@ def toilStageFiles(
|
|
|
2134
2325
|
:param destBucket: If set, export to this base URL instead of to the local
|
|
2135
2326
|
filesystem.
|
|
2136
2327
|
|
|
2137
|
-
:param log_level: Log each file
|
|
2328
|
+
:param log_level: Log each file transferred at the given level.
|
|
2138
2329
|
"""
|
|
2139
2330
|
|
|
2140
2331
|
def _collectDirEntries(
|
|
2141
|
-
obj: Union[CWLObjectType,
|
|
2332
|
+
obj: Union[CWLObjectType, list[CWLObjectType]]
|
|
2142
2333
|
) -> Iterator[CWLObjectType]:
|
|
2143
2334
|
if isinstance(obj, dict):
|
|
2144
2335
|
if obj.get("class") in ("File", "Directory"):
|
|
@@ -2220,13 +2411,17 @@ def toilStageFiles(
|
|
|
2220
2411
|
# TODO: Use direct S3 to S3 copy on exports as well
|
|
2221
2412
|
file_id_or_contents = (
|
|
2222
2413
|
"toilfile:"
|
|
2223
|
-
+ toil.import_file(
|
|
2414
|
+
+ toil.import_file(
|
|
2415
|
+
file_id_or_contents, symlink=False
|
|
2416
|
+
).pack()
|
|
2224
2417
|
)
|
|
2225
2418
|
|
|
2226
2419
|
if file_id_or_contents.startswith("toilfile:"):
|
|
2227
2420
|
# This is something we can export
|
|
2228
2421
|
# TODO: Do we need to urlencode the parts before sending them to S3?
|
|
2229
|
-
dest_url = "/".join(
|
|
2422
|
+
dest_url = "/".join(
|
|
2423
|
+
s.strip("/") for s in [destBucket, baseName]
|
|
2424
|
+
)
|
|
2230
2425
|
logger.log(log_level, "Saving %s...", dest_url)
|
|
2231
2426
|
toil.export_file(
|
|
2232
2427
|
FileID.unpack(file_id_or_contents[len("toilfile:") :]),
|
|
@@ -2248,7 +2443,12 @@ def toilStageFiles(
|
|
|
2248
2443
|
# Probably staging and bypassing file store. Just copy.
|
|
2249
2444
|
logger.log(log_level, "Saving %s...", dest_url)
|
|
2250
2445
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2251
|
-
|
|
2446
|
+
try:
|
|
2447
|
+
shutil.copyfile(p.resolved, p.target)
|
|
2448
|
+
except shutil.SameFileError:
|
|
2449
|
+
# If outdir isn't set and we're passing through an input file/directory as the output,
|
|
2450
|
+
# the file doesn't need to be copied because it is already there
|
|
2451
|
+
pass
|
|
2252
2452
|
else:
|
|
2253
2453
|
uri = p.resolved
|
|
2254
2454
|
if not uri.startswith("toilfile:"):
|
|
@@ -2321,26 +2521,31 @@ class CWLJobWrapper(CWLNamedJob):
|
|
|
2321
2521
|
subjob_name="_wrapper",
|
|
2322
2522
|
local=True,
|
|
2323
2523
|
)
|
|
2324
|
-
self.cwltool =
|
|
2524
|
+
self.cwltool = tool
|
|
2325
2525
|
self.cwljob = cwljob
|
|
2326
2526
|
self.runtime_context = runtime_context
|
|
2327
|
-
self.conditional = conditional
|
|
2527
|
+
self.conditional = conditional or Conditional()
|
|
2328
2528
|
self.parent_name = parent_name
|
|
2329
2529
|
|
|
2330
2530
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
2331
2531
|
"""Create a child job with the correct resource requirements set."""
|
|
2332
2532
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2533
|
+
|
|
2534
|
+
# Check confitional to license full evaluation of job inputs.
|
|
2535
|
+
if self.conditional.is_false(cwljob):
|
|
2536
|
+
return self.conditional.skipped_outputs()
|
|
2537
|
+
|
|
2333
2538
|
fill_in_defaults(
|
|
2334
2539
|
self.cwltool.tool["inputs"],
|
|
2335
2540
|
cwljob,
|
|
2336
2541
|
self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
|
|
2337
2542
|
)
|
|
2543
|
+
# Don't forward the conditional. We checked it already.
|
|
2338
2544
|
realjob = CWLJob(
|
|
2339
2545
|
tool=self.cwltool,
|
|
2340
2546
|
cwljob=cwljob,
|
|
2341
2547
|
runtime_context=self.runtime_context,
|
|
2342
2548
|
parent_name=self.parent_name,
|
|
2343
|
-
conditional=self.conditional,
|
|
2344
2549
|
)
|
|
2345
2550
|
self.addChild(realjob)
|
|
2346
2551
|
return realjob.rv()
|
|
@@ -2358,7 +2563,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2358
2563
|
conditional: Union[Conditional, None] = None,
|
|
2359
2564
|
):
|
|
2360
2565
|
"""Store the context for later execution."""
|
|
2361
|
-
self.cwltool =
|
|
2566
|
+
self.cwltool = tool
|
|
2362
2567
|
self.conditional = conditional or Conditional()
|
|
2363
2568
|
|
|
2364
2569
|
if runtime_context.builder:
|
|
@@ -2375,7 +2580,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2375
2580
|
resources={},
|
|
2376
2581
|
mutation_manager=runtime_context.mutation_manager,
|
|
2377
2582
|
formatgraph=tool.formatgraph,
|
|
2378
|
-
make_fs_access=cast(
|
|
2583
|
+
make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
|
|
2379
2584
|
fs_access=runtime_context.make_fs_access(""),
|
|
2380
2585
|
job_script_provider=runtime_context.job_script_provider,
|
|
2381
2586
|
timeout=runtime_context.eval_timeout,
|
|
@@ -2392,7 +2597,21 @@ class CWLJob(CWLNamedJob):
|
|
|
2392
2597
|
|
|
2393
2598
|
req = tool.evalResources(self.builder, runtime_context)
|
|
2394
2599
|
|
|
2395
|
-
|
|
2600
|
+
tool_own_resources = tool.get_requirement("ResourceRequirement")[0] or {}
|
|
2601
|
+
if "ramMin" in tool_own_resources or "ramMax" in tool_own_resources:
|
|
2602
|
+
# The tool is actually asking for memory.
|
|
2603
|
+
memory = int(req["ram"] * (2**20))
|
|
2604
|
+
else:
|
|
2605
|
+
# The tool is getting a default ram allocation.
|
|
2606
|
+
if getattr(runtime_context, "cwl_default_ram"):
|
|
2607
|
+
# We will respect the CWL spec and apply the default cwltool
|
|
2608
|
+
# computed, which might be different than Toil's default.
|
|
2609
|
+
memory = int(req["ram"] * (2**20))
|
|
2610
|
+
else:
|
|
2611
|
+
# We use a None requirement and the Toil default applies.
|
|
2612
|
+
memory = None
|
|
2613
|
+
|
|
2614
|
+
accelerators: Optional[list[AcceleratorRequirement]] = None
|
|
2396
2615
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
2397
2616
|
# There's a CUDARequirement, which cwltool processed for us
|
|
2398
2617
|
# TODO: How is cwltool deciding what value to use between min and max?
|
|
@@ -2456,7 +2675,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2456
2675
|
|
|
2457
2676
|
super().__init__(
|
|
2458
2677
|
cores=req["cores"],
|
|
2459
|
-
memory=
|
|
2678
|
+
memory=memory,
|
|
2460
2679
|
disk=int(total_disk),
|
|
2461
2680
|
accelerators=accelerators,
|
|
2462
2681
|
preemptible=preemptible,
|
|
@@ -2470,7 +2689,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2470
2689
|
self.step_inputs = self.cwltool.tool["inputs"]
|
|
2471
2690
|
self.workdir: str = runtime_context.workdir # type: ignore[attr-defined]
|
|
2472
2691
|
|
|
2473
|
-
def required_env_vars(self, cwljob: Any) -> Iterator[
|
|
2692
|
+
def required_env_vars(self, cwljob: Any) -> Iterator[tuple[str, str]]:
|
|
2474
2693
|
"""Yield environment variables from EnvVarRequirement."""
|
|
2475
2694
|
if isinstance(cwljob, dict):
|
|
2476
2695
|
if cwljob.get("class") == "EnvVarRequirement":
|
|
@@ -2482,7 +2701,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2482
2701
|
for env_var in cwljob:
|
|
2483
2702
|
yield from self.required_env_vars(env_var)
|
|
2484
2703
|
|
|
2485
|
-
def populate_env_vars(self, cwljob: CWLObjectType) ->
|
|
2704
|
+
def populate_env_vars(self, cwljob: CWLObjectType) -> dict[str, str]:
|
|
2486
2705
|
"""
|
|
2487
2706
|
Prepare environment variables necessary at runtime for the job.
|
|
2488
2707
|
|
|
@@ -2498,9 +2717,9 @@ class CWLJob(CWLNamedJob):
|
|
|
2498
2717
|
required_env_vars = {}
|
|
2499
2718
|
# iterate over EnvVarRequirement env vars, if any
|
|
2500
2719
|
for k, v in self.required_env_vars(cwljob):
|
|
2501
|
-
required_env_vars[
|
|
2502
|
-
|
|
2503
|
-
|
|
2720
|
+
required_env_vars[k] = (
|
|
2721
|
+
v # will tell cwltool which env vars to take from the environment
|
|
2722
|
+
)
|
|
2504
2723
|
os.environ[k] = v
|
|
2505
2724
|
# needs to actually be populated in the environment as well or
|
|
2506
2725
|
# they're not used
|
|
@@ -2510,7 +2729,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2510
2729
|
# env var with the same name is found
|
|
2511
2730
|
for req in self.cwltool.requirements:
|
|
2512
2731
|
if req["class"] == "EnvVarRequirement":
|
|
2513
|
-
envDefs = cast(
|
|
2732
|
+
envDefs = cast(list[dict[str, str]], req["envDef"])
|
|
2514
2733
|
for env_def in envDefs:
|
|
2515
2734
|
env_name = env_def["envName"]
|
|
2516
2735
|
if env_name in required_env_vars:
|
|
@@ -2542,7 +2761,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2542
2761
|
for inp_id in immobile_cwljob_dict.keys():
|
|
2543
2762
|
found = False
|
|
2544
2763
|
for field in cast(
|
|
2545
|
-
|
|
2764
|
+
list[dict[str, str]], self.cwltool.inputs_record_schema["fields"]
|
|
2546
2765
|
):
|
|
2547
2766
|
if field["name"] == inp_id:
|
|
2548
2767
|
found = True
|
|
@@ -2557,8 +2776,8 @@ class CWLJob(CWLNamedJob):
|
|
|
2557
2776
|
functools.partial(remove_empty_listings),
|
|
2558
2777
|
)
|
|
2559
2778
|
|
|
2560
|
-
index:
|
|
2561
|
-
existing:
|
|
2779
|
+
index: dict[str, str] = {}
|
|
2780
|
+
existing: dict[str, str] = {}
|
|
2562
2781
|
|
|
2563
2782
|
# Prepare the run instructions for cwltool
|
|
2564
2783
|
runtime_context = self.runtime_context.copy()
|
|
@@ -2570,7 +2789,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2570
2789
|
# will come and grab this function for fetching files from the Toil
|
|
2571
2790
|
# file store. pipe_threads is used for keeping track of separate
|
|
2572
2791
|
# threads launched to stream files around.
|
|
2573
|
-
pipe_threads:
|
|
2792
|
+
pipe_threads: list[tuple[Thread, int]] = []
|
|
2574
2793
|
setattr(
|
|
2575
2794
|
runtime_context,
|
|
2576
2795
|
"toil_get_file",
|
|
@@ -2604,7 +2823,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2604
2823
|
# function and a path_mapper type or factory function.
|
|
2605
2824
|
|
|
2606
2825
|
runtime_context.make_fs_access = cast(
|
|
2607
|
-
|
|
2826
|
+
type[StdFsAccess],
|
|
2608
2827
|
functools.partial(ToilFsAccess, file_store=file_store),
|
|
2609
2828
|
)
|
|
2610
2829
|
|
|
@@ -2614,6 +2833,17 @@ class CWLJob(CWLNamedJob):
|
|
|
2614
2833
|
streaming_allowed=runtime_context.streaming_allowed,
|
|
2615
2834
|
)
|
|
2616
2835
|
|
|
2836
|
+
# Collect standard output and standard error somewhere if they don't go to files.
|
|
2837
|
+
# We need to keep two FDs to these because cwltool will close what we give it.
|
|
2838
|
+
default_stdout = TemporaryFile()
|
|
2839
|
+
runtime_context.default_stdout = os.fdopen(
|
|
2840
|
+
os.dup(default_stdout.fileno()), "wb"
|
|
2841
|
+
)
|
|
2842
|
+
default_stderr = TemporaryFile()
|
|
2843
|
+
runtime_context.default_stderr = os.fdopen(
|
|
2844
|
+
os.dup(default_stderr.fileno()), "wb"
|
|
2845
|
+
)
|
|
2846
|
+
|
|
2617
2847
|
process_uuid = uuid.uuid4() # noqa F841
|
|
2618
2848
|
started_at = datetime.datetime.now() # noqa F841
|
|
2619
2849
|
|
|
@@ -2622,13 +2852,49 @@ class CWLJob(CWLNamedJob):
|
|
|
2622
2852
|
logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
|
|
2623
2853
|
|
|
2624
2854
|
runtime_context.name = self.description.unitName
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2855
|
+
|
|
2856
|
+
if isinstance(self.cwltool, ToilTool):
|
|
2857
|
+
# Connect the CWL tool to us so it can call into the Toil job when
|
|
2858
|
+
# it reaches points where we might need to debug it.
|
|
2859
|
+
self.cwltool.connect_toil_job(self)
|
|
2860
|
+
|
|
2861
|
+
status = "did_not_run"
|
|
2862
|
+
try:
|
|
2863
|
+
output, status = ToilSingleJobExecutor().execute(
|
|
2864
|
+
process=self.cwltool,
|
|
2865
|
+
job_order_object=cwljob,
|
|
2866
|
+
runtime_context=runtime_context,
|
|
2867
|
+
logger=cwllogger,
|
|
2868
|
+
)
|
|
2869
|
+
finally:
|
|
2870
|
+
ended_at = datetime.datetime.now() # noqa F841
|
|
2871
|
+
|
|
2872
|
+
# Log any output/error data
|
|
2873
|
+
default_stdout.seek(0, os.SEEK_END)
|
|
2874
|
+
if default_stdout.tell() > 0:
|
|
2875
|
+
default_stdout.seek(0)
|
|
2876
|
+
file_store.log_user_stream(
|
|
2877
|
+
self.description.unitName + ".stdout", default_stdout
|
|
2878
|
+
)
|
|
2879
|
+
if status != "success":
|
|
2880
|
+
default_stdout.seek(0)
|
|
2881
|
+
logger.error(
|
|
2882
|
+
"Failed command standard output:\n%s",
|
|
2883
|
+
default_stdout.read().decode("utf-8", errors="replace"),
|
|
2884
|
+
)
|
|
2885
|
+
default_stderr.seek(0, os.SEEK_END)
|
|
2886
|
+
if default_stderr.tell():
|
|
2887
|
+
default_stderr.seek(0)
|
|
2888
|
+
file_store.log_user_stream(
|
|
2889
|
+
self.description.unitName + ".stderr", default_stderr
|
|
2890
|
+
)
|
|
2891
|
+
if status != "success":
|
|
2892
|
+
default_stderr.seek(0)
|
|
2893
|
+
logger.error(
|
|
2894
|
+
"Failed command standard error:\n%s",
|
|
2895
|
+
default_stderr.read().decode("utf-8", errors="replace"),
|
|
2896
|
+
)
|
|
2897
|
+
|
|
2632
2898
|
if status != "success":
|
|
2633
2899
|
raise cwl_utils.errors.WorkflowException(status)
|
|
2634
2900
|
|
|
@@ -2640,12 +2906,18 @@ class CWLJob(CWLNamedJob):
|
|
|
2640
2906
|
fs_access = runtime_context.make_fs_access(runtime_context.basedir)
|
|
2641
2907
|
|
|
2642
2908
|
# And a file importer that can go from a file:// URI to a Toil FileID
|
|
2643
|
-
file_import_function =
|
|
2909
|
+
def file_import_function(url: str, log_level: int = logging.DEBUG) -> FileID:
|
|
2910
|
+
logger.log(log_level, "Loading %s...", url)
|
|
2911
|
+
return writeGlobalFileWrapper(file_store, url)
|
|
2912
|
+
|
|
2913
|
+
file_upload_function = functools.partial(
|
|
2914
|
+
extract_and_convert_file_to_toil_uri, file_import_function
|
|
2915
|
+
)
|
|
2644
2916
|
|
|
2645
2917
|
# Upload all the Files and set their and the Directories' locations, if
|
|
2646
2918
|
# needed.
|
|
2647
|
-
|
|
2648
|
-
|
|
2919
|
+
visit_files(
|
|
2920
|
+
file_upload_function,
|
|
2649
2921
|
fs_access,
|
|
2650
2922
|
index,
|
|
2651
2923
|
existing,
|
|
@@ -2675,6 +2947,74 @@ def get_container_engine(runtime_context: cwltool.context.RuntimeContext) -> str
|
|
|
2675
2947
|
return "docker"
|
|
2676
2948
|
|
|
2677
2949
|
|
|
2950
|
+
def makeRootJob(
|
|
2951
|
+
tool: Process,
|
|
2952
|
+
jobobj: CWLObjectType,
|
|
2953
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
2954
|
+
initialized_job_order: CWLObjectType,
|
|
2955
|
+
options: Namespace,
|
|
2956
|
+
toil: Toil,
|
|
2957
|
+
) -> CWLNamedJob:
|
|
2958
|
+
"""
|
|
2959
|
+
Create the Toil root Job object for the CWL tool. Is the same as makeJob() except this also handles import logic.
|
|
2960
|
+
|
|
2961
|
+
Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
|
|
2962
|
+
If only one job is created, it is returned twice.
|
|
2963
|
+
|
|
2964
|
+
:return:
|
|
2965
|
+
"""
|
|
2966
|
+
if options.run_imports_on_workers:
|
|
2967
|
+
filenames = extract_workflow_inputs(options, initialized_job_order, tool)
|
|
2968
|
+
metadata = get_file_sizes(
|
|
2969
|
+
filenames, toil._jobStore, include_remote_files=options.reference_inputs
|
|
2970
|
+
)
|
|
2971
|
+
|
|
2972
|
+
# Mapping of files to metadata for files that will be imported on the worker
|
|
2973
|
+
# This will consist of files that we were able to get a file size for
|
|
2974
|
+
worker_metadata: dict[str, FileMetadata] = dict()
|
|
2975
|
+
# Mapping of files to metadata for files that will be imported on the leader
|
|
2976
|
+
# This will consist of files that we were not able to get a file size for
|
|
2977
|
+
leader_metadata = dict()
|
|
2978
|
+
for filename, file_data in metadata.items():
|
|
2979
|
+
if file_data.size is None:
|
|
2980
|
+
leader_metadata[filename] = file_data
|
|
2981
|
+
else:
|
|
2982
|
+
worker_metadata[filename] = file_data
|
|
2983
|
+
|
|
2984
|
+
# import the files for the leader first
|
|
2985
|
+
path_to_fileid = WorkerImportJob.import_files(
|
|
2986
|
+
list(leader_metadata.keys()), toil._jobStore
|
|
2987
|
+
)
|
|
2988
|
+
|
|
2989
|
+
# then install the imported files before importing the other files
|
|
2990
|
+
# this way the control flow can fall from the leader to workers
|
|
2991
|
+
tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
|
|
2992
|
+
initialized_job_order,
|
|
2993
|
+
tool,
|
|
2994
|
+
path_to_fileid,
|
|
2995
|
+
options.basedir,
|
|
2996
|
+
options.reference_inputs,
|
|
2997
|
+
options.bypass_file_store,
|
|
2998
|
+
)
|
|
2999
|
+
|
|
3000
|
+
import_job = CWLImportWrapper(
|
|
3001
|
+
initialized_job_order, tool, runtime_context, worker_metadata, options
|
|
3002
|
+
)
|
|
3003
|
+
return import_job
|
|
3004
|
+
else:
|
|
3005
|
+
import_workflow_inputs(
|
|
3006
|
+
toil._jobStore,
|
|
3007
|
+
options,
|
|
3008
|
+
initialized_job_order=initialized_job_order,
|
|
3009
|
+
tool=tool,
|
|
3010
|
+
)
|
|
3011
|
+
root_job, followOn = makeJob(
|
|
3012
|
+
tool, jobobj, runtime_context, None, None
|
|
3013
|
+
) # toplevel, no name needed
|
|
3014
|
+
root_job.cwljob = initialized_job_order
|
|
3015
|
+
return root_job
|
|
3016
|
+
|
|
3017
|
+
|
|
2678
3018
|
def makeJob(
|
|
2679
3019
|
tool: Process,
|
|
2680
3020
|
jobobj: CWLObjectType,
|
|
@@ -2682,13 +3022,16 @@ def makeJob(
|
|
|
2682
3022
|
parent_name: Optional[str],
|
|
2683
3023
|
conditional: Union[Conditional, None],
|
|
2684
3024
|
) -> Union[
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
3025
|
+
tuple["CWLWorkflow", ResolveIndirect],
|
|
3026
|
+
tuple[CWLJob, CWLJob],
|
|
3027
|
+
tuple[CWLJobWrapper, CWLJobWrapper],
|
|
2688
3028
|
]:
|
|
2689
3029
|
"""
|
|
2690
3030
|
Create the correct Toil Job object for the CWL tool.
|
|
2691
3031
|
|
|
3032
|
+
Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
|
|
3033
|
+
If only one job is created, it is returned twice.
|
|
3034
|
+
|
|
2692
3035
|
Types: workflow, job, or job wrapper for dynamic resource requirements.
|
|
2693
3036
|
|
|
2694
3037
|
:return: "wfjob, followOn" if the input tool is a workflow, and "job, job" otherwise
|
|
@@ -2768,16 +3111,16 @@ class CWLScatter(Job):
|
|
|
2768
3111
|
def flat_crossproduct_scatter(
|
|
2769
3112
|
self,
|
|
2770
3113
|
joborder: CWLObjectType,
|
|
2771
|
-
scatter_keys:
|
|
2772
|
-
outputs:
|
|
3114
|
+
scatter_keys: list[str],
|
|
3115
|
+
outputs: list[Promised[CWLObjectType]],
|
|
2773
3116
|
postScatterEval: Callable[[CWLObjectType], CWLObjectType],
|
|
2774
3117
|
) -> None:
|
|
2775
3118
|
"""Cartesian product of the inputs, then flattened."""
|
|
2776
3119
|
scatter_key = shortname(scatter_keys[0])
|
|
2777
|
-
for n in range(0, len(cast(
|
|
3120
|
+
for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
|
|
2778
3121
|
updated_joborder = copy.copy(joborder)
|
|
2779
3122
|
updated_joborder[scatter_key] = cast(
|
|
2780
|
-
|
|
3123
|
+
list[CWLObjectType], joborder[scatter_key]
|
|
2781
3124
|
)[n]
|
|
2782
3125
|
if len(scatter_keys) == 1:
|
|
2783
3126
|
updated_joborder = postScatterEval(updated_joborder)
|
|
@@ -2798,16 +3141,16 @@ class CWLScatter(Job):
|
|
|
2798
3141
|
def nested_crossproduct_scatter(
|
|
2799
3142
|
self,
|
|
2800
3143
|
joborder: CWLObjectType,
|
|
2801
|
-
scatter_keys:
|
|
3144
|
+
scatter_keys: list[str],
|
|
2802
3145
|
postScatterEval: Callable[[CWLObjectType], CWLObjectType],
|
|
2803
|
-
) ->
|
|
3146
|
+
) -> list[Promised[CWLObjectType]]:
|
|
2804
3147
|
"""Cartesian product of the inputs."""
|
|
2805
3148
|
scatter_key = shortname(scatter_keys[0])
|
|
2806
|
-
outputs:
|
|
2807
|
-
for n in range(0, len(cast(
|
|
3149
|
+
outputs: list[Promised[CWLObjectType]] = []
|
|
3150
|
+
for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
|
|
2808
3151
|
updated_joborder = copy.copy(joborder)
|
|
2809
3152
|
updated_joborder[scatter_key] = cast(
|
|
2810
|
-
|
|
3153
|
+
list[CWLObjectType], joborder[scatter_key]
|
|
2811
3154
|
)[n]
|
|
2812
3155
|
if len(scatter_keys) == 1:
|
|
2813
3156
|
updated_joborder = postScatterEval(updated_joborder)
|
|
@@ -2828,7 +3171,7 @@ class CWLScatter(Job):
|
|
|
2828
3171
|
)
|
|
2829
3172
|
return outputs
|
|
2830
3173
|
|
|
2831
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3174
|
+
def run(self, file_store: AbstractFileStore) -> list[Promised[CWLObjectType]]:
|
|
2832
3175
|
"""Generate the follow on scatter jobs."""
|
|
2833
3176
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2834
3177
|
|
|
@@ -2840,7 +3183,7 @@ class CWLScatter(Job):
|
|
|
2840
3183
|
scatterMethod = self.step.tool.get("scatterMethod", None)
|
|
2841
3184
|
if len(scatter) == 1:
|
|
2842
3185
|
scatterMethod = "dotproduct"
|
|
2843
|
-
outputs:
|
|
3186
|
+
outputs: list[Promised[CWLObjectType]] = []
|
|
2844
3187
|
|
|
2845
3188
|
valueFrom = {
|
|
2846
3189
|
shortname(i["id"]): i["valueFrom"]
|
|
@@ -2872,11 +3215,11 @@ class CWLScatter(Job):
|
|
|
2872
3215
|
|
|
2873
3216
|
if scatterMethod == "dotproduct":
|
|
2874
3217
|
for i in range(
|
|
2875
|
-
0, len(cast(
|
|
3218
|
+
0, len(cast(list[CWLObjectType], cwljob[shortname(scatter[0])]))
|
|
2876
3219
|
):
|
|
2877
3220
|
copyjob = copy.copy(cwljob)
|
|
2878
3221
|
for sc in [shortname(x) for x in scatter]:
|
|
2879
|
-
copyjob[sc] = cast(
|
|
3222
|
+
copyjob[sc] = cast(list[CWLObjectType], cwljob[sc])[i]
|
|
2880
3223
|
copyjob = postScatterEval(copyjob)
|
|
2881
3224
|
subjob, follow_on = makeJob(
|
|
2882
3225
|
tool=self.step.embedded_tool,
|
|
@@ -2915,7 +3258,7 @@ class CWLGather(Job):
|
|
|
2915
3258
|
def __init__(
|
|
2916
3259
|
self,
|
|
2917
3260
|
step: cwltool.workflow.WorkflowStep,
|
|
2918
|
-
outputs: Promised[Union[CWLObjectType,
|
|
3261
|
+
outputs: Promised[Union[CWLObjectType, list[CWLObjectType]]],
|
|
2919
3262
|
):
|
|
2920
3263
|
"""Collect our context for later gathering."""
|
|
2921
3264
|
super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
|
|
@@ -2924,24 +3267,24 @@ class CWLGather(Job):
|
|
|
2924
3267
|
|
|
2925
3268
|
@staticmethod
|
|
2926
3269
|
def extract(
|
|
2927
|
-
obj: Union[CWLObjectType,
|
|
2928
|
-
) -> Union[CWLOutputType,
|
|
3270
|
+
obj: Union[CWLObjectType, list[CWLObjectType]], k: str
|
|
3271
|
+
) -> Union[CWLOutputType, list[CWLObjectType]]:
|
|
2929
3272
|
"""
|
|
2930
3273
|
Extract the given key from the obj.
|
|
2931
3274
|
|
|
2932
3275
|
If the object is a list, extract it from all members of the list.
|
|
2933
3276
|
"""
|
|
2934
3277
|
if isinstance(obj, Mapping):
|
|
2935
|
-
return cast(Union[CWLOutputType,
|
|
3278
|
+
return cast(Union[CWLOutputType, list[CWLObjectType]], obj.get(k))
|
|
2936
3279
|
elif isinstance(obj, MutableSequence):
|
|
2937
|
-
cp:
|
|
3280
|
+
cp: list[CWLObjectType] = []
|
|
2938
3281
|
for item in obj:
|
|
2939
3282
|
cp.append(cast(CWLObjectType, CWLGather.extract(item, k)))
|
|
2940
3283
|
return cp
|
|
2941
3284
|
else:
|
|
2942
|
-
return cast(
|
|
3285
|
+
return cast(list[CWLObjectType], [])
|
|
2943
3286
|
|
|
2944
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3287
|
+
def run(self, file_store: AbstractFileStore) -> dict[str, Any]:
|
|
2945
3288
|
"""Gather all the outputs of the scatter."""
|
|
2946
3289
|
outobj = {}
|
|
2947
3290
|
|
|
@@ -2952,8 +3295,8 @@ class CWLGather(Job):
|
|
|
2952
3295
|
return shortname(n)
|
|
2953
3296
|
|
|
2954
3297
|
# TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
|
|
2955
|
-
outputs: Union[CWLObjectType,
|
|
2956
|
-
Union[CWLObjectType,
|
|
3298
|
+
outputs: Union[CWLObjectType, list[CWLObjectType]] = cast(
|
|
3299
|
+
Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
|
|
2957
3300
|
)
|
|
2958
3301
|
for k in [sn(i) for i in self.step.tool["out"]]:
|
|
2959
3302
|
outobj[k] = self.extract(outputs, k)
|
|
@@ -2995,7 +3338,11 @@ ProcessType = TypeVar(
|
|
|
2995
3338
|
|
|
2996
3339
|
|
|
2997
3340
|
def remove_pickle_problems(obj: ProcessType) -> ProcessType:
|
|
2998
|
-
"""
|
|
3341
|
+
"""
|
|
3342
|
+
Doc_loader does not pickle correctly, causing Toil errors, remove from objects.
|
|
3343
|
+
|
|
3344
|
+
See github issue: https://github.com/mypyc/mypyc/issues/804
|
|
3345
|
+
"""
|
|
2999
3346
|
if hasattr(obj, "doc_loader"):
|
|
3000
3347
|
obj.doc_loader = None
|
|
3001
3348
|
if isinstance(obj, cwltool.workflow.WorkflowStep):
|
|
@@ -3027,12 +3374,11 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3027
3374
|
self.cwlwf = cwlwf
|
|
3028
3375
|
self.cwljob = cwljob
|
|
3029
3376
|
self.runtime_context = runtime_context
|
|
3030
|
-
self.cwlwf = remove_pickle_problems(self.cwlwf)
|
|
3031
3377
|
self.conditional = conditional or Conditional()
|
|
3032
3378
|
|
|
3033
3379
|
def run(
|
|
3034
3380
|
self, file_store: AbstractFileStore
|
|
3035
|
-
) -> Union[UnresolvedDict,
|
|
3381
|
+
) -> Union[UnresolvedDict, dict[str, SkipNull]]:
|
|
3036
3382
|
"""
|
|
3037
3383
|
Convert a CWL Workflow graph into a Toil job graph.
|
|
3038
3384
|
|
|
@@ -3053,7 +3399,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3053
3399
|
# that may be used as a "source" for a step input workflow output
|
|
3054
3400
|
# parameter
|
|
3055
3401
|
# to: the job that will produce that value.
|
|
3056
|
-
promises:
|
|
3402
|
+
promises: dict[str, Job] = {}
|
|
3057
3403
|
|
|
3058
3404
|
parent_name = shortname(self.cwlwf.tool["id"])
|
|
3059
3405
|
|
|
@@ -3082,7 +3428,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3082
3428
|
stepinputs_fufilled = False
|
|
3083
3429
|
if stepinputs_fufilled:
|
|
3084
3430
|
logger.debug("Ready to make job for workflow step %s", step_id)
|
|
3085
|
-
jobobj:
|
|
3431
|
+
jobobj: dict[
|
|
3086
3432
|
str, Union[ResolveSource, DefaultWithSource, StepValueFrom]
|
|
3087
3433
|
] = {}
|
|
3088
3434
|
|
|
@@ -3216,30 +3562,348 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3216
3562
|
return UnresolvedDict(outobj)
|
|
3217
3563
|
|
|
3218
3564
|
|
|
3565
|
+
class CWLInstallImportsJob(Job):
|
|
3566
|
+
def __init__(
|
|
3567
|
+
self,
|
|
3568
|
+
initialized_job_order: Promised[CWLObjectType],
|
|
3569
|
+
tool: Promised[Process],
|
|
3570
|
+
basedir: str,
|
|
3571
|
+
skip_remote: bool,
|
|
3572
|
+
bypass_file_store: bool,
|
|
3573
|
+
import_data: Promised[dict[str, FileID]],
|
|
3574
|
+
**kwargs: Any,
|
|
3575
|
+
) -> None:
|
|
3576
|
+
"""
|
|
3577
|
+
Job to take the entire CWL object and a mapping of filenames to the imported URIs
|
|
3578
|
+
to convert all file locations to URIs.
|
|
3579
|
+
|
|
3580
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
3581
|
+
"""
|
|
3582
|
+
super().__init__(local=True, **kwargs)
|
|
3583
|
+
self.initialized_job_order = initialized_job_order
|
|
3584
|
+
self.tool = tool
|
|
3585
|
+
self.basedir = basedir
|
|
3586
|
+
self.skip_remote = skip_remote
|
|
3587
|
+
self.bypass_file_store = bypass_file_store
|
|
3588
|
+
self.import_data = import_data
|
|
3589
|
+
|
|
3590
|
+
@staticmethod
|
|
3591
|
+
def fill_in_files(
|
|
3592
|
+
initialized_job_order: CWLObjectType,
|
|
3593
|
+
tool: Process,
|
|
3594
|
+
candidate_to_fileid: dict[str, FileID],
|
|
3595
|
+
basedir: str,
|
|
3596
|
+
skip_remote: bool,
|
|
3597
|
+
bypass_file_store: bool,
|
|
3598
|
+
) -> tuple[Process, CWLObjectType]:
|
|
3599
|
+
"""
|
|
3600
|
+
Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
|
|
3601
|
+
"""
|
|
3602
|
+
def fill_in_file(filename: str) -> FileID:
|
|
3603
|
+
"""
|
|
3604
|
+
Return the file name's associated Toil file ID
|
|
3605
|
+
"""
|
|
3606
|
+
return candidate_to_fileid[filename]
|
|
3607
|
+
|
|
3608
|
+
file_convert_function = functools.partial(
|
|
3609
|
+
extract_and_convert_file_to_toil_uri, fill_in_file
|
|
3610
|
+
)
|
|
3611
|
+
fs_access = ToilFsAccess(basedir)
|
|
3612
|
+
fileindex: dict[str, str] = {}
|
|
3613
|
+
existing: dict[str, str] = {}
|
|
3614
|
+
visit_files(
|
|
3615
|
+
file_convert_function,
|
|
3616
|
+
fs_access,
|
|
3617
|
+
fileindex,
|
|
3618
|
+
existing,
|
|
3619
|
+
initialized_job_order,
|
|
3620
|
+
mark_broken=True,
|
|
3621
|
+
skip_remote=skip_remote,
|
|
3622
|
+
bypass_file_store=bypass_file_store,
|
|
3623
|
+
)
|
|
3624
|
+
visitSteps(
|
|
3625
|
+
tool,
|
|
3626
|
+
functools.partial(
|
|
3627
|
+
visit_files,
|
|
3628
|
+
file_convert_function,
|
|
3629
|
+
fs_access,
|
|
3630
|
+
fileindex,
|
|
3631
|
+
existing,
|
|
3632
|
+
mark_broken=True,
|
|
3633
|
+
skip_remote=skip_remote,
|
|
3634
|
+
bypass_file_store=bypass_file_store,
|
|
3635
|
+
),
|
|
3636
|
+
)
|
|
3637
|
+
|
|
3638
|
+
# We always expect to have processed all files that exist
|
|
3639
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3640
|
+
# Loop through all the parameters for the workflow overall.
|
|
3641
|
+
# Drop any files that aren't either imported (for when we use
|
|
3642
|
+
# the file store) or available on disk (for when we don't).
|
|
3643
|
+
# This will properly make them cause an error later if they
|
|
3644
|
+
# were required.
|
|
3645
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3646
|
+
return tool, initialized_job_order
|
|
3647
|
+
|
|
3648
|
+
def run(self, file_store: AbstractFileStore) -> Tuple[Process, CWLObjectType]:
|
|
3649
|
+
"""
|
|
3650
|
+
Convert the filenames in the workflow inputs into the URIs
|
|
3651
|
+
:return: Promise of transformed workflow inputs. A tuple of the job order and process
|
|
3652
|
+
"""
|
|
3653
|
+
candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
|
|
3654
|
+
|
|
3655
|
+
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3656
|
+
tool = unwrap(self.tool)
|
|
3657
|
+
return CWLInstallImportsJob.fill_in_files(
|
|
3658
|
+
initialized_job_order,
|
|
3659
|
+
tool,
|
|
3660
|
+
candidate_to_fileid,
|
|
3661
|
+
self.basedir,
|
|
3662
|
+
self.skip_remote,
|
|
3663
|
+
self.bypass_file_store,
|
|
3664
|
+
)
|
|
3665
|
+
|
|
3666
|
+
|
|
3667
|
+
class CWLImportWrapper(CWLNamedJob):
|
|
3668
|
+
"""
|
|
3669
|
+
Job to organize importing files on workers instead of the leader. Responsible for extracting filenames and metadata,
|
|
3670
|
+
calling ImportsJob, applying imports to the job objects, and scheduling the start workflow job
|
|
3671
|
+
|
|
3672
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
3673
|
+
"""
|
|
3674
|
+
|
|
3675
|
+
def __init__(
|
|
3676
|
+
self,
|
|
3677
|
+
initialized_job_order: CWLObjectType,
|
|
3678
|
+
tool: Process,
|
|
3679
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
3680
|
+
file_to_data: dict[str, FileMetadata],
|
|
3681
|
+
options: Namespace,
|
|
3682
|
+
):
|
|
3683
|
+
super().__init__(local=False, disk=options.import_workers_threshold)
|
|
3684
|
+
self.initialized_job_order = initialized_job_order
|
|
3685
|
+
self.tool = tool
|
|
3686
|
+
self.options = options
|
|
3687
|
+
self.runtime_context = runtime_context
|
|
3688
|
+
self.file_to_data = file_to_data
|
|
3689
|
+
|
|
3690
|
+
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3691
|
+
imports_job = ImportsJob(
|
|
3692
|
+
self.file_to_data,
|
|
3693
|
+
self.options.import_workers_threshold,
|
|
3694
|
+
self.options.import_workers_disk,
|
|
3695
|
+
)
|
|
3696
|
+
self.addChild(imports_job)
|
|
3697
|
+
install_imports_job = CWLInstallImportsJob(
|
|
3698
|
+
initialized_job_order=self.initialized_job_order,
|
|
3699
|
+
tool=self.tool,
|
|
3700
|
+
basedir=self.options.basedir,
|
|
3701
|
+
skip_remote=self.options.reference_inputs,
|
|
3702
|
+
bypass_file_store=self.options.bypass_file_store,
|
|
3703
|
+
import_data=imports_job.rv(0),
|
|
3704
|
+
)
|
|
3705
|
+
self.addChild(install_imports_job)
|
|
3706
|
+
imports_job.addFollowOn(install_imports_job)
|
|
3707
|
+
|
|
3708
|
+
start_job = CWLStartJob(
|
|
3709
|
+
install_imports_job.rv(0),
|
|
3710
|
+
install_imports_job.rv(1),
|
|
3711
|
+
runtime_context=self.runtime_context,
|
|
3712
|
+
)
|
|
3713
|
+
self.addChild(start_job)
|
|
3714
|
+
install_imports_job.addFollowOn(start_job)
|
|
3715
|
+
|
|
3716
|
+
return start_job.rv()
|
|
3717
|
+
|
|
3718
|
+
|
|
3719
|
+
class CWLStartJob(CWLNamedJob):
|
|
3720
|
+
"""
|
|
3721
|
+
Job responsible for starting the CWL workflow.
|
|
3722
|
+
|
|
3723
|
+
Takes in the workflow/tool and inputs after all files are imported
|
|
3724
|
+
and creates jobs to run those workflows.
|
|
3725
|
+
"""
|
|
3726
|
+
|
|
3727
|
+
def __init__(
|
|
3728
|
+
self,
|
|
3729
|
+
tool: Promised[Process],
|
|
3730
|
+
initialized_job_order: Promised[CWLObjectType],
|
|
3731
|
+
runtime_context: cwltool.context.RuntimeContext,
|
|
3732
|
+
**kwargs: Any,
|
|
3733
|
+
) -> None:
|
|
3734
|
+
super().__init__(**kwargs)
|
|
3735
|
+
self.tool = tool
|
|
3736
|
+
self.initialized_job_order = initialized_job_order
|
|
3737
|
+
self.runtime_context = runtime_context
|
|
3738
|
+
|
|
3739
|
+
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3740
|
+
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3741
|
+
tool = unwrap(self.tool)
|
|
3742
|
+
cwljob, _ = makeJob(
|
|
3743
|
+
tool, initialized_job_order, self.runtime_context, None, None
|
|
3744
|
+
) # toplevel, no name needed
|
|
3745
|
+
cwljob.cwljob = initialized_job_order
|
|
3746
|
+
self.addChild(cwljob)
|
|
3747
|
+
return cwljob.rv()
|
|
3748
|
+
|
|
3749
|
+
|
|
3750
|
+
def extract_workflow_inputs(
|
|
3751
|
+
options: Namespace, initialized_job_order: CWLObjectType, tool: Process
|
|
3752
|
+
) -> list[str]:
|
|
3753
|
+
"""
|
|
3754
|
+
Collect all the workflow input files to import later.
|
|
3755
|
+
:param options: namespace
|
|
3756
|
+
:param initialized_job_order: cwl object
|
|
3757
|
+
:param tool: tool object
|
|
3758
|
+
:return:
|
|
3759
|
+
"""
|
|
3760
|
+
fileindex: dict[str, str] = {}
|
|
3761
|
+
existing: dict[str, str] = {}
|
|
3762
|
+
|
|
3763
|
+
# Extract out all the input files' filenames
|
|
3764
|
+
logger.info("Collecting input files...")
|
|
3765
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3766
|
+
filenames = visit_files(
|
|
3767
|
+
extract_file_uri_once,
|
|
3768
|
+
fs_access,
|
|
3769
|
+
fileindex,
|
|
3770
|
+
existing,
|
|
3771
|
+
initialized_job_order,
|
|
3772
|
+
mark_broken=True,
|
|
3773
|
+
skip_remote=options.reference_inputs,
|
|
3774
|
+
bypass_file_store=options.bypass_file_store,
|
|
3775
|
+
)
|
|
3776
|
+
# Extract filenames of all the files associated with tools (binaries, etc.).
|
|
3777
|
+
logger.info("Collecting tool-associated files...")
|
|
3778
|
+
tool_filenames = visitSteps(
|
|
3779
|
+
tool,
|
|
3780
|
+
functools.partial(
|
|
3781
|
+
visit_files,
|
|
3782
|
+
extract_file_uri_once,
|
|
3783
|
+
fs_access,
|
|
3784
|
+
fileindex,
|
|
3785
|
+
existing,
|
|
3786
|
+
mark_broken=True,
|
|
3787
|
+
skip_remote=options.reference_inputs,
|
|
3788
|
+
bypass_file_store=options.bypass_file_store,
|
|
3789
|
+
),
|
|
3790
|
+
)
|
|
3791
|
+
filenames.extend(tool_filenames)
|
|
3792
|
+
return [file for file in filenames if file is not None]
|
|
3793
|
+
|
|
3794
|
+
|
|
3795
|
+
def import_workflow_inputs(
|
|
3796
|
+
jobstore: AbstractJobStore,
|
|
3797
|
+
options: Namespace,
|
|
3798
|
+
initialized_job_order: CWLObjectType,
|
|
3799
|
+
tool: Process,
|
|
3800
|
+
log_level: int = logging.DEBUG,
|
|
3801
|
+
) -> None:
|
|
3802
|
+
"""
|
|
3803
|
+
Import all workflow inputs on the leader.
|
|
3804
|
+
|
|
3805
|
+
Ran when not importing on workers.
|
|
3806
|
+
:param jobstore: Toil jobstore
|
|
3807
|
+
:param options: Namespace
|
|
3808
|
+
:param initialized_job_order: CWL object
|
|
3809
|
+
:param tool: CWL tool
|
|
3810
|
+
:param log_level: log level
|
|
3811
|
+
:return:
|
|
3812
|
+
"""
|
|
3813
|
+
fileindex: dict[str, str] = {}
|
|
3814
|
+
existing: dict[str, str] = {}
|
|
3815
|
+
|
|
3816
|
+
# Define something we can call to import a file and get its file
|
|
3817
|
+
# ID.
|
|
3818
|
+
def file_import_function(url: str) -> FileID:
|
|
3819
|
+
logger.log(log_level, "Loading %s...", url)
|
|
3820
|
+
return jobstore.import_file(url, symlink=True)
|
|
3821
|
+
|
|
3822
|
+
import_function = functools.partial(
|
|
3823
|
+
extract_and_convert_file_to_toil_uri, file_import_function
|
|
3824
|
+
)
|
|
3825
|
+
# Import all the input files, some of which may be missing optional
|
|
3826
|
+
# files.
|
|
3827
|
+
logger.info("Importing input files...")
|
|
3828
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3829
|
+
visit_files(
|
|
3830
|
+
import_function,
|
|
3831
|
+
fs_access,
|
|
3832
|
+
fileindex,
|
|
3833
|
+
existing,
|
|
3834
|
+
initialized_job_order,
|
|
3835
|
+
mark_broken=True,
|
|
3836
|
+
skip_remote=options.reference_inputs,
|
|
3837
|
+
bypass_file_store=options.bypass_file_store,
|
|
3838
|
+
)
|
|
3839
|
+
|
|
3840
|
+
# Make another function for importing tool files. This one doesn't allow
|
|
3841
|
+
# symlinking, since the tools might be coming from storage not accessible
|
|
3842
|
+
# to all nodes.
|
|
3843
|
+
tool_import_function = functools.partial(
|
|
3844
|
+
extract_and_convert_file_to_toil_uri,
|
|
3845
|
+
cast(
|
|
3846
|
+
Callable[[str], FileID],
|
|
3847
|
+
functools.partial(jobstore.import_file, symlink=False),
|
|
3848
|
+
),
|
|
3849
|
+
)
|
|
3850
|
+
|
|
3851
|
+
# Import all the files associated with tools (binaries, etc.).
|
|
3852
|
+
# Not sure why you would have an optional secondary file here, but
|
|
3853
|
+
# the spec probably needs us to support them.
|
|
3854
|
+
logger.info("Importing tool-associated files...")
|
|
3855
|
+
visitSteps(
|
|
3856
|
+
tool,
|
|
3857
|
+
functools.partial(
|
|
3858
|
+
visit_files,
|
|
3859
|
+
tool_import_function,
|
|
3860
|
+
fs_access,
|
|
3861
|
+
fileindex,
|
|
3862
|
+
existing,
|
|
3863
|
+
mark_broken=True,
|
|
3864
|
+
skip_remote=options.reference_inputs,
|
|
3865
|
+
bypass_file_store=options.bypass_file_store,
|
|
3866
|
+
),
|
|
3867
|
+
)
|
|
3868
|
+
|
|
3869
|
+
# We always expect to have processed all files that exist
|
|
3870
|
+
for param_name, param_value in initialized_job_order.items():
|
|
3871
|
+
# Loop through all the parameters for the workflow overall.
|
|
3872
|
+
# Drop any files that aren't either imported (for when we use
|
|
3873
|
+
# the file store) or available on disk (for when we don't).
|
|
3874
|
+
# This will properly make them cause an error later if they
|
|
3875
|
+
# were required.
|
|
3876
|
+
rm_unprocessed_secondary_files(param_value)
|
|
3877
|
+
|
|
3878
|
+
|
|
3879
|
+
T = TypeVar("T")
|
|
3880
|
+
|
|
3881
|
+
|
|
3219
3882
|
def visitSteps(
|
|
3220
3883
|
cmdline_tool: Process,
|
|
3221
|
-
op: Callable[[CommentedMap],
|
|
3222
|
-
) ->
|
|
3884
|
+
op: Callable[[CommentedMap], list[T]],
|
|
3885
|
+
) -> list[T]:
|
|
3223
3886
|
"""
|
|
3224
3887
|
Iterate over a CWL Process object, running the op on each tool description
|
|
3225
3888
|
CWL object.
|
|
3226
3889
|
"""
|
|
3227
3890
|
if isinstance(cmdline_tool, cwltool.workflow.Workflow):
|
|
3228
3891
|
# For workflows we need to dispatch on steps
|
|
3892
|
+
ret = []
|
|
3229
3893
|
for step in cmdline_tool.steps:
|
|
3230
3894
|
# Handle the step's tool
|
|
3231
|
-
op(step.tool)
|
|
3895
|
+
ret.extend(op(step.tool))
|
|
3232
3896
|
# Recures on the embedded tool; maybe it's a workflow.
|
|
3233
|
-
visitSteps(step.embedded_tool, op)
|
|
3897
|
+
recurse_ret = visitSteps(step.embedded_tool, op)
|
|
3898
|
+
ret.extend(recurse_ret)
|
|
3899
|
+
return ret
|
|
3234
3900
|
elif isinstance(cmdline_tool, cwltool.process.Process):
|
|
3235
3901
|
# All CWL Process objects (including CommandLineTool) will have tools
|
|
3236
3902
|
# if they bothered to run the Process __init__.
|
|
3237
|
-
op(cmdline_tool.tool)
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
f"traversal: {type(cmdline_tool)}"
|
|
3242
|
-
)
|
|
3903
|
+
return op(cmdline_tool.tool)
|
|
3904
|
+
raise RuntimeError(
|
|
3905
|
+
f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
|
|
3906
|
+
)
|
|
3243
3907
|
|
|
3244
3908
|
|
|
3245
3909
|
def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
@@ -3252,7 +3916,7 @@ def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
|
3252
3916
|
|
|
3253
3917
|
def filtered_secondary_files(
|
|
3254
3918
|
unfiltered_secondary_files: CWLObjectType,
|
|
3255
|
-
) ->
|
|
3919
|
+
) -> list[CWLObjectType]:
|
|
3256
3920
|
"""
|
|
3257
3921
|
Remove unprocessed secondary files.
|
|
3258
3922
|
|
|
@@ -3263,9 +3927,8 @@ def filtered_secondary_files(
|
|
|
3263
3927
|
but add the resolved fields to the list of unresolved fields so we remove
|
|
3264
3928
|
them here after the fact.
|
|
3265
3929
|
|
|
3266
|
-
We keep secondary files
|
|
3267
|
-
|
|
3268
|
-
exist. The 'required' logic seems to be handled deeper in
|
|
3930
|
+
We keep secondary files with anything other than MISSING_FILE as their
|
|
3931
|
+
location. The 'required' logic seems to be handled deeper in
|
|
3269
3932
|
cwltool.builder.Builder(), and correctly determines which files should be
|
|
3270
3933
|
imported. Therefore we remove the files here and if this file is SUPPOSED
|
|
3271
3934
|
to exist, it will still give the appropriate file does not exist error, but
|
|
@@ -3274,30 +3937,33 @@ def filtered_secondary_files(
|
|
|
3274
3937
|
intermediate_secondary_files = []
|
|
3275
3938
|
final_secondary_files = []
|
|
3276
3939
|
# remove secondary files still containing interpolated strings
|
|
3277
|
-
for sf in cast(
|
|
3940
|
+
for sf in cast(list[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
|
|
3278
3941
|
sf_bn = cast(str, sf.get("basename", ""))
|
|
3279
3942
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3280
3943
|
if ("$(" not in sf_bn) and ("${" not in sf_bn):
|
|
3281
3944
|
if ("$(" not in sf_loc) and ("${" not in sf_loc):
|
|
3282
3945
|
intermediate_secondary_files.append(sf)
|
|
3946
|
+
else:
|
|
3947
|
+
logger.debug(
|
|
3948
|
+
"Secondary file %s is dropped because it has an uninterpolated location",
|
|
3949
|
+
sf,
|
|
3950
|
+
)
|
|
3951
|
+
else:
|
|
3952
|
+
logger.debug(
|
|
3953
|
+
"Secondary file %s is dropped because it has an uninterpolated basename",
|
|
3954
|
+
sf,
|
|
3955
|
+
)
|
|
3283
3956
|
# remove secondary files that are not present in the filestore or pointing
|
|
3284
3957
|
# to existant things on disk
|
|
3285
3958
|
for sf in intermediate_secondary_files:
|
|
3286
3959
|
sf_loc = cast(str, sf.get("location", ""))
|
|
3287
|
-
if (
|
|
3288
|
-
sf_loc.startswith("toilfile:")
|
|
3289
|
-
or sf_loc.startswith("toildir:")
|
|
3290
|
-
or sf_loc.startswith("_:")
|
|
3291
|
-
or sf.get("class", "") == "Directory"
|
|
3292
|
-
):
|
|
3960
|
+
if sf_loc != MISSING_FILE or sf.get("class", "") == "Directory":
|
|
3293
3961
|
# Pass imported files, and all Directories
|
|
3294
3962
|
final_secondary_files.append(sf)
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
# import because we aren't using the file store)
|
|
3300
|
-
final_secondary_files.append(sf)
|
|
3963
|
+
else:
|
|
3964
|
+
logger.debug(
|
|
3965
|
+
"Secondary file %s is dropped because it is known to be missing", sf
|
|
3966
|
+
)
|
|
3301
3967
|
return final_secondary_files
|
|
3302
3968
|
|
|
3303
3969
|
|
|
@@ -3352,12 +4018,12 @@ def determine_load_listing(
|
|
|
3352
4018
|
|
|
3353
4019
|
1. no_listing: DIRECTORY_NAME.listing will be undefined.
|
|
3354
4020
|
e.g.
|
|
3355
|
-
|
|
4021
|
+
|
|
3356
4022
|
inputs.DIRECTORY_NAME.listing == unspecified
|
|
3357
4023
|
|
|
3358
4024
|
2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
|
|
3359
4025
|
deep of DIRECTORY_NAME's contents.
|
|
3360
|
-
e.g.
|
|
4026
|
+
e.g.
|
|
3361
4027
|
|
|
3362
4028
|
inputs.DIRECTORY_NAME.listing == [items in directory]
|
|
3363
4029
|
inputs.DIRECTORY_NAME.listing[0].listing == undefined
|
|
@@ -3402,8 +4068,6 @@ def determine_load_listing(
|
|
|
3402
4068
|
class NoAvailableJobStoreException(Exception):
|
|
3403
4069
|
"""Indicates that no job store name is available."""
|
|
3404
4070
|
|
|
3405
|
-
pass
|
|
3406
|
-
|
|
3407
4071
|
|
|
3408
4072
|
def generate_default_job_store(
|
|
3409
4073
|
batch_system_name: Optional[str],
|
|
@@ -3471,37 +4135,64 @@ def generate_default_job_store(
|
|
|
3471
4135
|
|
|
3472
4136
|
usage_message = "\n\n" + textwrap.dedent(
|
|
3473
4137
|
"""
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
|
|
3477
|
-
|
|
3478
|
-
|
|
3479
|
-
|
|
3480
|
-
|
|
3481
|
-
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
|
|
4138
|
+
NOTE: If you're trying to specify a jobstore, you must use --jobStore, not a positional argument.
|
|
4139
|
+
|
|
4140
|
+
Usage: toil-cwl-runner [options] <workflow> [<input file>] [workflow options]
|
|
4141
|
+
|
|
4142
|
+
Example: toil-cwl-runner \\
|
|
4143
|
+
--jobStore aws:us-west-2:jobstore \\
|
|
4144
|
+
--realTimeLogging \\
|
|
4145
|
+
--logInfo \\
|
|
4146
|
+
example.cwl \\
|
|
4147
|
+
example-job.yaml \\
|
|
4148
|
+
--wf_input="hello world"
|
|
4149
|
+
"""[
|
|
3485
4150
|
1:
|
|
3486
4151
|
]
|
|
3487
4152
|
)
|
|
3488
4153
|
|
|
3489
|
-
|
|
4154
|
+
|
|
4155
|
+
def get_options(args: list[str]) -> Namespace:
|
|
3490
4156
|
"""
|
|
3491
4157
|
Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
|
|
3492
4158
|
:param args: List of args from command line
|
|
3493
4159
|
:return: options namespace
|
|
3494
4160
|
"""
|
|
3495
|
-
|
|
4161
|
+
# We can't allow abbreviations in case the workflow defines an option that
|
|
4162
|
+
# is a prefix of a Toil option.
|
|
4163
|
+
parser = ArgParser(
|
|
4164
|
+
allow_abbrev=False,
|
|
4165
|
+
usage="%(prog)s [options] WORKFLOW [INFILE] [WF_OPTIONS...]",
|
|
4166
|
+
description=textwrap.dedent(
|
|
4167
|
+
"""
|
|
4168
|
+
positional arguments:
|
|
4169
|
+
|
|
4170
|
+
WORKFLOW CWL file to run.
|
|
4171
|
+
|
|
4172
|
+
INFILE YAML or JSON file of workflow inputs.
|
|
4173
|
+
|
|
4174
|
+
WF_OPTIONS Additional inputs to the workflow as command-line
|
|
4175
|
+
flags. If CWL workflow takes an input, the name of the
|
|
4176
|
+
input can be used as an option. For example:
|
|
4177
|
+
|
|
4178
|
+
%(prog)s workflow.cwl --file1 file
|
|
4179
|
+
|
|
4180
|
+
If an input has the same name as a Toil option, pass
|
|
4181
|
+
'--' before it.
|
|
4182
|
+
"""
|
|
4183
|
+
),
|
|
4184
|
+
formatter_class=RawDescriptionHelpFormatter,
|
|
4185
|
+
)
|
|
4186
|
+
|
|
3496
4187
|
addOptions(parser, jobstore_as_flag=True, cwl=True)
|
|
3497
4188
|
options: Namespace
|
|
3498
|
-
options,
|
|
3499
|
-
options.cwljob
|
|
4189
|
+
options, extra = parser.parse_known_args(args)
|
|
4190
|
+
options.cwljob = extra
|
|
3500
4191
|
|
|
3501
4192
|
return options
|
|
3502
4193
|
|
|
3503
4194
|
|
|
3504
|
-
def main(args: Optional[
|
|
4195
|
+
def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
3505
4196
|
"""Run the main loop for toil-cwl-runner."""
|
|
3506
4197
|
# Remove cwltool logger's stream handler so it uses Toil's
|
|
3507
4198
|
cwllogger.removeHandler(defaultStreamHandler)
|
|
@@ -3513,25 +4204,21 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3513
4204
|
|
|
3514
4205
|
# Do cwltool setup
|
|
3515
4206
|
cwltool.main.setup_schema(args=options, custom_schema_callback=None)
|
|
3516
|
-
tmpdir_prefix = options.tmpdir_prefix =
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
# workdir and the default job store under it
|
|
3522
|
-
workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3523
|
-
else:
|
|
3524
|
-
# Use a directory in the default tmpdir
|
|
3525
|
-
workdir = mkdtemp()
|
|
3526
|
-
# Make sure workdir doesn't exist so it can be a job store
|
|
3527
|
-
os.rmdir(workdir)
|
|
4207
|
+
tmpdir_prefix = options.tmpdir_prefix = (
|
|
4208
|
+
options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
4209
|
+
)
|
|
4210
|
+
tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
|
|
4211
|
+
workdir = options.workDir or tmp_outdir_prefix
|
|
3528
4212
|
|
|
3529
4213
|
if options.jobStore is None:
|
|
4214
|
+
jobstore = cwltool.utils.create_tmp_dir(tmp_outdir_prefix)
|
|
4215
|
+
# Make sure directory doesn't exist so it can be a job store
|
|
4216
|
+
os.rmdir(jobstore)
|
|
3530
4217
|
# Pick a default job store specifier appropriate to our choice of batch
|
|
3531
4218
|
# system and provisioner and installed modules, given this available
|
|
3532
4219
|
# local directory name. Fail if no good default can be used.
|
|
3533
4220
|
options.jobStore = generate_default_job_store(
|
|
3534
|
-
options.batchSystem, options.provisioner,
|
|
4221
|
+
options.batchSystem, options.provisioner, jobstore
|
|
3535
4222
|
)
|
|
3536
4223
|
|
|
3537
4224
|
options.doc_cache = True
|
|
@@ -3539,13 +4226,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3539
4226
|
options.do_validate = True
|
|
3540
4227
|
options.pack = False
|
|
3541
4228
|
options.print_subgraph = False
|
|
3542
|
-
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
|
|
3543
|
-
# We need to override workDir because by default Toil will pick
|
|
3544
|
-
# somewhere under the system temp directory if unset, ignoring
|
|
3545
|
-
# --tmpdir-prefix.
|
|
3546
|
-
#
|
|
3547
|
-
# If set, workDir needs to exist, so we directly use the prefix
|
|
3548
|
-
options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3549
4229
|
|
|
3550
4230
|
if options.batchSystem == "kubernetes":
|
|
3551
4231
|
# Containers under Kubernetes can only run in Singularity
|
|
@@ -3563,12 +4243,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3563
4243
|
logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
|
|
3564
4244
|
|
|
3565
4245
|
outdir = os.path.abspath(options.outdir or os.getcwd())
|
|
3566
|
-
tmp_outdir_prefix = os.path.abspath(
|
|
3567
|
-
options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3568
|
-
)
|
|
3569
|
-
|
|
3570
|
-
fileindex: Dict[str, str] = {}
|
|
3571
|
-
existing: Dict[str, str] = {}
|
|
3572
4246
|
conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)
|
|
3573
4247
|
use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
|
|
3574
4248
|
job_script_provider = None
|
|
@@ -3576,7 +4250,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3576
4250
|
dependencies_configuration = DependenciesConfiguration(options)
|
|
3577
4251
|
job_script_provider = dependencies_configuration
|
|
3578
4252
|
|
|
3579
|
-
options.default_container = None
|
|
3580
4253
|
runtime_context = cwltool.context.RuntimeContext(vars(options))
|
|
3581
4254
|
runtime_context.toplevel = True # enable discovery of secondaryFiles
|
|
3582
4255
|
runtime_context.find_default_container = functools.partial(
|
|
@@ -3584,6 +4257,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3584
4257
|
)
|
|
3585
4258
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
3586
4259
|
runtime_context.outdir = outdir
|
|
4260
|
+
setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
|
|
3587
4261
|
runtime_context.move_outputs = "leave"
|
|
3588
4262
|
runtime_context.rm_tmpdir = False
|
|
3589
4263
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
@@ -3617,27 +4291,28 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3617
4291
|
)
|
|
3618
4292
|
runtime_context.research_obj = research_obj
|
|
3619
4293
|
|
|
3620
|
-
|
|
3621
|
-
|
|
3622
|
-
|
|
3623
|
-
|
|
3624
|
-
|
|
3625
|
-
|
|
3626
|
-
|
|
3627
|
-
|
|
3628
|
-
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
3632
|
-
|
|
3633
|
-
|
|
3634
|
-
|
|
4294
|
+
try:
|
|
4295
|
+
|
|
4296
|
+
if not options.restart:
|
|
4297
|
+
# Make a version of the config based on the initial options, for
|
|
4298
|
+
# setting up CWL option stuff
|
|
4299
|
+
expected_config = Config()
|
|
4300
|
+
expected_config.setOptions(options)
|
|
4301
|
+
|
|
4302
|
+
# Before showing the options to any cwltool stuff that wants to
|
|
4303
|
+
# load the workflow, transform options.cwltool, where our
|
|
4304
|
+
# argument for what to run is, to handle Dockstore workflows.
|
|
4305
|
+
options.cwltool = resolve_workflow(options.cwltool)
|
|
4306
|
+
|
|
4307
|
+
# TODO: why are we doing this? Does this get applied to all
|
|
4308
|
+
# tools as a default or something?
|
|
3635
4309
|
loading_context.hints = [
|
|
3636
4310
|
{
|
|
3637
4311
|
"class": "ResourceRequirement",
|
|
3638
|
-
"coresMin":
|
|
3639
|
-
|
|
3640
|
-
|
|
4312
|
+
"coresMin": expected_config.defaultCores,
|
|
4313
|
+
# Don't include any RAM requirement because we want to
|
|
4314
|
+
# know when tools don't manually ask for RAM.
|
|
4315
|
+
"outdirMin": expected_config.defaultDisk / (2**20),
|
|
3641
4316
|
"tmpdirMin": 0,
|
|
3642
4317
|
}
|
|
3643
4318
|
]
|
|
@@ -3660,6 +4335,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3660
4335
|
)
|
|
3661
4336
|
raise
|
|
3662
4337
|
|
|
4338
|
+
# Attempt to prepull the containers
|
|
4339
|
+
if not options.no_prepull and not options.no_container:
|
|
4340
|
+
try_prepull(uri, runtime_context, expected_config.batchSystem)
|
|
4341
|
+
|
|
3663
4342
|
options.tool_help = None
|
|
3664
4343
|
options.debug = options.logLevel == "DEBUG"
|
|
3665
4344
|
job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
|
|
@@ -3724,11 +4403,12 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3724
4403
|
secret_store=runtime_context.secret_store,
|
|
3725
4404
|
input_required=True,
|
|
3726
4405
|
)
|
|
3727
|
-
except SystemExit as
|
|
3728
|
-
if
|
|
4406
|
+
except SystemExit as err:
|
|
4407
|
+
if err.code == 2: # raised by argparse's parse_args() function
|
|
3729
4408
|
print(
|
|
3730
4409
|
"\nIf both a CWL file and an input object (YAML/JSON) file were "
|
|
3731
|
-
"provided,
|
|
4410
|
+
"provided, the problem may be the argument order."
|
|
4411
|
+
+ usage_message,
|
|
3732
4412
|
file=sys.stderr,
|
|
3733
4413
|
)
|
|
3734
4414
|
raise
|
|
@@ -3767,6 +4447,16 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3767
4447
|
# ToilFsAccess needs to be set up if we want to be able to use
|
|
3768
4448
|
# URLs.
|
|
3769
4449
|
builder = tool._init_job(initialized_job_order, runtime_context)
|
|
4450
|
+
if not isinstance(tool, cwltool.workflow.Workflow):
|
|
4451
|
+
# make sure this doesn't add listing items; if shallow_listing is
|
|
4452
|
+
# selected, it will discover dirs one deep and then again later on
|
|
4453
|
+
# (when the cwltool builder gets constructed from the job in the
|
|
4454
|
+
# CommandLineTool's job() method,
|
|
4455
|
+
# see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L951),
|
|
4456
|
+
# producing 2+ deep listings instead of only 1.
|
|
4457
|
+
# ExpressionTool also uses a builder, see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L207
|
|
4458
|
+
# Workflows don't need this because they don't go through CommandLineTool or ExpressionTool
|
|
4459
|
+
builder.loadListing = "no_listing"
|
|
3770
4460
|
|
|
3771
4461
|
# make sure this doesn't add listing items; if shallow_listing is
|
|
3772
4462
|
# selected, it will discover dirs one deep and then again later on
|
|
@@ -3780,151 +4470,114 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3780
4470
|
discover_secondaryFiles=True,
|
|
3781
4471
|
)
|
|
3782
4472
|
|
|
3783
|
-
# Define something we can call to import a file and get its file
|
|
3784
|
-
# ID.
|
|
3785
|
-
# We cast this because import_file is overloaded depending on if we
|
|
3786
|
-
# pass a shared file name or not, and we know the way we call it we
|
|
3787
|
-
# always get a FileID out.
|
|
3788
|
-
file_import_function = cast(
|
|
3789
|
-
Callable[[str], FileID],
|
|
3790
|
-
functools.partial(toil.import_file, symlink=True),
|
|
3791
|
-
)
|
|
3792
|
-
|
|
3793
|
-
# Import all the input files, some of which may be missing optional
|
|
3794
|
-
# files.
|
|
3795
|
-
logger.info("Importing input files...")
|
|
3796
|
-
fs_access = ToilFsAccess(options.basedir)
|
|
3797
|
-
import_files(
|
|
3798
|
-
file_import_function,
|
|
3799
|
-
fs_access,
|
|
3800
|
-
fileindex,
|
|
3801
|
-
existing,
|
|
3802
|
-
initialized_job_order,
|
|
3803
|
-
skip_broken=True,
|
|
3804
|
-
skip_remote=options.reference_inputs,
|
|
3805
|
-
bypass_file_store=options.bypass_file_store,
|
|
3806
|
-
log_level=logging.INFO,
|
|
3807
|
-
)
|
|
3808
|
-
# Import all the files associated with tools (binaries, etc.).
|
|
3809
|
-
# Not sure why you would have an optional secondary file here, but
|
|
3810
|
-
# the spec probably needs us to support them.
|
|
3811
|
-
logger.info("Importing tool-associated files...")
|
|
3812
|
-
visitSteps(
|
|
3813
|
-
tool,
|
|
3814
|
-
functools.partial(
|
|
3815
|
-
import_files,
|
|
3816
|
-
file_import_function,
|
|
3817
|
-
fs_access,
|
|
3818
|
-
fileindex,
|
|
3819
|
-
existing,
|
|
3820
|
-
skip_broken=True,
|
|
3821
|
-
skip_remote=options.reference_inputs,
|
|
3822
|
-
bypass_file_store=options.bypass_file_store,
|
|
3823
|
-
log_level=logging.INFO,
|
|
3824
|
-
),
|
|
3825
|
-
)
|
|
3826
|
-
|
|
3827
|
-
# We always expect to have processed all files that exist
|
|
3828
|
-
for param_name, param_value in initialized_job_order.items():
|
|
3829
|
-
# Loop through all the parameters for the workflow overall.
|
|
3830
|
-
# Drop any files that aren't either imported (for when we use
|
|
3831
|
-
# the file store) or available on disk (for when we don't).
|
|
3832
|
-
# This will properly make them cause an error later if they
|
|
3833
|
-
# were required.
|
|
3834
|
-
rm_unprocessed_secondary_files(param_value)
|
|
3835
|
-
|
|
3836
4473
|
logger.info("Creating root job")
|
|
3837
4474
|
logger.debug("Root tool: %s", tool)
|
|
3838
|
-
|
|
3839
|
-
|
|
3840
|
-
|
|
3841
|
-
|
|
3842
|
-
|
|
3843
|
-
|
|
3844
|
-
|
|
3845
|
-
|
|
3846
|
-
|
|
3847
|
-
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
outobj = toil.start(wf1)
|
|
3853
|
-
except FailedJobsException as err:
|
|
3854
|
-
if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
|
|
3855
|
-
# We figured out that we can't support this workflow.
|
|
3856
|
-
logging.error(err)
|
|
3857
|
-
logging.error(
|
|
3858
|
-
"Your workflow uses a CWL requirement that Toil does not support!"
|
|
4475
|
+
tool = remove_pickle_problems(tool)
|
|
4476
|
+
|
|
4477
|
+
with Toil(options) as toil:
|
|
4478
|
+
if options.restart:
|
|
4479
|
+
outobj = toil.restart()
|
|
4480
|
+
else:
|
|
4481
|
+
try:
|
|
4482
|
+
wf1 = makeRootJob(
|
|
4483
|
+
tool=tool,
|
|
4484
|
+
jobobj={},
|
|
4485
|
+
runtime_context=runtime_context,
|
|
4486
|
+
initialized_job_order=initialized_job_order,
|
|
4487
|
+
options=options,
|
|
4488
|
+
toil=toil,
|
|
3859
4489
|
)
|
|
4490
|
+
except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
|
|
4491
|
+
logging.error(err)
|
|
3860
4492
|
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3861
|
-
|
|
3862
|
-
|
|
3863
|
-
|
|
3864
|
-
# Now the workflow has completed. We need to make sure the outputs (and
|
|
3865
|
-
# inputs) end up where the user wants them to be.
|
|
3866
|
-
logger.info("Collecting workflow outputs...")
|
|
3867
|
-
outobj = resolve_dict_w_promises(outobj)
|
|
3868
|
-
|
|
3869
|
-
# Stage files. Specify destination bucket if specified in CLI
|
|
3870
|
-
# options. If destination bucket not passed in,
|
|
3871
|
-
# options.destBucket's value will be None.
|
|
3872
|
-
toilStageFiles(
|
|
3873
|
-
toil,
|
|
3874
|
-
outobj,
|
|
3875
|
-
outdir,
|
|
3876
|
-
destBucket=options.destBucket,
|
|
3877
|
-
log_level=logging.INFO
|
|
3878
|
-
)
|
|
3879
|
-
logger.info("Stored workflow outputs")
|
|
4493
|
+
logger.info("Starting workflow")
|
|
4494
|
+
outobj = toil.start(wf1)
|
|
3880
4495
|
|
|
3881
|
-
|
|
3882
|
-
|
|
3883
|
-
|
|
3884
|
-
)
|
|
4496
|
+
# Now the workflow has completed. We need to make sure the outputs (and
|
|
4497
|
+
# inputs) end up where the user wants them to be.
|
|
4498
|
+
logger.info("Collecting workflow outputs...")
|
|
4499
|
+
outobj = resolve_dict_w_promises(outobj)
|
|
3885
4500
|
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
else:
|
|
3892
|
-
value = doc[key]
|
|
3893
|
-
if isinstance(value, MutableMapping):
|
|
3894
|
-
remove_at_id(value)
|
|
3895
|
-
if isinstance(value, MutableSequence):
|
|
3896
|
-
for entry in value:
|
|
3897
|
-
if isinstance(value, MutableMapping):
|
|
3898
|
-
remove_at_id(entry)
|
|
3899
|
-
|
|
3900
|
-
remove_at_id(outobj)
|
|
3901
|
-
visit_class(
|
|
4501
|
+
# Stage files. Specify destination bucket if specified in CLI
|
|
4502
|
+
# options. If destination bucket not passed in,
|
|
4503
|
+
# options.destBucket's value will be None.
|
|
4504
|
+
toilStageFiles(
|
|
4505
|
+
toil,
|
|
3902
4506
|
outobj,
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
|
|
3906
|
-
if not document_loader:
|
|
3907
|
-
raise RuntimeError("cwltool loader is not set.")
|
|
3908
|
-
prov_dependencies = cwltool.main.prov_deps(
|
|
3909
|
-
workflowobj, document_loader, uri
|
|
3910
|
-
)
|
|
3911
|
-
runtime_context.research_obj.generate_snapshot(prov_dependencies)
|
|
3912
|
-
cwltool.cwlprov.writablebagfile.close_ro(
|
|
3913
|
-
runtime_context.research_obj, options.provenance
|
|
4507
|
+
outdir,
|
|
4508
|
+
destBucket=options.destBucket,
|
|
4509
|
+
log_level=logging.INFO,
|
|
3914
4510
|
)
|
|
4511
|
+
logger.info("Stored workflow outputs")
|
|
3915
4512
|
|
|
3916
|
-
|
|
3917
|
-
|
|
3918
|
-
|
|
3919
|
-
|
|
3920
|
-
|
|
3921
|
-
|
|
3922
|
-
|
|
4513
|
+
if runtime_context.research_obj is not None:
|
|
4514
|
+
cwltool.cwlprov.writablebagfile.create_job(
|
|
4515
|
+
runtime_context.research_obj, outobj, True
|
|
4516
|
+
)
|
|
4517
|
+
|
|
4518
|
+
def remove_at_id(doc: Any) -> None:
|
|
4519
|
+
if isinstance(doc, MutableMapping):
|
|
4520
|
+
for key in list(doc.keys()):
|
|
4521
|
+
if key == "@id":
|
|
4522
|
+
del doc[key]
|
|
4523
|
+
else:
|
|
4524
|
+
value = doc[key]
|
|
4525
|
+
if isinstance(value, MutableMapping):
|
|
4526
|
+
remove_at_id(value)
|
|
4527
|
+
if isinstance(value, MutableSequence):
|
|
4528
|
+
for entry in value:
|
|
4529
|
+
if isinstance(value, MutableMapping):
|
|
4530
|
+
remove_at_id(entry)
|
|
4531
|
+
|
|
4532
|
+
remove_at_id(outobj)
|
|
4533
|
+
visit_class(
|
|
4534
|
+
outobj,
|
|
4535
|
+
("File",),
|
|
4536
|
+
functools.partial(add_sizes, runtime_context.make_fs_access("")),
|
|
4537
|
+
)
|
|
4538
|
+
if not document_loader:
|
|
4539
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
4540
|
+
prov_dependencies = cwltool.main.prov_deps(
|
|
4541
|
+
workflowobj, document_loader, uri
|
|
4542
|
+
)
|
|
4543
|
+
runtime_context.research_obj.generate_snapshot(prov_dependencies)
|
|
4544
|
+
cwltool.cwlprov.writablebagfile.close_ro(
|
|
4545
|
+
runtime_context.research_obj, options.provenance
|
|
4546
|
+
)
|
|
3923
4547
|
|
|
3924
|
-
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
|
|
4548
|
+
if not options.destBucket and options.compute_checksum:
|
|
4549
|
+
logger.info("Computing output file checksums...")
|
|
4550
|
+
visit_class(
|
|
4551
|
+
outobj,
|
|
4552
|
+
("File",),
|
|
4553
|
+
functools.partial(compute_checksums, StdFsAccess("")),
|
|
4554
|
+
)
|
|
4555
|
+
|
|
4556
|
+
visit_class(outobj, ("File",), MutationManager().unset_generation)
|
|
4557
|
+
stdout.write(json.dumps(outobj, indent=4, default=str))
|
|
4558
|
+
stdout.write("\n")
|
|
4559
|
+
logger.info("CWL run complete!")
|
|
4560
|
+
# Don't expose tracebacks to the user for exceptions that may be expected
|
|
4561
|
+
except FailedJobsException as err:
|
|
4562
|
+
if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
|
|
4563
|
+
# We figured out that we can't support this workflow.
|
|
4564
|
+
logging.error(err)
|
|
4565
|
+
logging.error(
|
|
4566
|
+
"Your workflow uses a CWL requirement that Toil does not support!"
|
|
4567
|
+
)
|
|
4568
|
+
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
4569
|
+
else:
|
|
4570
|
+
logging.error(err)
|
|
4571
|
+
return 1
|
|
4572
|
+
except (
|
|
4573
|
+
InsufficientSystemResources,
|
|
4574
|
+
LocatorException,
|
|
4575
|
+
InvalidImportExportUrlException,
|
|
4576
|
+
UnimplementedURLException,
|
|
4577
|
+
JobTooBigError,
|
|
4578
|
+
) as err:
|
|
4579
|
+
logging.error(err)
|
|
4580
|
+
return 1
|
|
3928
4581
|
|
|
3929
4582
|
return 0
|
|
3930
4583
|
|