toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -17,43 +17,43 @@
|
|
|
17
17
|
|
|
18
18
|
# For an overview of how this all works, see discussion in
|
|
19
19
|
# docs/architecture.rst
|
|
20
|
-
import argparse
|
|
21
20
|
import base64
|
|
22
21
|
import copy
|
|
23
22
|
import datetime
|
|
24
23
|
import errno
|
|
25
24
|
import functools
|
|
25
|
+
import glob
|
|
26
|
+
import io
|
|
26
27
|
import json
|
|
27
28
|
import logging
|
|
28
29
|
import os
|
|
30
|
+
import pprint
|
|
29
31
|
import shutil
|
|
30
32
|
import socket
|
|
31
33
|
import stat
|
|
32
34
|
import sys
|
|
33
|
-
import tempfile
|
|
34
35
|
import textwrap
|
|
35
|
-
import urllib
|
|
36
36
|
import uuid
|
|
37
|
+
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
37
38
|
from threading import Thread
|
|
38
|
-
from typing import (
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
from urllib.parse import ParseResult, quote, unquote, urlparse, urlsplit
|
|
39
|
+
from typing import (IO,
|
|
40
|
+
Any,
|
|
41
|
+
Callable,
|
|
42
|
+
Dict,
|
|
43
|
+
Iterator,
|
|
44
|
+
List,
|
|
45
|
+
Mapping,
|
|
46
|
+
MutableMapping,
|
|
47
|
+
MutableSequence,
|
|
48
|
+
Optional,
|
|
49
|
+
Sequence,
|
|
50
|
+
TextIO,
|
|
51
|
+
Tuple,
|
|
52
|
+
Type,
|
|
53
|
+
TypeVar,
|
|
54
|
+
Union,
|
|
55
|
+
cast)
|
|
56
|
+
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
57
57
|
|
|
58
58
|
import cwl_utils.errors
|
|
59
59
|
import cwl_utils.expression
|
|
@@ -66,35 +66,30 @@ import cwltool.load_tool
|
|
|
66
66
|
import cwltool.main
|
|
67
67
|
import cwltool.resolver
|
|
68
68
|
import schema_salad.ref_resolver
|
|
69
|
+
from configargparse import SUPPRESS, ArgParser, Namespace
|
|
69
70
|
from cwltool.loghandler import _logger as cwllogger
|
|
70
71
|
from cwltool.loghandler import defaultStreamHandler
|
|
71
72
|
from cwltool.mpi import MpiConfig
|
|
72
73
|
from cwltool.mutation import MutationManager
|
|
73
74
|
from cwltool.pathmapper import MapperEnt, PathMapper
|
|
74
|
-
from cwltool.process import (
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
shortname,
|
|
80
|
-
)
|
|
75
|
+
from cwltool.process import (Process,
|
|
76
|
+
add_sizes,
|
|
77
|
+
compute_checksums,
|
|
78
|
+
fill_in_defaults,
|
|
79
|
+
shortname)
|
|
81
80
|
from cwltool.secrets import SecretStore
|
|
82
|
-
from cwltool.software_requirements import (
|
|
83
|
-
|
|
84
|
-
get_container_from_software_requirements,
|
|
85
|
-
)
|
|
81
|
+
from cwltool.software_requirements import (DependenciesConfiguration,
|
|
82
|
+
get_container_from_software_requirements)
|
|
86
83
|
from cwltool.stdfsaccess import StdFsAccess, abspath
|
|
87
|
-
from cwltool.utils import (
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
visit_class,
|
|
97
|
-
)
|
|
84
|
+
from cwltool.utils import (CWLObjectType,
|
|
85
|
+
CWLOutputType,
|
|
86
|
+
DirectoryType,
|
|
87
|
+
adjustDirObjs,
|
|
88
|
+
aslist,
|
|
89
|
+
downloadHttpFile,
|
|
90
|
+
get_listing,
|
|
91
|
+
normalizeFilesDirs,
|
|
92
|
+
visit_class)
|
|
98
93
|
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
|
99
94
|
from schema_salad.avro.schema import Names
|
|
100
95
|
from schema_salad.exceptions import ValidationException
|
|
@@ -103,28 +98,31 @@ from schema_salad.sourceline import SourceLine
|
|
|
103
98
|
from typing_extensions import Literal
|
|
104
99
|
|
|
105
100
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
106
|
-
from toil.common import
|
|
107
|
-
from toil.cwl
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
101
|
+
from toil.common import Toil, addOptions
|
|
102
|
+
from toil.cwl import check_cwltool_version
|
|
103
|
+
|
|
104
|
+
check_cwltool_version()
|
|
105
|
+
from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
|
|
106
|
+
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
|
|
107
|
+
download_structure,
|
|
108
|
+
get_from_structure,
|
|
109
|
+
visit_cwl_class_and_reduce)
|
|
113
110
|
from toil.exceptions import FailedJobsException
|
|
114
111
|
from toil.fileStores import FileID
|
|
115
112
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
116
113
|
from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
|
|
117
|
-
from toil.jobStores.abstractJobStore import AbstractJobStore,
|
|
114
|
+
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
115
|
+
NoSuchFileException)
|
|
118
116
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
119
117
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
118
|
+
from toil.lib.io import mkdtemp
|
|
120
119
|
from toil.lib.threading import ExceptionalThread
|
|
121
120
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
122
|
-
from toil.version import baseVersion
|
|
123
121
|
|
|
124
122
|
logger = logging.getLogger(__name__)
|
|
125
123
|
|
|
126
124
|
# Find the default temporary directory
|
|
127
|
-
DEFAULT_TMPDIR =
|
|
125
|
+
DEFAULT_TMPDIR = gettempdir()
|
|
128
126
|
# And compose a CWL-style default prefix inside it.
|
|
129
127
|
# We used to not put this inside anything and we would drop loads of temp
|
|
130
128
|
# directories in the current directory and leave them there.
|
|
@@ -349,16 +347,24 @@ class ResolveSource:
|
|
|
349
347
|
|
|
350
348
|
def __repr__(self) -> str:
|
|
351
349
|
"""Allow for debug printing."""
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
350
|
+
|
|
351
|
+
parts = [f"source key {self.source_key}"]
|
|
352
|
+
|
|
353
|
+
if "pickValue" in self.input:
|
|
354
|
+
parts.append(f"pick value {self.input['pickValue']} from")
|
|
355
|
+
|
|
356
|
+
if isinstance(self.promise_tuples, list):
|
|
357
|
+
names = [n for n, _ in self.promise_tuples]
|
|
358
|
+
parts.append(f"names {names} in promises")
|
|
359
|
+
else:
|
|
360
|
+
name, _ = self.promise_tuples
|
|
361
|
+
parts.append(f"name {name} in promise")
|
|
362
|
+
|
|
363
|
+
return f"ResolveSource({', '.join(parts)})"
|
|
359
364
|
|
|
360
365
|
def resolve(self) -> Any:
|
|
361
366
|
"""First apply linkMerge then pickValue if either present."""
|
|
367
|
+
|
|
362
368
|
result: Optional[Any] = None
|
|
363
369
|
if isinstance(self.promise_tuples, list):
|
|
364
370
|
result = self.link_merge(
|
|
@@ -382,6 +388,7 @@ class ResolveSource:
|
|
|
382
388
|
|
|
383
389
|
:param values: result of step
|
|
384
390
|
"""
|
|
391
|
+
|
|
385
392
|
link_merge_type = self.input.get("linkMerge", "merge_nested")
|
|
386
393
|
|
|
387
394
|
if link_merge_type == "merge_nested":
|
|
@@ -409,6 +416,7 @@ class ResolveSource:
|
|
|
409
416
|
without modification.
|
|
410
417
|
:return:
|
|
411
418
|
"""
|
|
419
|
+
|
|
412
420
|
pick_value_type = cast(str, self.input.get("pickValue"))
|
|
413
421
|
|
|
414
422
|
if pick_value_type is None:
|
|
@@ -425,6 +433,11 @@ class ResolveSource:
|
|
|
425
433
|
|
|
426
434
|
if pick_value_type == "first_non_null":
|
|
427
435
|
if len(result) < 1:
|
|
436
|
+
logger.error(
|
|
437
|
+
"Could not find non-null entry for %s:\n%s",
|
|
438
|
+
self.name,
|
|
439
|
+
pprint.pformat(self.promise_tuples),
|
|
440
|
+
)
|
|
428
441
|
raise cwl_utils.errors.WorkflowException(
|
|
429
442
|
"%s: first_non_null operator found no non-null values" % self.name
|
|
430
443
|
)
|
|
@@ -479,6 +492,11 @@ class StepValueFrom:
|
|
|
479
492
|
self.req = req
|
|
480
493
|
self.container_engine = container_engine
|
|
481
494
|
|
|
495
|
+
def __repr__(self) -> str:
|
|
496
|
+
"""Allow for debug printing."""
|
|
497
|
+
|
|
498
|
+
return f"StepValueFrom({self.expr}, {self.source}, {self.req}, {self.container_engine})"
|
|
499
|
+
|
|
482
500
|
def eval_prep(
|
|
483
501
|
self, step_inputs: CWLObjectType, file_store: AbstractFileStore
|
|
484
502
|
) -> None:
|
|
@@ -551,6 +569,11 @@ class DefaultWithSource:
|
|
|
551
569
|
self.default = default
|
|
552
570
|
self.source = source
|
|
553
571
|
|
|
572
|
+
def __repr__(self) -> str:
|
|
573
|
+
"""Allow for debug printing."""
|
|
574
|
+
|
|
575
|
+
return f"DefaultWithSource({self.default}, {self.source})"
|
|
576
|
+
|
|
554
577
|
def resolve(self) -> Any:
|
|
555
578
|
"""
|
|
556
579
|
Determine the final input value when the time is right.
|
|
@@ -573,6 +596,11 @@ class JustAValue:
|
|
|
573
596
|
"""Store the value."""
|
|
574
597
|
self.val = val
|
|
575
598
|
|
|
599
|
+
def __repr__(self) -> str:
|
|
600
|
+
"""Allow for debug printing."""
|
|
601
|
+
|
|
602
|
+
return f"JustAValue({self.val})"
|
|
603
|
+
|
|
576
604
|
def resolve(self) -> Any:
|
|
577
605
|
"""Return the value."""
|
|
578
606
|
return self.val
|
|
@@ -654,6 +682,8 @@ class ToilPathMapper(PathMapper):
|
|
|
654
682
|
streaming on, and returns a file: URI to where the file or
|
|
655
683
|
directory has been downloaded to. Meant to be a partially-bound
|
|
656
684
|
version of toil_get_file().
|
|
685
|
+
:param referenced_files: List of CWL File and Directory objects, which can have their locations set as both
|
|
686
|
+
virtualized and absolute local paths
|
|
657
687
|
"""
|
|
658
688
|
self.get_file = get_file
|
|
659
689
|
self.stage_listing = stage_listing
|
|
@@ -675,28 +705,29 @@ class ToilPathMapper(PathMapper):
|
|
|
675
705
|
This is called on each File or Directory CWL object. The Files and
|
|
676
706
|
Directories all have "location" fields. For the Files, these are from
|
|
677
707
|
upload_file(), and for the Directories, these are from
|
|
678
|
-
upload_directory(),
|
|
679
|
-
locations based on listing the Directories using ToilFsAccess.
|
|
708
|
+
upload_directory() or cwltool internally. With upload_directory(), they and their children will be assigned
|
|
709
|
+
locations based on listing the Directories using ToilFsAccess. With cwltool, locations will be set as absolute
|
|
710
|
+
paths.
|
|
680
711
|
|
|
681
712
|
:param obj: The CWL File or Directory to process
|
|
682
713
|
|
|
683
714
|
:param stagedir: The base path for target paths to be generated under,
|
|
684
|
-
|
|
685
|
-
|
|
715
|
+
except when a File or Directory has an overriding parent directory in
|
|
716
|
+
dirname
|
|
686
717
|
|
|
687
718
|
:param basedir: The directory from which relative paths should be
|
|
688
|
-
|
|
689
|
-
|
|
719
|
+
resolved; used as the base directory for the StdFsAccess that generated
|
|
720
|
+
the listing being processed.
|
|
690
721
|
|
|
691
722
|
:param copy: If set, use writable types for Files and Directories.
|
|
692
723
|
|
|
693
724
|
:param staged: Starts as True at the top of the recursion. Set to False
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
725
|
+
when entering a directory that we can actually download, so we don't
|
|
726
|
+
stage files and subdirectories separately from the directory as a
|
|
727
|
+
whole. Controls the staged flag on generated mappings, and therefore
|
|
728
|
+
whether files and directories are actually placed at their mapped-to
|
|
729
|
+
target locations. If stage_listing is True, we will leave this True
|
|
730
|
+
throughout and stage everything.
|
|
700
731
|
|
|
701
732
|
Produces one MapperEnt for every unique location for a File or
|
|
702
733
|
Directory. These MapperEnt objects are instructions to cwltool's
|
|
@@ -807,6 +838,14 @@ class ToilPathMapper(PathMapper):
|
|
|
807
838
|
# We can't really make the directory. Maybe we are
|
|
808
839
|
# exporting from the leader and it doesn't matter.
|
|
809
840
|
resolved = location
|
|
841
|
+
elif location.startswith("/"):
|
|
842
|
+
# Test if path is an absolute local path
|
|
843
|
+
# Does not check if the path is relative
|
|
844
|
+
# While Toil encodes paths into a URL with ToilPathMapper,
|
|
845
|
+
# something called internally in cwltool may return an absolute path
|
|
846
|
+
# ex: if cwltool calls itself internally in command_line_tool.py,
|
|
847
|
+
# it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
|
|
848
|
+
resolved = location
|
|
810
849
|
else:
|
|
811
850
|
raise RuntimeError("Unsupported location: " + location)
|
|
812
851
|
|
|
@@ -883,7 +922,6 @@ class ToilPathMapper(PathMapper):
|
|
|
883
922
|
)
|
|
884
923
|
else:
|
|
885
924
|
deref = ab
|
|
886
|
-
|
|
887
925
|
if deref.startswith("file:"):
|
|
888
926
|
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
889
927
|
if urlsplit(deref).scheme in ["http", "https"]:
|
|
@@ -1027,8 +1065,6 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1027
1065
|
class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
|
|
1028
1066
|
"""Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
|
|
1029
1067
|
|
|
1030
|
-
pass
|
|
1031
|
-
|
|
1032
1068
|
|
|
1033
1069
|
def toil_make_tool(
|
|
1034
1070
|
toolpath_object: CommentedMap,
|
|
@@ -1047,10 +1083,7 @@ def toil_make_tool(
|
|
|
1047
1083
|
return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
|
|
1048
1084
|
|
|
1049
1085
|
|
|
1050
|
-
|
|
1051
|
-
# can't say that until https://github.com/python/mypy/issues/731 is fixed
|
|
1052
|
-
# because it's recursive.
|
|
1053
|
-
DirectoryContents = Dict[str, Union[str, Dict[str, Any]]]
|
|
1086
|
+
DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
|
|
1054
1087
|
|
|
1055
1088
|
|
|
1056
1089
|
def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
@@ -1080,9 +1113,8 @@ def decode_directory(
|
|
|
1080
1113
|
None), and the deduplication key string that uniquely identifies the
|
|
1081
1114
|
directory.
|
|
1082
1115
|
"""
|
|
1083
|
-
|
|
1084
|
-
"
|
|
1085
|
-
), f"Cannot decode non-directory path: {dir_path}"
|
|
1116
|
+
if not dir_path.startswith("toildir:"):
|
|
1117
|
+
raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
|
|
1086
1118
|
|
|
1087
1119
|
# We will decode the directory and then look inside it
|
|
1088
1120
|
|
|
@@ -1203,7 +1235,8 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1203
1235
|
|
|
1204
1236
|
logger.debug("ToilFsAccess downloading %s to %s", cache_key, temp_dir)
|
|
1205
1237
|
|
|
1206
|
-
# Save it all into this new temp directory
|
|
1238
|
+
# Save it all into this new temp directory.
|
|
1239
|
+
# Guaranteed to fill it with real files and not symlinks.
|
|
1207
1240
|
download_structure(self.file_store, {}, {}, contents, temp_dir)
|
|
1208
1241
|
|
|
1209
1242
|
# Make sure we use the same temp directory if we go traversing
|
|
@@ -1233,7 +1266,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1233
1266
|
logger.debug(
|
|
1234
1267
|
"ToilFsAccess fetching directory %s from a JobStore", path
|
|
1235
1268
|
)
|
|
1236
|
-
dest_dir =
|
|
1269
|
+
dest_dir = mkdtemp()
|
|
1237
1270
|
|
|
1238
1271
|
# Recursively fetch all the files in the directory.
|
|
1239
1272
|
def download_to(url: str, dest: str) -> None:
|
|
@@ -1256,7 +1289,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1256
1289
|
logger.debug("ToilFsAccess fetching file %s from a JobStore", path)
|
|
1257
1290
|
# Try to grab it with a jobstore implementation, and save it
|
|
1258
1291
|
# somewhere arbitrary.
|
|
1259
|
-
dest_file =
|
|
1292
|
+
dest_file = NamedTemporaryFile(delete=False)
|
|
1260
1293
|
AbstractJobStore.read_from_url(path, dest_file)
|
|
1261
1294
|
dest_file.close()
|
|
1262
1295
|
self.dir_to_download[path] = dest_file.name
|
|
@@ -1271,72 +1304,160 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1271
1304
|
return destination
|
|
1272
1305
|
|
|
1273
1306
|
def glob(self, pattern: str) -> List[str]:
|
|
1274
|
-
|
|
1275
|
-
|
|
1307
|
+
parse = urlparse(pattern)
|
|
1308
|
+
if parse.scheme == "file":
|
|
1309
|
+
pattern = os.path.abspath(unquote(parse.path))
|
|
1310
|
+
elif parse.scheme == "":
|
|
1311
|
+
pattern = os.path.abspath(pattern)
|
|
1312
|
+
else:
|
|
1313
|
+
raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
|
|
1314
|
+
|
|
1315
|
+
# Actually do the glob
|
|
1316
|
+
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
|
|
1276
1317
|
|
|
1277
1318
|
def open(self, fn: str, mode: str) -> IO[Any]:
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1319
|
+
if "w" in mode or "x" in mode or "+" in mode or "a" in mode:
|
|
1320
|
+
raise RuntimeError(f"Mode {mode} for opening {fn} involves writing")
|
|
1321
|
+
|
|
1322
|
+
parse = urlparse(fn)
|
|
1323
|
+
if parse.scheme in ["", "file"]:
|
|
1324
|
+
# Handle local files
|
|
1325
|
+
return open(self._abs(fn), mode)
|
|
1326
|
+
elif parse.scheme == "toildir":
|
|
1327
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1328
|
+
if cache_key in self.dir_to_download:
|
|
1329
|
+
# This is already available locally, so fall back on the local copy
|
|
1330
|
+
return open(self._abs(fn), mode)
|
|
1331
|
+
else:
|
|
1332
|
+
# We need to get the URI out of the virtual directory
|
|
1333
|
+
if subpath is None:
|
|
1334
|
+
raise RuntimeError(f"{fn} is a toildir directory")
|
|
1335
|
+
uri = get_from_structure(contents, subpath)
|
|
1336
|
+
if not isinstance(uri, str):
|
|
1337
|
+
raise RuntimeError(f"{fn} does not point to a file")
|
|
1338
|
+
# Recurse on that URI
|
|
1339
|
+
return self.open(uri, mode)
|
|
1340
|
+
elif parse.scheme == "toilfile":
|
|
1341
|
+
if self.file_store is None:
|
|
1342
|
+
raise RuntimeError("URL requires a file store: " + fn)
|
|
1343
|
+
# Streaming access to Toil file store files requires being inside a
|
|
1344
|
+
# context manager, which we can't require. So we need to download
|
|
1345
|
+
# the file.
|
|
1346
|
+
return open(self._abs(fn), mode)
|
|
1347
|
+
else:
|
|
1348
|
+
# This should be supported by a job store.
|
|
1349
|
+
byte_stream = AbstractJobStore.open_url(fn)
|
|
1350
|
+
if 'b' in mode:
|
|
1351
|
+
# Pass stream along in binary
|
|
1352
|
+
return byte_stream
|
|
1353
|
+
else:
|
|
1354
|
+
# Wrap it in a text decoder
|
|
1355
|
+
return io.TextIOWrapper(byte_stream, encoding='utf-8')
|
|
1281
1356
|
|
|
1282
1357
|
def exists(self, path: str) -> bool:
|
|
1283
1358
|
"""Test for file existence."""
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
#
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1359
|
+
parse = urlparse(path)
|
|
1360
|
+
if parse.scheme in ["", "file"]:
|
|
1361
|
+
# Handle local files
|
|
1362
|
+
# toil's _abs() throws errors when files are not found and cwltool's _abs() does not
|
|
1363
|
+
try:
|
|
1364
|
+
return os.path.exists(self._abs(path))
|
|
1365
|
+
except NoSuchFileException:
|
|
1366
|
+
return False
|
|
1367
|
+
elif parse.scheme == "toildir":
|
|
1368
|
+
contents, subpath, cache_key = decode_directory(path)
|
|
1369
|
+
if subpath is None:
|
|
1370
|
+
# The toildir directory itself exists
|
|
1371
|
+
return True
|
|
1372
|
+
uri = get_from_structure(contents, subpath)
|
|
1373
|
+
if uri is None:
|
|
1374
|
+
# It's not in the virtual directory, so it doesn't exist
|
|
1375
|
+
return False
|
|
1376
|
+
if isinstance(uri, dict):
|
|
1377
|
+
# Actually it's a subdirectory, so it exists.
|
|
1378
|
+
return True
|
|
1379
|
+
# We recurse and poll the URI directly to make sure it really exists
|
|
1380
|
+
return self.exists(uri)
|
|
1381
|
+
elif parse.scheme == "toilfile":
|
|
1382
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1383
|
+
return True
|
|
1384
|
+
else:
|
|
1385
|
+
# This should be supported by a job store.
|
|
1386
|
+
return AbstractJobStore.url_exists(path)
|
|
1290
1387
|
|
|
1291
1388
|
def size(self, path: str) -> int:
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
if self.file_store is None:
|
|
1297
|
-
raise RuntimeError("URL requires a file store: " + path)
|
|
1298
|
-
return self.file_store.getGlobalFileSize(
|
|
1299
|
-
FileID.unpack(path[len("toilfile:") :])
|
|
1300
|
-
)
|
|
1301
|
-
elif path.startswith("toildir:"):
|
|
1389
|
+
parse = urlparse(path)
|
|
1390
|
+
if parse.scheme in ["", "file"]:
|
|
1391
|
+
return os.stat(self._abs(path)).st_size
|
|
1392
|
+
elif parse.scheme == "toildir":
|
|
1302
1393
|
# Decode its contents, the path inside it to the file (if any), and
|
|
1303
1394
|
# the key to use for caching the directory.
|
|
1304
|
-
|
|
1395
|
+
contents, subpath, cache_key = decode_directory(path)
|
|
1305
1396
|
|
|
1306
1397
|
# We can't get the size of just a directory.
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
for part in subpath.split("/"):
|
|
1310
|
-
# Follow the path inside the directory contents.
|
|
1311
|
-
here = cast(DirectoryContents, here[part])
|
|
1398
|
+
if subpath is None:
|
|
1399
|
+
raise RuntimeError(f"Attempted to check size of directory {path}")
|
|
1312
1400
|
|
|
1313
|
-
|
|
1314
|
-
assert isinstance(here, str), f"Did not find a file at {path}"
|
|
1315
|
-
assert here.startswith(
|
|
1316
|
-
"toilfile:"
|
|
1317
|
-
), f"Did not find a filestore file at {path}"
|
|
1401
|
+
uri = get_from_structure(contents, subpath)
|
|
1318
1402
|
|
|
1319
|
-
|
|
1403
|
+
# We ought to end up with a URI.
|
|
1404
|
+
if not isinstance(uri, str):
|
|
1405
|
+
raise RuntimeError(f"Did not find a file at {path}")
|
|
1406
|
+
return self.size(uri)
|
|
1407
|
+
elif parse.scheme == "toilfile":
|
|
1408
|
+
if self.file_store is None:
|
|
1409
|
+
raise RuntimeError("URL requires a file store: " + path)
|
|
1410
|
+
return self.file_store.getGlobalFileSize(
|
|
1411
|
+
FileID.unpack(path[len("toilfile:") :])
|
|
1412
|
+
)
|
|
1320
1413
|
else:
|
|
1321
|
-
#
|
|
1322
|
-
|
|
1323
|
-
|
|
1414
|
+
# This should be supported by a job store.
|
|
1415
|
+
size = AbstractJobStore.get_size(path)
|
|
1416
|
+
if size is None:
|
|
1417
|
+
# get_size can be unimplemented or unavailable
|
|
1418
|
+
raise RuntimeError(f"Could not get size of {path}")
|
|
1419
|
+
return size
|
|
1324
1420
|
|
|
1325
1421
|
def isfile(self, fn: str) -> bool:
|
|
1326
1422
|
parse = urlparse(fn)
|
|
1327
|
-
if parse.scheme in ["
|
|
1328
|
-
|
|
1329
|
-
|
|
1423
|
+
if parse.scheme in ["file", ""]:
|
|
1424
|
+
return os.path.isfile(self._abs(fn))
|
|
1425
|
+
elif parse.scheme == "toilfile":
|
|
1426
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1427
|
+
return True
|
|
1428
|
+
elif parse.scheme == "toildir":
|
|
1429
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1430
|
+
if subpath is None:
|
|
1431
|
+
# This is the toildir directory itself
|
|
1432
|
+
return False
|
|
1433
|
+
found = get_from_structure(contents, subpath)
|
|
1434
|
+
# If we find a string, that's a file
|
|
1435
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1436
|
+
return isinstance(found, str)
|
|
1330
1437
|
else:
|
|
1331
|
-
return not AbstractJobStore.get_is_directory(fn)
|
|
1438
|
+
return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
|
|
1332
1439
|
|
|
1333
1440
|
def isdir(self, fn: str) -> bool:
|
|
1441
|
+
logger.debug("ToilFsAccess checking type of %s", fn)
|
|
1334
1442
|
parse = urlparse(fn)
|
|
1335
|
-
if parse.scheme in ["
|
|
1336
|
-
|
|
1337
|
-
|
|
1443
|
+
if parse.scheme in ["file", ""]:
|
|
1444
|
+
return os.path.isdir(self._abs(fn))
|
|
1445
|
+
elif parse.scheme == "toilfile":
|
|
1446
|
+
return False
|
|
1447
|
+
elif parse.scheme == "toildir":
|
|
1448
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1449
|
+
if subpath is None:
|
|
1450
|
+
# This is the toildir directory itself.
|
|
1451
|
+
# TODO: We assume directories can't be deleted.
|
|
1452
|
+
return True
|
|
1453
|
+
found = get_from_structure(contents, subpath)
|
|
1454
|
+
# If we find a dict, that's a directory.
|
|
1455
|
+
# TODO: We assume directories can't be deleted.
|
|
1456
|
+
return isinstance(found, dict)
|
|
1338
1457
|
else:
|
|
1339
|
-
|
|
1458
|
+
status = AbstractJobStore.get_is_directory(fn)
|
|
1459
|
+
logger.debug("AbstractJobStore said: %s", status)
|
|
1460
|
+
return status
|
|
1340
1461
|
|
|
1341
1462
|
def listdir(self, fn: str) -> List[str]:
|
|
1342
1463
|
# This needs to return full URLs for everything in the directory.
|
|
@@ -1344,12 +1465,25 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1344
1465
|
logger.debug("ToilFsAccess listing %s", fn)
|
|
1345
1466
|
|
|
1346
1467
|
parse = urlparse(fn)
|
|
1347
|
-
if parse.scheme in ["
|
|
1348
|
-
#
|
|
1468
|
+
if parse.scheme in ["file", ""]:
|
|
1469
|
+
# Find the local path
|
|
1349
1470
|
directory = self._abs(fn)
|
|
1350
|
-
|
|
1351
1471
|
# Now list it (it is probably a directory)
|
|
1352
1472
|
return [abspath(quote(entry), fn) for entry in os.listdir(directory)]
|
|
1473
|
+
elif parse.scheme == "toilfile":
|
|
1474
|
+
raise RuntimeError(f"Cannot list a file: {fn}")
|
|
1475
|
+
elif parse.scheme == "toildir":
|
|
1476
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1477
|
+
here = contents
|
|
1478
|
+
if subpath is not None:
|
|
1479
|
+
got = get_from_structure(contents, subpath)
|
|
1480
|
+
if got is None:
|
|
1481
|
+
raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
|
|
1482
|
+
if isinstance(got, str):
|
|
1483
|
+
raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
|
|
1484
|
+
here = got
|
|
1485
|
+
# List all the things in here and make full URIs to them
|
|
1486
|
+
return [os.path.join(fn, k) for k in here.keys()]
|
|
1353
1487
|
else:
|
|
1354
1488
|
return [
|
|
1355
1489
|
os.path.join(fn, entry.rstrip("/"))
|
|
@@ -1371,7 +1505,7 @@ def toil_get_file(
|
|
|
1371
1505
|
file_store: AbstractFileStore,
|
|
1372
1506
|
index: Dict[str, str],
|
|
1373
1507
|
existing: Dict[str, str],
|
|
1374
|
-
|
|
1508
|
+
uri: str,
|
|
1375
1509
|
streamable: bool = False,
|
|
1376
1510
|
streaming_allowed: bool = True,
|
|
1377
1511
|
pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
|
|
@@ -1388,28 +1522,28 @@ def toil_get_file(
|
|
|
1388
1522
|
|
|
1389
1523
|
:param index: Maps from downloaded file path back to input Toil URI.
|
|
1390
1524
|
|
|
1391
|
-
:param existing: Maps from
|
|
1525
|
+
:param existing: Maps from URI to downloaded file path.
|
|
1392
1526
|
|
|
1393
|
-
:param
|
|
1527
|
+
:param uri: The URI for the file to download.
|
|
1394
1528
|
|
|
1395
1529
|
:param streamable: If the file is has 'streamable' flag set
|
|
1396
1530
|
|
|
1397
1531
|
:param streaming_allowed: If streaming is allowed
|
|
1398
1532
|
|
|
1399
1533
|
:param pipe_threads: List of threads responsible for streaming the data
|
|
1400
|
-
|
|
1401
|
-
|
|
1534
|
+
and open file descriptors corresponding to those files. Caller is responsible
|
|
1535
|
+
to close the file descriptors (to break the pipes) and join the threads
|
|
1402
1536
|
"""
|
|
1403
1537
|
pipe_threads_real = pipe_threads or []
|
|
1404
1538
|
# We can't use urlparse here because we need to handle the '_:' scheme and
|
|
1405
1539
|
# urlparse sees that as a path and not a URI scheme.
|
|
1406
|
-
if
|
|
1540
|
+
if uri.startswith("toildir:"):
|
|
1407
1541
|
# This is a file in a directory, or maybe a directory itself.
|
|
1408
1542
|
# See ToilFsAccess and upload_directory.
|
|
1409
1543
|
# We will go look for the actual file in the encoded directory
|
|
1410
1544
|
# structure which will tell us where the toilfile: name for the file is.
|
|
1411
1545
|
|
|
1412
|
-
parts =
|
|
1546
|
+
parts = uri[len("toildir:") :].split("/")
|
|
1413
1547
|
contents = json.loads(
|
|
1414
1548
|
base64.urlsafe_b64decode(parts[0].encode("utf-8")).decode("utf-8")
|
|
1415
1549
|
)
|
|
@@ -1429,21 +1563,41 @@ def toil_get_file(
|
|
|
1429
1563
|
download_structure(file_store, index, existing, contents, dest_path)
|
|
1430
1564
|
# Return where we put it, but as a file:// URI
|
|
1431
1565
|
return schema_salad.ref_resolver.file_uri(dest_path)
|
|
1432
|
-
elif
|
|
1433
|
-
#
|
|
1566
|
+
elif uri.startswith("_:"):
|
|
1567
|
+
# Someone is asking us for an empty temp directory.
|
|
1568
|
+
# We need to check this before the file path case because urlsplit()
|
|
1569
|
+
# will call this a path with no scheme.
|
|
1570
|
+
dest_path = file_store.getLocalTempDir()
|
|
1571
|
+
return schema_salad.ref_resolver.file_uri(dest_path)
|
|
1572
|
+
elif uri.startswith("file:") or urlsplit(uri).scheme == "":
|
|
1573
|
+
# There's a file: scheme or no scheme, and we know this isn't a _: URL.
|
|
1574
|
+
|
|
1575
|
+
# We need to support file: URIs and local paths, because we might be
|
|
1576
|
+
# involved in moving files around on the local disk when uploading
|
|
1577
|
+
# things after a job. We might want to catch cases where a leader
|
|
1578
|
+
# filesystem file URI leaks in here, but we can't, so we just rely on
|
|
1579
|
+
# the rest of the code to be correct.
|
|
1580
|
+
return uri
|
|
1581
|
+
else:
|
|
1582
|
+
# This is a toilfile: uri or other remote URI
|
|
1434
1583
|
def write_to_pipe(
|
|
1435
|
-
file_store: AbstractFileStore, pipe_name: str,
|
|
1584
|
+
file_store: AbstractFileStore, pipe_name: str, uri: str
|
|
1436
1585
|
) -> None:
|
|
1437
1586
|
try:
|
|
1438
1587
|
with open(pipe_name, "wb") as pipe:
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1588
|
+
if uri.startswith("toilfile:"):
|
|
1589
|
+
# Stream from the file store
|
|
1590
|
+
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1591
|
+
with file_store.readGlobalFileStream(file_store_id) as fi:
|
|
1592
|
+
chunk_sz = 1024
|
|
1593
|
+
while True:
|
|
1594
|
+
data = fi.read(chunk_sz)
|
|
1595
|
+
if not data:
|
|
1596
|
+
break
|
|
1597
|
+
pipe.write(data)
|
|
1598
|
+
else:
|
|
1599
|
+
# Stream from some other URI
|
|
1600
|
+
AbstractJobStore.read_from_url(uri, pipe)
|
|
1447
1601
|
except OSError as e:
|
|
1448
1602
|
# The other side of the pipe may have been closed by the
|
|
1449
1603
|
# reading thread, which is OK.
|
|
@@ -1456,7 +1610,7 @@ def toil_get_file(
|
|
|
1456
1610
|
and not isinstance(file_store.jobStore, FileJobStore)
|
|
1457
1611
|
):
|
|
1458
1612
|
logger.debug(
|
|
1459
|
-
"Streaming file %s",
|
|
1613
|
+
"Streaming file %s", uri
|
|
1460
1614
|
)
|
|
1461
1615
|
src_path = file_store.getLocalTempFileName()
|
|
1462
1616
|
os.mkfifo(src_path)
|
|
@@ -1465,42 +1619,39 @@ def toil_get_file(
|
|
|
1465
1619
|
args=(
|
|
1466
1620
|
file_store,
|
|
1467
1621
|
src_path,
|
|
1468
|
-
|
|
1622
|
+
uri,
|
|
1469
1623
|
),
|
|
1470
1624
|
)
|
|
1471
1625
|
th.start()
|
|
1472
1626
|
pipe_threads_real.append((th, os.open(src_path, os.O_RDONLY)))
|
|
1473
1627
|
else:
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1628
|
+
# We need to do a real file
|
|
1629
|
+
if uri in existing:
|
|
1630
|
+
# Already did it
|
|
1631
|
+
src_path = existing[uri]
|
|
1632
|
+
else:
|
|
1633
|
+
if uri.startswith("toilfile:"):
|
|
1634
|
+
# Download from the file store
|
|
1635
|
+
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1636
|
+
src_path = file_store.readGlobalFile(
|
|
1637
|
+
file_store_id, symlink=True
|
|
1638
|
+
)
|
|
1639
|
+
else:
|
|
1640
|
+
# Download from the URI via the job store.
|
|
1641
|
+
|
|
1642
|
+
# Figure out where it goes.
|
|
1643
|
+
src_path = file_store.getLocalTempFileName()
|
|
1644
|
+
# Open that path exclusively to make sure we created it
|
|
1645
|
+
with open(src_path, 'xb') as fh:
|
|
1646
|
+
# Download into the file
|
|
1647
|
+
size, executable = AbstractJobStore.read_from_url(uri, fh)
|
|
1648
|
+
if executable:
|
|
1649
|
+
# Set the execute bit in the file's permissions
|
|
1650
|
+
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
1651
|
+
|
|
1652
|
+
index[src_path] = uri
|
|
1653
|
+
existing[uri] = src_path
|
|
1481
1654
|
return schema_salad.ref_resolver.file_uri(src_path)
|
|
1482
|
-
elif file_store_id.startswith("_:"):
|
|
1483
|
-
# Someone is asking us for an empty temp directory.
|
|
1484
|
-
# We need to check this before the file path case because urlsplit()
|
|
1485
|
-
# will call this a path with no scheme.
|
|
1486
|
-
dest_path = file_store.getLocalTempDir()
|
|
1487
|
-
return schema_salad.ref_resolver.file_uri(dest_path)
|
|
1488
|
-
elif file_store_id.startswith("file:") or urlsplit(file_store_id).scheme == "":
|
|
1489
|
-
# There's a file: scheme or no scheme, and we know this isn't a _: URL.
|
|
1490
|
-
|
|
1491
|
-
# We need to support file: URIs and local paths, because we might be
|
|
1492
|
-
# involved in moving files around on the local disk when uploading
|
|
1493
|
-
# things after a job. We might want to catch cases where a leader
|
|
1494
|
-
# filesystem file URI leaks in here, but we can't, so we just rely on
|
|
1495
|
-
# the rest of the code to be correct.
|
|
1496
|
-
return file_store_id
|
|
1497
|
-
else:
|
|
1498
|
-
raise RuntimeError(
|
|
1499
|
-
f"Cannot obtain file {file_store_id} while on host "
|
|
1500
|
-
f"{socket.gethostname()}; all imports must happen on the "
|
|
1501
|
-
f"leader!"
|
|
1502
|
-
)
|
|
1503
|
-
|
|
1504
1655
|
|
|
1505
1656
|
def write_file(
|
|
1506
1657
|
writeFunc: Callable[[str], FileID],
|
|
@@ -1557,7 +1708,9 @@ def import_files(
|
|
|
1557
1708
|
existing: Dict[str, str],
|
|
1558
1709
|
cwl_object: Optional[CWLObjectType],
|
|
1559
1710
|
skip_broken: bool = False,
|
|
1711
|
+
skip_remote: bool = False,
|
|
1560
1712
|
bypass_file_store: bool = False,
|
|
1713
|
+
log_level: int = logging.DEBUG
|
|
1561
1714
|
) -> None:
|
|
1562
1715
|
"""
|
|
1563
1716
|
Prepare all files and directories.
|
|
@@ -1579,28 +1732,41 @@ def import_files(
|
|
|
1579
1732
|
Also does some miscelaneous normalization.
|
|
1580
1733
|
|
|
1581
1734
|
:param import_function: The function used to upload a URI and get a
|
|
1582
|
-
|
|
1735
|
+
Toil FileID for it.
|
|
1583
1736
|
|
|
1584
1737
|
:param fs_access: the CWL FS access object we use to access the filesystem
|
|
1585
|
-
|
|
1738
|
+
to find files to import. Needs to support the URI schemes used.
|
|
1586
1739
|
|
|
1587
1740
|
:param fileindex: Forward map to fill in from file URI to Toil storage
|
|
1588
|
-
|
|
1741
|
+
location, used by write_file to deduplicate writes.
|
|
1589
1742
|
|
|
1590
1743
|
:param existing: Reverse map to fill in from Toil storage location to file
|
|
1591
|
-
|
|
1744
|
+
URI. Not read from.
|
|
1592
1745
|
|
|
1593
1746
|
:param cwl_object: CWL tool (or workflow order) we are importing files for
|
|
1594
1747
|
|
|
1595
1748
|
:param skip_broken: If True, when files can't be imported because they e.g.
|
|
1596
|
-
|
|
1749
|
+
don't exist, leave their locations alone rather than failing with an error.
|
|
1750
|
+
|
|
1751
|
+
:param skp_remote: If True, leave remote URIs in place instead of importing
|
|
1752
|
+
files.
|
|
1597
1753
|
|
|
1598
1754
|
:param bypass_file_store: If True, leave file:// URIs in place instead of
|
|
1599
|
-
|
|
1755
|
+
importing files and directories.
|
|
1756
|
+
|
|
1757
|
+
:param log_level: Log imported files at the given level.
|
|
1600
1758
|
"""
|
|
1601
1759
|
tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
|
|
1602
1760
|
|
|
1603
1761
|
logger.debug("Importing files for %s", tool_id)
|
|
1762
|
+
logger.debug("Importing files in %s", cwl_object)
|
|
1763
|
+
|
|
1764
|
+
def import_and_log(url: str) -> FileID:
|
|
1765
|
+
"""
|
|
1766
|
+
Upload a file and log that we are doing so.
|
|
1767
|
+
"""
|
|
1768
|
+
logger.log(log_level, "Loading %s...", url)
|
|
1769
|
+
return import_function(url)
|
|
1604
1770
|
|
|
1605
1771
|
# We need to upload all files to the Toil filestore, and encode structure
|
|
1606
1772
|
# recursively into all Directories' locations. But we cannot safely alter
|
|
@@ -1700,7 +1866,7 @@ def import_files(
|
|
|
1700
1866
|
|
|
1701
1867
|
# Upload the file itself, which will adjust its location.
|
|
1702
1868
|
upload_file(
|
|
1703
|
-
|
|
1869
|
+
import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
|
|
1704
1870
|
)
|
|
1705
1871
|
|
|
1706
1872
|
# Make a record for this file under its name
|
|
@@ -1805,11 +1971,16 @@ def upload_file(
|
|
|
1805
1971
|
existing: Dict[str, str],
|
|
1806
1972
|
file_metadata: CWLObjectType,
|
|
1807
1973
|
skip_broken: bool = False,
|
|
1974
|
+
skip_remote: bool = False
|
|
1808
1975
|
) -> None:
|
|
1809
1976
|
"""
|
|
1810
|
-
Update a file object so that the
|
|
1977
|
+
Update a file object so that the file will be accessible from another machine.
|
|
1811
1978
|
|
|
1812
|
-
|
|
1979
|
+
Uploads local files to the Toil file store, and sets their location to a
|
|
1980
|
+
reference to the toil file store.
|
|
1981
|
+
|
|
1982
|
+
Unless skip_remote is set, downloads remote files into the file store and
|
|
1983
|
+
sets their locations to references into the file store as well.
|
|
1813
1984
|
"""
|
|
1814
1985
|
location = cast(str, file_metadata["location"])
|
|
1815
1986
|
if (
|
|
@@ -1832,7 +2003,10 @@ def upload_file(
|
|
|
1832
2003
|
return
|
|
1833
2004
|
else:
|
|
1834
2005
|
raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
|
|
1835
|
-
|
|
2006
|
+
|
|
2007
|
+
if location.startswith("file://") or not skip_remote:
|
|
2008
|
+
# This is a local file, or we also need to download and re-upload remote files
|
|
2009
|
+
file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
|
|
1836
2010
|
|
|
1837
2011
|
logger.debug("Sending file at: %s", file_metadata["location"])
|
|
1838
2012
|
|
|
@@ -1866,6 +2040,7 @@ class CWLNamedJob(Job):
|
|
|
1866
2040
|
memory: Union[int, str, None] = "1GiB",
|
|
1867
2041
|
disk: Union[int, str, None] = "1MiB",
|
|
1868
2042
|
accelerators: Optional[List[AcceleratorRequirement]] = None,
|
|
2043
|
+
preemptible: Optional[bool] = None,
|
|
1869
2044
|
tool_id: Optional[str] = None,
|
|
1870
2045
|
parent_name: Optional[str] = None,
|
|
1871
2046
|
subjob_name: Optional[str] = None,
|
|
@@ -1910,6 +2085,7 @@ class CWLNamedJob(Job):
|
|
|
1910
2085
|
memory=memory,
|
|
1911
2086
|
disk=disk,
|
|
1912
2087
|
accelerators=accelerators,
|
|
2088
|
+
preemptible=preemptible,
|
|
1913
2089
|
unitName=unit_name,
|
|
1914
2090
|
displayName=display_name,
|
|
1915
2091
|
local=local,
|
|
@@ -1941,12 +2117,15 @@ def toilStageFiles(
|
|
|
1941
2117
|
cwljob: Union[CWLObjectType, List[CWLObjectType]],
|
|
1942
2118
|
outdir: str,
|
|
1943
2119
|
destBucket: Union[str, None] = None,
|
|
2120
|
+
log_level: int = logging.DEBUG
|
|
1944
2121
|
) -> None:
|
|
1945
2122
|
"""
|
|
1946
2123
|
Copy input files out of the global file store and update location and path.
|
|
1947
2124
|
|
|
1948
2125
|
:param destBucket: If set, export to this base URL instead of to the local
|
|
1949
2126
|
filesystem.
|
|
2127
|
+
|
|
2128
|
+
:param log_level: Log each file transfered at the given level.
|
|
1950
2129
|
"""
|
|
1951
2130
|
|
|
1952
2131
|
def _collectDirEntries(
|
|
@@ -1986,7 +2165,6 @@ def toilStageFiles(
|
|
|
1986
2165
|
stage_listing=True,
|
|
1987
2166
|
)
|
|
1988
2167
|
for _, p in pm.items():
|
|
1989
|
-
logger.debug("Staging output: %s", p)
|
|
1990
2168
|
if p.staged:
|
|
1991
2169
|
# We're supposed to copy/expose something.
|
|
1992
2170
|
# Note that we have to handle writable versions of everything
|
|
@@ -2008,7 +2186,7 @@ def toilStageFiles(
|
|
|
2008
2186
|
"CreateFile",
|
|
2009
2187
|
"CreateWritableFile",
|
|
2010
2188
|
]: # TODO: CreateFile for buckets is not under testing
|
|
2011
|
-
with
|
|
2189
|
+
with NamedTemporaryFile() as f:
|
|
2012
2190
|
# Make a file with the right contents
|
|
2013
2191
|
f.write(file_id_or_contents.encode("utf-8"))
|
|
2014
2192
|
f.close()
|
|
@@ -2027,39 +2205,63 @@ def toilStageFiles(
|
|
|
2027
2205
|
# At the end we should get a direct toilfile: URI
|
|
2028
2206
|
file_id_or_contents = cast(str, here)
|
|
2029
2207
|
|
|
2208
|
+
# This might be an e.g. S3 URI now
|
|
2209
|
+
if not file_id_or_contents.startswith("toilfile:"):
|
|
2210
|
+
# We need to import it so we can export it.
|
|
2211
|
+
# TODO: Use direct S3 to S3 copy on exports as well
|
|
2212
|
+
file_id_or_contents = (
|
|
2213
|
+
"toilfile:"
|
|
2214
|
+
+ toil.import_file(file_id_or_contents, symlink=False).pack()
|
|
2215
|
+
)
|
|
2216
|
+
|
|
2030
2217
|
if file_id_or_contents.startswith("toilfile:"):
|
|
2031
2218
|
# This is something we can export
|
|
2032
|
-
|
|
2033
|
-
|
|
2219
|
+
# TODO: Do we need to urlencode the parts before sending them to S3?
|
|
2220
|
+
dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
|
|
2221
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2222
|
+
toil.export_file(
|
|
2034
2223
|
FileID.unpack(file_id_or_contents[len("toilfile:") :]),
|
|
2035
|
-
|
|
2224
|
+
dest_url,
|
|
2036
2225
|
)
|
|
2037
2226
|
# TODO: can a toildir: "file" get here?
|
|
2038
2227
|
else:
|
|
2039
|
-
# We are saving to the filesystem
|
|
2228
|
+
# We are saving to the filesystem.
|
|
2229
|
+
dest_url = "file://" + quote(p.target)
|
|
2230
|
+
|
|
2231
|
+
# We only really need export_file for actual files.
|
|
2040
2232
|
if not os.path.exists(p.target) and p.type in [
|
|
2041
2233
|
"Directory",
|
|
2042
2234
|
"WritableDirectory",
|
|
2043
2235
|
]:
|
|
2044
2236
|
os.makedirs(p.target)
|
|
2045
|
-
if
|
|
2046
|
-
if p.resolved.startswith("
|
|
2047
|
-
# We can actually export this
|
|
2048
|
-
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2049
|
-
toil.exportFile(
|
|
2050
|
-
FileID.unpack(p.resolved[len("toilfile:") :]),
|
|
2051
|
-
"file://" + p.target,
|
|
2052
|
-
)
|
|
2053
|
-
elif p.resolved.startswith("/"):
|
|
2237
|
+
if p.type in ["File", "WritableFile"]:
|
|
2238
|
+
if p.resolved.startswith("/"):
|
|
2054
2239
|
# Probably staging and bypassing file store. Just copy.
|
|
2240
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2055
2241
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2056
2242
|
shutil.copyfile(p.resolved, p.target)
|
|
2057
|
-
|
|
2058
|
-
|
|
2243
|
+
else:
|
|
2244
|
+
uri = p.resolved
|
|
2245
|
+
if not uri.startswith("toilfile:"):
|
|
2246
|
+
# We need to import so we can export
|
|
2247
|
+
uri = (
|
|
2248
|
+
"toilfile:"
|
|
2249
|
+
+ toil.import_file(uri, symlink=False).pack()
|
|
2250
|
+
)
|
|
2251
|
+
|
|
2252
|
+
# Actually export from the file store
|
|
2253
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2254
|
+
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2255
|
+
toil.export_file(
|
|
2256
|
+
FileID.unpack(uri[len("toilfile:") :]),
|
|
2257
|
+
dest_url,
|
|
2258
|
+
)
|
|
2259
|
+
if p.type in [
|
|
2059
2260
|
"CreateFile",
|
|
2060
2261
|
"CreateWritableFile",
|
|
2061
2262
|
]:
|
|
2062
2263
|
# We just need to make a file with particular contents
|
|
2264
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2063
2265
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2064
2266
|
with open(p.target, "wb") as n:
|
|
2065
2267
|
n.write(p.resolved.encode("utf-8"))
|
|
@@ -2078,6 +2280,7 @@ def toilStageFiles(
|
|
|
2078
2280
|
# Make the location point to the place we put this thing on the
|
|
2079
2281
|
# local filesystem.
|
|
2080
2282
|
f["location"] = schema_salad.ref_resolver.file_uri(mapped_location.target)
|
|
2283
|
+
f["path"] = mapped_location.target
|
|
2081
2284
|
|
|
2082
2285
|
if "contents" in f:
|
|
2083
2286
|
del f["contents"]
|
|
@@ -2182,7 +2385,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2182
2385
|
|
|
2183
2386
|
accelerators: Optional[List[AcceleratorRequirement]] = None
|
|
2184
2387
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
2185
|
-
# There's a CUDARequirement
|
|
2388
|
+
# There's a CUDARequirement, which cwltool processed for us
|
|
2186
2389
|
# TODO: How is cwltool deciding what value to use between min and max?
|
|
2187
2390
|
accelerators = [
|
|
2188
2391
|
{
|
|
@@ -2192,14 +2395,62 @@ class CWLJob(CWLNamedJob):
|
|
|
2192
2395
|
}
|
|
2193
2396
|
]
|
|
2194
2397
|
|
|
2398
|
+
# cwltool doesn't handle http://arvados.org/cwl#UsePreemptible as part
|
|
2399
|
+
# of its resource logic so we have to do it manually.
|
|
2400
|
+
#
|
|
2401
|
+
# Note that according to
|
|
2402
|
+
# https://github.com/arvados/arvados/blob/48a0d575e6de34bcda91c489e4aa98df291a8cca/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml#L345
|
|
2403
|
+
# this can only be a literal boolean! cwltool doesn't want to evaluate
|
|
2404
|
+
# expressions in the value for us like it does for CUDARequirement
|
|
2405
|
+
# which has a schema which allows for CWL expressions:
|
|
2406
|
+
# https://github.com/common-workflow-language/cwltool/blob/1573509eea2faa3cd1dc959224e52ff1d796d3eb/cwltool/extensions.yml#L221
|
|
2407
|
+
#
|
|
2408
|
+
# By default we have default preemptibility.
|
|
2409
|
+
preemptible: Optional[bool] = None
|
|
2410
|
+
preemptible_req, _ = tool.get_requirement(
|
|
2411
|
+
"http://arvados.org/cwl#UsePreemptible"
|
|
2412
|
+
)
|
|
2413
|
+
if preemptible_req:
|
|
2414
|
+
if "usePreemptible" not in preemptible_req:
|
|
2415
|
+
# If we have a requirement it has to have the value
|
|
2416
|
+
raise ValidationException(
|
|
2417
|
+
f"Unacceptable syntax for http://arvados.org/cwl#UsePreemptible: "
|
|
2418
|
+
f"expected key usePreemptible but got: {preemptible_req}"
|
|
2419
|
+
)
|
|
2420
|
+
parsed_value = preemptible_req["usePreemptible"]
|
|
2421
|
+
if isinstance(parsed_value, str) and (
|
|
2422
|
+
"$(" in parsed_value or "${" in parsed_value
|
|
2423
|
+
):
|
|
2424
|
+
# Looks like they tried to use an expression
|
|
2425
|
+
raise ValidationException(
|
|
2426
|
+
f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
|
|
2427
|
+
f"expected true or false but got what appears to be an expression: {repr(parsed_value)}. "
|
|
2428
|
+
f"Note that expressions are not allowed here by Arvados's schema."
|
|
2429
|
+
)
|
|
2430
|
+
if not isinstance(parsed_value, bool):
|
|
2431
|
+
# If we have a value it has to be a bool flag
|
|
2432
|
+
raise ValidationException(
|
|
2433
|
+
f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
|
|
2434
|
+
f"expected true or false but got: {repr(parsed_value)}"
|
|
2435
|
+
)
|
|
2436
|
+
preemptible = parsed_value
|
|
2437
|
+
|
|
2438
|
+
# We always need space for the temporary files for the job
|
|
2439
|
+
total_disk = cast(int, req["tmpdirSize"]) * (2**20)
|
|
2440
|
+
if not getattr(runtime_context, "bypass_file_store", False):
|
|
2441
|
+
# If using the Toil file store, we also need space for the output
|
|
2442
|
+
# files, which may need to be stored locally and copied off the
|
|
2443
|
+
# node.
|
|
2444
|
+
total_disk += cast(int, req["outdirSize"]) * (2**20)
|
|
2445
|
+
# If not using the Toil file store, output files just go directly to
|
|
2446
|
+
# their final homes their space doesn't need to be accounted per-job.
|
|
2447
|
+
|
|
2195
2448
|
super().__init__(
|
|
2196
2449
|
cores=req["cores"],
|
|
2197
2450
|
memory=int(req["ram"] * (2**20)),
|
|
2198
|
-
disk=int(
|
|
2199
|
-
(cast(int, req["tmpdirSize"]) * (2**20))
|
|
2200
|
-
+ (cast(int, req["outdirSize"]) * (2**20))
|
|
2201
|
-
),
|
|
2451
|
+
disk=int(total_disk),
|
|
2202
2452
|
accelerators=accelerators,
|
|
2453
|
+
preemptible=preemptible,
|
|
2203
2454
|
tool_id=self.cwltool.tool["id"],
|
|
2204
2455
|
parent_name=parent_name,
|
|
2205
2456
|
local=isinstance(tool, cwltool.command_line_tool.ExpressionTool),
|
|
@@ -2265,7 +2516,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2265
2516
|
cwllogger.removeHandler(defaultStreamHandler)
|
|
2266
2517
|
cwllogger.setLevel(logger.getEffectiveLevel())
|
|
2267
2518
|
|
|
2268
|
-
logger.debug("Loaded order
|
|
2519
|
+
logger.debug("Loaded order:\n%s", self.cwljob)
|
|
2269
2520
|
|
|
2270
2521
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2271
2522
|
|
|
@@ -2354,6 +2605,13 @@ class CWLJob(CWLNamedJob):
|
|
|
2354
2605
|
streaming_allowed=runtime_context.streaming_allowed,
|
|
2355
2606
|
)
|
|
2356
2607
|
|
|
2608
|
+
# Collect standard output and standard error somewhere if they don't go to files.
|
|
2609
|
+
# We need to keep two FDs to these because cwltool will close what we give it.
|
|
2610
|
+
default_stdout = TemporaryFile()
|
|
2611
|
+
runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
|
|
2612
|
+
default_stderr = TemporaryFile()
|
|
2613
|
+
runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
|
|
2614
|
+
|
|
2357
2615
|
process_uuid = uuid.uuid4() # noqa F841
|
|
2358
2616
|
started_at = datetime.datetime.now() # noqa F841
|
|
2359
2617
|
|
|
@@ -2362,13 +2620,34 @@ class CWLJob(CWLNamedJob):
|
|
|
2362
2620
|
logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
|
|
2363
2621
|
|
|
2364
2622
|
runtime_context.name = self.description.unitName
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2623
|
+
|
|
2624
|
+
status = "did_not_run"
|
|
2625
|
+
try:
|
|
2626
|
+
output, status = ToilSingleJobExecutor().execute(
|
|
2627
|
+
process=self.cwltool,
|
|
2628
|
+
job_order_object=cwljob,
|
|
2629
|
+
runtime_context=runtime_context,
|
|
2630
|
+
logger=cwllogger,
|
|
2631
|
+
)
|
|
2632
|
+
finally:
|
|
2633
|
+
ended_at = datetime.datetime.now() # noqa F841
|
|
2634
|
+
|
|
2635
|
+
# Log any output/error data
|
|
2636
|
+
default_stdout.seek(0, os.SEEK_END)
|
|
2637
|
+
if default_stdout.tell() > 0:
|
|
2638
|
+
default_stdout.seek(0)
|
|
2639
|
+
file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
|
|
2640
|
+
if status != "success":
|
|
2641
|
+
default_stdout.seek(0)
|
|
2642
|
+
logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
|
|
2643
|
+
default_stderr.seek(0, os.SEEK_END)
|
|
2644
|
+
if default_stderr.tell():
|
|
2645
|
+
default_stderr.seek(0)
|
|
2646
|
+
file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
|
|
2647
|
+
if status != "success":
|
|
2648
|
+
default_stderr.seek(0)
|
|
2649
|
+
logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
|
|
2650
|
+
|
|
2372
2651
|
if status != "success":
|
|
2373
2652
|
raise cwl_utils.errors.WorkflowException(status)
|
|
2374
2653
|
|
|
@@ -2395,6 +2674,8 @@ class CWLJob(CWLNamedJob):
|
|
|
2395
2674
|
|
|
2396
2675
|
logger.debug("Emitting output: %s", output)
|
|
2397
2676
|
|
|
2677
|
+
file_store.log_to_leader(f"CWL step complete: {runtime_context.name}")
|
|
2678
|
+
|
|
2398
2679
|
# metadata[process_uuid] = {
|
|
2399
2680
|
# 'started_at': started_at,
|
|
2400
2681
|
# 'ended_at': ended_at,
|
|
@@ -2782,6 +3063,10 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
2782
3063
|
if self.conditional.is_false(cwljob):
|
|
2783
3064
|
return self.conditional.skipped_outputs()
|
|
2784
3065
|
|
|
3066
|
+
# Apply default values set in the workflow
|
|
3067
|
+
fs_access = ToilFsAccess(self.runtime_context.basedir, file_store=file_store)
|
|
3068
|
+
fill_in_defaults(self.cwlwf.tool["inputs"], cwljob, fs_access)
|
|
3069
|
+
|
|
2785
3070
|
# `promises` dict
|
|
2786
3071
|
# from: each parameter (workflow input or step output)
|
|
2787
3072
|
# that may be used as a "source" for a step input workflow output
|
|
@@ -2844,6 +3129,10 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
2844
3129
|
get_container_engine(self.runtime_context),
|
|
2845
3130
|
)
|
|
2846
3131
|
|
|
3132
|
+
logger.debug(
|
|
3133
|
+
"Value will come from %s", jobobj.get(key, None)
|
|
3134
|
+
)
|
|
3135
|
+
|
|
2847
3136
|
conditional = Conditional(
|
|
2848
3137
|
expression=step.tool.get("when"),
|
|
2849
3138
|
outputs=step.tool["out"],
|
|
@@ -3042,8 +3331,8 @@ def scan_for_unsupported_requirements(
|
|
|
3042
3331
|
:param tool: The CWL tool to check for unsupported requirements.
|
|
3043
3332
|
|
|
3044
3333
|
:param bypass_file_store: True if the Toil file store is not being used to
|
|
3045
|
-
|
|
3046
|
-
|
|
3334
|
+
transport files between nodes, and raw origin node file:// URIs are exposed
|
|
3335
|
+
to tools instead.
|
|
3047
3336
|
|
|
3048
3337
|
"""
|
|
3049
3338
|
|
|
@@ -3080,24 +3369,31 @@ def determine_load_listing(
|
|
|
3080
3369
|
DIRECTORY_NAME is any variable name) set to one of the following three
|
|
3081
3370
|
options:
|
|
3082
3371
|
|
|
3083
|
-
|
|
3084
|
-
|
|
3372
|
+
1. no_listing: DIRECTORY_NAME.listing will be undefined.
|
|
3373
|
+
e.g.
|
|
3374
|
+
|
|
3375
|
+
inputs.DIRECTORY_NAME.listing == unspecified
|
|
3376
|
+
|
|
3377
|
+
2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
|
|
3378
|
+
deep of DIRECTORY_NAME's contents.
|
|
3379
|
+
e.g.
|
|
3380
|
+
|
|
3381
|
+
inputs.DIRECTORY_NAME.listing == [items in directory]
|
|
3382
|
+
inputs.DIRECTORY_NAME.listing[0].listing == undefined
|
|
3383
|
+
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3085
3384
|
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
inputs.DIRECTORY_NAME.listing[0].listing == undefined
|
|
3090
|
-
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3385
|
+
3. deep_listing: DIRECTORY_NAME.listing will return a list of the entire
|
|
3386
|
+
contents of DIRECTORY_NAME.
|
|
3387
|
+
e.g.
|
|
3091
3388
|
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
|
|
3095
|
-
|
|
3096
|
-
in subdirectory if it exists and is the first item listed]
|
|
3097
|
-
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3389
|
+
inputs.DIRECTORY_NAME.listing == [items in directory]
|
|
3390
|
+
inputs.DIRECTORY_NAME.listing[0].listing == [items in subdirectory
|
|
3391
|
+
if it exists and is the first item listed]
|
|
3392
|
+
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3098
3393
|
|
|
3099
|
-
See
|
|
3100
|
-
|
|
3394
|
+
See
|
|
3395
|
+
https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
|
|
3396
|
+
and https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingEnum
|
|
3101
3397
|
|
|
3102
3398
|
DIRECTORY_NAME.listing should be determined first from loadListing.
|
|
3103
3399
|
If that's not specified, from LoadListingRequirement.
|
|
@@ -3209,6 +3505,20 @@ usage_message = "\n\n" + textwrap.dedent(
|
|
|
3209
3505
|
]
|
|
3210
3506
|
)
|
|
3211
3507
|
|
|
3508
|
+
def get_options(args: List[str]) -> Namespace:
|
|
3509
|
+
"""
|
|
3510
|
+
Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
|
|
3511
|
+
:param args: List of args from command line
|
|
3512
|
+
:return: options namespace
|
|
3513
|
+
"""
|
|
3514
|
+
parser = ArgParser()
|
|
3515
|
+
addOptions(parser, jobstore_as_flag=True, cwl=True)
|
|
3516
|
+
options: Namespace
|
|
3517
|
+
options, cwl_options = parser.parse_known_args(args)
|
|
3518
|
+
options.cwljob.extend(cwl_options)
|
|
3519
|
+
|
|
3520
|
+
return options
|
|
3521
|
+
|
|
3212
3522
|
|
|
3213
3523
|
def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
3214
3524
|
"""Run the main loop for toil-cwl-runner."""
|
|
@@ -3218,334 +3528,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3218
3528
|
if args is None:
|
|
3219
3529
|
args = sys.argv[1:]
|
|
3220
3530
|
|
|
3221
|
-
|
|
3222
|
-
config.disableChaining = True
|
|
3223
|
-
config.cwl = True
|
|
3224
|
-
parser = argparse.ArgumentParser()
|
|
3225
|
-
addOptions(parser, config, jobstore_as_flag=True)
|
|
3226
|
-
parser.add_argument("cwltool", type=str)
|
|
3227
|
-
parser.add_argument("cwljob", nargs=argparse.REMAINDER)
|
|
3228
|
-
|
|
3229
|
-
parser.add_argument("--not-strict", action="store_true")
|
|
3230
|
-
parser.add_argument(
|
|
3231
|
-
"--enable-dev",
|
|
3232
|
-
action="store_true",
|
|
3233
|
-
help="Enable loading and running development versions of CWL",
|
|
3234
|
-
)
|
|
3235
|
-
parser.add_argument(
|
|
3236
|
-
"--enable-ext",
|
|
3237
|
-
action="store_true",
|
|
3238
|
-
help="Enable loading and running 'cwltool:' extensions to the CWL standards.",
|
|
3239
|
-
default=False,
|
|
3240
|
-
)
|
|
3241
|
-
parser.add_argument("--quiet", dest="quiet", action="store_true", default=False)
|
|
3242
|
-
parser.add_argument("--basedir", type=str) # TODO: Might be hard-coded?
|
|
3243
|
-
parser.add_argument("--outdir", type=str, default=os.getcwd())
|
|
3244
|
-
parser.add_argument("--version", action="version", version=baseVersion)
|
|
3245
|
-
parser.add_argument(
|
|
3246
|
-
"--log-dir",
|
|
3247
|
-
type=str,
|
|
3248
|
-
default="",
|
|
3249
|
-
help="Log your tools stdout/stderr to this location outside of container",
|
|
3250
|
-
)
|
|
3251
|
-
dockergroup = parser.add_mutually_exclusive_group()
|
|
3252
|
-
dockergroup.add_argument(
|
|
3253
|
-
"--user-space-docker-cmd",
|
|
3254
|
-
help="(Linux/OS X only) Specify a user space docker command (like "
|
|
3255
|
-
"udocker or dx-docker) that will be used to call 'pull' and 'run'",
|
|
3256
|
-
)
|
|
3257
|
-
dockergroup.add_argument(
|
|
3258
|
-
"--singularity",
|
|
3259
|
-
action="store_true",
|
|
3260
|
-
default=False,
|
|
3261
|
-
help="Use Singularity runtime for running containers. "
|
|
3262
|
-
"Requires Singularity v2.6.1+ and Linux with kernel version v3.18+ or "
|
|
3263
|
-
"with overlayfs support backported.",
|
|
3264
|
-
)
|
|
3265
|
-
dockergroup.add_argument(
|
|
3266
|
-
"--podman",
|
|
3267
|
-
action="store_true",
|
|
3268
|
-
default=False,
|
|
3269
|
-
help="Use Podman runtime for running containers. ",
|
|
3270
|
-
)
|
|
3271
|
-
dockergroup.add_argument(
|
|
3272
|
-
"--no-container",
|
|
3273
|
-
action="store_true",
|
|
3274
|
-
help="Do not execute jobs in a "
|
|
3275
|
-
"Docker container, even when `DockerRequirement` "
|
|
3276
|
-
"is specified under `hints`.",
|
|
3277
|
-
)
|
|
3278
|
-
dockergroup.add_argument(
|
|
3279
|
-
"--leave-container",
|
|
3280
|
-
action="store_false",
|
|
3281
|
-
default=True,
|
|
3282
|
-
help="Do not delete Docker container used by jobs after they exit",
|
|
3283
|
-
dest="rm_container",
|
|
3284
|
-
)
|
|
3285
|
-
extra_dockergroup = parser.add_argument_group()
|
|
3286
|
-
extra_dockergroup.add_argument(
|
|
3287
|
-
"--custom-net",
|
|
3288
|
-
help="Specify docker network name to pass to docker run command",
|
|
3289
|
-
)
|
|
3290
|
-
cidgroup = parser.add_argument_group(
|
|
3291
|
-
"Options for recording the Docker container identifier into a file."
|
|
3292
|
-
)
|
|
3293
|
-
cidgroup.add_argument(
|
|
3294
|
-
# Disabled as containerid is now saved by default
|
|
3295
|
-
"--record-container-id",
|
|
3296
|
-
action="store_true",
|
|
3297
|
-
default=False,
|
|
3298
|
-
help=argparse.SUPPRESS,
|
|
3299
|
-
dest="record_container_id",
|
|
3300
|
-
)
|
|
3301
|
-
|
|
3302
|
-
cidgroup.add_argument(
|
|
3303
|
-
"--cidfile-dir",
|
|
3304
|
-
type=str,
|
|
3305
|
-
help="Store the Docker container ID into a file in the specified directory.",
|
|
3306
|
-
default=None,
|
|
3307
|
-
dest="cidfile_dir",
|
|
3308
|
-
)
|
|
3309
|
-
|
|
3310
|
-
cidgroup.add_argument(
|
|
3311
|
-
"--cidfile-prefix",
|
|
3312
|
-
type=str,
|
|
3313
|
-
help="Specify a prefix to the container ID filename. "
|
|
3314
|
-
"Final file name will be followed by a timestamp. "
|
|
3315
|
-
"The default is no prefix.",
|
|
3316
|
-
default=None,
|
|
3317
|
-
dest="cidfile_prefix",
|
|
3318
|
-
)
|
|
3319
|
-
|
|
3320
|
-
parser.add_argument(
|
|
3321
|
-
"--preserve-environment",
|
|
3322
|
-
type=str,
|
|
3323
|
-
nargs="+",
|
|
3324
|
-
help="Preserve specified environment variables when running"
|
|
3325
|
-
" CommandLineTools",
|
|
3326
|
-
metavar=("VAR1 VAR2"),
|
|
3327
|
-
default=("PATH",),
|
|
3328
|
-
dest="preserve_environment",
|
|
3329
|
-
)
|
|
3330
|
-
parser.add_argument(
|
|
3331
|
-
"--preserve-entire-environment",
|
|
3332
|
-
action="store_true",
|
|
3333
|
-
help="Preserve all environment variable when running CommandLineTools.",
|
|
3334
|
-
default=False,
|
|
3335
|
-
dest="preserve_entire_environment",
|
|
3336
|
-
)
|
|
3337
|
-
parser.add_argument(
|
|
3338
|
-
"--destBucket",
|
|
3339
|
-
type=str,
|
|
3340
|
-
help="Specify a cloud bucket endpoint for output files.",
|
|
3341
|
-
)
|
|
3342
|
-
parser.add_argument("--beta-dependency-resolvers-configuration", default=None)
|
|
3343
|
-
parser.add_argument("--beta-dependencies-directory", default=None)
|
|
3344
|
-
parser.add_argument("--beta-use-biocontainers", default=None, action="store_true")
|
|
3345
|
-
parser.add_argument("--beta-conda-dependencies", default=None, action="store_true")
|
|
3346
|
-
parser.add_argument(
|
|
3347
|
-
"--tmpdir-prefix",
|
|
3348
|
-
type=str,
|
|
3349
|
-
help="Path prefix for temporary directories",
|
|
3350
|
-
default=DEFAULT_TMPDIR_PREFIX,
|
|
3351
|
-
)
|
|
3352
|
-
parser.add_argument(
|
|
3353
|
-
"--tmp-outdir-prefix",
|
|
3354
|
-
type=str,
|
|
3355
|
-
help="Path prefix for intermediate output directories",
|
|
3356
|
-
default=DEFAULT_TMPDIR_PREFIX,
|
|
3357
|
-
)
|
|
3358
|
-
parser.add_argument(
|
|
3359
|
-
"--force-docker-pull",
|
|
3360
|
-
action="store_true",
|
|
3361
|
-
default=False,
|
|
3362
|
-
dest="force_docker_pull",
|
|
3363
|
-
help="Pull latest docker image even if it is locally present",
|
|
3364
|
-
)
|
|
3365
|
-
parser.add_argument(
|
|
3366
|
-
"--no-match-user",
|
|
3367
|
-
action="store_true",
|
|
3368
|
-
default=False,
|
|
3369
|
-
help="Disable passing the current uid to `docker run --user`",
|
|
3370
|
-
)
|
|
3371
|
-
parser.add_argument(
|
|
3372
|
-
"--no-read-only",
|
|
3373
|
-
action="store_true",
|
|
3374
|
-
default=False,
|
|
3375
|
-
help="Do not set root directory in the container as read-only",
|
|
3376
|
-
)
|
|
3377
|
-
parser.add_argument(
|
|
3378
|
-
"--strict-memory-limit",
|
|
3379
|
-
action="store_true",
|
|
3380
|
-
help="When running with "
|
|
3381
|
-
"software containers and the Docker engine, pass either the "
|
|
3382
|
-
"calculated memory allocation from ResourceRequirements or the "
|
|
3383
|
-
"default of 1 gigabyte to Docker's --memory option.",
|
|
3384
|
-
)
|
|
3385
|
-
parser.add_argument(
|
|
3386
|
-
"--strict-cpu-limit",
|
|
3387
|
-
action="store_true",
|
|
3388
|
-
help="When running with "
|
|
3389
|
-
"software containers and the Docker engine, pass either the "
|
|
3390
|
-
"calculated cpu allocation from ResourceRequirements or the "
|
|
3391
|
-
"default of 1 core to Docker's --cpu option. "
|
|
3392
|
-
"Requires docker version >= v1.13.",
|
|
3393
|
-
)
|
|
3394
|
-
parser.add_argument(
|
|
3395
|
-
"--relax-path-checks",
|
|
3396
|
-
action="store_true",
|
|
3397
|
-
default=False,
|
|
3398
|
-
help="Relax requirements on path names to permit "
|
|
3399
|
-
"spaces and hash characters.",
|
|
3400
|
-
dest="relax_path_checks",
|
|
3401
|
-
)
|
|
3402
|
-
parser.add_argument(
|
|
3403
|
-
"--default-container",
|
|
3404
|
-
help="Specify a default docker container that will be "
|
|
3405
|
-
"used if the workflow fails to specify one.",
|
|
3406
|
-
)
|
|
3407
|
-
parser.add_argument(
|
|
3408
|
-
"--disable-validate",
|
|
3409
|
-
dest="do_validate",
|
|
3410
|
-
action="store_false",
|
|
3411
|
-
default=True,
|
|
3412
|
-
help=argparse.SUPPRESS,
|
|
3413
|
-
)
|
|
3414
|
-
parser.add_argument(
|
|
3415
|
-
"--fast-parser",
|
|
3416
|
-
dest="fast_parser",
|
|
3417
|
-
action="store_true",
|
|
3418
|
-
default=False,
|
|
3419
|
-
help=argparse.SUPPRESS,
|
|
3420
|
-
)
|
|
3421
|
-
checkgroup = parser.add_mutually_exclusive_group()
|
|
3422
|
-
checkgroup.add_argument(
|
|
3423
|
-
"--compute-checksum",
|
|
3424
|
-
action="store_true",
|
|
3425
|
-
default=True,
|
|
3426
|
-
help="Compute checksum of contents while collecting outputs",
|
|
3427
|
-
dest="compute_checksum",
|
|
3428
|
-
)
|
|
3429
|
-
checkgroup.add_argument(
|
|
3430
|
-
"--no-compute-checksum",
|
|
3431
|
-
action="store_false",
|
|
3432
|
-
help="Do not compute checksum of contents while collecting outputs",
|
|
3433
|
-
dest="compute_checksum",
|
|
3434
|
-
)
|
|
3435
|
-
|
|
3436
|
-
parser.add_argument(
|
|
3437
|
-
"--eval-timeout",
|
|
3438
|
-
help="Time to wait for a Javascript expression to evaluate before giving "
|
|
3439
|
-
"an error, default 20s.",
|
|
3440
|
-
type=float,
|
|
3441
|
-
default=20,
|
|
3442
|
-
)
|
|
3443
|
-
parser.add_argument(
|
|
3444
|
-
"--overrides",
|
|
3445
|
-
type=str,
|
|
3446
|
-
default=None,
|
|
3447
|
-
help="Read process requirement overrides from file.",
|
|
3448
|
-
)
|
|
3449
|
-
|
|
3450
|
-
parser.add_argument(
|
|
3451
|
-
"--mpi-config-file",
|
|
3452
|
-
type=str,
|
|
3453
|
-
default=None,
|
|
3454
|
-
help="Platform specific configuration for MPI (parallel "
|
|
3455
|
-
"launcher, its flag etc). See the cwltool README "
|
|
3456
|
-
"section 'Running MPI-based tools' for details of the format: "
|
|
3457
|
-
"https://github.com/common-workflow-language/cwltool#running-mpi-based-tools-that-need-to-be-launched",
|
|
3458
|
-
)
|
|
3459
|
-
parser.add_argument(
|
|
3460
|
-
"--bypass-file-store",
|
|
3461
|
-
action="store_true",
|
|
3462
|
-
default=False,
|
|
3463
|
-
help="Do not use Toil's file store and assume all "
|
|
3464
|
-
"paths are accessible in place from all nodes.",
|
|
3465
|
-
dest="bypass_file_store",
|
|
3466
|
-
)
|
|
3467
|
-
parser.add_argument(
|
|
3468
|
-
"--disable-streaming",
|
|
3469
|
-
action="store_true",
|
|
3470
|
-
default=False,
|
|
3471
|
-
help="Disable file streaming for files that have 'streamable' flag True",
|
|
3472
|
-
dest="disable_streaming",
|
|
3473
|
-
)
|
|
3474
|
-
|
|
3475
|
-
provgroup = parser.add_argument_group(
|
|
3476
|
-
"Options for recording provenance information of the execution"
|
|
3477
|
-
)
|
|
3478
|
-
provgroup.add_argument(
|
|
3479
|
-
"--provenance",
|
|
3480
|
-
help="Save provenance to specified folder as a "
|
|
3481
|
-
"Research Object that captures and aggregates "
|
|
3482
|
-
"workflow execution and data products.",
|
|
3483
|
-
type=str,
|
|
3484
|
-
)
|
|
3485
|
-
|
|
3486
|
-
provgroup.add_argument(
|
|
3487
|
-
"--enable-user-provenance",
|
|
3488
|
-
default=False,
|
|
3489
|
-
action="store_true",
|
|
3490
|
-
help="Record user account info as part of provenance.",
|
|
3491
|
-
dest="user_provenance",
|
|
3492
|
-
)
|
|
3493
|
-
provgroup.add_argument(
|
|
3494
|
-
"--disable-user-provenance",
|
|
3495
|
-
default=False,
|
|
3496
|
-
action="store_false",
|
|
3497
|
-
help="Do not record user account info in provenance.",
|
|
3498
|
-
dest="user_provenance",
|
|
3499
|
-
)
|
|
3500
|
-
provgroup.add_argument(
|
|
3501
|
-
"--enable-host-provenance",
|
|
3502
|
-
default=False,
|
|
3503
|
-
action="store_true",
|
|
3504
|
-
help="Record host info as part of provenance.",
|
|
3505
|
-
dest="host_provenance",
|
|
3506
|
-
)
|
|
3507
|
-
provgroup.add_argument(
|
|
3508
|
-
"--disable-host-provenance",
|
|
3509
|
-
default=False,
|
|
3510
|
-
action="store_false",
|
|
3511
|
-
help="Do not record host info in provenance.",
|
|
3512
|
-
dest="host_provenance",
|
|
3513
|
-
)
|
|
3514
|
-
provgroup.add_argument(
|
|
3515
|
-
"--orcid",
|
|
3516
|
-
help="Record user ORCID identifier as part of "
|
|
3517
|
-
"provenance, e.g. https://orcid.org/0000-0002-1825-0097 "
|
|
3518
|
-
"or 0000-0002-1825-0097. Alternatively the environment variable "
|
|
3519
|
-
"ORCID may be set.",
|
|
3520
|
-
dest="orcid",
|
|
3521
|
-
default=os.environ.get("ORCID", ""),
|
|
3522
|
-
type=str,
|
|
3523
|
-
)
|
|
3524
|
-
provgroup.add_argument(
|
|
3525
|
-
"--full-name",
|
|
3526
|
-
help="Record full name of user as part of provenance, "
|
|
3527
|
-
"e.g. Josiah Carberry. You may need to use shell quotes to preserve "
|
|
3528
|
-
"spaces. Alternatively the environment variable CWL_FULL_NAME may "
|
|
3529
|
-
"be set.",
|
|
3530
|
-
dest="cwl_full_name",
|
|
3531
|
-
default=os.environ.get("CWL_FULL_NAME", ""),
|
|
3532
|
-
type=str,
|
|
3533
|
-
)
|
|
3534
|
-
|
|
3535
|
-
# Parse all the options once.
|
|
3536
|
-
options = parser.parse_args(args)
|
|
3531
|
+
options = get_options(args)
|
|
3537
3532
|
|
|
3538
3533
|
# Do cwltool setup
|
|
3539
3534
|
cwltool.main.setup_schema(args=options, custom_schema_callback=None)
|
|
3535
|
+
tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3540
3536
|
|
|
3541
3537
|
# We need a workdir for the CWL runtime contexts.
|
|
3542
|
-
if
|
|
3538
|
+
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
|
|
3543
3539
|
# if tmpdir_prefix is not the default value, move
|
|
3544
3540
|
# workdir and the default job store under it
|
|
3545
|
-
workdir = cwltool.utils.create_tmp_dir(
|
|
3541
|
+
workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3546
3542
|
else:
|
|
3547
3543
|
# Use a directory in the default tmpdir
|
|
3548
|
-
workdir =
|
|
3544
|
+
workdir = mkdtemp()
|
|
3549
3545
|
# Make sure workdir doesn't exist so it can be a job store
|
|
3550
3546
|
os.rmdir(workdir)
|
|
3551
3547
|
|
|
@@ -3562,13 +3558,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3562
3558
|
options.do_validate = True
|
|
3563
3559
|
options.pack = False
|
|
3564
3560
|
options.print_subgraph = False
|
|
3565
|
-
if
|
|
3561
|
+
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
|
|
3566
3562
|
# We need to override workDir because by default Toil will pick
|
|
3567
3563
|
# somewhere under the system temp directory if unset, ignoring
|
|
3568
3564
|
# --tmpdir-prefix.
|
|
3569
3565
|
#
|
|
3570
3566
|
# If set, workDir needs to exist, so we directly use the prefix
|
|
3571
|
-
options.workDir = cwltool.utils.create_tmp_dir(
|
|
3567
|
+
options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3572
3568
|
|
|
3573
3569
|
if options.batchSystem == "kubernetes":
|
|
3574
3570
|
# Containers under Kubernetes can only run in Singularity
|
|
@@ -3585,8 +3581,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3585
3581
|
|
|
3586
3582
|
logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
|
|
3587
3583
|
|
|
3588
|
-
outdir = os.path.abspath(options.outdir)
|
|
3589
|
-
tmp_outdir_prefix = os.path.abspath(
|
|
3584
|
+
outdir = os.path.abspath(options.outdir or os.getcwd())
|
|
3585
|
+
tmp_outdir_prefix = os.path.abspath(
|
|
3586
|
+
options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3587
|
+
)
|
|
3590
3588
|
|
|
3591
3589
|
fileindex: Dict[str, str] = {}
|
|
3592
3590
|
existing: Dict[str, str] = {}
|
|
@@ -3597,13 +3595,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3597
3595
|
dependencies_configuration = DependenciesConfiguration(options)
|
|
3598
3596
|
job_script_provider = dependencies_configuration
|
|
3599
3597
|
|
|
3600
|
-
options.default_container = None
|
|
3601
3598
|
runtime_context = cwltool.context.RuntimeContext(vars(options))
|
|
3602
3599
|
runtime_context.toplevel = True # enable discovery of secondaryFiles
|
|
3603
3600
|
runtime_context.find_default_container = functools.partial(
|
|
3604
3601
|
find_default_container, options
|
|
3605
3602
|
)
|
|
3606
3603
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
3604
|
+
runtime_context.outdir = outdir
|
|
3607
3605
|
runtime_context.move_outputs = "leave"
|
|
3608
3606
|
runtime_context.rm_tmpdir = False
|
|
3609
3607
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
@@ -3621,12 +3619,16 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3621
3619
|
# Otherwise, if it takes a File with loadContents from a URL, we won't
|
|
3622
3620
|
# be able to load the contents when we need to.
|
|
3623
3621
|
runtime_context.make_fs_access = ToilFsAccess
|
|
3622
|
+
if options.reference_inputs and options.bypass_file_store:
|
|
3623
|
+
# We can't do both of these at the same time.
|
|
3624
|
+
logger.error("Cannot reference inputs when bypassing the file store")
|
|
3625
|
+
return 1
|
|
3624
3626
|
|
|
3625
3627
|
loading_context = cwltool.main.setup_loadingContext(None, runtime_context, options)
|
|
3626
3628
|
|
|
3627
3629
|
if options.provenance:
|
|
3628
3630
|
research_obj = cwltool.cwlprov.ro.ResearchObject(
|
|
3629
|
-
temp_prefix_ro=
|
|
3631
|
+
temp_prefix_ro=tmp_outdir_prefix,
|
|
3630
3632
|
orcid=options.orcid,
|
|
3631
3633
|
full_name=options.cwl_full_name,
|
|
3632
3634
|
fsaccess=runtime_context.make_fs_access(""),
|
|
@@ -3701,7 +3703,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3701
3703
|
loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
|
|
3702
3704
|
loading_context, workflowobj, uri
|
|
3703
3705
|
)
|
|
3704
|
-
|
|
3706
|
+
if not loading_context.loader:
|
|
3707
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
3705
3708
|
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
3706
3709
|
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3707
3710
|
|
|
@@ -3748,10 +3751,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3748
3751
|
)
|
|
3749
3752
|
raise
|
|
3750
3753
|
|
|
3751
|
-
#
|
|
3752
|
-
#
|
|
3753
|
-
fs_access = ToilFsAccess(options.basedir)
|
|
3754
|
-
fill_in_defaults(tool.tool["inputs"], initialized_job_order, fs_access)
|
|
3754
|
+
# Leave the defaults un-filled in the top-level order. The tool or
|
|
3755
|
+
# workflow will fill them when it runs
|
|
3755
3756
|
|
|
3756
3757
|
for inp in tool.tool["inputs"]:
|
|
3757
3758
|
if (
|
|
@@ -3809,6 +3810,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3809
3810
|
|
|
3810
3811
|
# Import all the input files, some of which may be missing optional
|
|
3811
3812
|
# files.
|
|
3813
|
+
logger.info("Importing input files...")
|
|
3814
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3812
3815
|
import_files(
|
|
3813
3816
|
file_import_function,
|
|
3814
3817
|
fs_access,
|
|
@@ -3816,11 +3819,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3816
3819
|
existing,
|
|
3817
3820
|
initialized_job_order,
|
|
3818
3821
|
skip_broken=True,
|
|
3822
|
+
skip_remote=options.reference_inputs,
|
|
3819
3823
|
bypass_file_store=options.bypass_file_store,
|
|
3824
|
+
log_level=logging.INFO,
|
|
3820
3825
|
)
|
|
3821
3826
|
# Import all the files associated with tools (binaries, etc.).
|
|
3822
3827
|
# Not sure why you would have an optional secondary file here, but
|
|
3823
3828
|
# the spec probably needs us to support them.
|
|
3829
|
+
logger.info("Importing tool-associated files...")
|
|
3824
3830
|
visitSteps(
|
|
3825
3831
|
tool,
|
|
3826
3832
|
functools.partial(
|
|
@@ -3830,7 +3836,9 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3830
3836
|
fileindex,
|
|
3831
3837
|
existing,
|
|
3832
3838
|
skip_broken=True,
|
|
3839
|
+
skip_remote=options.reference_inputs,
|
|
3833
3840
|
bypass_file_store=options.bypass_file_store,
|
|
3841
|
+
log_level=logging.INFO,
|
|
3834
3842
|
),
|
|
3835
3843
|
)
|
|
3836
3844
|
|
|
@@ -3843,7 +3851,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3843
3851
|
# were required.
|
|
3844
3852
|
rm_unprocessed_secondary_files(param_value)
|
|
3845
3853
|
|
|
3846
|
-
logger.
|
|
3854
|
+
logger.info("Creating root job")
|
|
3855
|
+
logger.debug("Root tool: %s", tool)
|
|
3847
3856
|
try:
|
|
3848
3857
|
wf1, _ = makeJob(
|
|
3849
3858
|
tool=tool,
|
|
@@ -3856,6 +3865,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3856
3865
|
logging.error(err)
|
|
3857
3866
|
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3858
3867
|
wf1.cwljob = initialized_job_order
|
|
3868
|
+
logger.info("Starting workflow")
|
|
3859
3869
|
try:
|
|
3860
3870
|
outobj = toil.start(wf1)
|
|
3861
3871
|
except FailedJobsException as err:
|
|
@@ -3871,13 +3881,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3871
3881
|
|
|
3872
3882
|
# Now the workflow has completed. We need to make sure the outputs (and
|
|
3873
3883
|
# inputs) end up where the user wants them to be.
|
|
3874
|
-
|
|
3884
|
+
logger.info("Collecting workflow outputs...")
|
|
3875
3885
|
outobj = resolve_dict_w_promises(outobj)
|
|
3876
3886
|
|
|
3877
3887
|
# Stage files. Specify destination bucket if specified in CLI
|
|
3878
3888
|
# options. If destination bucket not passed in,
|
|
3879
3889
|
# options.destBucket's value will be None.
|
|
3880
|
-
toilStageFiles(
|
|
3890
|
+
toilStageFiles(
|
|
3891
|
+
toil,
|
|
3892
|
+
outobj,
|
|
3893
|
+
outdir,
|
|
3894
|
+
destBucket=options.destBucket,
|
|
3895
|
+
log_level=logging.INFO
|
|
3896
|
+
)
|
|
3897
|
+
logger.info("Stored workflow outputs")
|
|
3881
3898
|
|
|
3882
3899
|
if runtime_context.research_obj is not None:
|
|
3883
3900
|
cwltool.cwlprov.writablebagfile.create_job(
|
|
@@ -3904,7 +3921,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3904
3921
|
("File",),
|
|
3905
3922
|
functools.partial(add_sizes, runtime_context.make_fs_access("")),
|
|
3906
3923
|
)
|
|
3907
|
-
|
|
3924
|
+
if not document_loader:
|
|
3925
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
3908
3926
|
prov_dependencies = cwltool.main.prov_deps(
|
|
3909
3927
|
workflowobj, document_loader, uri
|
|
3910
3928
|
)
|
|
@@ -3914,6 +3932,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3914
3932
|
)
|
|
3915
3933
|
|
|
3916
3934
|
if not options.destBucket and options.compute_checksum:
|
|
3935
|
+
logger.info("Computing output file checksums...")
|
|
3917
3936
|
visit_class(
|
|
3918
3937
|
outobj,
|
|
3919
3938
|
("File",),
|
|
@@ -3922,12 +3941,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3922
3941
|
|
|
3923
3942
|
visit_class(outobj, ("File",), MutationManager().unset_generation)
|
|
3924
3943
|
stdout.write(json.dumps(outobj, indent=4, default=str))
|
|
3944
|
+
stdout.write("\n")
|
|
3945
|
+
logger.info("CWL run complete!")
|
|
3925
3946
|
|
|
3926
3947
|
return 0
|
|
3927
3948
|
|
|
3928
3949
|
|
|
3929
3950
|
def find_default_container(
|
|
3930
|
-
args:
|
|
3951
|
+
args: Namespace, builder: cwltool.builder.Builder
|
|
3931
3952
|
) -> Optional[str]:
|
|
3932
3953
|
"""Find the default constructor by consulting a Toil.options object."""
|
|
3933
3954
|
if args.default_container:
|