toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -17,23 +17,24 @@
|
|
|
17
17
|
|
|
18
18
|
# For an overview of how this all works, see discussion in
|
|
19
19
|
# docs/architecture.rst
|
|
20
|
-
import argparse
|
|
21
20
|
import base64
|
|
22
21
|
import copy
|
|
23
22
|
import datetime
|
|
24
23
|
import errno
|
|
25
24
|
import functools
|
|
25
|
+
import glob
|
|
26
|
+
import io
|
|
26
27
|
import json
|
|
27
28
|
import logging
|
|
28
29
|
import os
|
|
30
|
+
import pprint
|
|
29
31
|
import shutil
|
|
30
32
|
import socket
|
|
31
33
|
import stat
|
|
32
34
|
import sys
|
|
33
|
-
import tempfile
|
|
34
35
|
import textwrap
|
|
35
|
-
import urllib
|
|
36
36
|
import uuid
|
|
37
|
+
from tempfile import NamedTemporaryFile, gettempdir
|
|
37
38
|
from threading import Thread
|
|
38
39
|
from typing import (
|
|
39
40
|
IO,
|
|
@@ -52,8 +53,9 @@ from typing import (
|
|
|
52
53
|
TypeVar,
|
|
53
54
|
Union,
|
|
54
55
|
cast,
|
|
56
|
+
Sequence,
|
|
55
57
|
)
|
|
56
|
-
from urllib.parse import
|
|
58
|
+
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
57
59
|
|
|
58
60
|
import cwl_utils.errors
|
|
59
61
|
import cwl_utils.expression
|
|
@@ -66,6 +68,7 @@ import cwltool.load_tool
|
|
|
66
68
|
import cwltool.main
|
|
67
69
|
import cwltool.resolver
|
|
68
70
|
import schema_salad.ref_resolver
|
|
71
|
+
from configargparse import ArgParser, SUPPRESS, Namespace
|
|
69
72
|
from cwltool.loghandler import _logger as cwllogger
|
|
70
73
|
from cwltool.loghandler import defaultStreamHandler
|
|
71
74
|
from cwltool.mpi import MpiConfig
|
|
@@ -103,11 +106,15 @@ from schema_salad.sourceline import SourceLine
|
|
|
103
106
|
from typing_extensions import Literal
|
|
104
107
|
|
|
105
108
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
106
|
-
from toil.common import
|
|
109
|
+
from toil.common import Toil, addOptions
|
|
110
|
+
from toil.cwl import check_cwltool_version
|
|
111
|
+
|
|
112
|
+
check_cwltool_version()
|
|
107
113
|
from toil.cwl.utils import (
|
|
108
114
|
CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
|
|
109
115
|
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
|
|
110
116
|
download_structure,
|
|
117
|
+
get_from_structure,
|
|
111
118
|
visit_cwl_class_and_reduce,
|
|
112
119
|
)
|
|
113
120
|
from toil.exceptions import FailedJobsException
|
|
@@ -117,14 +124,14 @@ from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
|
|
|
117
124
|
from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
|
|
118
125
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
119
126
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
127
|
+
from toil.lib.io import mkdtemp
|
|
120
128
|
from toil.lib.threading import ExceptionalThread
|
|
121
129
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
122
|
-
from toil.version import baseVersion
|
|
123
130
|
|
|
124
131
|
logger = logging.getLogger(__name__)
|
|
125
132
|
|
|
126
133
|
# Find the default temporary directory
|
|
127
|
-
DEFAULT_TMPDIR =
|
|
134
|
+
DEFAULT_TMPDIR = gettempdir()
|
|
128
135
|
# And compose a CWL-style default prefix inside it.
|
|
129
136
|
# We used to not put this inside anything and we would drop loads of temp
|
|
130
137
|
# directories in the current directory and leave them there.
|
|
@@ -349,16 +356,24 @@ class ResolveSource:
|
|
|
349
356
|
|
|
350
357
|
def __repr__(self) -> str:
|
|
351
358
|
"""Allow for debug printing."""
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
+
|
|
360
|
+
parts = [f"source key {self.source_key}"]
|
|
361
|
+
|
|
362
|
+
if "pickValue" in self.input:
|
|
363
|
+
parts.append(f"pick value {self.input['pickValue']} from")
|
|
364
|
+
|
|
365
|
+
if isinstance(self.promise_tuples, list):
|
|
366
|
+
names = [n for n, _ in self.promise_tuples]
|
|
367
|
+
parts.append(f"names {names} in promises")
|
|
368
|
+
else:
|
|
369
|
+
name, _ = self.promise_tuples
|
|
370
|
+
parts.append(f"name {name} in promise")
|
|
371
|
+
|
|
372
|
+
return f"ResolveSource({', '.join(parts)})"
|
|
359
373
|
|
|
360
374
|
def resolve(self) -> Any:
|
|
361
375
|
"""First apply linkMerge then pickValue if either present."""
|
|
376
|
+
|
|
362
377
|
result: Optional[Any] = None
|
|
363
378
|
if isinstance(self.promise_tuples, list):
|
|
364
379
|
result = self.link_merge(
|
|
@@ -382,6 +397,7 @@ class ResolveSource:
|
|
|
382
397
|
|
|
383
398
|
:param values: result of step
|
|
384
399
|
"""
|
|
400
|
+
|
|
385
401
|
link_merge_type = self.input.get("linkMerge", "merge_nested")
|
|
386
402
|
|
|
387
403
|
if link_merge_type == "merge_nested":
|
|
@@ -409,6 +425,7 @@ class ResolveSource:
|
|
|
409
425
|
without modification.
|
|
410
426
|
:return:
|
|
411
427
|
"""
|
|
428
|
+
|
|
412
429
|
pick_value_type = cast(str, self.input.get("pickValue"))
|
|
413
430
|
|
|
414
431
|
if pick_value_type is None:
|
|
@@ -425,6 +442,11 @@ class ResolveSource:
|
|
|
425
442
|
|
|
426
443
|
if pick_value_type == "first_non_null":
|
|
427
444
|
if len(result) < 1:
|
|
445
|
+
logger.error(
|
|
446
|
+
"Could not find non-null entry for %s:\n%s",
|
|
447
|
+
self.name,
|
|
448
|
+
pprint.pformat(self.promise_tuples),
|
|
449
|
+
)
|
|
428
450
|
raise cwl_utils.errors.WorkflowException(
|
|
429
451
|
"%s: first_non_null operator found no non-null values" % self.name
|
|
430
452
|
)
|
|
@@ -479,6 +501,11 @@ class StepValueFrom:
|
|
|
479
501
|
self.req = req
|
|
480
502
|
self.container_engine = container_engine
|
|
481
503
|
|
|
504
|
+
def __repr__(self) -> str:
|
|
505
|
+
"""Allow for debug printing."""
|
|
506
|
+
|
|
507
|
+
return f"StepValueFrom({self.expr}, {self.source}, {self.req}, {self.container_engine})"
|
|
508
|
+
|
|
482
509
|
def eval_prep(
|
|
483
510
|
self, step_inputs: CWLObjectType, file_store: AbstractFileStore
|
|
484
511
|
) -> None:
|
|
@@ -551,6 +578,11 @@ class DefaultWithSource:
|
|
|
551
578
|
self.default = default
|
|
552
579
|
self.source = source
|
|
553
580
|
|
|
581
|
+
def __repr__(self) -> str:
|
|
582
|
+
"""Allow for debug printing."""
|
|
583
|
+
|
|
584
|
+
return f"DefaultWithSource({self.default}, {self.source})"
|
|
585
|
+
|
|
554
586
|
def resolve(self) -> Any:
|
|
555
587
|
"""
|
|
556
588
|
Determine the final input value when the time is right.
|
|
@@ -573,6 +605,11 @@ class JustAValue:
|
|
|
573
605
|
"""Store the value."""
|
|
574
606
|
self.val = val
|
|
575
607
|
|
|
608
|
+
def __repr__(self) -> str:
|
|
609
|
+
"""Allow for debug printing."""
|
|
610
|
+
|
|
611
|
+
return f"JustAValue({self.val})"
|
|
612
|
+
|
|
576
613
|
def resolve(self) -> Any:
|
|
577
614
|
"""Return the value."""
|
|
578
615
|
return self.val
|
|
@@ -654,6 +691,8 @@ class ToilPathMapper(PathMapper):
|
|
|
654
691
|
streaming on, and returns a file: URI to where the file or
|
|
655
692
|
directory has been downloaded to. Meant to be a partially-bound
|
|
656
693
|
version of toil_get_file().
|
|
694
|
+
:param referenced_files: List of CWL File and Directory objects, which can have their locations set as both
|
|
695
|
+
virtualized and absolute local paths
|
|
657
696
|
"""
|
|
658
697
|
self.get_file = get_file
|
|
659
698
|
self.stage_listing = stage_listing
|
|
@@ -675,28 +714,29 @@ class ToilPathMapper(PathMapper):
|
|
|
675
714
|
This is called on each File or Directory CWL object. The Files and
|
|
676
715
|
Directories all have "location" fields. For the Files, these are from
|
|
677
716
|
upload_file(), and for the Directories, these are from
|
|
678
|
-
upload_directory(),
|
|
679
|
-
locations based on listing the Directories using ToilFsAccess.
|
|
717
|
+
upload_directory() or cwltool internally. With upload_directory(), they and their children will be assigned
|
|
718
|
+
locations based on listing the Directories using ToilFsAccess. With cwltool, locations will be set as absolute
|
|
719
|
+
paths.
|
|
680
720
|
|
|
681
721
|
:param obj: The CWL File or Directory to process
|
|
682
722
|
|
|
683
723
|
:param stagedir: The base path for target paths to be generated under,
|
|
684
|
-
|
|
685
|
-
|
|
724
|
+
except when a File or Directory has an overriding parent directory in
|
|
725
|
+
dirname
|
|
686
726
|
|
|
687
727
|
:param basedir: The directory from which relative paths should be
|
|
688
|
-
|
|
689
|
-
|
|
728
|
+
resolved; used as the base directory for the StdFsAccess that generated
|
|
729
|
+
the listing being processed.
|
|
690
730
|
|
|
691
731
|
:param copy: If set, use writable types for Files and Directories.
|
|
692
732
|
|
|
693
733
|
:param staged: Starts as True at the top of the recursion. Set to False
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
734
|
+
when entering a directory that we can actually download, so we don't
|
|
735
|
+
stage files and subdirectories separately from the directory as a
|
|
736
|
+
whole. Controls the staged flag on generated mappings, and therefore
|
|
737
|
+
whether files and directories are actually placed at their mapped-to
|
|
738
|
+
target locations. If stage_listing is True, we will leave this True
|
|
739
|
+
throughout and stage everything.
|
|
700
740
|
|
|
701
741
|
Produces one MapperEnt for every unique location for a File or
|
|
702
742
|
Directory. These MapperEnt objects are instructions to cwltool's
|
|
@@ -807,6 +847,14 @@ class ToilPathMapper(PathMapper):
|
|
|
807
847
|
# We can't really make the directory. Maybe we are
|
|
808
848
|
# exporting from the leader and it doesn't matter.
|
|
809
849
|
resolved = location
|
|
850
|
+
elif location.startswith("/"):
|
|
851
|
+
# Test if path is an absolute local path
|
|
852
|
+
# Does not check if the path is relative
|
|
853
|
+
# While Toil encodes paths into a URL with ToilPathMapper,
|
|
854
|
+
# something called internally in cwltool may return an absolute path
|
|
855
|
+
# ex: if cwltool calls itself internally in command_line_tool.py,
|
|
856
|
+
# it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
|
|
857
|
+
resolved = location
|
|
810
858
|
else:
|
|
811
859
|
raise RuntimeError("Unsupported location: " + location)
|
|
812
860
|
|
|
@@ -883,7 +931,6 @@ class ToilPathMapper(PathMapper):
|
|
|
883
931
|
)
|
|
884
932
|
else:
|
|
885
933
|
deref = ab
|
|
886
|
-
|
|
887
934
|
if deref.startswith("file:"):
|
|
888
935
|
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
889
936
|
if urlsplit(deref).scheme in ["http", "https"]:
|
|
@@ -1027,8 +1074,6 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1027
1074
|
class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
|
|
1028
1075
|
"""Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
|
|
1029
1076
|
|
|
1030
|
-
pass
|
|
1031
|
-
|
|
1032
1077
|
|
|
1033
1078
|
def toil_make_tool(
|
|
1034
1079
|
toolpath_object: CommentedMap,
|
|
@@ -1047,10 +1092,7 @@ def toil_make_tool(
|
|
|
1047
1092
|
return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
|
|
1048
1093
|
|
|
1049
1094
|
|
|
1050
|
-
|
|
1051
|
-
# can't say that until https://github.com/python/mypy/issues/731 is fixed
|
|
1052
|
-
# because it's recursive.
|
|
1053
|
-
DirectoryContents = Dict[str, Union[str, Dict[str, Any]]]
|
|
1095
|
+
DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
|
|
1054
1096
|
|
|
1055
1097
|
|
|
1056
1098
|
def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
@@ -1080,9 +1122,8 @@ def decode_directory(
|
|
|
1080
1122
|
None), and the deduplication key string that uniquely identifies the
|
|
1081
1123
|
directory.
|
|
1082
1124
|
"""
|
|
1083
|
-
|
|
1084
|
-
"
|
|
1085
|
-
), f"Cannot decode non-directory path: {dir_path}"
|
|
1125
|
+
if not dir_path.startswith("toildir:"):
|
|
1126
|
+
raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
|
|
1086
1127
|
|
|
1087
1128
|
# We will decode the directory and then look inside it
|
|
1088
1129
|
|
|
@@ -1203,7 +1244,8 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1203
1244
|
|
|
1204
1245
|
logger.debug("ToilFsAccess downloading %s to %s", cache_key, temp_dir)
|
|
1205
1246
|
|
|
1206
|
-
# Save it all into this new temp directory
|
|
1247
|
+
# Save it all into this new temp directory.
|
|
1248
|
+
# Guaranteed to fill it with real files and not symlinks.
|
|
1207
1249
|
download_structure(self.file_store, {}, {}, contents, temp_dir)
|
|
1208
1250
|
|
|
1209
1251
|
# Make sure we use the same temp directory if we go traversing
|
|
@@ -1233,7 +1275,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1233
1275
|
logger.debug(
|
|
1234
1276
|
"ToilFsAccess fetching directory %s from a JobStore", path
|
|
1235
1277
|
)
|
|
1236
|
-
dest_dir =
|
|
1278
|
+
dest_dir = mkdtemp()
|
|
1237
1279
|
|
|
1238
1280
|
# Recursively fetch all the files in the directory.
|
|
1239
1281
|
def download_to(url: str, dest: str) -> None:
|
|
@@ -1256,7 +1298,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1256
1298
|
logger.debug("ToilFsAccess fetching file %s from a JobStore", path)
|
|
1257
1299
|
# Try to grab it with a jobstore implementation, and save it
|
|
1258
1300
|
# somewhere arbitrary.
|
|
1259
|
-
dest_file =
|
|
1301
|
+
dest_file = NamedTemporaryFile(delete=False)
|
|
1260
1302
|
AbstractJobStore.read_from_url(path, dest_file)
|
|
1261
1303
|
dest_file.close()
|
|
1262
1304
|
self.dir_to_download[path] = dest_file.name
|
|
@@ -1271,72 +1313,160 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1271
1313
|
return destination
|
|
1272
1314
|
|
|
1273
1315
|
def glob(self, pattern: str) -> List[str]:
|
|
1274
|
-
|
|
1275
|
-
|
|
1316
|
+
parse = urlparse(pattern)
|
|
1317
|
+
if parse.scheme == "file":
|
|
1318
|
+
pattern = os.path.abspath(unquote(parse.path))
|
|
1319
|
+
elif parse.scheme == "":
|
|
1320
|
+
pattern = os.path.abspath(pattern)
|
|
1321
|
+
else:
|
|
1322
|
+
raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
|
|
1323
|
+
|
|
1324
|
+
# Actually do the glob
|
|
1325
|
+
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
|
|
1276
1326
|
|
|
1277
1327
|
def open(self, fn: str, mode: str) -> IO[Any]:
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1328
|
+
if "w" in mode or "x" in mode or "+" in mode or "a" in mode:
|
|
1329
|
+
raise RuntimeError(f"Mode {mode} for opening {fn} involves writing")
|
|
1330
|
+
|
|
1331
|
+
parse = urlparse(fn)
|
|
1332
|
+
if parse.scheme in ["", "file"]:
|
|
1333
|
+
# Handle local files
|
|
1334
|
+
return open(self._abs(fn), mode)
|
|
1335
|
+
elif parse.scheme == "toildir":
|
|
1336
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1337
|
+
if cache_key in self.dir_to_download:
|
|
1338
|
+
# This is already available locally, so fall back on the local copy
|
|
1339
|
+
return open(self._abs(fn), mode)
|
|
1340
|
+
else:
|
|
1341
|
+
# We need to get the URI out of the virtual directory
|
|
1342
|
+
if subpath is None:
|
|
1343
|
+
raise RuntimeError(f"{fn} is a toildir directory")
|
|
1344
|
+
uri = get_from_structure(contents, subpath)
|
|
1345
|
+
if not isinstance(uri, str):
|
|
1346
|
+
raise RuntimeError(f"{fn} does not point to a file")
|
|
1347
|
+
# Recurse on that URI
|
|
1348
|
+
return self.open(uri, mode)
|
|
1349
|
+
elif parse.scheme == "toilfile":
|
|
1350
|
+
if self.file_store is None:
|
|
1351
|
+
raise RuntimeError("URL requires a file store: " + fn)
|
|
1352
|
+
# Streaming access to Toil file store files requires being inside a
|
|
1353
|
+
# context manager, which we can't require. So we need to download
|
|
1354
|
+
# the file.
|
|
1355
|
+
return open(self._abs(fn), mode)
|
|
1356
|
+
else:
|
|
1357
|
+
# This should be supported by a job store.
|
|
1358
|
+
byte_stream = AbstractJobStore.open_url(fn)
|
|
1359
|
+
if 'b' in mode:
|
|
1360
|
+
# Pass stream along in binary
|
|
1361
|
+
return byte_stream
|
|
1362
|
+
else:
|
|
1363
|
+
# Wrap it in a text decoder
|
|
1364
|
+
return io.TextIOWrapper(byte_stream, encoding='utf-8')
|
|
1281
1365
|
|
|
1282
1366
|
def exists(self, path: str) -> bool:
|
|
1283
1367
|
"""Test for file existence."""
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
#
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1368
|
+
parse = urlparse(path)
|
|
1369
|
+
if parse.scheme in ["", "file"]:
|
|
1370
|
+
# Handle local files
|
|
1371
|
+
# toil's _abs() throws errors when files are not found and cwltool's _abs() does not
|
|
1372
|
+
try:
|
|
1373
|
+
return os.path.exists(self._abs(path))
|
|
1374
|
+
except NoSuchFileException:
|
|
1375
|
+
return False
|
|
1376
|
+
elif parse.scheme == "toildir":
|
|
1377
|
+
contents, subpath, cache_key = decode_directory(path)
|
|
1378
|
+
if subpath is None:
|
|
1379
|
+
# The toildir directory itself exists
|
|
1380
|
+
return True
|
|
1381
|
+
uri = get_from_structure(contents, subpath)
|
|
1382
|
+
if uri is None:
|
|
1383
|
+
# It's not in the virtual directory, so it doesn't exist
|
|
1384
|
+
return False
|
|
1385
|
+
if isinstance(uri, dict):
|
|
1386
|
+
# Actually it's a subdirectory, so it exists.
|
|
1387
|
+
return True
|
|
1388
|
+
# We recurse and poll the URI directly to make sure it really exists
|
|
1389
|
+
return self.exists(uri)
|
|
1390
|
+
elif parse.scheme == "toilfile":
|
|
1391
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1392
|
+
return True
|
|
1393
|
+
else:
|
|
1394
|
+
# This should be supported by a job store.
|
|
1395
|
+
return AbstractJobStore.url_exists(path)
|
|
1290
1396
|
|
|
1291
1397
|
def size(self, path: str) -> int:
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
if self.file_store is None:
|
|
1297
|
-
raise RuntimeError("URL requires a file store: " + path)
|
|
1298
|
-
return self.file_store.getGlobalFileSize(
|
|
1299
|
-
FileID.unpack(path[len("toilfile:") :])
|
|
1300
|
-
)
|
|
1301
|
-
elif path.startswith("toildir:"):
|
|
1398
|
+
parse = urlparse(path)
|
|
1399
|
+
if parse.scheme in ["", "file"]:
|
|
1400
|
+
return os.stat(self._abs(path)).st_size
|
|
1401
|
+
elif parse.scheme == "toildir":
|
|
1302
1402
|
# Decode its contents, the path inside it to the file (if any), and
|
|
1303
1403
|
# the key to use for caching the directory.
|
|
1304
|
-
|
|
1404
|
+
contents, subpath, cache_key = decode_directory(path)
|
|
1305
1405
|
|
|
1306
1406
|
# We can't get the size of just a directory.
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
for part in subpath.split("/"):
|
|
1310
|
-
# Follow the path inside the directory contents.
|
|
1311
|
-
here = cast(DirectoryContents, here[part])
|
|
1407
|
+
if subpath is None:
|
|
1408
|
+
raise RuntimeError(f"Attempted to check size of directory {path}")
|
|
1312
1409
|
|
|
1313
|
-
|
|
1314
|
-
assert isinstance(here, str), f"Did not find a file at {path}"
|
|
1315
|
-
assert here.startswith(
|
|
1316
|
-
"toilfile:"
|
|
1317
|
-
), f"Did not find a filestore file at {path}"
|
|
1410
|
+
uri = get_from_structure(contents, subpath)
|
|
1318
1411
|
|
|
1319
|
-
|
|
1412
|
+
# We ought to end up with a URI.
|
|
1413
|
+
if not isinstance(uri, str):
|
|
1414
|
+
raise RuntimeError(f"Did not find a file at {path}")
|
|
1415
|
+
return self.size(uri)
|
|
1416
|
+
elif parse.scheme == "toilfile":
|
|
1417
|
+
if self.file_store is None:
|
|
1418
|
+
raise RuntimeError("URL requires a file store: " + path)
|
|
1419
|
+
return self.file_store.getGlobalFileSize(
|
|
1420
|
+
FileID.unpack(path[len("toilfile:") :])
|
|
1421
|
+
)
|
|
1320
1422
|
else:
|
|
1321
|
-
#
|
|
1322
|
-
|
|
1323
|
-
|
|
1423
|
+
# This should be supported by a job store.
|
|
1424
|
+
size = AbstractJobStore.get_size(path)
|
|
1425
|
+
if size is None:
|
|
1426
|
+
# get_size can be unimplemented or unavailable
|
|
1427
|
+
raise RuntimeError(f"Could not get size of {path}")
|
|
1428
|
+
return size
|
|
1324
1429
|
|
|
1325
1430
|
def isfile(self, fn: str) -> bool:
|
|
1326
1431
|
parse = urlparse(fn)
|
|
1327
|
-
if parse.scheme in ["
|
|
1328
|
-
|
|
1329
|
-
|
|
1432
|
+
if parse.scheme in ["file", ""]:
|
|
1433
|
+
return os.path.isfile(self._abs(fn))
|
|
1434
|
+
elif parse.scheme == "toilfile":
|
|
1435
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1436
|
+
return True
|
|
1437
|
+
elif parse.scheme == "toildir":
|
|
1438
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1439
|
+
if subpath is None:
|
|
1440
|
+
# This is the toildir directory itself
|
|
1441
|
+
return False
|
|
1442
|
+
found = get_from_structure(contents, subpath)
|
|
1443
|
+
# If we find a string, that's a file
|
|
1444
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1445
|
+
return isinstance(found, str)
|
|
1330
1446
|
else:
|
|
1331
|
-
return not AbstractJobStore.get_is_directory(fn)
|
|
1447
|
+
return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
|
|
1332
1448
|
|
|
1333
1449
|
def isdir(self, fn: str) -> bool:
|
|
1450
|
+
logger.debug("ToilFsAccess checking type of %s", fn)
|
|
1334
1451
|
parse = urlparse(fn)
|
|
1335
|
-
if parse.scheme in ["
|
|
1336
|
-
|
|
1337
|
-
|
|
1452
|
+
if parse.scheme in ["file", ""]:
|
|
1453
|
+
return os.path.isdir(self._abs(fn))
|
|
1454
|
+
elif parse.scheme == "toilfile":
|
|
1455
|
+
return False
|
|
1456
|
+
elif parse.scheme == "toildir":
|
|
1457
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1458
|
+
if subpath is None:
|
|
1459
|
+
# This is the toildir directory itself.
|
|
1460
|
+
# TODO: We assume directories can't be deleted.
|
|
1461
|
+
return True
|
|
1462
|
+
found = get_from_structure(contents, subpath)
|
|
1463
|
+
# If we find a dict, that's a directory.
|
|
1464
|
+
# TODO: We assume directories can't be deleted.
|
|
1465
|
+
return isinstance(found, dict)
|
|
1338
1466
|
else:
|
|
1339
|
-
|
|
1467
|
+
status = AbstractJobStore.get_is_directory(fn)
|
|
1468
|
+
logger.debug("AbstractJobStore said: %s", status)
|
|
1469
|
+
return status
|
|
1340
1470
|
|
|
1341
1471
|
def listdir(self, fn: str) -> List[str]:
|
|
1342
1472
|
# This needs to return full URLs for everything in the directory.
|
|
@@ -1344,12 +1474,25 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1344
1474
|
logger.debug("ToilFsAccess listing %s", fn)
|
|
1345
1475
|
|
|
1346
1476
|
parse = urlparse(fn)
|
|
1347
|
-
if parse.scheme in ["
|
|
1348
|
-
#
|
|
1477
|
+
if parse.scheme in ["file", ""]:
|
|
1478
|
+
# Find the local path
|
|
1349
1479
|
directory = self._abs(fn)
|
|
1350
|
-
|
|
1351
1480
|
# Now list it (it is probably a directory)
|
|
1352
1481
|
return [abspath(quote(entry), fn) for entry in os.listdir(directory)]
|
|
1482
|
+
elif parse.scheme == "toilfile":
|
|
1483
|
+
raise RuntimeError(f"Cannot list a file: {fn}")
|
|
1484
|
+
elif parse.scheme == "toildir":
|
|
1485
|
+
contents, subpath, cache_key = decode_directory(fn)
|
|
1486
|
+
here = contents
|
|
1487
|
+
if subpath is not None:
|
|
1488
|
+
got = get_from_structure(contents, subpath)
|
|
1489
|
+
if got is None:
|
|
1490
|
+
raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
|
|
1491
|
+
if isinstance(got, str):
|
|
1492
|
+
raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
|
|
1493
|
+
here = got
|
|
1494
|
+
# List all the things in here and make full URIs to them
|
|
1495
|
+
return [os.path.join(fn, k) for k in here.keys()]
|
|
1353
1496
|
else:
|
|
1354
1497
|
return [
|
|
1355
1498
|
os.path.join(fn, entry.rstrip("/"))
|
|
@@ -1371,7 +1514,7 @@ def toil_get_file(
|
|
|
1371
1514
|
file_store: AbstractFileStore,
|
|
1372
1515
|
index: Dict[str, str],
|
|
1373
1516
|
existing: Dict[str, str],
|
|
1374
|
-
|
|
1517
|
+
uri: str,
|
|
1375
1518
|
streamable: bool = False,
|
|
1376
1519
|
streaming_allowed: bool = True,
|
|
1377
1520
|
pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
|
|
@@ -1388,28 +1531,28 @@ def toil_get_file(
|
|
|
1388
1531
|
|
|
1389
1532
|
:param index: Maps from downloaded file path back to input Toil URI.
|
|
1390
1533
|
|
|
1391
|
-
:param existing: Maps from
|
|
1534
|
+
:param existing: Maps from URI to downloaded file path.
|
|
1392
1535
|
|
|
1393
|
-
:param
|
|
1536
|
+
:param uri: The URI for the file to download.
|
|
1394
1537
|
|
|
1395
1538
|
:param streamable: If the file is has 'streamable' flag set
|
|
1396
1539
|
|
|
1397
1540
|
:param streaming_allowed: If streaming is allowed
|
|
1398
1541
|
|
|
1399
1542
|
:param pipe_threads: List of threads responsible for streaming the data
|
|
1400
|
-
|
|
1401
|
-
|
|
1543
|
+
and open file descriptors corresponding to those files. Caller is responsible
|
|
1544
|
+
to close the file descriptors (to break the pipes) and join the threads
|
|
1402
1545
|
"""
|
|
1403
1546
|
pipe_threads_real = pipe_threads or []
|
|
1404
1547
|
# We can't use urlparse here because we need to handle the '_:' scheme and
|
|
1405
1548
|
# urlparse sees that as a path and not a URI scheme.
|
|
1406
|
-
if
|
|
1549
|
+
if uri.startswith("toildir:"):
|
|
1407
1550
|
# This is a file in a directory, or maybe a directory itself.
|
|
1408
1551
|
# See ToilFsAccess and upload_directory.
|
|
1409
1552
|
# We will go look for the actual file in the encoded directory
|
|
1410
1553
|
# structure which will tell us where the toilfile: name for the file is.
|
|
1411
1554
|
|
|
1412
|
-
parts =
|
|
1555
|
+
parts = uri[len("toildir:") :].split("/")
|
|
1413
1556
|
contents = json.loads(
|
|
1414
1557
|
base64.urlsafe_b64decode(parts[0].encode("utf-8")).decode("utf-8")
|
|
1415
1558
|
)
|
|
@@ -1429,21 +1572,41 @@ def toil_get_file(
|
|
|
1429
1572
|
download_structure(file_store, index, existing, contents, dest_path)
|
|
1430
1573
|
# Return where we put it, but as a file:// URI
|
|
1431
1574
|
return schema_salad.ref_resolver.file_uri(dest_path)
|
|
1432
|
-
elif
|
|
1433
|
-
#
|
|
1575
|
+
elif uri.startswith("_:"):
|
|
1576
|
+
# Someone is asking us for an empty temp directory.
|
|
1577
|
+
# We need to check this before the file path case because urlsplit()
|
|
1578
|
+
# will call this a path with no scheme.
|
|
1579
|
+
dest_path = file_store.getLocalTempDir()
|
|
1580
|
+
return schema_salad.ref_resolver.file_uri(dest_path)
|
|
1581
|
+
elif uri.startswith("file:") or urlsplit(uri).scheme == "":
|
|
1582
|
+
# There's a file: scheme or no scheme, and we know this isn't a _: URL.
|
|
1583
|
+
|
|
1584
|
+
# We need to support file: URIs and local paths, because we might be
|
|
1585
|
+
# involved in moving files around on the local disk when uploading
|
|
1586
|
+
# things after a job. We might want to catch cases where a leader
|
|
1587
|
+
# filesystem file URI leaks in here, but we can't, so we just rely on
|
|
1588
|
+
# the rest of the code to be correct.
|
|
1589
|
+
return uri
|
|
1590
|
+
else:
|
|
1591
|
+
# This is a toilfile: uri or other remote URI
|
|
1434
1592
|
def write_to_pipe(
|
|
1435
|
-
file_store: AbstractFileStore, pipe_name: str,
|
|
1593
|
+
file_store: AbstractFileStore, pipe_name: str, uri: str
|
|
1436
1594
|
) -> None:
|
|
1437
1595
|
try:
|
|
1438
1596
|
with open(pipe_name, "wb") as pipe:
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1597
|
+
if uri.startswith("toilfile:"):
|
|
1598
|
+
# Stream from the file store
|
|
1599
|
+
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1600
|
+
with file_store.readGlobalFileStream(file_store_id) as fi:
|
|
1601
|
+
chunk_sz = 1024
|
|
1602
|
+
while True:
|
|
1603
|
+
data = fi.read(chunk_sz)
|
|
1604
|
+
if not data:
|
|
1605
|
+
break
|
|
1606
|
+
pipe.write(data)
|
|
1607
|
+
else:
|
|
1608
|
+
# Stream from some other URI
|
|
1609
|
+
AbstractJobStore.read_from_url(uri, pipe)
|
|
1447
1610
|
except OSError as e:
|
|
1448
1611
|
# The other side of the pipe may have been closed by the
|
|
1449
1612
|
# reading thread, which is OK.
|
|
@@ -1456,7 +1619,7 @@ def toil_get_file(
|
|
|
1456
1619
|
and not isinstance(file_store.jobStore, FileJobStore)
|
|
1457
1620
|
):
|
|
1458
1621
|
logger.debug(
|
|
1459
|
-
"Streaming file %s",
|
|
1622
|
+
"Streaming file %s", uri
|
|
1460
1623
|
)
|
|
1461
1624
|
src_path = file_store.getLocalTempFileName()
|
|
1462
1625
|
os.mkfifo(src_path)
|
|
@@ -1465,42 +1628,39 @@ def toil_get_file(
|
|
|
1465
1628
|
args=(
|
|
1466
1629
|
file_store,
|
|
1467
1630
|
src_path,
|
|
1468
|
-
|
|
1631
|
+
uri,
|
|
1469
1632
|
),
|
|
1470
1633
|
)
|
|
1471
1634
|
th.start()
|
|
1472
1635
|
pipe_threads_real.append((th, os.open(src_path, os.O_RDONLY)))
|
|
1473
1636
|
else:
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1637
|
+
# We need to do a real file
|
|
1638
|
+
if uri in existing:
|
|
1639
|
+
# Already did it
|
|
1640
|
+
src_path = existing[uri]
|
|
1641
|
+
else:
|
|
1642
|
+
if uri.startswith("toilfile:"):
|
|
1643
|
+
# Download from the file store
|
|
1644
|
+
file_store_id = FileID.unpack(uri[len("toilfile:") :])
|
|
1645
|
+
src_path = file_store.readGlobalFile(
|
|
1646
|
+
file_store_id, symlink=True
|
|
1647
|
+
)
|
|
1648
|
+
else:
|
|
1649
|
+
# Download from the URI via the job store.
|
|
1650
|
+
|
|
1651
|
+
# Figure out where it goes.
|
|
1652
|
+
src_path = file_store.getLocalTempFileName()
|
|
1653
|
+
# Open that path exclusively to make sure we created it
|
|
1654
|
+
with open(src_path, 'xb') as fh:
|
|
1655
|
+
# Download into the file
|
|
1656
|
+
size, executable = AbstractJobStore.read_from_url(uri, fh)
|
|
1657
|
+
if executable:
|
|
1658
|
+
# Set the execute bit in the file's permissions
|
|
1659
|
+
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
1660
|
+
|
|
1661
|
+
index[src_path] = uri
|
|
1662
|
+
existing[uri] = src_path
|
|
1481
1663
|
return schema_salad.ref_resolver.file_uri(src_path)
|
|
1482
|
-
elif file_store_id.startswith("_:"):
|
|
1483
|
-
# Someone is asking us for an empty temp directory.
|
|
1484
|
-
# We need to check this before the file path case because urlsplit()
|
|
1485
|
-
# will call this a path with no scheme.
|
|
1486
|
-
dest_path = file_store.getLocalTempDir()
|
|
1487
|
-
return schema_salad.ref_resolver.file_uri(dest_path)
|
|
1488
|
-
elif file_store_id.startswith("file:") or urlsplit(file_store_id).scheme == "":
|
|
1489
|
-
# There's a file: scheme or no scheme, and we know this isn't a _: URL.
|
|
1490
|
-
|
|
1491
|
-
# We need to support file: URIs and local paths, because we might be
|
|
1492
|
-
# involved in moving files around on the local disk when uploading
|
|
1493
|
-
# things after a job. We might want to catch cases where a leader
|
|
1494
|
-
# filesystem file URI leaks in here, but we can't, so we just rely on
|
|
1495
|
-
# the rest of the code to be correct.
|
|
1496
|
-
return file_store_id
|
|
1497
|
-
else:
|
|
1498
|
-
raise RuntimeError(
|
|
1499
|
-
f"Cannot obtain file {file_store_id} while on host "
|
|
1500
|
-
f"{socket.gethostname()}; all imports must happen on the "
|
|
1501
|
-
f"leader!"
|
|
1502
|
-
)
|
|
1503
|
-
|
|
1504
1664
|
|
|
1505
1665
|
def write_file(
|
|
1506
1666
|
writeFunc: Callable[[str], FileID],
|
|
@@ -1557,7 +1717,9 @@ def import_files(
|
|
|
1557
1717
|
existing: Dict[str, str],
|
|
1558
1718
|
cwl_object: Optional[CWLObjectType],
|
|
1559
1719
|
skip_broken: bool = False,
|
|
1720
|
+
skip_remote: bool = False,
|
|
1560
1721
|
bypass_file_store: bool = False,
|
|
1722
|
+
log_level: int = logging.DEBUG
|
|
1561
1723
|
) -> None:
|
|
1562
1724
|
"""
|
|
1563
1725
|
Prepare all files and directories.
|
|
@@ -1579,28 +1741,41 @@ def import_files(
|
|
|
1579
1741
|
Also does some miscelaneous normalization.
|
|
1580
1742
|
|
|
1581
1743
|
:param import_function: The function used to upload a URI and get a
|
|
1582
|
-
|
|
1744
|
+
Toil FileID for it.
|
|
1583
1745
|
|
|
1584
1746
|
:param fs_access: the CWL FS access object we use to access the filesystem
|
|
1585
|
-
|
|
1747
|
+
to find files to import. Needs to support the URI schemes used.
|
|
1586
1748
|
|
|
1587
1749
|
:param fileindex: Forward map to fill in from file URI to Toil storage
|
|
1588
|
-
|
|
1750
|
+
location, used by write_file to deduplicate writes.
|
|
1589
1751
|
|
|
1590
1752
|
:param existing: Reverse map to fill in from Toil storage location to file
|
|
1591
|
-
|
|
1753
|
+
URI. Not read from.
|
|
1592
1754
|
|
|
1593
1755
|
:param cwl_object: CWL tool (or workflow order) we are importing files for
|
|
1594
1756
|
|
|
1595
1757
|
:param skip_broken: If True, when files can't be imported because they e.g.
|
|
1596
|
-
|
|
1758
|
+
don't exist, leave their locations alone rather than failing with an error.
|
|
1759
|
+
|
|
1760
|
+
:param skp_remote: If True, leave remote URIs in place instead of importing
|
|
1761
|
+
files.
|
|
1597
1762
|
|
|
1598
1763
|
:param bypass_file_store: If True, leave file:// URIs in place instead of
|
|
1599
|
-
|
|
1764
|
+
importing files and directories.
|
|
1765
|
+
|
|
1766
|
+
:param log_level: Log imported files at the given level.
|
|
1600
1767
|
"""
|
|
1601
1768
|
tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
|
|
1602
1769
|
|
|
1603
1770
|
logger.debug("Importing files for %s", tool_id)
|
|
1771
|
+
logger.debug("Importing files in %s", cwl_object)
|
|
1772
|
+
|
|
1773
|
+
def import_and_log(url: str) -> FileID:
|
|
1774
|
+
"""
|
|
1775
|
+
Upload a file and log that we are doing so.
|
|
1776
|
+
"""
|
|
1777
|
+
logger.log(log_level, "Loading %s...", url)
|
|
1778
|
+
return import_function(url)
|
|
1604
1779
|
|
|
1605
1780
|
# We need to upload all files to the Toil filestore, and encode structure
|
|
1606
1781
|
# recursively into all Directories' locations. But we cannot safely alter
|
|
@@ -1700,7 +1875,7 @@ def import_files(
|
|
|
1700
1875
|
|
|
1701
1876
|
# Upload the file itself, which will adjust its location.
|
|
1702
1877
|
upload_file(
|
|
1703
|
-
|
|
1878
|
+
import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
|
|
1704
1879
|
)
|
|
1705
1880
|
|
|
1706
1881
|
# Make a record for this file under its name
|
|
@@ -1805,11 +1980,16 @@ def upload_file(
|
|
|
1805
1980
|
existing: Dict[str, str],
|
|
1806
1981
|
file_metadata: CWLObjectType,
|
|
1807
1982
|
skip_broken: bool = False,
|
|
1983
|
+
skip_remote: bool = False
|
|
1808
1984
|
) -> None:
|
|
1809
1985
|
"""
|
|
1810
|
-
Update a file object so that the
|
|
1986
|
+
Update a file object so that the file will be accessible from another machine.
|
|
1811
1987
|
|
|
1812
|
-
|
|
1988
|
+
Uploads local files to the Toil file store, and sets their location to a
|
|
1989
|
+
reference to the toil file store.
|
|
1990
|
+
|
|
1991
|
+
Unless skip_remote is set, downloads remote files into the file store and
|
|
1992
|
+
sets their locations to references into the file store as well.
|
|
1813
1993
|
"""
|
|
1814
1994
|
location = cast(str, file_metadata["location"])
|
|
1815
1995
|
if (
|
|
@@ -1832,7 +2012,10 @@ def upload_file(
|
|
|
1832
2012
|
return
|
|
1833
2013
|
else:
|
|
1834
2014
|
raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
|
|
1835
|
-
|
|
2015
|
+
|
|
2016
|
+
if location.startswith("file://") or not skip_remote:
|
|
2017
|
+
# This is a local file, or we also need to download and re-upload remote files
|
|
2018
|
+
file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
|
|
1836
2019
|
|
|
1837
2020
|
logger.debug("Sending file at: %s", file_metadata["location"])
|
|
1838
2021
|
|
|
@@ -1866,6 +2049,7 @@ class CWLNamedJob(Job):
|
|
|
1866
2049
|
memory: Union[int, str, None] = "1GiB",
|
|
1867
2050
|
disk: Union[int, str, None] = "1MiB",
|
|
1868
2051
|
accelerators: Optional[List[AcceleratorRequirement]] = None,
|
|
2052
|
+
preemptible: Optional[bool] = None,
|
|
1869
2053
|
tool_id: Optional[str] = None,
|
|
1870
2054
|
parent_name: Optional[str] = None,
|
|
1871
2055
|
subjob_name: Optional[str] = None,
|
|
@@ -1910,6 +2094,7 @@ class CWLNamedJob(Job):
|
|
|
1910
2094
|
memory=memory,
|
|
1911
2095
|
disk=disk,
|
|
1912
2096
|
accelerators=accelerators,
|
|
2097
|
+
preemptible=preemptible,
|
|
1913
2098
|
unitName=unit_name,
|
|
1914
2099
|
displayName=display_name,
|
|
1915
2100
|
local=local,
|
|
@@ -1941,12 +2126,15 @@ def toilStageFiles(
|
|
|
1941
2126
|
cwljob: Union[CWLObjectType, List[CWLObjectType]],
|
|
1942
2127
|
outdir: str,
|
|
1943
2128
|
destBucket: Union[str, None] = None,
|
|
2129
|
+
log_level: int = logging.DEBUG
|
|
1944
2130
|
) -> None:
|
|
1945
2131
|
"""
|
|
1946
2132
|
Copy input files out of the global file store and update location and path.
|
|
1947
2133
|
|
|
1948
2134
|
:param destBucket: If set, export to this base URL instead of to the local
|
|
1949
2135
|
filesystem.
|
|
2136
|
+
|
|
2137
|
+
:param log_level: Log each file transfered at the given level.
|
|
1950
2138
|
"""
|
|
1951
2139
|
|
|
1952
2140
|
def _collectDirEntries(
|
|
@@ -1986,7 +2174,6 @@ def toilStageFiles(
|
|
|
1986
2174
|
stage_listing=True,
|
|
1987
2175
|
)
|
|
1988
2176
|
for _, p in pm.items():
|
|
1989
|
-
logger.debug("Staging output: %s", p)
|
|
1990
2177
|
if p.staged:
|
|
1991
2178
|
# We're supposed to copy/expose something.
|
|
1992
2179
|
# Note that we have to handle writable versions of everything
|
|
@@ -2008,7 +2195,7 @@ def toilStageFiles(
|
|
|
2008
2195
|
"CreateFile",
|
|
2009
2196
|
"CreateWritableFile",
|
|
2010
2197
|
]: # TODO: CreateFile for buckets is not under testing
|
|
2011
|
-
with
|
|
2198
|
+
with NamedTemporaryFile() as f:
|
|
2012
2199
|
# Make a file with the right contents
|
|
2013
2200
|
f.write(file_id_or_contents.encode("utf-8"))
|
|
2014
2201
|
f.close()
|
|
@@ -2027,39 +2214,63 @@ def toilStageFiles(
|
|
|
2027
2214
|
# At the end we should get a direct toilfile: URI
|
|
2028
2215
|
file_id_or_contents = cast(str, here)
|
|
2029
2216
|
|
|
2217
|
+
# This might be an e.g. S3 URI now
|
|
2218
|
+
if not file_id_or_contents.startswith("toilfile:"):
|
|
2219
|
+
# We need to import it so we can export it.
|
|
2220
|
+
# TODO: Use direct S3 to S3 copy on exports as well
|
|
2221
|
+
file_id_or_contents = (
|
|
2222
|
+
"toilfile:"
|
|
2223
|
+
+ toil.import_file(file_id_or_contents, symlink=False).pack()
|
|
2224
|
+
)
|
|
2225
|
+
|
|
2030
2226
|
if file_id_or_contents.startswith("toilfile:"):
|
|
2031
2227
|
# This is something we can export
|
|
2032
|
-
|
|
2033
|
-
|
|
2228
|
+
# TODO: Do we need to urlencode the parts before sending them to S3?
|
|
2229
|
+
dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
|
|
2230
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2231
|
+
toil.export_file(
|
|
2034
2232
|
FileID.unpack(file_id_or_contents[len("toilfile:") :]),
|
|
2035
|
-
|
|
2233
|
+
dest_url,
|
|
2036
2234
|
)
|
|
2037
2235
|
# TODO: can a toildir: "file" get here?
|
|
2038
2236
|
else:
|
|
2039
|
-
# We are saving to the filesystem
|
|
2237
|
+
# We are saving to the filesystem.
|
|
2238
|
+
dest_url = "file://" + quote(p.target)
|
|
2239
|
+
|
|
2240
|
+
# We only really need export_file for actual files.
|
|
2040
2241
|
if not os.path.exists(p.target) and p.type in [
|
|
2041
2242
|
"Directory",
|
|
2042
2243
|
"WritableDirectory",
|
|
2043
2244
|
]:
|
|
2044
2245
|
os.makedirs(p.target)
|
|
2045
|
-
if
|
|
2046
|
-
if p.resolved.startswith("
|
|
2047
|
-
# We can actually export this
|
|
2048
|
-
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2049
|
-
toil.exportFile(
|
|
2050
|
-
FileID.unpack(p.resolved[len("toilfile:") :]),
|
|
2051
|
-
"file://" + p.target,
|
|
2052
|
-
)
|
|
2053
|
-
elif p.resolved.startswith("/"):
|
|
2246
|
+
if p.type in ["File", "WritableFile"]:
|
|
2247
|
+
if p.resolved.startswith("/"):
|
|
2054
2248
|
# Probably staging and bypassing file store. Just copy.
|
|
2249
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2055
2250
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2056
2251
|
shutil.copyfile(p.resolved, p.target)
|
|
2057
|
-
|
|
2058
|
-
|
|
2252
|
+
else:
|
|
2253
|
+
uri = p.resolved
|
|
2254
|
+
if not uri.startswith("toilfile:"):
|
|
2255
|
+
# We need to import so we can export
|
|
2256
|
+
uri = (
|
|
2257
|
+
"toilfile:"
|
|
2258
|
+
+ toil.import_file(uri, symlink=False).pack()
|
|
2259
|
+
)
|
|
2260
|
+
|
|
2261
|
+
# Actually export from the file store
|
|
2262
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2263
|
+
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2264
|
+
toil.export_file(
|
|
2265
|
+
FileID.unpack(uri[len("toilfile:") :]),
|
|
2266
|
+
dest_url,
|
|
2267
|
+
)
|
|
2268
|
+
if p.type in [
|
|
2059
2269
|
"CreateFile",
|
|
2060
2270
|
"CreateWritableFile",
|
|
2061
2271
|
]:
|
|
2062
2272
|
# We just need to make a file with particular contents
|
|
2273
|
+
logger.log(log_level, "Saving %s...", dest_url)
|
|
2063
2274
|
os.makedirs(os.path.dirname(p.target), exist_ok=True)
|
|
2064
2275
|
with open(p.target, "wb") as n:
|
|
2065
2276
|
n.write(p.resolved.encode("utf-8"))
|
|
@@ -2078,6 +2289,7 @@ def toilStageFiles(
|
|
|
2078
2289
|
# Make the location point to the place we put this thing on the
|
|
2079
2290
|
# local filesystem.
|
|
2080
2291
|
f["location"] = schema_salad.ref_resolver.file_uri(mapped_location.target)
|
|
2292
|
+
f["path"] = mapped_location.target
|
|
2081
2293
|
|
|
2082
2294
|
if "contents" in f:
|
|
2083
2295
|
del f["contents"]
|
|
@@ -2182,7 +2394,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2182
2394
|
|
|
2183
2395
|
accelerators: Optional[List[AcceleratorRequirement]] = None
|
|
2184
2396
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
2185
|
-
# There's a CUDARequirement
|
|
2397
|
+
# There's a CUDARequirement, which cwltool processed for us
|
|
2186
2398
|
# TODO: How is cwltool deciding what value to use between min and max?
|
|
2187
2399
|
accelerators = [
|
|
2188
2400
|
{
|
|
@@ -2192,14 +2404,62 @@ class CWLJob(CWLNamedJob):
|
|
|
2192
2404
|
}
|
|
2193
2405
|
]
|
|
2194
2406
|
|
|
2407
|
+
# cwltool doesn't handle http://arvados.org/cwl#UsePreemptible as part
|
|
2408
|
+
# of its resource logic so we have to do it manually.
|
|
2409
|
+
#
|
|
2410
|
+
# Note that according to
|
|
2411
|
+
# https://github.com/arvados/arvados/blob/48a0d575e6de34bcda91c489e4aa98df291a8cca/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml#L345
|
|
2412
|
+
# this can only be a literal boolean! cwltool doesn't want to evaluate
|
|
2413
|
+
# expressions in the value for us like it does for CUDARequirement
|
|
2414
|
+
# which has a schema which allows for CWL expressions:
|
|
2415
|
+
# https://github.com/common-workflow-language/cwltool/blob/1573509eea2faa3cd1dc959224e52ff1d796d3eb/cwltool/extensions.yml#L221
|
|
2416
|
+
#
|
|
2417
|
+
# By default we have default preemptibility.
|
|
2418
|
+
preemptible: Optional[bool] = None
|
|
2419
|
+
preemptible_req, _ = tool.get_requirement(
|
|
2420
|
+
"http://arvados.org/cwl#UsePreemptible"
|
|
2421
|
+
)
|
|
2422
|
+
if preemptible_req:
|
|
2423
|
+
if "usePreemptible" not in preemptible_req:
|
|
2424
|
+
# If we have a requirement it has to have the value
|
|
2425
|
+
raise ValidationException(
|
|
2426
|
+
f"Unacceptable syntax for http://arvados.org/cwl#UsePreemptible: "
|
|
2427
|
+
f"expected key usePreemptible but got: {preemptible_req}"
|
|
2428
|
+
)
|
|
2429
|
+
parsed_value = preemptible_req["usePreemptible"]
|
|
2430
|
+
if isinstance(parsed_value, str) and (
|
|
2431
|
+
"$(" in parsed_value or "${" in parsed_value
|
|
2432
|
+
):
|
|
2433
|
+
# Looks like they tried to use an expression
|
|
2434
|
+
raise ValidationException(
|
|
2435
|
+
f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
|
|
2436
|
+
f"expected true or false but got what appears to be an expression: {repr(parsed_value)}. "
|
|
2437
|
+
f"Note that expressions are not allowed here by Arvados's schema."
|
|
2438
|
+
)
|
|
2439
|
+
if not isinstance(parsed_value, bool):
|
|
2440
|
+
# If we have a value it has to be a bool flag
|
|
2441
|
+
raise ValidationException(
|
|
2442
|
+
f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
|
|
2443
|
+
f"expected true or false but got: {repr(parsed_value)}"
|
|
2444
|
+
)
|
|
2445
|
+
preemptible = parsed_value
|
|
2446
|
+
|
|
2447
|
+
# We always need space for the temporary files for the job
|
|
2448
|
+
total_disk = cast(int, req["tmpdirSize"]) * (2**20)
|
|
2449
|
+
if not getattr(runtime_context, "bypass_file_store", False):
|
|
2450
|
+
# If using the Toil file store, we also need space for the output
|
|
2451
|
+
# files, which may need to be stored locally and copied off the
|
|
2452
|
+
# node.
|
|
2453
|
+
total_disk += cast(int, req["outdirSize"]) * (2**20)
|
|
2454
|
+
# If not using the Toil file store, output files just go directly to
|
|
2455
|
+
# their final homes their space doesn't need to be accounted per-job.
|
|
2456
|
+
|
|
2195
2457
|
super().__init__(
|
|
2196
2458
|
cores=req["cores"],
|
|
2197
2459
|
memory=int(req["ram"] * (2**20)),
|
|
2198
|
-
disk=int(
|
|
2199
|
-
(cast(int, req["tmpdirSize"]) * (2**20))
|
|
2200
|
-
+ (cast(int, req["outdirSize"]) * (2**20))
|
|
2201
|
-
),
|
|
2460
|
+
disk=int(total_disk),
|
|
2202
2461
|
accelerators=accelerators,
|
|
2462
|
+
preemptible=preemptible,
|
|
2203
2463
|
tool_id=self.cwltool.tool["id"],
|
|
2204
2464
|
parent_name=parent_name,
|
|
2205
2465
|
local=isinstance(tool, cwltool.command_line_tool.ExpressionTool),
|
|
@@ -2265,7 +2525,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2265
2525
|
cwllogger.removeHandler(defaultStreamHandler)
|
|
2266
2526
|
cwllogger.setLevel(logger.getEffectiveLevel())
|
|
2267
2527
|
|
|
2268
|
-
logger.debug("Loaded order
|
|
2528
|
+
logger.debug("Loaded order:\n%s", self.cwljob)
|
|
2269
2529
|
|
|
2270
2530
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2271
2531
|
|
|
@@ -2395,6 +2655,8 @@ class CWLJob(CWLNamedJob):
|
|
|
2395
2655
|
|
|
2396
2656
|
logger.debug("Emitting output: %s", output)
|
|
2397
2657
|
|
|
2658
|
+
file_store.log_to_leader(f"CWL step complete: {runtime_context.name}")
|
|
2659
|
+
|
|
2398
2660
|
# metadata[process_uuid] = {
|
|
2399
2661
|
# 'started_at': started_at,
|
|
2400
2662
|
# 'ended_at': ended_at,
|
|
@@ -2782,6 +3044,10 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
2782
3044
|
if self.conditional.is_false(cwljob):
|
|
2783
3045
|
return self.conditional.skipped_outputs()
|
|
2784
3046
|
|
|
3047
|
+
# Apply default values set in the workflow
|
|
3048
|
+
fs_access = ToilFsAccess(self.runtime_context.basedir, file_store=file_store)
|
|
3049
|
+
fill_in_defaults(self.cwlwf.tool["inputs"], cwljob, fs_access)
|
|
3050
|
+
|
|
2785
3051
|
# `promises` dict
|
|
2786
3052
|
# from: each parameter (workflow input or step output)
|
|
2787
3053
|
# that may be used as a "source" for a step input workflow output
|
|
@@ -2844,6 +3110,10 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
2844
3110
|
get_container_engine(self.runtime_context),
|
|
2845
3111
|
)
|
|
2846
3112
|
|
|
3113
|
+
logger.debug(
|
|
3114
|
+
"Value will come from %s", jobobj.get(key, None)
|
|
3115
|
+
)
|
|
3116
|
+
|
|
2847
3117
|
conditional = Conditional(
|
|
2848
3118
|
expression=step.tool.get("when"),
|
|
2849
3119
|
outputs=step.tool["out"],
|
|
@@ -3042,8 +3312,8 @@ def scan_for_unsupported_requirements(
|
|
|
3042
3312
|
:param tool: The CWL tool to check for unsupported requirements.
|
|
3043
3313
|
|
|
3044
3314
|
:param bypass_file_store: True if the Toil file store is not being used to
|
|
3045
|
-
|
|
3046
|
-
|
|
3315
|
+
transport files between nodes, and raw origin node file:// URIs are exposed
|
|
3316
|
+
to tools instead.
|
|
3047
3317
|
|
|
3048
3318
|
"""
|
|
3049
3319
|
|
|
@@ -3080,24 +3350,31 @@ def determine_load_listing(
|
|
|
3080
3350
|
DIRECTORY_NAME is any variable name) set to one of the following three
|
|
3081
3351
|
options:
|
|
3082
3352
|
|
|
3083
|
-
|
|
3084
|
-
|
|
3353
|
+
1. no_listing: DIRECTORY_NAME.listing will be undefined.
|
|
3354
|
+
e.g.
|
|
3355
|
+
|
|
3356
|
+
inputs.DIRECTORY_NAME.listing == unspecified
|
|
3085
3357
|
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
inputs.DIRECTORY_NAME.listing[0].listing == undefined
|
|
3090
|
-
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3358
|
+
2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
|
|
3359
|
+
deep of DIRECTORY_NAME's contents.
|
|
3360
|
+
e.g.
|
|
3091
3361
|
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
|
|
3095
|
-
inputs.DIRECTORY_NAME.listing[0].listing == [items
|
|
3096
|
-
in subdirectory if it exists and is the first item listed]
|
|
3097
|
-
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3362
|
+
inputs.DIRECTORY_NAME.listing == [items in directory]
|
|
3363
|
+
inputs.DIRECTORY_NAME.listing[0].listing == undefined
|
|
3364
|
+
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3098
3365
|
|
|
3099
|
-
|
|
3100
|
-
|
|
3366
|
+
3. deep_listing: DIRECTORY_NAME.listing will return a list of the entire
|
|
3367
|
+
contents of DIRECTORY_NAME.
|
|
3368
|
+
e.g.
|
|
3369
|
+
|
|
3370
|
+
inputs.DIRECTORY_NAME.listing == [items in directory]
|
|
3371
|
+
inputs.DIRECTORY_NAME.listing[0].listing == [items in subdirectory
|
|
3372
|
+
if it exists and is the first item listed]
|
|
3373
|
+
inputs.DIRECTORY_NAME.listing.length == # of items in directory
|
|
3374
|
+
|
|
3375
|
+
See
|
|
3376
|
+
https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
|
|
3377
|
+
and https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingEnum
|
|
3101
3378
|
|
|
3102
3379
|
DIRECTORY_NAME.listing should be determined first from loadListing.
|
|
3103
3380
|
If that's not specified, from LoadListingRequirement.
|
|
@@ -3209,6 +3486,20 @@ usage_message = "\n\n" + textwrap.dedent(
|
|
|
3209
3486
|
]
|
|
3210
3487
|
)
|
|
3211
3488
|
|
|
3489
|
+
def get_options(args: List[str]) -> Namespace:
|
|
3490
|
+
"""
|
|
3491
|
+
Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
|
|
3492
|
+
:param args: List of args from command line
|
|
3493
|
+
:return: options namespace
|
|
3494
|
+
"""
|
|
3495
|
+
parser = ArgParser()
|
|
3496
|
+
addOptions(parser, jobstore_as_flag=True, cwl=True)
|
|
3497
|
+
options: Namespace
|
|
3498
|
+
options, cwl_options = parser.parse_known_args(args)
|
|
3499
|
+
options.cwljob.extend(cwl_options)
|
|
3500
|
+
|
|
3501
|
+
return options
|
|
3502
|
+
|
|
3212
3503
|
|
|
3213
3504
|
def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
3214
3505
|
"""Run the main loop for toil-cwl-runner."""
|
|
@@ -3218,334 +3509,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3218
3509
|
if args is None:
|
|
3219
3510
|
args = sys.argv[1:]
|
|
3220
3511
|
|
|
3221
|
-
|
|
3222
|
-
config.disableChaining = True
|
|
3223
|
-
config.cwl = True
|
|
3224
|
-
parser = argparse.ArgumentParser()
|
|
3225
|
-
addOptions(parser, config, jobstore_as_flag=True)
|
|
3226
|
-
parser.add_argument("cwltool", type=str)
|
|
3227
|
-
parser.add_argument("cwljob", nargs=argparse.REMAINDER)
|
|
3228
|
-
|
|
3229
|
-
parser.add_argument("--not-strict", action="store_true")
|
|
3230
|
-
parser.add_argument(
|
|
3231
|
-
"--enable-dev",
|
|
3232
|
-
action="store_true",
|
|
3233
|
-
help="Enable loading and running development versions of CWL",
|
|
3234
|
-
)
|
|
3235
|
-
parser.add_argument(
|
|
3236
|
-
"--enable-ext",
|
|
3237
|
-
action="store_true",
|
|
3238
|
-
help="Enable loading and running 'cwltool:' extensions to the CWL standards.",
|
|
3239
|
-
default=False,
|
|
3240
|
-
)
|
|
3241
|
-
parser.add_argument("--quiet", dest="quiet", action="store_true", default=False)
|
|
3242
|
-
parser.add_argument("--basedir", type=str) # TODO: Might be hard-coded?
|
|
3243
|
-
parser.add_argument("--outdir", type=str, default=os.getcwd())
|
|
3244
|
-
parser.add_argument("--version", action="version", version=baseVersion)
|
|
3245
|
-
parser.add_argument(
|
|
3246
|
-
"--log-dir",
|
|
3247
|
-
type=str,
|
|
3248
|
-
default="",
|
|
3249
|
-
help="Log your tools stdout/stderr to this location outside of container",
|
|
3250
|
-
)
|
|
3251
|
-
dockergroup = parser.add_mutually_exclusive_group()
|
|
3252
|
-
dockergroup.add_argument(
|
|
3253
|
-
"--user-space-docker-cmd",
|
|
3254
|
-
help="(Linux/OS X only) Specify a user space docker command (like "
|
|
3255
|
-
"udocker or dx-docker) that will be used to call 'pull' and 'run'",
|
|
3256
|
-
)
|
|
3257
|
-
dockergroup.add_argument(
|
|
3258
|
-
"--singularity",
|
|
3259
|
-
action="store_true",
|
|
3260
|
-
default=False,
|
|
3261
|
-
help="Use Singularity runtime for running containers. "
|
|
3262
|
-
"Requires Singularity v2.6.1+ and Linux with kernel version v3.18+ or "
|
|
3263
|
-
"with overlayfs support backported.",
|
|
3264
|
-
)
|
|
3265
|
-
dockergroup.add_argument(
|
|
3266
|
-
"--podman",
|
|
3267
|
-
action="store_true",
|
|
3268
|
-
default=False,
|
|
3269
|
-
help="Use Podman runtime for running containers. ",
|
|
3270
|
-
)
|
|
3271
|
-
dockergroup.add_argument(
|
|
3272
|
-
"--no-container",
|
|
3273
|
-
action="store_true",
|
|
3274
|
-
help="Do not execute jobs in a "
|
|
3275
|
-
"Docker container, even when `DockerRequirement` "
|
|
3276
|
-
"is specified under `hints`.",
|
|
3277
|
-
)
|
|
3278
|
-
dockergroup.add_argument(
|
|
3279
|
-
"--leave-container",
|
|
3280
|
-
action="store_false",
|
|
3281
|
-
default=True,
|
|
3282
|
-
help="Do not delete Docker container used by jobs after they exit",
|
|
3283
|
-
dest="rm_container",
|
|
3284
|
-
)
|
|
3285
|
-
extra_dockergroup = parser.add_argument_group()
|
|
3286
|
-
extra_dockergroup.add_argument(
|
|
3287
|
-
"--custom-net",
|
|
3288
|
-
help="Specify docker network name to pass to docker run command",
|
|
3289
|
-
)
|
|
3290
|
-
cidgroup = parser.add_argument_group(
|
|
3291
|
-
"Options for recording the Docker container identifier into a file."
|
|
3292
|
-
)
|
|
3293
|
-
cidgroup.add_argument(
|
|
3294
|
-
# Disabled as containerid is now saved by default
|
|
3295
|
-
"--record-container-id",
|
|
3296
|
-
action="store_true",
|
|
3297
|
-
default=False,
|
|
3298
|
-
help=argparse.SUPPRESS,
|
|
3299
|
-
dest="record_container_id",
|
|
3300
|
-
)
|
|
3301
|
-
|
|
3302
|
-
cidgroup.add_argument(
|
|
3303
|
-
"--cidfile-dir",
|
|
3304
|
-
type=str,
|
|
3305
|
-
help="Store the Docker container ID into a file in the specified directory.",
|
|
3306
|
-
default=None,
|
|
3307
|
-
dest="cidfile_dir",
|
|
3308
|
-
)
|
|
3309
|
-
|
|
3310
|
-
cidgroup.add_argument(
|
|
3311
|
-
"--cidfile-prefix",
|
|
3312
|
-
type=str,
|
|
3313
|
-
help="Specify a prefix to the container ID filename. "
|
|
3314
|
-
"Final file name will be followed by a timestamp. "
|
|
3315
|
-
"The default is no prefix.",
|
|
3316
|
-
default=None,
|
|
3317
|
-
dest="cidfile_prefix",
|
|
3318
|
-
)
|
|
3319
|
-
|
|
3320
|
-
parser.add_argument(
|
|
3321
|
-
"--preserve-environment",
|
|
3322
|
-
type=str,
|
|
3323
|
-
nargs="+",
|
|
3324
|
-
help="Preserve specified environment variables when running"
|
|
3325
|
-
" CommandLineTools",
|
|
3326
|
-
metavar=("VAR1 VAR2"),
|
|
3327
|
-
default=("PATH",),
|
|
3328
|
-
dest="preserve_environment",
|
|
3329
|
-
)
|
|
3330
|
-
parser.add_argument(
|
|
3331
|
-
"--preserve-entire-environment",
|
|
3332
|
-
action="store_true",
|
|
3333
|
-
help="Preserve all environment variable when running CommandLineTools.",
|
|
3334
|
-
default=False,
|
|
3335
|
-
dest="preserve_entire_environment",
|
|
3336
|
-
)
|
|
3337
|
-
parser.add_argument(
|
|
3338
|
-
"--destBucket",
|
|
3339
|
-
type=str,
|
|
3340
|
-
help="Specify a cloud bucket endpoint for output files.",
|
|
3341
|
-
)
|
|
3342
|
-
parser.add_argument("--beta-dependency-resolvers-configuration", default=None)
|
|
3343
|
-
parser.add_argument("--beta-dependencies-directory", default=None)
|
|
3344
|
-
parser.add_argument("--beta-use-biocontainers", default=None, action="store_true")
|
|
3345
|
-
parser.add_argument("--beta-conda-dependencies", default=None, action="store_true")
|
|
3346
|
-
parser.add_argument(
|
|
3347
|
-
"--tmpdir-prefix",
|
|
3348
|
-
type=str,
|
|
3349
|
-
help="Path prefix for temporary directories",
|
|
3350
|
-
default=DEFAULT_TMPDIR_PREFIX,
|
|
3351
|
-
)
|
|
3352
|
-
parser.add_argument(
|
|
3353
|
-
"--tmp-outdir-prefix",
|
|
3354
|
-
type=str,
|
|
3355
|
-
help="Path prefix for intermediate output directories",
|
|
3356
|
-
default=DEFAULT_TMPDIR_PREFIX,
|
|
3357
|
-
)
|
|
3358
|
-
parser.add_argument(
|
|
3359
|
-
"--force-docker-pull",
|
|
3360
|
-
action="store_true",
|
|
3361
|
-
default=False,
|
|
3362
|
-
dest="force_docker_pull",
|
|
3363
|
-
help="Pull latest docker image even if it is locally present",
|
|
3364
|
-
)
|
|
3365
|
-
parser.add_argument(
|
|
3366
|
-
"--no-match-user",
|
|
3367
|
-
action="store_true",
|
|
3368
|
-
default=False,
|
|
3369
|
-
help="Disable passing the current uid to `docker run --user`",
|
|
3370
|
-
)
|
|
3371
|
-
parser.add_argument(
|
|
3372
|
-
"--no-read-only",
|
|
3373
|
-
action="store_true",
|
|
3374
|
-
default=False,
|
|
3375
|
-
help="Do not set root directory in the container as read-only",
|
|
3376
|
-
)
|
|
3377
|
-
parser.add_argument(
|
|
3378
|
-
"--strict-memory-limit",
|
|
3379
|
-
action="store_true",
|
|
3380
|
-
help="When running with "
|
|
3381
|
-
"software containers and the Docker engine, pass either the "
|
|
3382
|
-
"calculated memory allocation from ResourceRequirements or the "
|
|
3383
|
-
"default of 1 gigabyte to Docker's --memory option.",
|
|
3384
|
-
)
|
|
3385
|
-
parser.add_argument(
|
|
3386
|
-
"--strict-cpu-limit",
|
|
3387
|
-
action="store_true",
|
|
3388
|
-
help="When running with "
|
|
3389
|
-
"software containers and the Docker engine, pass either the "
|
|
3390
|
-
"calculated cpu allocation from ResourceRequirements or the "
|
|
3391
|
-
"default of 1 core to Docker's --cpu option. "
|
|
3392
|
-
"Requires docker version >= v1.13.",
|
|
3393
|
-
)
|
|
3394
|
-
parser.add_argument(
|
|
3395
|
-
"--relax-path-checks",
|
|
3396
|
-
action="store_true",
|
|
3397
|
-
default=False,
|
|
3398
|
-
help="Relax requirements on path names to permit "
|
|
3399
|
-
"spaces and hash characters.",
|
|
3400
|
-
dest="relax_path_checks",
|
|
3401
|
-
)
|
|
3402
|
-
parser.add_argument(
|
|
3403
|
-
"--default-container",
|
|
3404
|
-
help="Specify a default docker container that will be "
|
|
3405
|
-
"used if the workflow fails to specify one.",
|
|
3406
|
-
)
|
|
3407
|
-
parser.add_argument(
|
|
3408
|
-
"--disable-validate",
|
|
3409
|
-
dest="do_validate",
|
|
3410
|
-
action="store_false",
|
|
3411
|
-
default=True,
|
|
3412
|
-
help=argparse.SUPPRESS,
|
|
3413
|
-
)
|
|
3414
|
-
parser.add_argument(
|
|
3415
|
-
"--fast-parser",
|
|
3416
|
-
dest="fast_parser",
|
|
3417
|
-
action="store_true",
|
|
3418
|
-
default=False,
|
|
3419
|
-
help=argparse.SUPPRESS,
|
|
3420
|
-
)
|
|
3421
|
-
checkgroup = parser.add_mutually_exclusive_group()
|
|
3422
|
-
checkgroup.add_argument(
|
|
3423
|
-
"--compute-checksum",
|
|
3424
|
-
action="store_true",
|
|
3425
|
-
default=True,
|
|
3426
|
-
help="Compute checksum of contents while collecting outputs",
|
|
3427
|
-
dest="compute_checksum",
|
|
3428
|
-
)
|
|
3429
|
-
checkgroup.add_argument(
|
|
3430
|
-
"--no-compute-checksum",
|
|
3431
|
-
action="store_false",
|
|
3432
|
-
help="Do not compute checksum of contents while collecting outputs",
|
|
3433
|
-
dest="compute_checksum",
|
|
3434
|
-
)
|
|
3435
|
-
|
|
3436
|
-
parser.add_argument(
|
|
3437
|
-
"--eval-timeout",
|
|
3438
|
-
help="Time to wait for a Javascript expression to evaluate before giving "
|
|
3439
|
-
"an error, default 20s.",
|
|
3440
|
-
type=float,
|
|
3441
|
-
default=20,
|
|
3442
|
-
)
|
|
3443
|
-
parser.add_argument(
|
|
3444
|
-
"--overrides",
|
|
3445
|
-
type=str,
|
|
3446
|
-
default=None,
|
|
3447
|
-
help="Read process requirement overrides from file.",
|
|
3448
|
-
)
|
|
3449
|
-
|
|
3450
|
-
parser.add_argument(
|
|
3451
|
-
"--mpi-config-file",
|
|
3452
|
-
type=str,
|
|
3453
|
-
default=None,
|
|
3454
|
-
help="Platform specific configuration for MPI (parallel "
|
|
3455
|
-
"launcher, its flag etc). See the cwltool README "
|
|
3456
|
-
"section 'Running MPI-based tools' for details of the format: "
|
|
3457
|
-
"https://github.com/common-workflow-language/cwltool#running-mpi-based-tools-that-need-to-be-launched",
|
|
3458
|
-
)
|
|
3459
|
-
parser.add_argument(
|
|
3460
|
-
"--bypass-file-store",
|
|
3461
|
-
action="store_true",
|
|
3462
|
-
default=False,
|
|
3463
|
-
help="Do not use Toil's file store and assume all "
|
|
3464
|
-
"paths are accessible in place from all nodes.",
|
|
3465
|
-
dest="bypass_file_store",
|
|
3466
|
-
)
|
|
3467
|
-
parser.add_argument(
|
|
3468
|
-
"--disable-streaming",
|
|
3469
|
-
action="store_true",
|
|
3470
|
-
default=False,
|
|
3471
|
-
help="Disable file streaming for files that have 'streamable' flag True",
|
|
3472
|
-
dest="disable_streaming",
|
|
3473
|
-
)
|
|
3474
|
-
|
|
3475
|
-
provgroup = parser.add_argument_group(
|
|
3476
|
-
"Options for recording provenance information of the execution"
|
|
3477
|
-
)
|
|
3478
|
-
provgroup.add_argument(
|
|
3479
|
-
"--provenance",
|
|
3480
|
-
help="Save provenance to specified folder as a "
|
|
3481
|
-
"Research Object that captures and aggregates "
|
|
3482
|
-
"workflow execution and data products.",
|
|
3483
|
-
type=str,
|
|
3484
|
-
)
|
|
3485
|
-
|
|
3486
|
-
provgroup.add_argument(
|
|
3487
|
-
"--enable-user-provenance",
|
|
3488
|
-
default=False,
|
|
3489
|
-
action="store_true",
|
|
3490
|
-
help="Record user account info as part of provenance.",
|
|
3491
|
-
dest="user_provenance",
|
|
3492
|
-
)
|
|
3493
|
-
provgroup.add_argument(
|
|
3494
|
-
"--disable-user-provenance",
|
|
3495
|
-
default=False,
|
|
3496
|
-
action="store_false",
|
|
3497
|
-
help="Do not record user account info in provenance.",
|
|
3498
|
-
dest="user_provenance",
|
|
3499
|
-
)
|
|
3500
|
-
provgroup.add_argument(
|
|
3501
|
-
"--enable-host-provenance",
|
|
3502
|
-
default=False,
|
|
3503
|
-
action="store_true",
|
|
3504
|
-
help="Record host info as part of provenance.",
|
|
3505
|
-
dest="host_provenance",
|
|
3506
|
-
)
|
|
3507
|
-
provgroup.add_argument(
|
|
3508
|
-
"--disable-host-provenance",
|
|
3509
|
-
default=False,
|
|
3510
|
-
action="store_false",
|
|
3511
|
-
help="Do not record host info in provenance.",
|
|
3512
|
-
dest="host_provenance",
|
|
3513
|
-
)
|
|
3514
|
-
provgroup.add_argument(
|
|
3515
|
-
"--orcid",
|
|
3516
|
-
help="Record user ORCID identifier as part of "
|
|
3517
|
-
"provenance, e.g. https://orcid.org/0000-0002-1825-0097 "
|
|
3518
|
-
"or 0000-0002-1825-0097. Alternatively the environment variable "
|
|
3519
|
-
"ORCID may be set.",
|
|
3520
|
-
dest="orcid",
|
|
3521
|
-
default=os.environ.get("ORCID", ""),
|
|
3522
|
-
type=str,
|
|
3523
|
-
)
|
|
3524
|
-
provgroup.add_argument(
|
|
3525
|
-
"--full-name",
|
|
3526
|
-
help="Record full name of user as part of provenance, "
|
|
3527
|
-
"e.g. Josiah Carberry. You may need to use shell quotes to preserve "
|
|
3528
|
-
"spaces. Alternatively the environment variable CWL_FULL_NAME may "
|
|
3529
|
-
"be set.",
|
|
3530
|
-
dest="cwl_full_name",
|
|
3531
|
-
default=os.environ.get("CWL_FULL_NAME", ""),
|
|
3532
|
-
type=str,
|
|
3533
|
-
)
|
|
3534
|
-
|
|
3535
|
-
# Parse all the options once.
|
|
3536
|
-
options = parser.parse_args(args)
|
|
3512
|
+
options = get_options(args)
|
|
3537
3513
|
|
|
3538
3514
|
# Do cwltool setup
|
|
3539
3515
|
cwltool.main.setup_schema(args=options, custom_schema_callback=None)
|
|
3516
|
+
tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3540
3517
|
|
|
3541
3518
|
# We need a workdir for the CWL runtime contexts.
|
|
3542
|
-
if
|
|
3519
|
+
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
|
|
3543
3520
|
# if tmpdir_prefix is not the default value, move
|
|
3544
3521
|
# workdir and the default job store under it
|
|
3545
|
-
workdir = cwltool.utils.create_tmp_dir(
|
|
3522
|
+
workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3546
3523
|
else:
|
|
3547
3524
|
# Use a directory in the default tmpdir
|
|
3548
|
-
workdir =
|
|
3525
|
+
workdir = mkdtemp()
|
|
3549
3526
|
# Make sure workdir doesn't exist so it can be a job store
|
|
3550
3527
|
os.rmdir(workdir)
|
|
3551
3528
|
|
|
@@ -3562,13 +3539,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3562
3539
|
options.do_validate = True
|
|
3563
3540
|
options.pack = False
|
|
3564
3541
|
options.print_subgraph = False
|
|
3565
|
-
if
|
|
3542
|
+
if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
|
|
3566
3543
|
# We need to override workDir because by default Toil will pick
|
|
3567
3544
|
# somewhere under the system temp directory if unset, ignoring
|
|
3568
3545
|
# --tmpdir-prefix.
|
|
3569
3546
|
#
|
|
3570
3547
|
# If set, workDir needs to exist, so we directly use the prefix
|
|
3571
|
-
options.workDir = cwltool.utils.create_tmp_dir(
|
|
3548
|
+
options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
|
|
3572
3549
|
|
|
3573
3550
|
if options.batchSystem == "kubernetes":
|
|
3574
3551
|
# Containers under Kubernetes can only run in Singularity
|
|
@@ -3585,8 +3562,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3585
3562
|
|
|
3586
3563
|
logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
|
|
3587
3564
|
|
|
3588
|
-
outdir = os.path.abspath(options.outdir)
|
|
3589
|
-
tmp_outdir_prefix = os.path.abspath(
|
|
3565
|
+
outdir = os.path.abspath(options.outdir or os.getcwd())
|
|
3566
|
+
tmp_outdir_prefix = os.path.abspath(
|
|
3567
|
+
options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
3568
|
+
)
|
|
3590
3569
|
|
|
3591
3570
|
fileindex: Dict[str, str] = {}
|
|
3592
3571
|
existing: Dict[str, str] = {}
|
|
@@ -3604,6 +3583,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3604
3583
|
find_default_container, options
|
|
3605
3584
|
)
|
|
3606
3585
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
3586
|
+
runtime_context.outdir = outdir
|
|
3607
3587
|
runtime_context.move_outputs = "leave"
|
|
3608
3588
|
runtime_context.rm_tmpdir = False
|
|
3609
3589
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
@@ -3621,12 +3601,16 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3621
3601
|
# Otherwise, if it takes a File with loadContents from a URL, we won't
|
|
3622
3602
|
# be able to load the contents when we need to.
|
|
3623
3603
|
runtime_context.make_fs_access = ToilFsAccess
|
|
3604
|
+
if options.reference_inputs and options.bypass_file_store:
|
|
3605
|
+
# We can't do both of these at the same time.
|
|
3606
|
+
logger.error("Cannot reference inputs when bypassing the file store")
|
|
3607
|
+
return 1
|
|
3624
3608
|
|
|
3625
3609
|
loading_context = cwltool.main.setup_loadingContext(None, runtime_context, options)
|
|
3626
3610
|
|
|
3627
3611
|
if options.provenance:
|
|
3628
3612
|
research_obj = cwltool.cwlprov.ro.ResearchObject(
|
|
3629
|
-
temp_prefix_ro=
|
|
3613
|
+
temp_prefix_ro=tmp_outdir_prefix,
|
|
3630
3614
|
orcid=options.orcid,
|
|
3631
3615
|
full_name=options.cwl_full_name,
|
|
3632
3616
|
fsaccess=runtime_context.make_fs_access(""),
|
|
@@ -3701,7 +3685,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3701
3685
|
loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
|
|
3702
3686
|
loading_context, workflowobj, uri
|
|
3703
3687
|
)
|
|
3704
|
-
|
|
3688
|
+
if not loading_context.loader:
|
|
3689
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
3705
3690
|
processobj, metadata = loading_context.loader.resolve_ref(uri)
|
|
3706
3691
|
processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
|
|
3707
3692
|
|
|
@@ -3748,10 +3733,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3748
3733
|
)
|
|
3749
3734
|
raise
|
|
3750
3735
|
|
|
3751
|
-
#
|
|
3752
|
-
#
|
|
3753
|
-
fs_access = ToilFsAccess(options.basedir)
|
|
3754
|
-
fill_in_defaults(tool.tool["inputs"], initialized_job_order, fs_access)
|
|
3736
|
+
# Leave the defaults un-filled in the top-level order. The tool or
|
|
3737
|
+
# workflow will fill them when it runs
|
|
3755
3738
|
|
|
3756
3739
|
for inp in tool.tool["inputs"]:
|
|
3757
3740
|
if (
|
|
@@ -3806,9 +3789,11 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3806
3789
|
Callable[[str], FileID],
|
|
3807
3790
|
functools.partial(toil.import_file, symlink=True),
|
|
3808
3791
|
)
|
|
3809
|
-
|
|
3792
|
+
|
|
3810
3793
|
# Import all the input files, some of which may be missing optional
|
|
3811
3794
|
# files.
|
|
3795
|
+
logger.info("Importing input files...")
|
|
3796
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3812
3797
|
import_files(
|
|
3813
3798
|
file_import_function,
|
|
3814
3799
|
fs_access,
|
|
@@ -3816,11 +3801,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3816
3801
|
existing,
|
|
3817
3802
|
initialized_job_order,
|
|
3818
3803
|
skip_broken=True,
|
|
3804
|
+
skip_remote=options.reference_inputs,
|
|
3819
3805
|
bypass_file_store=options.bypass_file_store,
|
|
3806
|
+
log_level=logging.INFO,
|
|
3820
3807
|
)
|
|
3821
3808
|
# Import all the files associated with tools (binaries, etc.).
|
|
3822
3809
|
# Not sure why you would have an optional secondary file here, but
|
|
3823
3810
|
# the spec probably needs us to support them.
|
|
3811
|
+
logger.info("Importing tool-associated files...")
|
|
3824
3812
|
visitSteps(
|
|
3825
3813
|
tool,
|
|
3826
3814
|
functools.partial(
|
|
@@ -3830,7 +3818,9 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3830
3818
|
fileindex,
|
|
3831
3819
|
existing,
|
|
3832
3820
|
skip_broken=True,
|
|
3821
|
+
skip_remote=options.reference_inputs,
|
|
3833
3822
|
bypass_file_store=options.bypass_file_store,
|
|
3823
|
+
log_level=logging.INFO,
|
|
3834
3824
|
),
|
|
3835
3825
|
)
|
|
3836
3826
|
|
|
@@ -3843,7 +3833,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3843
3833
|
# were required.
|
|
3844
3834
|
rm_unprocessed_secondary_files(param_value)
|
|
3845
3835
|
|
|
3846
|
-
logger.
|
|
3836
|
+
logger.info("Creating root job")
|
|
3837
|
+
logger.debug("Root tool: %s", tool)
|
|
3847
3838
|
try:
|
|
3848
3839
|
wf1, _ = makeJob(
|
|
3849
3840
|
tool=tool,
|
|
@@ -3856,6 +3847,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3856
3847
|
logging.error(err)
|
|
3857
3848
|
return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
3858
3849
|
wf1.cwljob = initialized_job_order
|
|
3850
|
+
logger.info("Starting workflow")
|
|
3859
3851
|
try:
|
|
3860
3852
|
outobj = toil.start(wf1)
|
|
3861
3853
|
except FailedJobsException as err:
|
|
@@ -3871,13 +3863,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3871
3863
|
|
|
3872
3864
|
# Now the workflow has completed. We need to make sure the outputs (and
|
|
3873
3865
|
# inputs) end up where the user wants them to be.
|
|
3874
|
-
|
|
3866
|
+
logger.info("Collecting workflow outputs...")
|
|
3875
3867
|
outobj = resolve_dict_w_promises(outobj)
|
|
3876
3868
|
|
|
3877
3869
|
# Stage files. Specify destination bucket if specified in CLI
|
|
3878
3870
|
# options. If destination bucket not passed in,
|
|
3879
3871
|
# options.destBucket's value will be None.
|
|
3880
|
-
toilStageFiles(
|
|
3872
|
+
toilStageFiles(
|
|
3873
|
+
toil,
|
|
3874
|
+
outobj,
|
|
3875
|
+
outdir,
|
|
3876
|
+
destBucket=options.destBucket,
|
|
3877
|
+
log_level=logging.INFO
|
|
3878
|
+
)
|
|
3879
|
+
logger.info("Stored workflow outputs")
|
|
3881
3880
|
|
|
3882
3881
|
if runtime_context.research_obj is not None:
|
|
3883
3882
|
cwltool.cwlprov.writablebagfile.create_job(
|
|
@@ -3904,7 +3903,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3904
3903
|
("File",),
|
|
3905
3904
|
functools.partial(add_sizes, runtime_context.make_fs_access("")),
|
|
3906
3905
|
)
|
|
3907
|
-
|
|
3906
|
+
if not document_loader:
|
|
3907
|
+
raise RuntimeError("cwltool loader is not set.")
|
|
3908
3908
|
prov_dependencies = cwltool.main.prov_deps(
|
|
3909
3909
|
workflowobj, document_loader, uri
|
|
3910
3910
|
)
|
|
@@ -3914,6 +3914,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3914
3914
|
)
|
|
3915
3915
|
|
|
3916
3916
|
if not options.destBucket and options.compute_checksum:
|
|
3917
|
+
logger.info("Computing output file checksums...")
|
|
3917
3918
|
visit_class(
|
|
3918
3919
|
outobj,
|
|
3919
3920
|
("File",),
|
|
@@ -3922,12 +3923,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
3922
3923
|
|
|
3923
3924
|
visit_class(outobj, ("File",), MutationManager().unset_generation)
|
|
3924
3925
|
stdout.write(json.dumps(outobj, indent=4, default=str))
|
|
3926
|
+
stdout.write("\n")
|
|
3927
|
+
logger.info("CWL run complete!")
|
|
3925
3928
|
|
|
3926
3929
|
return 0
|
|
3927
3930
|
|
|
3928
3931
|
|
|
3929
3932
|
def find_default_container(
|
|
3930
|
-
args:
|
|
3933
|
+
args: Namespace, builder: cwltool.builder.Builder
|
|
3931
3934
|
) -> Optional[str]:
|
|
3932
3935
|
"""Find the default constructor by consulting a Toil.options object."""
|
|
3933
3936
|
if args.default_container:
|