toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +41 -17
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +4 -5
  7. toil/batchSystems/gridengine.py +1 -1
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +25 -11
  10. toil/batchSystems/local_support.py +3 -3
  11. toil/batchSystems/lsf.py +9 -9
  12. toil/batchSystems/mesos/batchSystem.py +4 -4
  13. toil/batchSystems/mesos/executor.py +3 -2
  14. toil/batchSystems/options.py +9 -0
  15. toil/batchSystems/singleMachine.py +11 -10
  16. toil/batchSystems/slurm.py +129 -16
  17. toil/batchSystems/torque.py +1 -1
  18. toil/bus.py +45 -3
  19. toil/common.py +56 -31
  20. toil/cwl/cwltoil.py +442 -371
  21. toil/deferred.py +1 -1
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/abstractFileStore.py +69 -20
  24. toil/fileStores/cachingFileStore.py +6 -22
  25. toil/fileStores/nonCachingFileStore.py +6 -15
  26. toil/job.py +270 -86
  27. toil/jobStores/abstractJobStore.py +37 -31
  28. toil/jobStores/aws/jobStore.py +280 -218
  29. toil/jobStores/aws/utils.py +60 -31
  30. toil/jobStores/conftest.py +2 -2
  31. toil/jobStores/fileJobStore.py +3 -3
  32. toil/jobStores/googleJobStore.py +3 -4
  33. toil/leader.py +89 -38
  34. toil/lib/aws/__init__.py +26 -10
  35. toil/lib/aws/iam.py +2 -2
  36. toil/lib/aws/session.py +62 -22
  37. toil/lib/aws/utils.py +73 -37
  38. toil/lib/conversions.py +24 -1
  39. toil/lib/ec2.py +118 -69
  40. toil/lib/expando.py +1 -1
  41. toil/lib/generatedEC2Lists.py +8 -8
  42. toil/lib/io.py +42 -4
  43. toil/lib/misc.py +1 -3
  44. toil/lib/resources.py +57 -16
  45. toil/lib/retry.py +12 -5
  46. toil/lib/threading.py +29 -14
  47. toil/lib/throttle.py +1 -1
  48. toil/options/common.py +31 -30
  49. toil/options/wdl.py +5 -0
  50. toil/provisioners/__init__.py +9 -3
  51. toil/provisioners/abstractProvisioner.py +12 -2
  52. toil/provisioners/aws/__init__.py +20 -15
  53. toil/provisioners/aws/awsProvisioner.py +406 -329
  54. toil/provisioners/gceProvisioner.py +2 -2
  55. toil/provisioners/node.py +13 -5
  56. toil/server/app.py +1 -1
  57. toil/statsAndLogging.py +93 -23
  58. toil/test/__init__.py +27 -12
  59. toil/test/batchSystems/batchSystemTest.py +40 -33
  60. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  61. toil/test/batchSystems/test_slurm.py +22 -7
  62. toil/test/cactus/__init__.py +0 -0
  63. toil/test/cactus/test_cactus_integration.py +58 -0
  64. toil/test/cwl/cwlTest.py +245 -236
  65. toil/test/cwl/seqtk_seq.cwl +1 -1
  66. toil/test/docs/scriptsTest.py +11 -14
  67. toil/test/jobStores/jobStoreTest.py +40 -54
  68. toil/test/lib/aws/test_iam.py +2 -2
  69. toil/test/lib/test_ec2.py +1 -1
  70. toil/test/options/__init__.py +13 -0
  71. toil/test/options/options.py +37 -0
  72. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  73. toil/test/provisioners/clusterTest.py +99 -16
  74. toil/test/server/serverTest.py +2 -2
  75. toil/test/src/autoDeploymentTest.py +1 -1
  76. toil/test/src/dockerCheckTest.py +2 -1
  77. toil/test/src/environmentTest.py +125 -0
  78. toil/test/src/fileStoreTest.py +1 -1
  79. toil/test/src/jobDescriptionTest.py +18 -8
  80. toil/test/src/jobTest.py +1 -1
  81. toil/test/src/realtimeLoggerTest.py +4 -0
  82. toil/test/src/workerTest.py +52 -19
  83. toil/test/utils/toilDebugTest.py +62 -4
  84. toil/test/utils/utilsTest.py +23 -21
  85. toil/test/wdl/wdltoil_test.py +49 -21
  86. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  87. toil/toilState.py +68 -9
  88. toil/utils/toilDebugFile.py +1 -1
  89. toil/utils/toilDebugJob.py +153 -26
  90. toil/utils/toilLaunchCluster.py +12 -2
  91. toil/utils/toilRsyncCluster.py +7 -2
  92. toil/utils/toilSshCluster.py +7 -3
  93. toil/utils/toilStats.py +310 -266
  94. toil/utils/toilStatus.py +98 -52
  95. toil/version.py +11 -11
  96. toil/wdl/wdltoil.py +644 -225
  97. toil/worker.py +125 -83
  98. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  99. toil-7.0.0.dist-info/METADATA +158 -0
  100. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
  101. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  102. toil-6.1.0a1.dist-info/METADATA +0 -125
  103. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  104. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -29,32 +29,29 @@ import logging
29
29
  import os
30
30
  import pprint
31
31
  import shutil
32
- import socket
33
32
  import stat
34
33
  import sys
35
34
  import textwrap
36
35
  import uuid
37
- from tempfile import NamedTemporaryFile, gettempdir
36
+ from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
38
37
  from threading import Thread
39
- from typing import (
40
- IO,
41
- Any,
42
- Callable,
43
- Dict,
44
- Iterator,
45
- List,
46
- Mapping,
47
- MutableMapping,
48
- MutableSequence,
49
- Optional,
50
- TextIO,
51
- Tuple,
52
- Type,
53
- TypeVar,
54
- Union,
55
- cast,
56
- Sequence,
57
- )
38
+ from typing import (IO,
39
+ Any,
40
+ Callable,
41
+ Dict,
42
+ Iterator,
43
+ List,
44
+ Mapping,
45
+ MutableMapping,
46
+ MutableSequence,
47
+ Optional,
48
+ Sequence,
49
+ TextIO,
50
+ Tuple,
51
+ Type,
52
+ TypeVar,
53
+ Union,
54
+ cast)
58
55
  from urllib.parse import quote, unquote, urlparse, urlsplit
59
56
 
60
57
  import cwl_utils.errors
@@ -68,36 +65,30 @@ import cwltool.load_tool
68
65
  import cwltool.main
69
66
  import cwltool.resolver
70
67
  import schema_salad.ref_resolver
71
- from configargparse import ArgParser, SUPPRESS, Namespace
68
+ from configargparse import ArgParser, Namespace
72
69
  from cwltool.loghandler import _logger as cwllogger
73
70
  from cwltool.loghandler import defaultStreamHandler
74
71
  from cwltool.mpi import MpiConfig
75
72
  from cwltool.mutation import MutationManager
76
73
  from cwltool.pathmapper import MapperEnt, PathMapper
77
- from cwltool.process import (
78
- Process,
79
- add_sizes,
80
- compute_checksums,
81
- fill_in_defaults,
82
- shortname,
83
- )
74
+ from cwltool.process import (Process,
75
+ add_sizes,
76
+ compute_checksums,
77
+ fill_in_defaults,
78
+ shortname)
84
79
  from cwltool.secrets import SecretStore
85
- from cwltool.software_requirements import (
86
- DependenciesConfiguration,
87
- get_container_from_software_requirements,
88
- )
80
+ from cwltool.software_requirements import (DependenciesConfiguration,
81
+ get_container_from_software_requirements)
89
82
  from cwltool.stdfsaccess import StdFsAccess, abspath
90
- from cwltool.utils import (
91
- CWLObjectType,
92
- CWLOutputType,
93
- DirectoryType,
94
- adjustDirObjs,
95
- aslist,
96
- downloadHttpFile,
97
- get_listing,
98
- normalizeFilesDirs,
99
- visit_class,
100
- )
83
+ from cwltool.utils import (CWLObjectType,
84
+ CWLOutputType,
85
+ DirectoryType,
86
+ adjustDirObjs,
87
+ aslist,
88
+ downloadHttpFile,
89
+ get_listing,
90
+ normalizeFilesDirs,
91
+ visit_class)
101
92
  from ruamel.yaml.comments import CommentedMap, CommentedSeq
102
93
  from schema_salad.avro.schema import Names
103
94
  from schema_salad.exceptions import ValidationException
@@ -105,23 +96,24 @@ from schema_salad.ref_resolver import file_uri, uri_file_path
105
96
  from schema_salad.sourceline import SourceLine
106
97
  from typing_extensions import Literal
107
98
 
99
+ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
108
100
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
109
101
  from toil.common import Toil, addOptions
110
102
  from toil.cwl import check_cwltool_version
103
+ from toil.provisioners.clusterScaler import JobTooBigError
111
104
 
112
105
  check_cwltool_version()
113
- from toil.cwl.utils import (
114
- CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
115
- CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
116
- download_structure,
117
- get_from_structure,
118
- visit_cwl_class_and_reduce,
119
- )
106
+ from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
107
+ CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
108
+ download_structure,
109
+ get_from_structure,
110
+ visit_cwl_class_and_reduce)
120
111
  from toil.exceptions import FailedJobsException
121
112
  from toil.fileStores import FileID
122
113
  from toil.fileStores.abstractFileStore import AbstractFileStore
123
114
  from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
124
- from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
115
+ from toil.jobStores.abstractJobStore import (AbstractJobStore, NoSuchFileException, LocatorException,
116
+ InvalidImportExportUrlException, UnimplementedURLException)
125
117
  from toil.jobStores.fileJobStore import FileJobStore
126
118
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
127
119
  from toil.lib.io import mkdtemp
@@ -1019,6 +1011,24 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
1019
1011
  class ToilTool:
1020
1012
  """Mixin to hook Toil into a cwltool tool type."""
1021
1013
 
1014
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
1015
+ """
1016
+ Init hook to set up member variables.
1017
+ """
1018
+ super().__init__(*args, **kwargs)
1019
+ # Reserve a spot for the Toil job that ends up executing this tool.
1020
+ self._toil_job: Optional[Job] = None
1021
+ # Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
1022
+ self._path_mappers: List[cwltool.pathmapper.PathMapper] = []
1023
+
1024
+ def connect_toil_job(self, job: Job) -> None:
1025
+ """
1026
+ Attach the Toil tool to the Toil job that is executing it. This allows
1027
+ it to use the Toil job to stop at certain points if debugging flags are
1028
+ set.
1029
+ """
1030
+ self._toil_job = job
1031
+
1022
1032
  def make_path_mapper(
1023
1033
  self,
1024
1034
  reffiles: List[Any],
@@ -1029,12 +1039,12 @@ class ToilTool:
1029
1039
  """Create the appropriate PathMapper for the situation."""
1030
1040
  if getattr(runtimeContext, "bypass_file_store", False):
1031
1041
  # We only need to understand cwltool's supported URIs
1032
- return PathMapper(
1042
+ mapper = PathMapper(
1033
1043
  reffiles, runtimeContext.basedir, stagedir, separateDirs=separateDirs
1034
1044
  )
1035
1045
  else:
1036
1046
  # We need to be able to read from Toil-provided URIs
1037
- return ToilPathMapper(
1047
+ mapper = ToilPathMapper(
1038
1048
  reffiles,
1039
1049
  runtimeContext.basedir,
1040
1050
  stagedir,
@@ -1043,6 +1053,10 @@ class ToilTool:
1043
1053
  streaming_allowed=runtimeContext.streaming_allowed,
1044
1054
  )
1045
1055
 
1056
+ # Remember the path mappers
1057
+ self._path_mappers.append(mapper)
1058
+ return mapper
1059
+
1046
1060
  def __str__(self) -> str:
1047
1061
  """Return string representation of this tool type."""
1048
1062
  return f'{self.__class__.__name__}({repr(getattr(self, "tool", {}).get("id", "???"))})'
@@ -1059,17 +1073,34 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1059
1073
  name conflicts at the top level of the work directory.
1060
1074
  """
1061
1075
 
1076
+ # Set up the initial work dir with all its files
1062
1077
  super()._initialworkdir(j, builder)
1063
1078
 
1064
1079
  # The initial work dir listing is now in j.generatefiles["listing"]
1065
- # Also j.generatrfiles is a CWL Directory.
1080
+ # Also j.generatefiles is a CWL Directory.
1066
1081
  # So check the initial working directory.
1067
- logger.info("Initial work dir: %s", j.generatefiles)
1082
+ logger.debug("Initial work dir: %s", j.generatefiles)
1068
1083
  ensure_no_collisions(
1069
1084
  j.generatefiles,
1070
1085
  "the job's working directory as specified by the InitialWorkDirRequirement",
1071
1086
  )
1072
1087
 
1088
+ if self._toil_job is not None:
1089
+ # Make a table of all the places we mapped files to when downloading the inputs.
1090
+
1091
+ # We want to hint which host paths and container (if any) paths correspond
1092
+ host_and_job_paths: List[Tuple[str, str]] = []
1093
+
1094
+ for pm in self._path_mappers:
1095
+ for _, mapper_entry in pm.items_exclude_children():
1096
+ # We know that mapper_entry.target as seen by the task is
1097
+ # mapper_entry.resolved on the host.
1098
+ host_and_job_paths.append((mapper_entry.resolved, mapper_entry.target))
1099
+
1100
+ # Notice that we have downloaded our inputs. Explain which files
1101
+ # those are here and what the task will expect to call them.
1102
+ self._toil_job.files_downloaded_hook(host_and_job_paths)
1103
+
1073
1104
 
1074
1105
  class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
1075
1106
  """Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
@@ -1092,6 +1123,10 @@ def toil_make_tool(
1092
1123
  return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
1093
1124
 
1094
1125
 
1126
+ # When a file we want to have is missing, we can give it this sentinal location
1127
+ # URI instead of raising an error right away, in case it is optional.
1128
+ MISSING_FILE = "missing://"
1129
+
1095
1130
  DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1096
1131
 
1097
1132
 
@@ -1716,7 +1751,7 @@ def import_files(
1716
1751
  fileindex: Dict[str, str],
1717
1752
  existing: Dict[str, str],
1718
1753
  cwl_object: Optional[CWLObjectType],
1719
- skip_broken: bool = False,
1754
+ mark_broken: bool = False,
1720
1755
  skip_remote: bool = False,
1721
1756
  bypass_file_store: bool = False,
1722
1757
  log_level: int = logging.DEBUG
@@ -1735,10 +1770,10 @@ def import_files(
1735
1770
  Preserves any listing fields.
1736
1771
 
1737
1772
  If a file cannot be found (like if it is an optional secondary file that
1738
- doesn't exist), fails, unless skip_broken is set, in which case it leaves
1739
- the location it was supposed to have been at.
1773
+ doesn't exist), fails, unless mark_broken is set, in which case it applies
1774
+ a sentinel location.
1740
1775
 
1741
- Also does some miscelaneous normalization.
1776
+ Also does some miscellaneous normalization.
1742
1777
 
1743
1778
  :param import_function: The function used to upload a URI and get a
1744
1779
  Toil FileID for it.
@@ -1754,8 +1789,9 @@ def import_files(
1754
1789
 
1755
1790
  :param cwl_object: CWL tool (or workflow order) we are importing files for
1756
1791
 
1757
- :param skip_broken: If True, when files can't be imported because they e.g.
1758
- don't exist, leave their locations alone rather than failing with an error.
1792
+ :param mark_broken: If True, when files can't be imported because they e.g.
1793
+ don't exist, set their locations to MISSING_FILE rather than failing
1794
+ with an error.
1759
1795
 
1760
1796
  :param skp_remote: If True, leave remote URIs in place instead of importing
1761
1797
  files.
@@ -1875,7 +1911,7 @@ def import_files(
1875
1911
 
1876
1912
  # Upload the file itself, which will adjust its location.
1877
1913
  upload_file(
1878
- import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
1914
+ import_and_log, fileindex, existing, rec, mark_broken=mark_broken, skip_remote=skip_remote
1879
1915
  )
1880
1916
 
1881
1917
  # Make a record for this file under its name
@@ -1904,7 +1940,7 @@ def import_files(
1904
1940
  contents.update(child_result)
1905
1941
 
1906
1942
  # Upload the directory itself, which will adjust its location.
1907
- upload_directory(rec, contents, skip_broken=skip_broken)
1943
+ upload_directory(rec, contents, mark_broken=mark_broken)
1908
1944
 
1909
1945
  # Show those contents as being under our name in our parent.
1910
1946
  return {cast(str, rec["basename"]): contents}
@@ -1924,7 +1960,7 @@ def import_files(
1924
1960
  def upload_directory(
1925
1961
  directory_metadata: CWLObjectType,
1926
1962
  directory_contents: DirectoryContents,
1927
- skip_broken: bool = False,
1963
+ mark_broken: bool = False,
1928
1964
  ) -> None:
1929
1965
  """
1930
1966
  Upload a Directory object.
@@ -1936,6 +1972,9 @@ def upload_directory(
1936
1972
  Makes sure the directory actually exists, and rewrites its location to be
1937
1973
  something we can use on another machine.
1938
1974
 
1975
+ If mark_broken is set, ignores missing directories and replaces them with
1976
+ directories containing the given (possibly empty) contents.
1977
+
1939
1978
  We can't rely on the directory's listing as visible to the next tool as a
1940
1979
  complete recursive description of the files we will need to present to the
1941
1980
  tool, since some tools require it to be cleared or single-level but still
@@ -1956,8 +1995,8 @@ def upload_directory(
1956
1995
  if location.startswith("file://") and not os.path.isdir(
1957
1996
  schema_salad.ref_resolver.uri_file_path(location)
1958
1997
  ):
1959
- if skip_broken:
1960
- return
1998
+ if mark_broken:
1999
+ logger.debug("Directory %s is missing as a whole", directory_metadata)
1961
2000
  else:
1962
2001
  raise cwl_utils.errors.WorkflowException(
1963
2002
  "Directory is missing: %s" % directory_metadata["location"]
@@ -1979,7 +2018,7 @@ def upload_file(
1979
2018
  fileindex: Dict[str, str],
1980
2019
  existing: Dict[str, str],
1981
2020
  file_metadata: CWLObjectType,
1982
- skip_broken: bool = False,
2021
+ mark_broken: bool = False,
1983
2022
  skip_remote: bool = False
1984
2023
  ) -> None:
1985
2024
  """
@@ -1987,7 +2026,10 @@ def upload_file(
1987
2026
 
1988
2027
  Uploads local files to the Toil file store, and sets their location to a
1989
2028
  reference to the toil file store.
1990
-
2029
+
2030
+ If a file doesn't exist, fails with an error, unless mark_broken is set, in
2031
+ which case the missing file is given a special sentinel location.
2032
+
1991
2033
  Unless skip_remote is set, downloads remote files into the file store and
1992
2034
  sets their locations to references into the file store as well.
1993
2035
  """
@@ -2008,10 +2050,11 @@ def upload_file(
2008
2050
  if location.startswith("file://") and not os.path.isfile(
2009
2051
  schema_salad.ref_resolver.uri_file_path(location)
2010
2052
  ):
2011
- if skip_broken:
2012
- return
2053
+ if mark_broken:
2054
+ logger.debug("File %s is missing", file_metadata)
2055
+ file_metadata["location"] = location = MISSING_FILE
2013
2056
  else:
2014
- raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
2057
+ raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
2015
2058
 
2016
2059
  if location.startswith("file://") or not skip_remote:
2017
2060
  # This is a local file, or we also need to download and re-upload remote files
@@ -2134,7 +2177,7 @@ def toilStageFiles(
2134
2177
  :param destBucket: If set, export to this base URL instead of to the local
2135
2178
  filesystem.
2136
2179
 
2137
- :param log_level: Log each file transfered at the given level.
2180
+ :param log_level: Log each file transferred at the given level.
2138
2181
  """
2139
2182
 
2140
2183
  def _collectDirEntries(
@@ -2614,6 +2657,13 @@ class CWLJob(CWLNamedJob):
2614
2657
  streaming_allowed=runtime_context.streaming_allowed,
2615
2658
  )
2616
2659
 
2660
+ # Collect standard output and standard error somewhere if they don't go to files.
2661
+ # We need to keep two FDs to these because cwltool will close what we give it.
2662
+ default_stdout = TemporaryFile()
2663
+ runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
2664
+ default_stderr = TemporaryFile()
2665
+ runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
2666
+
2617
2667
  process_uuid = uuid.uuid4() # noqa F841
2618
2668
  started_at = datetime.datetime.now() # noqa F841
2619
2669
 
@@ -2622,13 +2672,39 @@ class CWLJob(CWLNamedJob):
2622
2672
  logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
2623
2673
 
2624
2674
  runtime_context.name = self.description.unitName
2625
- output, status = ToilSingleJobExecutor().execute(
2626
- process=self.cwltool,
2627
- job_order_object=cwljob,
2628
- runtime_context=runtime_context,
2629
- logger=cwllogger,
2630
- )
2631
- ended_at = datetime.datetime.now() # noqa F841
2675
+
2676
+ if isinstance(self.cwltool, ToilTool):
2677
+ # Connect the CWL tool to us so it can call into the Toil job when
2678
+ # it reaches points where we might need to debug it.
2679
+ self.cwltool.connect_toil_job(self)
2680
+
2681
+ status = "did_not_run"
2682
+ try:
2683
+ output, status = ToilSingleJobExecutor().execute(
2684
+ process=self.cwltool,
2685
+ job_order_object=cwljob,
2686
+ runtime_context=runtime_context,
2687
+ logger=cwllogger,
2688
+ )
2689
+ finally:
2690
+ ended_at = datetime.datetime.now() # noqa F841
2691
+
2692
+ # Log any output/error data
2693
+ default_stdout.seek(0, os.SEEK_END)
2694
+ if default_stdout.tell() > 0:
2695
+ default_stdout.seek(0)
2696
+ file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
2697
+ if status != "success":
2698
+ default_stdout.seek(0)
2699
+ logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
2700
+ default_stderr.seek(0, os.SEEK_END)
2701
+ if default_stderr.tell():
2702
+ default_stderr.seek(0)
2703
+ file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
2704
+ if status != "success":
2705
+ default_stderr.seek(0)
2706
+ logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
2707
+
2632
2708
  if status != "success":
2633
2709
  raise cwl_utils.errors.WorkflowException(status)
2634
2710
 
@@ -3263,9 +3339,8 @@ def filtered_secondary_files(
3263
3339
  but add the resolved fields to the list of unresolved fields so we remove
3264
3340
  them here after the fact.
3265
3341
 
3266
- We keep secondary files using the 'toildir:', or '_:' protocols, or using
3267
- the 'file:' protocol and indicating files or directories that actually
3268
- exist. The 'required' logic seems to be handled deeper in
3342
+ We keep secondary files with anything other than MISSING_FILE as their
3343
+ location. The 'required' logic seems to be handled deeper in
3269
3344
  cwltool.builder.Builder(), and correctly determines which files should be
3270
3345
  imported. Therefore we remove the files here and if this file is SUPPOSED
3271
3346
  to exist, it will still give the appropriate file does not exist error, but
@@ -3280,24 +3355,22 @@ def filtered_secondary_files(
3280
3355
  if ("$(" not in sf_bn) and ("${" not in sf_bn):
3281
3356
  if ("$(" not in sf_loc) and ("${" not in sf_loc):
3282
3357
  intermediate_secondary_files.append(sf)
3358
+ else:
3359
+ logger.debug("Secondary file %s is dropped because it has an uninterpolated location", sf)
3360
+ else:
3361
+ logger.debug("Secondary file %s is dropped because it has an uninterpolated basename", sf)
3283
3362
  # remove secondary files that are not present in the filestore or pointing
3284
3363
  # to existant things on disk
3285
3364
  for sf in intermediate_secondary_files:
3286
3365
  sf_loc = cast(str, sf.get("location", ""))
3287
3366
  if (
3288
- sf_loc.startswith("toilfile:")
3289
- or sf_loc.startswith("toildir:")
3290
- or sf_loc.startswith("_:")
3367
+ sf_loc != MISSING_FILE
3291
3368
  or sf.get("class", "") == "Directory"
3292
3369
  ):
3293
3370
  # Pass imported files, and all Directories
3294
3371
  final_secondary_files.append(sf)
3295
- elif sf_loc.startswith("file:") and os.path.exists(
3296
- schema_salad.ref_resolver.uri_file_path(sf_loc)
3297
- ):
3298
- # Pass things that exist on disk (which we presumably declined to
3299
- # import because we aren't using the file store)
3300
- final_secondary_files.append(sf)
3372
+ else:
3373
+ logger.debug("Secondary file %s is dropped because it is known to be missing", sf)
3301
3374
  return final_secondary_files
3302
3375
 
3303
3376
 
@@ -3352,12 +3425,12 @@ def determine_load_listing(
3352
3425
 
3353
3426
  1. no_listing: DIRECTORY_NAME.listing will be undefined.
3354
3427
  e.g.
3355
-
3428
+
3356
3429
  inputs.DIRECTORY_NAME.listing == unspecified
3357
3430
 
3358
3431
  2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
3359
3432
  deep of DIRECTORY_NAME's contents.
3360
- e.g.
3433
+ e.g.
3361
3434
 
3362
3435
  inputs.DIRECTORY_NAME.listing == [items in directory]
3363
3436
  inputs.DIRECTORY_NAME.listing[0].listing == undefined
@@ -3546,6 +3619,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3546
3619
  #
3547
3620
  # If set, workDir needs to exist, so we directly use the prefix
3548
3621
  options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3622
+ if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
3623
+ # override coordination_dir as default Toil will pick somewhere else
3624
+ # ignoring --tmpdir_prefix
3625
+ options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3549
3626
 
3550
3627
  if options.batchSystem == "kubernetes":
3551
3628
  # Containers under Kubernetes can only run in Singularity
@@ -3576,7 +3653,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3576
3653
  dependencies_configuration = DependenciesConfiguration(options)
3577
3654
  job_script_provider = dependencies_configuration
3578
3655
 
3579
- options.default_container = None
3580
3656
  runtime_context = cwltool.context.RuntimeContext(vars(options))
3581
3657
  runtime_context.toplevel = True # enable discovery of secondaryFiles
3582
3658
  runtime_context.find_default_container = functools.partial(
@@ -3617,314 +3693,309 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3617
3693
  )
3618
3694
  runtime_context.research_obj = research_obj
3619
3695
 
3620
- with Toil(options) as toil:
3621
- if options.restart:
3622
- try:
3696
+ try:
3697
+ with Toil(options) as toil:
3698
+ if options.restart:
3623
3699
  outobj = toil.restart()
3624
- except FailedJobsException as err:
3625
- if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3626
- # We figured out that we can't support this workflow.
3627
- logging.error(err)
3628
- logging.error(
3629
- "Your workflow uses a CWL requirement that Toil does not support!"
3700
+ else:
3701
+ loading_context.hints = [
3702
+ {
3703
+ "class": "ResourceRequirement",
3704
+ "coresMin": toil.config.defaultCores,
3705
+ "ramMin": toil.config.defaultMemory / (2**20),
3706
+ "outdirMin": toil.config.defaultDisk / (2**20),
3707
+ "tmpdirMin": 0,
3708
+ }
3709
+ ]
3710
+ loading_context.construct_tool_object = toil_make_tool
3711
+ loading_context.strict = not options.not_strict
3712
+ options.workflow = options.cwltool
3713
+ options.job_order = options.cwljob
3714
+
3715
+ try:
3716
+ uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
3717
+ options.cwltool,
3718
+ loading_context.resolver,
3719
+ loading_context.fetcher_constructor,
3720
+ )
3721
+ except ValidationException:
3722
+ print(
3723
+ "\nYou may be getting this error because your arguments are incorrect or out of order."
3724
+ + usage_message,
3725
+ file=sys.stderr,
3630
3726
  )
3631
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3632
- else:
3633
3727
  raise
3634
- else:
3635
- loading_context.hints = [
3636
- {
3637
- "class": "ResourceRequirement",
3638
- "coresMin": toil.config.defaultCores,
3639
- "ramMin": toil.config.defaultMemory / (2**20),
3640
- "outdirMin": toil.config.defaultDisk / (2**20),
3641
- "tmpdirMin": 0,
3642
- }
3643
- ]
3644
- loading_context.construct_tool_object = toil_make_tool
3645
- loading_context.strict = not options.not_strict
3646
- options.workflow = options.cwltool
3647
- options.job_order = options.cwljob
3648
3728
 
3649
- try:
3650
- uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
3651
- options.cwltool,
3652
- loading_context.resolver,
3729
+ options.tool_help = None
3730
+ options.debug = options.logLevel == "DEBUG"
3731
+ job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
3732
+ options,
3733
+ sys.stdin,
3653
3734
  loading_context.fetcher_constructor,
3735
+ loading_context.overrides_list,
3736
+ tool_file_uri,
3654
3737
  )
3655
- except ValidationException:
3656
- print(
3657
- "\nYou may be getting this error because your arguments are incorrect or out of order."
3658
- + usage_message,
3659
- file=sys.stderr,
3660
- )
3661
- raise
3662
-
3663
- options.tool_help = None
3664
- options.debug = options.logLevel == "DEBUG"
3665
- job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
3666
- options,
3667
- sys.stdin,
3668
- loading_context.fetcher_constructor,
3669
- loading_context.overrides_list,
3670
- tool_file_uri,
3671
- )
3672
- if options.overrides:
3673
- loading_context.overrides_list.extend(
3674
- cwltool.load_tool.load_overrides(
3675
- schema_salad.ref_resolver.file_uri(
3676
- os.path.abspath(options.overrides)
3677
- ),
3678
- tool_file_uri,
3738
+ if options.overrides:
3739
+ loading_context.overrides_list.extend(
3740
+ cwltool.load_tool.load_overrides(
3741
+ schema_salad.ref_resolver.file_uri(
3742
+ os.path.abspath(options.overrides)
3743
+ ),
3744
+ tool_file_uri,
3745
+ )
3679
3746
  )
3747
+
3748
+ loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
3749
+ uri, loading_context
3750
+ )
3751
+ loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3752
+ loading_context, workflowobj, uri
3680
3753
  )
3754
+ if not loading_context.loader:
3755
+ raise RuntimeError("cwltool loader is not set.")
3756
+ processobj, metadata = loading_context.loader.resolve_ref(uri)
3757
+ processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3681
3758
 
3682
- loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
3683
- uri, loading_context
3684
- )
3685
- loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3686
- loading_context, workflowobj, uri
3687
- )
3688
- if not loading_context.loader:
3689
- raise RuntimeError("cwltool loader is not set.")
3690
- processobj, metadata = loading_context.loader.resolve_ref(uri)
3691
- processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3759
+ document_loader = loading_context.loader
3692
3760
 
3693
- document_loader = loading_context.loader
3761
+ if options.provenance and runtime_context.research_obj:
3762
+ cwltool.cwlprov.writablebagfile.packed_workflow(
3763
+ runtime_context.research_obj,
3764
+ cwltool.main.print_pack(loading_context, uri),
3765
+ )
3694
3766
 
3695
- if options.provenance and runtime_context.research_obj:
3696
- cwltool.cwlprov.writablebagfile.packed_workflow(
3697
- runtime_context.research_obj,
3698
- cwltool.main.print_pack(loading_context, uri),
3699
- )
3767
+ try:
3768
+ tool = cwltool.load_tool.make_tool(uri, loading_context)
3769
+ scan_for_unsupported_requirements(
3770
+ tool, bypass_file_store=options.bypass_file_store
3771
+ )
3772
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3773
+ logging.error(err)
3774
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3775
+ runtime_context.secret_store = SecretStore()
3776
+
3777
+ try:
3778
+ # Get the "order" for the execution of the root job. CWLTool
3779
+ # doesn't document this much, but this is an "order" in the
3780
+ # sense of a "specification" for running a single job. It
3781
+ # describes the inputs to the workflow.
3782
+ initialized_job_order = cwltool.main.init_job_order(
3783
+ job_order_object,
3784
+ options,
3785
+ tool,
3786
+ jobloader,
3787
+ sys.stdout,
3788
+ make_fs_access=runtime_context.make_fs_access,
3789
+ input_basedir=options.basedir,
3790
+ secret_store=runtime_context.secret_store,
3791
+ input_required=True,
3792
+ )
3793
+ except SystemExit as err:
3794
+ if err.code == 2: # raised by argparse's parse_args() function
3795
+ print(
3796
+ "\nIf both a CWL file and an input object (YAML/JSON) file were "
3797
+ "provided, this may be the argument order." + usage_message,
3798
+ file=sys.stderr,
3799
+ )
3800
+ raise
3700
3801
 
3701
- try:
3702
- tool = cwltool.load_tool.make_tool(uri, loading_context)
3703
- scan_for_unsupported_requirements(
3704
- tool, bypass_file_store=options.bypass_file_store
3802
+ # Leave the defaults un-filled in the top-level order. The tool or
3803
+ # workflow will fill them when it runs
3804
+
3805
+ for inp in tool.tool["inputs"]:
3806
+ if (
3807
+ shortname(inp["id"]) in initialized_job_order
3808
+ and inp["type"] == "File"
3809
+ ):
3810
+ cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
3811
+ "streamable"
3812
+ ] = inp.get("streamable", False)
3813
+ # TODO also for nested types that contain streamable Files
3814
+
3815
+ runtime_context.use_container = not options.no_container
3816
+ runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
3817
+ runtime_context.job_script_provider = job_script_provider
3818
+ runtime_context.force_docker_pull = options.force_docker_pull
3819
+ runtime_context.no_match_user = options.no_match_user
3820
+ runtime_context.no_read_only = options.no_read_only
3821
+ runtime_context.basedir = options.basedir
3822
+ if not options.bypass_file_store:
3823
+ # If we're using the file store we need to start moving output
3824
+ # files now.
3825
+ runtime_context.move_outputs = "move"
3826
+
3827
+ # We instantiate an early builder object here to populate indirect
3828
+ # secondaryFile references using cwltool's library because we need
3829
+ # to resolve them before toil imports them into the filestore.
3830
+ # A second builder will be built in the job's run method when toil
3831
+ # actually starts the cwl job.
3832
+ # Note that this accesses input files for tools, so the
3833
+ # ToilFsAccess needs to be set up if we want to be able to use
3834
+ # URLs.
3835
+ builder = tool._init_job(initialized_job_order, runtime_context)
3836
+
3837
+ # make sure this doesn't add listing items; if shallow_listing is
3838
+ # selected, it will discover dirs one deep and then again later on
3839
+ # (probably when the cwltool builder gets ahold of the job in the
3840
+ # CWL job's run()), producing 2+ deep listings instead of only 1.
3841
+ builder.loadListing = "no_listing"
3842
+
3843
+ builder.bind_input(
3844
+ tool.inputs_record_schema,
3845
+ initialized_job_order,
3846
+ discover_secondaryFiles=True,
3705
3847
  )
3706
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3707
- logging.error(err)
3708
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3709
- runtime_context.secret_store = SecretStore()
3710
3848
 
3711
- try:
3712
- # Get the "order" for the execution of the root job. CWLTool
3713
- # doesn't document this much, but this is an "order" in the
3714
- # sense of a "specification" for running a single job. It
3715
- # describes the inputs to the workflow.
3716
- initialized_job_order = cwltool.main.init_job_order(
3717
- job_order_object,
3718
- options,
3719
- tool,
3720
- jobloader,
3721
- sys.stdout,
3722
- make_fs_access=runtime_context.make_fs_access,
3723
- input_basedir=options.basedir,
3724
- secret_store=runtime_context.secret_store,
3725
- input_required=True,
3849
+ # Define something we can call to import a file and get its file
3850
+ # ID.
3851
+ # We cast this because import_file is overloaded depending on if we
3852
+ # pass a shared file name or not, and we know the way we call it we
3853
+ # always get a FileID out.
3854
+ file_import_function = cast(
3855
+ Callable[[str], FileID],
3856
+ functools.partial(toil.import_file, symlink=True),
3726
3857
  )
3727
- except SystemExit as e:
3728
- if e.code == 2: # raised by argparse's parse_args() function
3729
- print(
3730
- "\nIf both a CWL file and an input object (YAML/JSON) file were "
3731
- "provided, this may be the argument order." + usage_message,
3732
- file=sys.stderr,
3733
- )
3734
- raise
3735
-
3736
- # Leave the defaults un-filled in the top-level order. The tool or
3737
- # workflow will fill them when it runs
3738
-
3739
- for inp in tool.tool["inputs"]:
3740
- if (
3741
- shortname(inp["id"]) in initialized_job_order
3742
- and inp["type"] == "File"
3743
- ):
3744
- cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
3745
- "streamable"
3746
- ] = inp.get("streamable", False)
3747
- # TODO also for nested types that contain streamable Files
3748
-
3749
- runtime_context.use_container = not options.no_container
3750
- runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
3751
- runtime_context.job_script_provider = job_script_provider
3752
- runtime_context.force_docker_pull = options.force_docker_pull
3753
- runtime_context.no_match_user = options.no_match_user
3754
- runtime_context.no_read_only = options.no_read_only
3755
- runtime_context.basedir = options.basedir
3756
- if not options.bypass_file_store:
3757
- # If we're using the file store we need to start moving output
3758
- # files now.
3759
- runtime_context.move_outputs = "move"
3760
-
3761
- # We instantiate an early builder object here to populate indirect
3762
- # secondaryFile references using cwltool's library because we need
3763
- # to resolve them before toil imports them into the filestore.
3764
- # A second builder will be built in the job's run method when toil
3765
- # actually starts the cwl job.
3766
- # Note that this accesses input files for tools, so the
3767
- # ToilFsAccess needs to be set up if we want to be able to use
3768
- # URLs.
3769
- builder = tool._init_job(initialized_job_order, runtime_context)
3770
-
3771
- # make sure this doesn't add listing items; if shallow_listing is
3772
- # selected, it will discover dirs one deep and then again later on
3773
- # (probably when the cwltool builder gets ahold of the job in the
3774
- # CWL job's run()), producing 2+ deep listings instead of only 1.
3775
- builder.loadListing = "no_listing"
3776
-
3777
- builder.bind_input(
3778
- tool.inputs_record_schema,
3779
- initialized_job_order,
3780
- discover_secondaryFiles=True,
3781
- )
3782
3858
 
3783
- # Define something we can call to import a file and get its file
3784
- # ID.
3785
- # We cast this because import_file is overloaded depending on if we
3786
- # pass a shared file name or not, and we know the way we call it we
3787
- # always get a FileID out.
3788
- file_import_function = cast(
3789
- Callable[[str], FileID],
3790
- functools.partial(toil.import_file, symlink=True),
3791
- )
3792
-
3793
- # Import all the input files, some of which may be missing optional
3794
- # files.
3795
- logger.info("Importing input files...")
3796
- fs_access = ToilFsAccess(options.basedir)
3797
- import_files(
3798
- file_import_function,
3799
- fs_access,
3800
- fileindex,
3801
- existing,
3802
- initialized_job_order,
3803
- skip_broken=True,
3804
- skip_remote=options.reference_inputs,
3805
- bypass_file_store=options.bypass_file_store,
3806
- log_level=logging.INFO,
3807
- )
3808
- # Import all the files associated with tools (binaries, etc.).
3809
- # Not sure why you would have an optional secondary file here, but
3810
- # the spec probably needs us to support them.
3811
- logger.info("Importing tool-associated files...")
3812
- visitSteps(
3813
- tool,
3814
- functools.partial(
3815
- import_files,
3859
+ # Import all the input files, some of which may be missing optional
3860
+ # files.
3861
+ logger.info("Importing input files...")
3862
+ fs_access = ToilFsAccess(options.basedir)
3863
+ import_files(
3816
3864
  file_import_function,
3817
3865
  fs_access,
3818
3866
  fileindex,
3819
3867
  existing,
3820
- skip_broken=True,
3868
+ initialized_job_order,
3869
+ mark_broken=True,
3821
3870
  skip_remote=options.reference_inputs,
3822
3871
  bypass_file_store=options.bypass_file_store,
3823
3872
  log_level=logging.INFO,
3824
- ),
3825
- )
3826
-
3827
- # We always expect to have processed all files that exist
3828
- for param_name, param_value in initialized_job_order.items():
3829
- # Loop through all the parameters for the workflow overall.
3830
- # Drop any files that aren't either imported (for when we use
3831
- # the file store) or available on disk (for when we don't).
3832
- # This will properly make them cause an error later if they
3833
- # were required.
3834
- rm_unprocessed_secondary_files(param_value)
3835
-
3836
- logger.info("Creating root job")
3837
- logger.debug("Root tool: %s", tool)
3838
- try:
3839
- wf1, _ = makeJob(
3840
- tool=tool,
3841
- jobobj={},
3842
- runtime_context=runtime_context,
3843
- parent_name=None, # toplevel, no name needed
3844
- conditional=None,
3845
3873
  )
3846
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3847
- logging.error(err)
3848
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3849
- wf1.cwljob = initialized_job_order
3850
- logger.info("Starting workflow")
3851
- try:
3852
- outobj = toil.start(wf1)
3853
- except FailedJobsException as err:
3854
- if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3855
- # We figured out that we can't support this workflow.
3856
- logging.error(err)
3857
- logging.error(
3858
- "Your workflow uses a CWL requirement that Toil does not support!"
3874
+ # Import all the files associated with tools (binaries, etc.).
3875
+ # Not sure why you would have an optional secondary file here, but
3876
+ # the spec probably needs us to support them.
3877
+ logger.info("Importing tool-associated files...")
3878
+ visitSteps(
3879
+ tool,
3880
+ functools.partial(
3881
+ import_files,
3882
+ file_import_function,
3883
+ fs_access,
3884
+ fileindex,
3885
+ existing,
3886
+ mark_broken=True,
3887
+ skip_remote=options.reference_inputs,
3888
+ bypass_file_store=options.bypass_file_store,
3889
+ log_level=logging.INFO,
3890
+ ),
3891
+ )
3892
+
3893
+ # We always expect to have processed all files that exist
3894
+ for param_name, param_value in initialized_job_order.items():
3895
+ # Loop through all the parameters for the workflow overall.
3896
+ # Drop any files that aren't either imported (for when we use
3897
+ # the file store) or available on disk (for when we don't).
3898
+ # This will properly make them cause an error later if they
3899
+ # were required.
3900
+ rm_unprocessed_secondary_files(param_value)
3901
+
3902
+ logger.info("Creating root job")
3903
+ logger.debug("Root tool: %s", tool)
3904
+ try:
3905
+ wf1, _ = makeJob(
3906
+ tool=tool,
3907
+ jobobj={},
3908
+ runtime_context=runtime_context,
3909
+ parent_name=None, # toplevel, no name needed
3910
+ conditional=None,
3859
3911
  )
3912
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3913
+ logging.error(err)
3860
3914
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3861
- else:
3862
- raise
3863
-
3864
- # Now the workflow has completed. We need to make sure the outputs (and
3865
- # inputs) end up where the user wants them to be.
3866
- logger.info("Collecting workflow outputs...")
3867
- outobj = resolve_dict_w_promises(outobj)
3868
-
3869
- # Stage files. Specify destination bucket if specified in CLI
3870
- # options. If destination bucket not passed in,
3871
- # options.destBucket's value will be None.
3872
- toilStageFiles(
3873
- toil,
3874
- outobj,
3875
- outdir,
3876
- destBucket=options.destBucket,
3877
- log_level=logging.INFO
3878
- )
3879
- logger.info("Stored workflow outputs")
3915
+ wf1.cwljob = initialized_job_order
3916
+ logger.info("Starting workflow")
3917
+ outobj = toil.start(wf1)
3880
3918
 
3881
- if runtime_context.research_obj is not None:
3882
- cwltool.cwlprov.writablebagfile.create_job(
3883
- runtime_context.research_obj, outobj, True
3884
- )
3919
+ # Now the workflow has completed. We need to make sure the outputs (and
3920
+ # inputs) end up where the user wants them to be.
3921
+ logger.info("Collecting workflow outputs...")
3922
+ outobj = resolve_dict_w_promises(outobj)
3885
3923
 
3886
- def remove_at_id(doc: Any) -> None:
3887
- if isinstance(doc, MutableMapping):
3888
- for key in list(doc.keys()):
3889
- if key == "@id":
3890
- del doc[key]
3891
- else:
3892
- value = doc[key]
3893
- if isinstance(value, MutableMapping):
3894
- remove_at_id(value)
3895
- if isinstance(value, MutableSequence):
3896
- for entry in value:
3897
- if isinstance(value, MutableMapping):
3898
- remove_at_id(entry)
3899
-
3900
- remove_at_id(outobj)
3901
- visit_class(
3924
+ # Stage files. Specify destination bucket if specified in CLI
3925
+ # options. If destination bucket not passed in,
3926
+ # options.destBucket's value will be None.
3927
+ toilStageFiles(
3928
+ toil,
3902
3929
  outobj,
3903
- ("File",),
3904
- functools.partial(add_sizes, runtime_context.make_fs_access("")),
3905
- )
3906
- if not document_loader:
3907
- raise RuntimeError("cwltool loader is not set.")
3908
- prov_dependencies = cwltool.main.prov_deps(
3909
- workflowobj, document_loader, uri
3910
- )
3911
- runtime_context.research_obj.generate_snapshot(prov_dependencies)
3912
- cwltool.cwlprov.writablebagfile.close_ro(
3913
- runtime_context.research_obj, options.provenance
3930
+ outdir,
3931
+ destBucket=options.destBucket,
3932
+ log_level=logging.INFO
3914
3933
  )
3934
+ logger.info("Stored workflow outputs")
3915
3935
 
3916
- if not options.destBucket and options.compute_checksum:
3917
- logger.info("Computing output file checksums...")
3918
- visit_class(
3919
- outobj,
3920
- ("File",),
3921
- functools.partial(compute_checksums, StdFsAccess("")),
3922
- )
3936
+ if runtime_context.research_obj is not None:
3937
+ cwltool.cwlprov.writablebagfile.create_job(
3938
+ runtime_context.research_obj, outobj, True
3939
+ )
3940
+
3941
+ def remove_at_id(doc: Any) -> None:
3942
+ if isinstance(doc, MutableMapping):
3943
+ for key in list(doc.keys()):
3944
+ if key == "@id":
3945
+ del doc[key]
3946
+ else:
3947
+ value = doc[key]
3948
+ if isinstance(value, MutableMapping):
3949
+ remove_at_id(value)
3950
+ if isinstance(value, MutableSequence):
3951
+ for entry in value:
3952
+ if isinstance(value, MutableMapping):
3953
+ remove_at_id(entry)
3954
+
3955
+ remove_at_id(outobj)
3956
+ visit_class(
3957
+ outobj,
3958
+ ("File",),
3959
+ functools.partial(add_sizes, runtime_context.make_fs_access("")),
3960
+ )
3961
+ if not document_loader:
3962
+ raise RuntimeError("cwltool loader is not set.")
3963
+ prov_dependencies = cwltool.main.prov_deps(
3964
+ workflowobj, document_loader, uri
3965
+ )
3966
+ runtime_context.research_obj.generate_snapshot(prov_dependencies)
3967
+ cwltool.cwlprov.writablebagfile.close_ro(
3968
+ runtime_context.research_obj, options.provenance
3969
+ )
3970
+
3971
+ if not options.destBucket and options.compute_checksum:
3972
+ logger.info("Computing output file checksums...")
3973
+ visit_class(
3974
+ outobj,
3975
+ ("File",),
3976
+ functools.partial(compute_checksums, StdFsAccess("")),
3977
+ )
3923
3978
 
3924
- visit_class(outobj, ("File",), MutationManager().unset_generation)
3925
- stdout.write(json.dumps(outobj, indent=4, default=str))
3926
- stdout.write("\n")
3927
- logger.info("CWL run complete!")
3979
+ visit_class(outobj, ("File",), MutationManager().unset_generation)
3980
+ stdout.write(json.dumps(outobj, indent=4, default=str))
3981
+ stdout.write("\n")
3982
+ logger.info("CWL run complete!")
3983
+ # Don't expose tracebacks to the user for exceptions that may be expected
3984
+ except FailedJobsException as err:
3985
+ if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3986
+ # We figured out that we can't support this workflow.
3987
+ logging.error(err)
3988
+ logging.error(
3989
+ "Your workflow uses a CWL requirement that Toil does not support!"
3990
+ )
3991
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3992
+ else:
3993
+ logging.error(err)
3994
+ return 1
3995
+ except (InsufficientSystemResources, LocatorException, InvalidImportExportUrlException, UnimplementedURLException,
3996
+ JobTooBigError) as err:
3997
+ logging.error(err)
3998
+ return 1
3928
3999
 
3929
4000
  return 0
3930
4001