toil 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +22 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +59 -45
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/contained_executor.py +4 -5
  6. toil/batchSystems/gridengine.py +1 -1
  7. toil/batchSystems/htcondor.py +5 -5
  8. toil/batchSystems/kubernetes.py +25 -11
  9. toil/batchSystems/local_support.py +3 -3
  10. toil/batchSystems/lsf.py +2 -2
  11. toil/batchSystems/mesos/batchSystem.py +4 -4
  12. toil/batchSystems/mesos/executor.py +3 -2
  13. toil/batchSystems/options.py +9 -0
  14. toil/batchSystems/singleMachine.py +11 -10
  15. toil/batchSystems/slurm.py +64 -22
  16. toil/batchSystems/torque.py +1 -1
  17. toil/bus.py +7 -3
  18. toil/common.py +36 -13
  19. toil/cwl/cwltoil.py +365 -312
  20. toil/deferred.py +1 -1
  21. toil/fileStores/abstractFileStore.py +17 -17
  22. toil/fileStores/cachingFileStore.py +2 -2
  23. toil/fileStores/nonCachingFileStore.py +1 -1
  24. toil/job.py +228 -60
  25. toil/jobStores/abstractJobStore.py +18 -10
  26. toil/jobStores/aws/jobStore.py +280 -218
  27. toil/jobStores/aws/utils.py +57 -29
  28. toil/jobStores/conftest.py +2 -2
  29. toil/jobStores/fileJobStore.py +2 -2
  30. toil/jobStores/googleJobStore.py +3 -4
  31. toil/leader.py +72 -24
  32. toil/lib/aws/__init__.py +26 -10
  33. toil/lib/aws/iam.py +2 -2
  34. toil/lib/aws/session.py +62 -22
  35. toil/lib/aws/utils.py +73 -37
  36. toil/lib/conversions.py +5 -1
  37. toil/lib/ec2.py +118 -69
  38. toil/lib/expando.py +1 -1
  39. toil/lib/io.py +14 -2
  40. toil/lib/misc.py +1 -3
  41. toil/lib/resources.py +55 -21
  42. toil/lib/retry.py +12 -5
  43. toil/lib/threading.py +2 -2
  44. toil/lib/throttle.py +1 -1
  45. toil/options/common.py +27 -24
  46. toil/provisioners/__init__.py +9 -3
  47. toil/provisioners/abstractProvisioner.py +9 -7
  48. toil/provisioners/aws/__init__.py +20 -15
  49. toil/provisioners/aws/awsProvisioner.py +406 -329
  50. toil/provisioners/gceProvisioner.py +2 -2
  51. toil/provisioners/node.py +13 -5
  52. toil/server/app.py +1 -1
  53. toil/statsAndLogging.py +58 -16
  54. toil/test/__init__.py +27 -12
  55. toil/test/batchSystems/batchSystemTest.py +40 -33
  56. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  57. toil/test/batchSystems/test_slurm.py +1 -1
  58. toil/test/cwl/cwlTest.py +8 -91
  59. toil/test/cwl/seqtk_seq.cwl +1 -1
  60. toil/test/docs/scriptsTest.py +10 -13
  61. toil/test/jobStores/jobStoreTest.py +33 -49
  62. toil/test/lib/aws/test_iam.py +2 -2
  63. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  64. toil/test/provisioners/clusterTest.py +90 -8
  65. toil/test/server/serverTest.py +2 -2
  66. toil/test/src/autoDeploymentTest.py +1 -1
  67. toil/test/src/dockerCheckTest.py +2 -1
  68. toil/test/src/environmentTest.py +125 -0
  69. toil/test/src/fileStoreTest.py +1 -1
  70. toil/test/src/jobDescriptionTest.py +18 -8
  71. toil/test/src/jobTest.py +1 -1
  72. toil/test/src/realtimeLoggerTest.py +4 -0
  73. toil/test/src/workerTest.py +52 -19
  74. toil/test/utils/toilDebugTest.py +61 -3
  75. toil/test/utils/utilsTest.py +20 -18
  76. toil/test/wdl/wdltoil_test.py +24 -71
  77. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  78. toil/toilState.py +68 -9
  79. toil/utils/toilDebugJob.py +153 -26
  80. toil/utils/toilLaunchCluster.py +12 -2
  81. toil/utils/toilRsyncCluster.py +7 -2
  82. toil/utils/toilSshCluster.py +7 -3
  83. toil/utils/toilStats.py +2 -1
  84. toil/utils/toilStatus.py +97 -51
  85. toil/version.py +10 -10
  86. toil/wdl/wdltoil.py +318 -51
  87. toil/worker.py +96 -69
  88. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  89. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/METADATA +55 -21
  90. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/RECORD +93 -90
  91. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  92. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  93. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -29,7 +29,6 @@ import logging
29
29
  import os
30
30
  import pprint
31
31
  import shutil
32
- import socket
33
32
  import stat
34
33
  import sys
35
34
  import textwrap
@@ -66,7 +65,7 @@ import cwltool.load_tool
66
65
  import cwltool.main
67
66
  import cwltool.resolver
68
67
  import schema_salad.ref_resolver
69
- from configargparse import SUPPRESS, ArgParser, Namespace
68
+ from configargparse import ArgParser, Namespace
70
69
  from cwltool.loghandler import _logger as cwllogger
71
70
  from cwltool.loghandler import defaultStreamHandler
72
71
  from cwltool.mpi import MpiConfig
@@ -97,9 +96,11 @@ from schema_salad.ref_resolver import file_uri, uri_file_path
97
96
  from schema_salad.sourceline import SourceLine
98
97
  from typing_extensions import Literal
99
98
 
99
+ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
100
100
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
101
101
  from toil.common import Toil, addOptions
102
102
  from toil.cwl import check_cwltool_version
103
+ from toil.provisioners.clusterScaler import JobTooBigError
103
104
 
104
105
  check_cwltool_version()
105
106
  from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
@@ -111,8 +112,8 @@ from toil.exceptions import FailedJobsException
111
112
  from toil.fileStores import FileID
112
113
  from toil.fileStores.abstractFileStore import AbstractFileStore
113
114
  from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
114
- from toil.jobStores.abstractJobStore import (AbstractJobStore,
115
- NoSuchFileException)
115
+ from toil.jobStores.abstractJobStore import (AbstractJobStore, NoSuchFileException, LocatorException,
116
+ InvalidImportExportUrlException, UnimplementedURLException)
116
117
  from toil.jobStores.fileJobStore import FileJobStore
117
118
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
118
119
  from toil.lib.io import mkdtemp
@@ -1010,6 +1011,24 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
1010
1011
  class ToilTool:
1011
1012
  """Mixin to hook Toil into a cwltool tool type."""
1012
1013
 
1014
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
1015
+ """
1016
+ Init hook to set up member variables.
1017
+ """
1018
+ super().__init__(*args, **kwargs)
1019
+ # Reserve a spot for the Toil job that ends up executing this tool.
1020
+ self._toil_job: Optional[Job] = None
1021
+ # Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
1022
+ self._path_mappers: List[cwltool.pathmapper.PathMapper] = []
1023
+
1024
+ def connect_toil_job(self, job: Job) -> None:
1025
+ """
1026
+ Attach the Toil tool to the Toil job that is executing it. This allows
1027
+ it to use the Toil job to stop at certain points if debugging flags are
1028
+ set.
1029
+ """
1030
+ self._toil_job = job
1031
+
1013
1032
  def make_path_mapper(
1014
1033
  self,
1015
1034
  reffiles: List[Any],
@@ -1020,12 +1039,12 @@ class ToilTool:
1020
1039
  """Create the appropriate PathMapper for the situation."""
1021
1040
  if getattr(runtimeContext, "bypass_file_store", False):
1022
1041
  # We only need to understand cwltool's supported URIs
1023
- return PathMapper(
1042
+ mapper = PathMapper(
1024
1043
  reffiles, runtimeContext.basedir, stagedir, separateDirs=separateDirs
1025
1044
  )
1026
1045
  else:
1027
1046
  # We need to be able to read from Toil-provided URIs
1028
- return ToilPathMapper(
1047
+ mapper = ToilPathMapper(
1029
1048
  reffiles,
1030
1049
  runtimeContext.basedir,
1031
1050
  stagedir,
@@ -1034,6 +1053,10 @@ class ToilTool:
1034
1053
  streaming_allowed=runtimeContext.streaming_allowed,
1035
1054
  )
1036
1055
 
1056
+ # Remember the path mappers
1057
+ self._path_mappers.append(mapper)
1058
+ return mapper
1059
+
1037
1060
  def __str__(self) -> str:
1038
1061
  """Return string representation of this tool type."""
1039
1062
  return f'{self.__class__.__name__}({repr(getattr(self, "tool", {}).get("id", "???"))})'
@@ -1050,17 +1073,34 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1050
1073
  name conflicts at the top level of the work directory.
1051
1074
  """
1052
1075
 
1076
+ # Set up the initial work dir with all its files
1053
1077
  super()._initialworkdir(j, builder)
1054
1078
 
1055
1079
  # The initial work dir listing is now in j.generatefiles["listing"]
1056
- # Also j.generatrfiles is a CWL Directory.
1080
+ # Also j.generatefiles is a CWL Directory.
1057
1081
  # So check the initial working directory.
1058
- logger.info("Initial work dir: %s", j.generatefiles)
1082
+ logger.debug("Initial work dir: %s", j.generatefiles)
1059
1083
  ensure_no_collisions(
1060
1084
  j.generatefiles,
1061
1085
  "the job's working directory as specified by the InitialWorkDirRequirement",
1062
1086
  )
1063
1087
 
1088
+ if self._toil_job is not None:
1089
+ # Make a table of all the places we mapped files to when downloading the inputs.
1090
+
1091
+ # We want to hint which host paths and container (if any) paths correspond
1092
+ host_and_job_paths: List[Tuple[str, str]] = []
1093
+
1094
+ for pm in self._path_mappers:
1095
+ for _, mapper_entry in pm.items_exclude_children():
1096
+ # We know that mapper_entry.target as seen by the task is
1097
+ # mapper_entry.resolved on the host.
1098
+ host_and_job_paths.append((mapper_entry.resolved, mapper_entry.target))
1099
+
1100
+ # Notice that we have downloaded our inputs. Explain which files
1101
+ # those are here and what the task will expect to call them.
1102
+ self._toil_job.files_downloaded_hook(host_and_job_paths)
1103
+
1064
1104
 
1065
1105
  class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
1066
1106
  """Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
@@ -1083,6 +1123,10 @@ def toil_make_tool(
1083
1123
  return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
1084
1124
 
1085
1125
 
1126
+ # When a file we want to have is missing, we can give it this sentinal location
1127
+ # URI instead of raising an error right away, in case it is optional.
1128
+ MISSING_FILE = "missing://"
1129
+
1086
1130
  DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1087
1131
 
1088
1132
 
@@ -1707,7 +1751,7 @@ def import_files(
1707
1751
  fileindex: Dict[str, str],
1708
1752
  existing: Dict[str, str],
1709
1753
  cwl_object: Optional[CWLObjectType],
1710
- skip_broken: bool = False,
1754
+ mark_broken: bool = False,
1711
1755
  skip_remote: bool = False,
1712
1756
  bypass_file_store: bool = False,
1713
1757
  log_level: int = logging.DEBUG
@@ -1726,10 +1770,10 @@ def import_files(
1726
1770
  Preserves any listing fields.
1727
1771
 
1728
1772
  If a file cannot be found (like if it is an optional secondary file that
1729
- doesn't exist), fails, unless skip_broken is set, in which case it leaves
1730
- the location it was supposed to have been at.
1773
+ doesn't exist), fails, unless mark_broken is set, in which case it applies
1774
+ a sentinel location.
1731
1775
 
1732
- Also does some miscelaneous normalization.
1776
+ Also does some miscellaneous normalization.
1733
1777
 
1734
1778
  :param import_function: The function used to upload a URI and get a
1735
1779
  Toil FileID for it.
@@ -1745,8 +1789,9 @@ def import_files(
1745
1789
 
1746
1790
  :param cwl_object: CWL tool (or workflow order) we are importing files for
1747
1791
 
1748
- :param skip_broken: If True, when files can't be imported because they e.g.
1749
- don't exist, leave their locations alone rather than failing with an error.
1792
+ :param mark_broken: If True, when files can't be imported because they e.g.
1793
+ don't exist, set their locations to MISSING_FILE rather than failing
1794
+ with an error.
1750
1795
 
1751
1796
  :param skp_remote: If True, leave remote URIs in place instead of importing
1752
1797
  files.
@@ -1866,7 +1911,7 @@ def import_files(
1866
1911
 
1867
1912
  # Upload the file itself, which will adjust its location.
1868
1913
  upload_file(
1869
- import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
1914
+ import_and_log, fileindex, existing, rec, mark_broken=mark_broken, skip_remote=skip_remote
1870
1915
  )
1871
1916
 
1872
1917
  # Make a record for this file under its name
@@ -1895,7 +1940,7 @@ def import_files(
1895
1940
  contents.update(child_result)
1896
1941
 
1897
1942
  # Upload the directory itself, which will adjust its location.
1898
- upload_directory(rec, contents, skip_broken=skip_broken)
1943
+ upload_directory(rec, contents, mark_broken=mark_broken)
1899
1944
 
1900
1945
  # Show those contents as being under our name in our parent.
1901
1946
  return {cast(str, rec["basename"]): contents}
@@ -1915,7 +1960,7 @@ def import_files(
1915
1960
  def upload_directory(
1916
1961
  directory_metadata: CWLObjectType,
1917
1962
  directory_contents: DirectoryContents,
1918
- skip_broken: bool = False,
1963
+ mark_broken: bool = False,
1919
1964
  ) -> None:
1920
1965
  """
1921
1966
  Upload a Directory object.
@@ -1927,6 +1972,9 @@ def upload_directory(
1927
1972
  Makes sure the directory actually exists, and rewrites its location to be
1928
1973
  something we can use on another machine.
1929
1974
 
1975
+ If mark_broken is set, ignores missing directories and replaces them with
1976
+ directories containing the given (possibly empty) contents.
1977
+
1930
1978
  We can't rely on the directory's listing as visible to the next tool as a
1931
1979
  complete recursive description of the files we will need to present to the
1932
1980
  tool, since some tools require it to be cleared or single-level but still
@@ -1947,8 +1995,8 @@ def upload_directory(
1947
1995
  if location.startswith("file://") and not os.path.isdir(
1948
1996
  schema_salad.ref_resolver.uri_file_path(location)
1949
1997
  ):
1950
- if skip_broken:
1951
- return
1998
+ if mark_broken:
1999
+ logger.debug("Directory %s is missing as a whole", directory_metadata)
1952
2000
  else:
1953
2001
  raise cwl_utils.errors.WorkflowException(
1954
2002
  "Directory is missing: %s" % directory_metadata["location"]
@@ -1970,7 +2018,7 @@ def upload_file(
1970
2018
  fileindex: Dict[str, str],
1971
2019
  existing: Dict[str, str],
1972
2020
  file_metadata: CWLObjectType,
1973
- skip_broken: bool = False,
2021
+ mark_broken: bool = False,
1974
2022
  skip_remote: bool = False
1975
2023
  ) -> None:
1976
2024
  """
@@ -1979,6 +2027,9 @@ def upload_file(
1979
2027
  Uploads local files to the Toil file store, and sets their location to a
1980
2028
  reference to the toil file store.
1981
2029
 
2030
+ If a file doesn't exist, fails with an error, unless mark_broken is set, in
2031
+ which case the missing file is given a special sentinel location.
2032
+
1982
2033
  Unless skip_remote is set, downloads remote files into the file store and
1983
2034
  sets their locations to references into the file store as well.
1984
2035
  """
@@ -1999,10 +2050,11 @@ def upload_file(
1999
2050
  if location.startswith("file://") and not os.path.isfile(
2000
2051
  schema_salad.ref_resolver.uri_file_path(location)
2001
2052
  ):
2002
- if skip_broken:
2003
- return
2053
+ if mark_broken:
2054
+ logger.debug("File %s is missing", file_metadata)
2055
+ file_metadata["location"] = location = MISSING_FILE
2004
2056
  else:
2005
- raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
2057
+ raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
2006
2058
 
2007
2059
  if location.startswith("file://") or not skip_remote:
2008
2060
  # This is a local file, or we also need to download and re-upload remote files
@@ -2125,7 +2177,7 @@ def toilStageFiles(
2125
2177
  :param destBucket: If set, export to this base URL instead of to the local
2126
2178
  filesystem.
2127
2179
 
2128
- :param log_level: Log each file transfered at the given level.
2180
+ :param log_level: Log each file transferred at the given level.
2129
2181
  """
2130
2182
 
2131
2183
  def _collectDirEntries(
@@ -2621,6 +2673,11 @@ class CWLJob(CWLNamedJob):
2621
2673
 
2622
2674
  runtime_context.name = self.description.unitName
2623
2675
 
2676
+ if isinstance(self.cwltool, ToilTool):
2677
+ # Connect the CWL tool to us so it can call into the Toil job when
2678
+ # it reaches points where we might need to debug it.
2679
+ self.cwltool.connect_toil_job(self)
2680
+
2624
2681
  status = "did_not_run"
2625
2682
  try:
2626
2683
  output, status = ToilSingleJobExecutor().execute(
@@ -3282,9 +3339,8 @@ def filtered_secondary_files(
3282
3339
  but add the resolved fields to the list of unresolved fields so we remove
3283
3340
  them here after the fact.
3284
3341
 
3285
- We keep secondary files using the 'toildir:', or '_:' protocols, or using
3286
- the 'file:' protocol and indicating files or directories that actually
3287
- exist. The 'required' logic seems to be handled deeper in
3342
+ We keep secondary files with anything other than MISSING_FILE as their
3343
+ location. The 'required' logic seems to be handled deeper in
3288
3344
  cwltool.builder.Builder(), and correctly determines which files should be
3289
3345
  imported. Therefore we remove the files here and if this file is SUPPOSED
3290
3346
  to exist, it will still give the appropriate file does not exist error, but
@@ -3299,24 +3355,22 @@ def filtered_secondary_files(
3299
3355
  if ("$(" not in sf_bn) and ("${" not in sf_bn):
3300
3356
  if ("$(" not in sf_loc) and ("${" not in sf_loc):
3301
3357
  intermediate_secondary_files.append(sf)
3358
+ else:
3359
+ logger.debug("Secondary file %s is dropped because it has an uninterpolated location", sf)
3360
+ else:
3361
+ logger.debug("Secondary file %s is dropped because it has an uninterpolated basename", sf)
3302
3362
  # remove secondary files that are not present in the filestore or pointing
3303
3363
  # to existant things on disk
3304
3364
  for sf in intermediate_secondary_files:
3305
3365
  sf_loc = cast(str, sf.get("location", ""))
3306
3366
  if (
3307
- sf_loc.startswith("toilfile:")
3308
- or sf_loc.startswith("toildir:")
3309
- or sf_loc.startswith("_:")
3367
+ sf_loc != MISSING_FILE
3310
3368
  or sf.get("class", "") == "Directory"
3311
3369
  ):
3312
3370
  # Pass imported files, and all Directories
3313
3371
  final_secondary_files.append(sf)
3314
- elif sf_loc.startswith("file:") and os.path.exists(
3315
- schema_salad.ref_resolver.uri_file_path(sf_loc)
3316
- ):
3317
- # Pass things that exist on disk (which we presumably declined to
3318
- # import because we aren't using the file store)
3319
- final_secondary_files.append(sf)
3372
+ else:
3373
+ logger.debug("Secondary file %s is dropped because it is known to be missing", sf)
3320
3374
  return final_secondary_files
3321
3375
 
3322
3376
 
@@ -3565,6 +3619,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3565
3619
  #
3566
3620
  # If set, workDir needs to exist, so we directly use the prefix
3567
3621
  options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3622
+ if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
3623
+ # override coordination_dir as default Toil will pick somewhere else
3624
+ # ignoring --tmpdir_prefix
3625
+ options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3568
3626
 
3569
3627
  if options.batchSystem == "kubernetes":
3570
3628
  # Containers under Kubernetes can only run in Singularity
@@ -3635,314 +3693,309 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3635
3693
  )
3636
3694
  runtime_context.research_obj = research_obj
3637
3695
 
3638
- with Toil(options) as toil:
3639
- if options.restart:
3640
- try:
3696
+ try:
3697
+ with Toil(options) as toil:
3698
+ if options.restart:
3641
3699
  outobj = toil.restart()
3642
- except FailedJobsException as err:
3643
- if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3644
- # We figured out that we can't support this workflow.
3645
- logging.error(err)
3646
- logging.error(
3647
- "Your workflow uses a CWL requirement that Toil does not support!"
3700
+ else:
3701
+ loading_context.hints = [
3702
+ {
3703
+ "class": "ResourceRequirement",
3704
+ "coresMin": toil.config.defaultCores,
3705
+ "ramMin": toil.config.defaultMemory / (2**20),
3706
+ "outdirMin": toil.config.defaultDisk / (2**20),
3707
+ "tmpdirMin": 0,
3708
+ }
3709
+ ]
3710
+ loading_context.construct_tool_object = toil_make_tool
3711
+ loading_context.strict = not options.not_strict
3712
+ options.workflow = options.cwltool
3713
+ options.job_order = options.cwljob
3714
+
3715
+ try:
3716
+ uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
3717
+ options.cwltool,
3718
+ loading_context.resolver,
3719
+ loading_context.fetcher_constructor,
3720
+ )
3721
+ except ValidationException:
3722
+ print(
3723
+ "\nYou may be getting this error because your arguments are incorrect or out of order."
3724
+ + usage_message,
3725
+ file=sys.stderr,
3648
3726
  )
3649
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3650
- else:
3651
3727
  raise
3652
- else:
3653
- loading_context.hints = [
3654
- {
3655
- "class": "ResourceRequirement",
3656
- "coresMin": toil.config.defaultCores,
3657
- "ramMin": toil.config.defaultMemory / (2**20),
3658
- "outdirMin": toil.config.defaultDisk / (2**20),
3659
- "tmpdirMin": 0,
3660
- }
3661
- ]
3662
- loading_context.construct_tool_object = toil_make_tool
3663
- loading_context.strict = not options.not_strict
3664
- options.workflow = options.cwltool
3665
- options.job_order = options.cwljob
3666
3728
 
3667
- try:
3668
- uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
3669
- options.cwltool,
3670
- loading_context.resolver,
3729
+ options.tool_help = None
3730
+ options.debug = options.logLevel == "DEBUG"
3731
+ job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
3732
+ options,
3733
+ sys.stdin,
3671
3734
  loading_context.fetcher_constructor,
3735
+ loading_context.overrides_list,
3736
+ tool_file_uri,
3672
3737
  )
3673
- except ValidationException:
3674
- print(
3675
- "\nYou may be getting this error because your arguments are incorrect or out of order."
3676
- + usage_message,
3677
- file=sys.stderr,
3678
- )
3679
- raise
3680
-
3681
- options.tool_help = None
3682
- options.debug = options.logLevel == "DEBUG"
3683
- job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
3684
- options,
3685
- sys.stdin,
3686
- loading_context.fetcher_constructor,
3687
- loading_context.overrides_list,
3688
- tool_file_uri,
3689
- )
3690
- if options.overrides:
3691
- loading_context.overrides_list.extend(
3692
- cwltool.load_tool.load_overrides(
3693
- schema_salad.ref_resolver.file_uri(
3694
- os.path.abspath(options.overrides)
3695
- ),
3696
- tool_file_uri,
3738
+ if options.overrides:
3739
+ loading_context.overrides_list.extend(
3740
+ cwltool.load_tool.load_overrides(
3741
+ schema_salad.ref_resolver.file_uri(
3742
+ os.path.abspath(options.overrides)
3743
+ ),
3744
+ tool_file_uri,
3745
+ )
3697
3746
  )
3698
- )
3699
-
3700
- loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
3701
- uri, loading_context
3702
- )
3703
- loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3704
- loading_context, workflowobj, uri
3705
- )
3706
- if not loading_context.loader:
3707
- raise RuntimeError("cwltool loader is not set.")
3708
- processobj, metadata = loading_context.loader.resolve_ref(uri)
3709
- processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3710
3747
 
3711
- document_loader = loading_context.loader
3712
-
3713
- if options.provenance and runtime_context.research_obj:
3714
- cwltool.cwlprov.writablebagfile.packed_workflow(
3715
- runtime_context.research_obj,
3716
- cwltool.main.print_pack(loading_context, uri),
3748
+ loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
3749
+ uri, loading_context
3717
3750
  )
3718
-
3719
- try:
3720
- tool = cwltool.load_tool.make_tool(uri, loading_context)
3721
- scan_for_unsupported_requirements(
3722
- tool, bypass_file_store=options.bypass_file_store
3751
+ loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3752
+ loading_context, workflowobj, uri
3723
3753
  )
3724
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3725
- logging.error(err)
3726
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3727
- runtime_context.secret_store = SecretStore()
3754
+ if not loading_context.loader:
3755
+ raise RuntimeError("cwltool loader is not set.")
3756
+ processobj, metadata = loading_context.loader.resolve_ref(uri)
3757
+ processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3728
3758
 
3729
- try:
3730
- # Get the "order" for the execution of the root job. CWLTool
3731
- # doesn't document this much, but this is an "order" in the
3732
- # sense of a "specification" for running a single job. It
3733
- # describes the inputs to the workflow.
3734
- initialized_job_order = cwltool.main.init_job_order(
3735
- job_order_object,
3736
- options,
3737
- tool,
3738
- jobloader,
3739
- sys.stdout,
3740
- make_fs_access=runtime_context.make_fs_access,
3741
- input_basedir=options.basedir,
3742
- secret_store=runtime_context.secret_store,
3743
- input_required=True,
3744
- )
3745
- except SystemExit as e:
3746
- if e.code == 2: # raised by argparse's parse_args() function
3747
- print(
3748
- "\nIf both a CWL file and an input object (YAML/JSON) file were "
3749
- "provided, this may be the argument order." + usage_message,
3750
- file=sys.stderr,
3759
+ document_loader = loading_context.loader
3760
+
3761
+ if options.provenance and runtime_context.research_obj:
3762
+ cwltool.cwlprov.writablebagfile.packed_workflow(
3763
+ runtime_context.research_obj,
3764
+ cwltool.main.print_pack(loading_context, uri),
3751
3765
  )
3752
- raise
3753
3766
 
3754
- # Leave the defaults un-filled in the top-level order. The tool or
3755
- # workflow will fill them when it runs
3767
+ try:
3768
+ tool = cwltool.load_tool.make_tool(uri, loading_context)
3769
+ scan_for_unsupported_requirements(
3770
+ tool, bypass_file_store=options.bypass_file_store
3771
+ )
3772
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3773
+ logging.error(err)
3774
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3775
+ runtime_context.secret_store = SecretStore()
3776
+
3777
+ try:
3778
+ # Get the "order" for the execution of the root job. CWLTool
3779
+ # doesn't document this much, but this is an "order" in the
3780
+ # sense of a "specification" for running a single job. It
3781
+ # describes the inputs to the workflow.
3782
+ initialized_job_order = cwltool.main.init_job_order(
3783
+ job_order_object,
3784
+ options,
3785
+ tool,
3786
+ jobloader,
3787
+ sys.stdout,
3788
+ make_fs_access=runtime_context.make_fs_access,
3789
+ input_basedir=options.basedir,
3790
+ secret_store=runtime_context.secret_store,
3791
+ input_required=True,
3792
+ )
3793
+ except SystemExit as err:
3794
+ if err.code == 2: # raised by argparse's parse_args() function
3795
+ print(
3796
+ "\nIf both a CWL file and an input object (YAML/JSON) file were "
3797
+ "provided, this may be the argument order." + usage_message,
3798
+ file=sys.stderr,
3799
+ )
3800
+ raise
3756
3801
 
3757
- for inp in tool.tool["inputs"]:
3758
- if (
3759
- shortname(inp["id"]) in initialized_job_order
3760
- and inp["type"] == "File"
3761
- ):
3762
- cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
3763
- "streamable"
3764
- ] = inp.get("streamable", False)
3765
- # TODO also for nested types that contain streamable Files
3766
-
3767
- runtime_context.use_container = not options.no_container
3768
- runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
3769
- runtime_context.job_script_provider = job_script_provider
3770
- runtime_context.force_docker_pull = options.force_docker_pull
3771
- runtime_context.no_match_user = options.no_match_user
3772
- runtime_context.no_read_only = options.no_read_only
3773
- runtime_context.basedir = options.basedir
3774
- if not options.bypass_file_store:
3775
- # If we're using the file store we need to start moving output
3776
- # files now.
3777
- runtime_context.move_outputs = "move"
3778
-
3779
- # We instantiate an early builder object here to populate indirect
3780
- # secondaryFile references using cwltool's library because we need
3781
- # to resolve them before toil imports them into the filestore.
3782
- # A second builder will be built in the job's run method when toil
3783
- # actually starts the cwl job.
3784
- # Note that this accesses input files for tools, so the
3785
- # ToilFsAccess needs to be set up if we want to be able to use
3786
- # URLs.
3787
- builder = tool._init_job(initialized_job_order, runtime_context)
3788
-
3789
- # make sure this doesn't add listing items; if shallow_listing is
3790
- # selected, it will discover dirs one deep and then again later on
3791
- # (probably when the cwltool builder gets ahold of the job in the
3792
- # CWL job's run()), producing 2+ deep listings instead of only 1.
3793
- builder.loadListing = "no_listing"
3794
-
3795
- builder.bind_input(
3796
- tool.inputs_record_schema,
3797
- initialized_job_order,
3798
- discover_secondaryFiles=True,
3799
- )
3802
+ # Leave the defaults un-filled in the top-level order. The tool or
3803
+ # workflow will fill them when it runs
3804
+
3805
+ for inp in tool.tool["inputs"]:
3806
+ if (
3807
+ shortname(inp["id"]) in initialized_job_order
3808
+ and inp["type"] == "File"
3809
+ ):
3810
+ cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
3811
+ "streamable"
3812
+ ] = inp.get("streamable", False)
3813
+ # TODO also for nested types that contain streamable Files
3814
+
3815
+ runtime_context.use_container = not options.no_container
3816
+ runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
3817
+ runtime_context.job_script_provider = job_script_provider
3818
+ runtime_context.force_docker_pull = options.force_docker_pull
3819
+ runtime_context.no_match_user = options.no_match_user
3820
+ runtime_context.no_read_only = options.no_read_only
3821
+ runtime_context.basedir = options.basedir
3822
+ if not options.bypass_file_store:
3823
+ # If we're using the file store we need to start moving output
3824
+ # files now.
3825
+ runtime_context.move_outputs = "move"
3826
+
3827
+ # We instantiate an early builder object here to populate indirect
3828
+ # secondaryFile references using cwltool's library because we need
3829
+ # to resolve them before toil imports them into the filestore.
3830
+ # A second builder will be built in the job's run method when toil
3831
+ # actually starts the cwl job.
3832
+ # Note that this accesses input files for tools, so the
3833
+ # ToilFsAccess needs to be set up if we want to be able to use
3834
+ # URLs.
3835
+ builder = tool._init_job(initialized_job_order, runtime_context)
3836
+
3837
+ # make sure this doesn't add listing items; if shallow_listing is
3838
+ # selected, it will discover dirs one deep and then again later on
3839
+ # (probably when the cwltool builder gets ahold of the job in the
3840
+ # CWL job's run()), producing 2+ deep listings instead of only 1.
3841
+ builder.loadListing = "no_listing"
3842
+
3843
+ builder.bind_input(
3844
+ tool.inputs_record_schema,
3845
+ initialized_job_order,
3846
+ discover_secondaryFiles=True,
3847
+ )
3800
3848
 
3801
- # Define something we can call to import a file and get its file
3802
- # ID.
3803
- # We cast this because import_file is overloaded depending on if we
3804
- # pass a shared file name or not, and we know the way we call it we
3805
- # always get a FileID out.
3806
- file_import_function = cast(
3807
- Callable[[str], FileID],
3808
- functools.partial(toil.import_file, symlink=True),
3809
- )
3849
+ # Define something we can call to import a file and get its file
3850
+ # ID.
3851
+ # We cast this because import_file is overloaded depending on if we
3852
+ # pass a shared file name or not, and we know the way we call it we
3853
+ # always get a FileID out.
3854
+ file_import_function = cast(
3855
+ Callable[[str], FileID],
3856
+ functools.partial(toil.import_file, symlink=True),
3857
+ )
3810
3858
 
3811
- # Import all the input files, some of which may be missing optional
3812
- # files.
3813
- logger.info("Importing input files...")
3814
- fs_access = ToilFsAccess(options.basedir)
3815
- import_files(
3816
- file_import_function,
3817
- fs_access,
3818
- fileindex,
3819
- existing,
3820
- initialized_job_order,
3821
- skip_broken=True,
3822
- skip_remote=options.reference_inputs,
3823
- bypass_file_store=options.bypass_file_store,
3824
- log_level=logging.INFO,
3825
- )
3826
- # Import all the files associated with tools (binaries, etc.).
3827
- # Not sure why you would have an optional secondary file here, but
3828
- # the spec probably needs us to support them.
3829
- logger.info("Importing tool-associated files...")
3830
- visitSteps(
3831
- tool,
3832
- functools.partial(
3833
- import_files,
3859
+ # Import all the input files, some of which may be missing optional
3860
+ # files.
3861
+ logger.info("Importing input files...")
3862
+ fs_access = ToilFsAccess(options.basedir)
3863
+ import_files(
3834
3864
  file_import_function,
3835
3865
  fs_access,
3836
3866
  fileindex,
3837
3867
  existing,
3838
- skip_broken=True,
3868
+ initialized_job_order,
3869
+ mark_broken=True,
3839
3870
  skip_remote=options.reference_inputs,
3840
3871
  bypass_file_store=options.bypass_file_store,
3841
3872
  log_level=logging.INFO,
3842
- ),
3843
- )
3844
-
3845
- # We always expect to have processed all files that exist
3846
- for param_name, param_value in initialized_job_order.items():
3847
- # Loop through all the parameters for the workflow overall.
3848
- # Drop any files that aren't either imported (for when we use
3849
- # the file store) or available on disk (for when we don't).
3850
- # This will properly make them cause an error later if they
3851
- # were required.
3852
- rm_unprocessed_secondary_files(param_value)
3853
-
3854
- logger.info("Creating root job")
3855
- logger.debug("Root tool: %s", tool)
3856
- try:
3857
- wf1, _ = makeJob(
3858
- tool=tool,
3859
- jobobj={},
3860
- runtime_context=runtime_context,
3861
- parent_name=None, # toplevel, no name needed
3862
- conditional=None,
3863
3873
  )
3864
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3865
- logging.error(err)
3866
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3867
- wf1.cwljob = initialized_job_order
3868
- logger.info("Starting workflow")
3869
- try:
3870
- outobj = toil.start(wf1)
3871
- except FailedJobsException as err:
3872
- if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3873
- # We figured out that we can't support this workflow.
3874
- logging.error(err)
3875
- logging.error(
3876
- "Your workflow uses a CWL requirement that Toil does not support!"
3874
+ # Import all the files associated with tools (binaries, etc.).
3875
+ # Not sure why you would have an optional secondary file here, but
3876
+ # the spec probably needs us to support them.
3877
+ logger.info("Importing tool-associated files...")
3878
+ visitSteps(
3879
+ tool,
3880
+ functools.partial(
3881
+ import_files,
3882
+ file_import_function,
3883
+ fs_access,
3884
+ fileindex,
3885
+ existing,
3886
+ mark_broken=True,
3887
+ skip_remote=options.reference_inputs,
3888
+ bypass_file_store=options.bypass_file_store,
3889
+ log_level=logging.INFO,
3890
+ ),
3891
+ )
3892
+
3893
+ # We always expect to have processed all files that exist
3894
+ for param_name, param_value in initialized_job_order.items():
3895
+ # Loop through all the parameters for the workflow overall.
3896
+ # Drop any files that aren't either imported (for when we use
3897
+ # the file store) or available on disk (for when we don't).
3898
+ # This will properly make them cause an error later if they
3899
+ # were required.
3900
+ rm_unprocessed_secondary_files(param_value)
3901
+
3902
+ logger.info("Creating root job")
3903
+ logger.debug("Root tool: %s", tool)
3904
+ try:
3905
+ wf1, _ = makeJob(
3906
+ tool=tool,
3907
+ jobobj={},
3908
+ runtime_context=runtime_context,
3909
+ parent_name=None, # toplevel, no name needed
3910
+ conditional=None,
3877
3911
  )
3912
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3913
+ logging.error(err)
3878
3914
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3879
- else:
3880
- raise
3881
-
3882
- # Now the workflow has completed. We need to make sure the outputs (and
3883
- # inputs) end up where the user wants them to be.
3884
- logger.info("Collecting workflow outputs...")
3885
- outobj = resolve_dict_w_promises(outobj)
3886
-
3887
- # Stage files. Specify destination bucket if specified in CLI
3888
- # options. If destination bucket not passed in,
3889
- # options.destBucket's value will be None.
3890
- toilStageFiles(
3891
- toil,
3892
- outobj,
3893
- outdir,
3894
- destBucket=options.destBucket,
3895
- log_level=logging.INFO
3896
- )
3897
- logger.info("Stored workflow outputs")
3915
+ wf1.cwljob = initialized_job_order
3916
+ logger.info("Starting workflow")
3917
+ outobj = toil.start(wf1)
3898
3918
 
3899
- if runtime_context.research_obj is not None:
3900
- cwltool.cwlprov.writablebagfile.create_job(
3901
- runtime_context.research_obj, outobj, True
3902
- )
3919
+ # Now the workflow has completed. We need to make sure the outputs (and
3920
+ # inputs) end up where the user wants them to be.
3921
+ logger.info("Collecting workflow outputs...")
3922
+ outobj = resolve_dict_w_promises(outobj)
3903
3923
 
3904
- def remove_at_id(doc: Any) -> None:
3905
- if isinstance(doc, MutableMapping):
3906
- for key in list(doc.keys()):
3907
- if key == "@id":
3908
- del doc[key]
3909
- else:
3910
- value = doc[key]
3911
- if isinstance(value, MutableMapping):
3912
- remove_at_id(value)
3913
- if isinstance(value, MutableSequence):
3914
- for entry in value:
3915
- if isinstance(value, MutableMapping):
3916
- remove_at_id(entry)
3917
-
3918
- remove_at_id(outobj)
3919
- visit_class(
3924
+ # Stage files. Specify destination bucket if specified in CLI
3925
+ # options. If destination bucket not passed in,
3926
+ # options.destBucket's value will be None.
3927
+ toilStageFiles(
3928
+ toil,
3920
3929
  outobj,
3921
- ("File",),
3922
- functools.partial(add_sizes, runtime_context.make_fs_access("")),
3923
- )
3924
- if not document_loader:
3925
- raise RuntimeError("cwltool loader is not set.")
3926
- prov_dependencies = cwltool.main.prov_deps(
3927
- workflowobj, document_loader, uri
3928
- )
3929
- runtime_context.research_obj.generate_snapshot(prov_dependencies)
3930
- cwltool.cwlprov.writablebagfile.close_ro(
3931
- runtime_context.research_obj, options.provenance
3930
+ outdir,
3931
+ destBucket=options.destBucket,
3932
+ log_level=logging.INFO
3932
3933
  )
3934
+ logger.info("Stored workflow outputs")
3933
3935
 
3934
- if not options.destBucket and options.compute_checksum:
3935
- logger.info("Computing output file checksums...")
3936
- visit_class(
3937
- outobj,
3938
- ("File",),
3939
- functools.partial(compute_checksums, StdFsAccess("")),
3940
- )
3936
+ if runtime_context.research_obj is not None:
3937
+ cwltool.cwlprov.writablebagfile.create_job(
3938
+ runtime_context.research_obj, outobj, True
3939
+ )
3940
+
3941
+ def remove_at_id(doc: Any) -> None:
3942
+ if isinstance(doc, MutableMapping):
3943
+ for key in list(doc.keys()):
3944
+ if key == "@id":
3945
+ del doc[key]
3946
+ else:
3947
+ value = doc[key]
3948
+ if isinstance(value, MutableMapping):
3949
+ remove_at_id(value)
3950
+ if isinstance(value, MutableSequence):
3951
+ for entry in value:
3952
+ if isinstance(value, MutableMapping):
3953
+ remove_at_id(entry)
3954
+
3955
+ remove_at_id(outobj)
3956
+ visit_class(
3957
+ outobj,
3958
+ ("File",),
3959
+ functools.partial(add_sizes, runtime_context.make_fs_access("")),
3960
+ )
3961
+ if not document_loader:
3962
+ raise RuntimeError("cwltool loader is not set.")
3963
+ prov_dependencies = cwltool.main.prov_deps(
3964
+ workflowobj, document_loader, uri
3965
+ )
3966
+ runtime_context.research_obj.generate_snapshot(prov_dependencies)
3967
+ cwltool.cwlprov.writablebagfile.close_ro(
3968
+ runtime_context.research_obj, options.provenance
3969
+ )
3970
+
3971
+ if not options.destBucket and options.compute_checksum:
3972
+ logger.info("Computing output file checksums...")
3973
+ visit_class(
3974
+ outobj,
3975
+ ("File",),
3976
+ functools.partial(compute_checksums, StdFsAccess("")),
3977
+ )
3941
3978
 
3942
- visit_class(outobj, ("File",), MutationManager().unset_generation)
3943
- stdout.write(json.dumps(outobj, indent=4, default=str))
3944
- stdout.write("\n")
3945
- logger.info("CWL run complete!")
3979
+ visit_class(outobj, ("File",), MutationManager().unset_generation)
3980
+ stdout.write(json.dumps(outobj, indent=4, default=str))
3981
+ stdout.write("\n")
3982
+ logger.info("CWL run complete!")
3983
+ # Don't expose tracebacks to the user for exceptions that may be expected
3984
+ except FailedJobsException as err:
3985
+ if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3986
+ # We figured out that we can't support this workflow.
3987
+ logging.error(err)
3988
+ logging.error(
3989
+ "Your workflow uses a CWL requirement that Toil does not support!"
3990
+ )
3991
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3992
+ else:
3993
+ logging.error(err)
3994
+ return 1
3995
+ except (InsufficientSystemResources, LocatorException, InvalidImportExportUrlException, UnimplementedURLException,
3996
+ JobTooBigError) as err:
3997
+ logging.error(err)
3998
+ return 1
3946
3999
 
3947
4000
  return 0
3948
4001