toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +41 -17
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +4 -5
  7. toil/batchSystems/gridengine.py +1 -1
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +25 -11
  10. toil/batchSystems/local_support.py +3 -3
  11. toil/batchSystems/lsf.py +9 -9
  12. toil/batchSystems/mesos/batchSystem.py +4 -4
  13. toil/batchSystems/mesos/executor.py +3 -2
  14. toil/batchSystems/options.py +9 -0
  15. toil/batchSystems/singleMachine.py +11 -10
  16. toil/batchSystems/slurm.py +129 -16
  17. toil/batchSystems/torque.py +1 -1
  18. toil/bus.py +45 -3
  19. toil/common.py +56 -31
  20. toil/cwl/cwltoil.py +442 -371
  21. toil/deferred.py +1 -1
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/abstractFileStore.py +69 -20
  24. toil/fileStores/cachingFileStore.py +6 -22
  25. toil/fileStores/nonCachingFileStore.py +6 -15
  26. toil/job.py +270 -86
  27. toil/jobStores/abstractJobStore.py +37 -31
  28. toil/jobStores/aws/jobStore.py +280 -218
  29. toil/jobStores/aws/utils.py +60 -31
  30. toil/jobStores/conftest.py +2 -2
  31. toil/jobStores/fileJobStore.py +3 -3
  32. toil/jobStores/googleJobStore.py +3 -4
  33. toil/leader.py +89 -38
  34. toil/lib/aws/__init__.py +26 -10
  35. toil/lib/aws/iam.py +2 -2
  36. toil/lib/aws/session.py +62 -22
  37. toil/lib/aws/utils.py +73 -37
  38. toil/lib/conversions.py +24 -1
  39. toil/lib/ec2.py +118 -69
  40. toil/lib/expando.py +1 -1
  41. toil/lib/generatedEC2Lists.py +8 -8
  42. toil/lib/io.py +42 -4
  43. toil/lib/misc.py +1 -3
  44. toil/lib/resources.py +57 -16
  45. toil/lib/retry.py +12 -5
  46. toil/lib/threading.py +29 -14
  47. toil/lib/throttle.py +1 -1
  48. toil/options/common.py +31 -30
  49. toil/options/wdl.py +5 -0
  50. toil/provisioners/__init__.py +9 -3
  51. toil/provisioners/abstractProvisioner.py +12 -2
  52. toil/provisioners/aws/__init__.py +20 -15
  53. toil/provisioners/aws/awsProvisioner.py +406 -329
  54. toil/provisioners/gceProvisioner.py +2 -2
  55. toil/provisioners/node.py +13 -5
  56. toil/server/app.py +1 -1
  57. toil/statsAndLogging.py +93 -23
  58. toil/test/__init__.py +27 -12
  59. toil/test/batchSystems/batchSystemTest.py +40 -33
  60. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  61. toil/test/batchSystems/test_slurm.py +22 -7
  62. toil/test/cactus/__init__.py +0 -0
  63. toil/test/cactus/test_cactus_integration.py +58 -0
  64. toil/test/cwl/cwlTest.py +245 -236
  65. toil/test/cwl/seqtk_seq.cwl +1 -1
  66. toil/test/docs/scriptsTest.py +11 -14
  67. toil/test/jobStores/jobStoreTest.py +40 -54
  68. toil/test/lib/aws/test_iam.py +2 -2
  69. toil/test/lib/test_ec2.py +1 -1
  70. toil/test/options/__init__.py +13 -0
  71. toil/test/options/options.py +37 -0
  72. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  73. toil/test/provisioners/clusterTest.py +99 -16
  74. toil/test/server/serverTest.py +2 -2
  75. toil/test/src/autoDeploymentTest.py +1 -1
  76. toil/test/src/dockerCheckTest.py +2 -1
  77. toil/test/src/environmentTest.py +125 -0
  78. toil/test/src/fileStoreTest.py +1 -1
  79. toil/test/src/jobDescriptionTest.py +18 -8
  80. toil/test/src/jobTest.py +1 -1
  81. toil/test/src/realtimeLoggerTest.py +4 -0
  82. toil/test/src/workerTest.py +52 -19
  83. toil/test/utils/toilDebugTest.py +62 -4
  84. toil/test/utils/utilsTest.py +23 -21
  85. toil/test/wdl/wdltoil_test.py +49 -21
  86. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  87. toil/toilState.py +68 -9
  88. toil/utils/toilDebugFile.py +1 -1
  89. toil/utils/toilDebugJob.py +153 -26
  90. toil/utils/toilLaunchCluster.py +12 -2
  91. toil/utils/toilRsyncCluster.py +7 -2
  92. toil/utils/toilSshCluster.py +7 -3
  93. toil/utils/toilStats.py +310 -266
  94. toil/utils/toilStatus.py +98 -52
  95. toil/version.py +11 -11
  96. toil/wdl/wdltoil.py +644 -225
  97. toil/worker.py +125 -83
  98. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  99. toil-7.0.0.dist-info/METADATA +158 -0
  100. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
  101. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  102. toil-6.1.0a1.dist-info/METADATA +0 -125
  103. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  104. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py CHANGED
@@ -24,6 +24,7 @@ import shutil
24
24
  import stat
25
25
  import subprocess
26
26
  import sys
27
+ import textwrap
27
28
  import uuid
28
29
  from contextlib import ExitStack, contextmanager
29
30
  from graphlib import TopologicalSorter
@@ -47,14 +48,15 @@ from urllib.parse import quote, unquote, urljoin, urlsplit
47
48
 
48
49
  import WDL.Error
49
50
  import WDL.runtime.config
50
- from configargparse import ArgParser, SUPPRESS
51
+ from configargparse import ArgParser
51
52
  from WDL._util import byte_size_units, strip_leading_whitespace
52
53
  from WDL.CLI import print_error
53
54
  from WDL.runtime.backend.docker_swarm import SwarmContainer
54
55
  from WDL.runtime.backend.singularity import SingularityContainer
55
56
  from WDL.runtime.task_container import TaskContainer
56
57
 
57
- from toil.common import Toil, addOptions, check_and_create_default_config_file
58
+ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
59
+ from toil.common import Toil, addOptions
58
60
  from toil.fileStores import FileID
59
61
  from toil.fileStores.abstractFileStore import AbstractFileStore
60
62
  from toil.job import (AcceleratorRequirement,
@@ -62,17 +64,19 @@ from toil.job import (AcceleratorRequirement,
62
64
  Promise,
63
65
  Promised,
64
66
  TemporaryID,
65
- accelerators_fully_satisfy,
66
67
  parse_accelerator,
67
68
  unwrap,
68
69
  unwrap_all)
69
- from toil.jobStores.abstractJobStore import (AbstractJobStore,
70
- UnimplementedURLException)
71
- from toil.lib.conversions import convert_units, human2bytes
70
+ from toil.jobStores.abstractJobStore import (AbstractJobStore, UnimplementedURLException,
71
+ InvalidImportExportUrlException, LocatorException)
72
+ from toil.lib.conversions import convert_units, human2bytes, strtobool
72
73
  from toil.lib.io import mkdtemp
73
74
  from toil.lib.memoize import memoize
74
75
  from toil.lib.misc import get_user_name
76
+ from toil.lib.resources import ResourceMonitor
75
77
  from toil.lib.threading import global_mutex
78
+ from toil.provisioners.clusterScaler import JobTooBigError
79
+
76
80
 
77
81
  logger = logging.getLogger(__name__)
78
82
 
@@ -86,19 +90,30 @@ def wdl_error_reporter(task: str, exit: bool = False, log: Callable[[str], None]
86
90
  try:
87
91
  yield
88
92
  except (
93
+ WDL.Error.EvalError,
89
94
  WDL.Error.SyntaxError,
90
95
  WDL.Error.ImportError,
91
96
  WDL.Error.ValidationError,
92
97
  WDL.Error.MultipleValidationErrors,
93
- FileNotFoundError
98
+ FileNotFoundError,
99
+ InsufficientSystemResources,
100
+ LocatorException,
101
+ InvalidImportExportUrlException,
102
+ UnimplementedURLException,
103
+ JobTooBigError
94
104
  ) as e:
95
- log("Could not " + task)
105
+ # Don't expose tracebacks to the user for exceptions that may be expected
106
+ log("Could not " + task + " because:")
107
+
96
108
  # These are the errors that MiniWDL's parser can raise and its reporter
97
- # can report. See
109
+ # can report (plus some extras). See
98
110
  # https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97.
99
111
  #
100
112
  # We are going to use MiniWDL's pretty printer to print them.
113
+ # Make the MiniWDL stuff on stderr loud so people see it
114
+ sys.stderr.write("\n" + "🚨" * 3 + "\n")
101
115
  print_error(e)
116
+ sys.stderr.write("🚨" * 3 + "\n\n")
102
117
  if exit:
103
118
  # Stop right now
104
119
  sys.exit(1)
@@ -110,7 +125,7 @@ F = TypeVar('F', bound=Callable[..., Any])
110
125
  def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Callable[[F], F]:
111
126
  """
112
127
  Create a decorator to report WDL errors with the given task message.
113
-
128
+
114
129
  Decorator can then be applied to a function, and if a WDL error happens it
115
130
  will say that it could not {task}.
116
131
  """
@@ -524,6 +539,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
524
539
  def __init__(self, file_store: AbstractFileStore, execution_dir: Optional[str] = None):
525
540
  """
526
541
  Set up the standard library.
542
+
543
+ :param execution_dir: Directory to use as the working directory for workflow code.
527
544
  """
528
545
  # TODO: Just always be the 1.2 standard library.
529
546
  wdl_version = "1.2"
@@ -542,14 +559,66 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
542
559
  # UUID to differentiate which node files are virtualized from
543
560
  self._parent_dir_to_ids: Dict[str, uuid.UUID] = dict()
544
561
 
562
+ # Map forward from virtualized files to absolute devirtualized ones.
563
+ self._virtualized_to_devirtualized: Dict[str, str] = {}
564
+ # Allow mapping back from absolute devirtualized files to virtualized
565
+ # paths, to save re-uploads.
566
+ self._devirtualized_to_virtualized: Dict[str, str] = {}
567
+
545
568
  self._execution_dir = execution_dir
546
569
 
570
+ def share_files(self, other: "ToilWDLStdLibBase") -> None:
571
+ """
572
+ Share caches for devirtualizing and virtualizing files with another instance.
573
+
574
+ Files devirtualized by one instance can be re-virtualized back to their
575
+ original virtualized filenames by the other.
576
+ """
577
+
578
+ if id(self._virtualized_to_devirtualized) != id(other._virtualized_to_devirtualized):
579
+ # Merge the virtualized to devirtualized mappings
580
+ self._virtualized_to_devirtualized.update(other._virtualized_to_devirtualized)
581
+ other._virtualized_to_devirtualized = self._virtualized_to_devirtualized
582
+
583
+ if id(self._devirtualized_to_virtualized) != id(other._devirtualized_to_virtualized):
584
+ # Merge the devirtualized to virtualized mappings
585
+ self._devirtualized_to_virtualized.update(other._devirtualized_to_virtualized)
586
+ other._devirtualized_to_virtualized = self._devirtualized_to_virtualized
587
+
547
588
  @memoize
548
589
  def _devirtualize_filename(self, filename: str) -> str:
549
590
  """
550
591
  'devirtualize' filename passed to a read_* function: return a filename that can be open()ed
551
592
  on the local host.
552
593
  """
594
+
595
+ result = self.devirtualize_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
596
+ # Store the back mapping
597
+ self._devirtualized_to_virtualized[result] = filename
598
+ # And the forward
599
+ self._virtualized_to_devirtualized[filename] = result
600
+ return result
601
+
602
+ @staticmethod
603
+ def devirtualize_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
604
+ """
605
+ Download or export a WDL virtualized filename/URL to the given directory.
606
+
607
+ The destination directory must already exist.
608
+
609
+ Makes sure sibling files stay siblings and files with the same name
610
+ don't clobber each other. Called from within this class for tasks, and
611
+ statically at the end of the workflow for outputs.
612
+
613
+ Returns the local path to the file. If it already had a local path
614
+ elsewhere, it might not actually be put in dest_dir.
615
+ """
616
+
617
+ if not os.path.isdir(dest_dir):
618
+ # os.mkdir fails saying the directory *being made* caused a
619
+ # FileNotFoundError. So check the dest_dir before trying to make
620
+ # directories under it.
621
+ raise RuntimeError(f"Cannot devirtualize {filename} into nonexistent directory {dest_dir}")
553
622
 
554
623
  # TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
555
624
  # TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
@@ -564,8 +633,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
564
633
  # Use UUID as folder name rather than a new temp folder to reduce internal clutter.
565
634
  # Put the UUID in the destination path in order for tasks to
566
635
  # see where to put files depending on their parents.
567
- dir_path = os.path.join(self._file_store.localTempDir, parent_id)
568
-
636
+ dir_path = os.path.join(dest_dir, parent_id)
637
+
569
638
  else:
570
639
  # Parse the URL and extract the basename
571
640
  file_basename = os.path.basename(urlsplit(filename).path)
@@ -574,8 +643,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
574
643
  # in, not relative to the thing.
575
644
  parent_url = urljoin(filename, ".")
576
645
  # Turn it into a string we can make a directory for
577
- dir_path = os.path.join(self._file_store.localTempDir, quote(parent_url, safe=''))
578
-
646
+ dir_path = os.path.join(dest_dir, quote(parent_url, safe=''))
647
+
579
648
  if not os.path.exists(dir_path):
580
649
  # Make sure the chosen directory exists
581
650
  os.mkdir(dir_path)
@@ -584,7 +653,17 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
584
653
 
585
654
  if filename.startswith(TOIL_URI_SCHEME):
586
655
  # Get a local path to the file
587
- result = self._file_store.readGlobalFile(file_id, dest_path)
656
+ if isinstance(file_source, AbstractFileStore):
657
+ # Read from the file store.
658
+ # File is not allowed to be modified by the task. See
659
+ # <https://github.com/openwdl/wdl/issues/495>.
660
+ # We try to get away with symlinks and hope the task
661
+ # container can mount the destination file.
662
+ result = file_source.readGlobalFile(file_id, dest_path, mutable=False, symlink=True)
663
+ elif isinstance(file_source, Toil):
664
+ # Read from the Toil context
665
+ file_source.export_file(file_id, dest_path)
666
+ result = dest_path
588
667
  else:
589
668
  # Download to a local file with the right name and execute bit.
590
669
  # Open it exclusively
@@ -600,8 +679,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
600
679
  # This is a local file
601
680
  # To support relative paths, join the execution dir and filename
602
681
  # if filename is already an abs path, join() will do nothing
603
- if self._execution_dir is not None:
604
- result = os.path.join(self._execution_dir, filename)
682
+ if execution_dir is not None:
683
+ result = os.path.join(execution_dir, filename)
605
684
  else:
606
685
  result = filename
607
686
 
@@ -610,6 +689,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
610
689
  raise RuntimeError(f"Virtualized file {filename} looks like a local file but isn't!")
611
690
  return result
612
691
 
692
+ @memoize
613
693
  def _virtualize_filename(self, filename: str) -> str:
614
694
  """
615
695
  from a local path in write_dir, 'virtualize' into the filename as it should present in a
@@ -618,21 +698,36 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
618
698
 
619
699
  if is_url(filename):
620
700
  # Already virtual
621
- logger.debug('Already virtualized %s as WDL file %s', filename, filename)
701
+ logger.debug('Already virtual: %s', filename)
622
702
  return filename
623
703
 
624
704
  # Otherwise this is a local file and we want to fake it as a Toil file store file
625
705
 
626
- # To support relative paths from execution directory, join the execution dir and filename
627
- # If filename is already an abs path, join() will not do anything
706
+ # Make it an absolute path
628
707
  if self._execution_dir is not None:
629
- file_id = self._file_store.writeGlobalFile(os.path.join(self._execution_dir, filename))
708
+ # To support relative paths from execution directory, join the execution dir and filename
709
+ # If filename is already an abs path, join() will not do anything
710
+ abs_filename = os.path.join(self._execution_dir, filename)
630
711
  else:
631
- file_id = self._file_store.writeGlobalFile(filename)
632
- dir = os.path.dirname(os.path.abspath(filename)) # is filename always an abspath?
633
- parent_id = self._parent_dir_to_ids.setdefault(dir, uuid.uuid4())
634
- result = pack_toil_uri(file_id, parent_id, os.path.basename(filename))
712
+ abs_filename = os.path.abspath(filename)
713
+
714
+ if abs_filename in self._devirtualized_to_virtualized:
715
+ # This is a previously devirtualized thing so we can just use the
716
+ # virtual version we remembered instead of reuploading it.
717
+ result = self._devirtualized_to_virtualized[abs_filename]
718
+ logger.debug("Re-using virtualized WDL file %s for %s", result, filename)
719
+ return result
720
+
721
+ file_id = self._file_store.writeGlobalFile(abs_filename)
722
+
723
+ file_dir = os.path.dirname(abs_filename)
724
+ parent_id = self._parent_dir_to_ids.setdefault(file_dir, uuid.uuid4())
725
+ result = pack_toil_uri(file_id, parent_id, os.path.basename(abs_filename))
635
726
  logger.debug('Virtualized %s as WDL file %s', filename, result)
727
+ # Remember the upload in case we share a cache
728
+ self._devirtualized_to_virtualized[abs_filename] = result
729
+ # And remember the local path in case we want a redownload
730
+ self._virtualized_to_devirtualized[result] = abs_filename
636
731
  return result
637
732
 
638
733
  class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
@@ -677,7 +772,7 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
677
772
  logger.debug('Devirtualized %s as out-of-container file %s', filename, result)
678
773
  return result
679
774
 
680
-
775
+ @memoize
681
776
  def _virtualize_filename(self, filename: str) -> str:
682
777
  """
683
778
  From a local path in write_dir, 'virtualize' into the filename as it should present in a
@@ -699,10 +794,11 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
699
794
  functions only allowed in task output sections.
700
795
  """
701
796
 
702
- def __init__(self, file_store: AbstractFileStore, stdout_path: str, stderr_path: str, current_directory_override: Optional[str] = None):
797
+ def __init__(self, file_store: AbstractFileStore, stdout_path: str, stderr_path: str, file_to_mountpoint: Dict[str, str], current_directory_override: Optional[str] = None):
703
798
  """
704
799
  Set up the standard library for a task output section. Needs to know
705
- where standard output and error from the task have been stored.
800
+ where standard output and error from the task have been stored, and
801
+ what local paths to pretend are where for resolving symlinks.
706
802
 
707
803
  If current_directory_override is set, resolves relative paths and globs
708
804
  from there instead of from the real current directory.
@@ -712,10 +808,17 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
712
808
  # WDL.StdLib.TaskOutputs next.
713
809
  super().__init__(file_store)
714
810
 
715
- # Remember task putput files
811
+ # Remember task output files
716
812
  self._stdout_path = stdout_path
717
813
  self._stderr_path = stderr_path
718
814
 
815
+ # Remember that the WDL code has not referenced them yet.
816
+ self._stdout_used = False
817
+ self._stderr_used = False
818
+
819
+ # Reverse and store the file mount dict
820
+ self._mountpoint_to_file = {v: k for k, v in file_to_mountpoint.items()}
821
+
719
822
  # Remember current directory
720
823
  self._current_directory_override = current_directory_override
721
824
 
@@ -741,14 +844,28 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
741
844
  """
742
845
  Get the standard output of the command that ran, as a WDL File, outside the container.
743
846
  """
847
+ self._stdout_used = True
744
848
  return WDL.Value.File(self._stdout_path)
745
849
 
850
+ def stdout_used(self) -> bool:
851
+ """
852
+ Return True if the standard output was read by the WDL.
853
+ """
854
+ return self._stdout_used
855
+
746
856
  def _stderr(self) -> WDL.Value.File:
747
857
  """
748
858
  Get the standard error of the command that ran, as a WDL File, outside the container.
749
859
  """
860
+ self._stderr_used = True
750
861
  return WDL.Value.File(self._stderr_path)
751
862
 
863
+ def stderr_used(self) -> bool:
864
+ """
865
+ Return True if the standard error was read by the WDL.
866
+ """
867
+ return self._stderr_used
868
+
752
869
  def _glob(self, pattern: WDL.Value.String) -> WDL.Value.Array:
753
870
  """
754
871
  Get a WDL Array of WDL Files left behind by the job that ran, matching the given glob pattern, outside the container.
@@ -770,7 +887,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
770
887
  work_dir = '.' if not self._current_directory_override else self._current_directory_override
771
888
 
772
889
  # TODO: get this to run in the right container if there is one
773
- # Bash (now?) has a compgen builtin for shell completion that can evaluate a glob where the glob is in a quotes string that might have spaces in it. See <https://unix.stackexchange.com/a/616608>.
890
+ # Bash (now?) has a compgen builtin for shell completion that can evaluate a glob where the glob is in a quoted string that might have spaces in it. See <https://unix.stackexchange.com/a/616608>.
774
891
  # This will handle everything except newlines in the filenames.
775
892
  # TODO: Newlines in the filenames?
776
893
  # Since compgen will return 1 if nothing matches, we need to allow a failing exit code here.
@@ -808,6 +925,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
808
925
 
809
926
  return super()._devirtualize_filename(filename)
810
927
 
928
+ @memoize
811
929
  def _virtualize_filename(self, filename: str) -> str:
812
930
  """
813
931
  Go from a local disk filename to a virtualized WDL-side filename.
@@ -818,11 +936,46 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
818
936
  """
819
937
 
820
938
  if not is_url(filename) and not filename.startswith('/'):
821
- # We are getting a bare relative path the supposedly devirtualized side.
939
+ # We are getting a bare relative path on the supposedly devirtualized side.
822
940
  # Find a real path to it relative to the current directory override.
823
941
  work_dir = '.' if not self._current_directory_override else self._current_directory_override
824
942
  filename = os.path.join(work_dir, filename)
825
943
 
944
+ if filename in self._devirtualized_to_virtualized:
945
+ result = self._devirtualized_to_virtualized[filename]
946
+ logger.debug("Re-using virtualized filename %s for %s", result, filename)
947
+ return result
948
+
949
+ if os.path.islink(filename):
950
+ # Recursively resolve symlinks
951
+ here = filename
952
+ # Notice if we have a symlink loop
953
+ seen = {here}
954
+ while os.path.islink(here):
955
+ dest = os.readlink(here)
956
+ if not dest.startswith('/'):
957
+ # Make it absolute
958
+ dest = os.path.join(os.path.dirname(here), dest)
959
+ here = dest
960
+ if here in self._mountpoint_to_file:
961
+ # This points to something mounted into the container, so use that path instead.
962
+ here = self._mountpoint_to_file[here]
963
+ if here in self._devirtualized_to_virtualized:
964
+ # Check the virtualized filenames before following symlinks
965
+ # all the way back to workflow inputs.
966
+ result = self._devirtualized_to_virtualized[here]
967
+ logger.debug("Re-using virtualized filename %s for %s linked from %s", result, here, filename)
968
+ return result
969
+ if here in seen:
970
+ raise RuntimeError(f"Symlink {filename} leads to symlink loop at {here}")
971
+ seen.add(here)
972
+
973
+ if os.path.exists(here):
974
+ logger.debug("Handling symlink %s ultimately to %s", filename, here)
975
+ else:
976
+ logger.error("Handling broken symlink %s ultimately to %s", filename, here)
977
+ filename = here
978
+
826
979
  return super()._virtualize_filename(filename)
827
980
 
828
981
  def evaluate_named_expression(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], name: str, expected_type: Optional[WDL.Type.Base], expression: Optional[WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL.Value.Base:
@@ -1009,7 +1162,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
1009
1162
  # we have no auth.
1010
1163
  logger.error("Something went wrong importing %s", candidate_uri)
1011
1164
  raise
1012
-
1165
+
1013
1166
  if imported is None:
1014
1167
  # Wasn't found there
1015
1168
  continue
@@ -1022,7 +1175,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
1022
1175
  # We can't have files with no basename because we need to
1023
1176
  # download them at that basename later.
1024
1177
  raise RuntimeError(f"File {candidate_uri} has no basename and so cannot be a WDL File")
1025
-
1178
+
1026
1179
  # Was actually found
1027
1180
  if is_url(candidate_uri):
1028
1181
  # Might be a file URI or other URI.
@@ -1074,8 +1227,10 @@ def drop_missing_files(environment: WDLBindings, current_directory_override: Opt
1074
1227
  logger.warning('File %s with type %s does not actually exist at its URI', filename, value_type)
1075
1228
  return None
1076
1229
  else:
1230
+ # Get the absolute path, not resolving symlinks
1077
1231
  effective_path = os.path.abspath(os.path.join(work_dir, filename))
1078
- if os.path.exists(effective_path):
1232
+ if os.path.islink(effective_path) or os.path.exists(effective_path):
1233
+ # This is a broken symlink or a working symlink or a file.
1079
1234
  return filename
1080
1235
  else:
1081
1236
  logger.warning('File %s with type %s does not actually exist at %s', filename, value_type, effective_path)
@@ -1089,7 +1244,7 @@ def get_file_paths_in_bindings(environment: WDLBindings) -> List[str]:
1089
1244
  duplicates are removed.
1090
1245
 
1091
1246
  TODO: Duplicative with WDL.runtime.task._fspaths, except that is internal
1092
- and supports Direcotry objects.
1247
+ and supports Directory objects.
1093
1248
  """
1094
1249
 
1095
1250
  paths = []
@@ -1184,9 +1339,11 @@ class WDLBaseJob(Job):
1184
1339
  null values for things not defined in a section. Post-processing operations
1185
1340
  can be added onto any job before it is saved, and will be applied as long
1186
1341
  as the job's run method calls postprocess().
1342
+
1343
+ Also responsible for remembering the Toil WDL configuration keys and values.
1187
1344
  """
1188
1345
 
1189
- def __init__(self, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
1346
+ def __init__(self, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
1190
1347
  """
1191
1348
  Make a WDL-related job.
1192
1349
 
@@ -1213,7 +1370,9 @@ class WDLBaseJob(Job):
1213
1370
  # jobs returning other jobs' promised RVs.
1214
1371
  self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
1215
1372
 
1216
- self._execution_dir = execution_dir
1373
+ self._wdl_options = wdl_options if wdl_options is not None else {}
1374
+
1375
+ assert self._wdl_options.get("container") is not None
1217
1376
 
1218
1377
  # TODO: We're not allowed by MyPy to override a method and widen the return
1219
1378
  # type, so this has to be Any.
@@ -1266,7 +1425,7 @@ class WDLBaseJob(Job):
1266
1425
 
1267
1426
  for action, argument in self._postprocessing_steps:
1268
1427
 
1269
- logger.debug("Apply postprocessing setp: (%s, %s)", action, argument)
1428
+ logger.debug("Apply postprocessing step: (%s, %s)", action, argument)
1270
1429
 
1271
1430
  # Interpret the mini language of postprocessing steps.
1272
1431
  # These are too small to justify being their own separate jobs.
@@ -1307,65 +1466,44 @@ class WDLBaseJob(Job):
1307
1466
 
1308
1467
  logger.debug("Assigned postprocessing steps from %s to %s", self, other)
1309
1468
 
1310
-
1311
- class WDLTaskJob(WDLBaseJob):
1469
+ class WDLTaskWrapperJob(WDLBaseJob):
1312
1470
  """
1313
- Job that runs a WDL task.
1471
+ Job that determines the resources needed to run a WDL job.
1314
1472
 
1315
1473
  Responsible for evaluating the input declarations for unspecified inputs,
1316
- evaluating the runtime section, re-scheduling if resources are not
1317
- available, running any command, and evaluating the outputs.
1474
+ evaluating the runtime section, and scheduling or chaining to the real WDL
1475
+ job.
1318
1476
 
1319
1477
  All bindings are in terms of task-internal names.
1320
1478
  """
1321
1479
 
1322
- def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, **kwargs: Any) -> None:
1480
+ def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
1323
1481
  """
1324
- Make a new job to run a task.
1482
+ Make a new job to determine resources and run a task.
1325
1483
 
1326
1484
  :param namespace: The namespace that the task's *contents* exist in.
1327
1485
  The caller has alredy added the task's own name.
1328
- """
1329
1486
 
1330
- # This job should not be local because it represents a real workflow task.
1331
- # TODO: Instead of re-scheduling with more resources, add a local
1332
- # "wrapper" job like CWL uses to determine the actual requirements.
1333
- super().__init__(unitName=namespace, displayName=namespace, local=False, **kwargs)
1487
+ :param task_path: Like the namespace, but including subscript numbers
1488
+ for scatters.
1489
+ """
1490
+ super().__init__(unitName=task_path + ".inputs", displayName=namespace + ".inputs", local=True, **kwargs)
1334
1491
 
1335
- logger.info("Preparing to run task %s as %s", task.name, namespace)
1492
+ logger.info("Preparing to run task code for %s as %s", task.name, namespace)
1336
1493
 
1337
1494
  self._task = task
1338
1495
  self._prev_node_results = prev_node_results
1339
1496
  self._task_id = task_id
1340
1497
  self._namespace = namespace
1498
+ self._task_path = task_path
1341
1499
 
1342
- def can_fake_root(self) -> bool:
1343
- """
1344
- Determie if --fakeroot is likely to work for Singularity.
1345
- """
1346
-
1347
- # We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
1348
- try:
1349
- subuid_file = open('/etc/subuid')
1350
- except OSError as e:
1351
- logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
1352
- return False
1353
- username = get_user_name()
1354
- for line in subuid_file:
1355
- if line.split(':')[0].strip() == username:
1356
- # We have a line assigning subuids
1357
- return True
1358
- # If there is no line, we have no subuids
1359
- logger.warning('No subuids are assigned to %s; cannot fake root.', username)
1360
- return False
1361
-
1362
- @report_wdl_errors("run task")
1500
+ @report_wdl_errors("evaluate task code", exit=True)
1363
1501
  def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
1364
1502
  """
1365
- Actually run the task.
1503
+ Evaluate inputs and runtime and schedule the task.
1366
1504
  """
1367
1505
  super().run(file_store)
1368
- logger.info("Running task %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
1506
+ logger.info("Evaluating inputs and runtime for task %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
1369
1507
 
1370
1508
  # Combine the bindings we get from previous jobs.
1371
1509
  # For a task we are only passed the inside-the-task namespace.
@@ -1375,19 +1513,20 @@ class WDLTaskJob(WDLBaseJob):
1375
1513
  standard_library = ToilWDLStdLibBase(file_store)
1376
1514
 
1377
1515
  if self._task.inputs:
1378
- logger.debug("Evaluating task inputs")
1516
+ logger.debug("Evaluating task code")
1379
1517
  for input_decl in self._task.inputs:
1380
1518
  # Evaluate all the inputs that aren't pre-set
1381
1519
  bindings = bindings.bind(input_decl.name, evaluate_defaultable_decl(input_decl, bindings, standard_library))
1382
1520
  for postinput_decl in self._task.postinputs:
1383
- # Evaluate all the postinput decls
1521
+ # Evaluate all the postinput decls.
1522
+ # We need these in order to evaluate the runtime.
1523
+ # TODO: What if they wanted resources from the runtime?
1384
1524
  bindings = bindings.bind(postinput_decl.name, evaluate_defaultable_decl(postinput_decl, bindings, standard_library))
1385
1525
 
1386
1526
  # Evaluate the runtime section
1387
1527
  runtime_bindings = evaluate_call_inputs(self._task, self._task.runtime, bindings, standard_library)
1388
1528
 
1389
- # Fill these in with not-None if we need to bump up our resources from what we have available.
1390
- # TODO: Can this break out into a function somehow?
1529
+ # Fill these in with not-None if the workflow asks for each resource.
1391
1530
  runtime_memory: Optional[int] = None
1392
1531
  runtime_cores: Optional[float] = None
1393
1532
  runtime_disk: Optional[int] = None
@@ -1395,21 +1534,14 @@ class WDLTaskJob(WDLBaseJob):
1395
1534
 
1396
1535
  if runtime_bindings.has_binding('cpu'):
1397
1536
  cpu_spec: int = runtime_bindings.resolve('cpu').value
1398
- if cpu_spec > self.cores:
1399
- # We need to get more cores
1400
- runtime_cores = float(cpu_spec)
1401
- logger.info('Need to reschedule to get %s cores; have %s', runtime_cores, self.cores)
1537
+ runtime_cores = float(cpu_spec)
1402
1538
 
1403
1539
  if runtime_bindings.has_binding('memory'):
1404
1540
  # Get the memory requirement and convert to bytes
1405
1541
  memory_spec: Union[int, str] = runtime_bindings.resolve('memory').value
1406
1542
  if isinstance(memory_spec, str):
1407
1543
  memory_spec = human2bytes(memory_spec)
1408
-
1409
- if memory_spec > self.memory:
1410
- # We need to go get more memory
1411
- runtime_memory = memory_spec
1412
- logger.info('Need to reschedule to get %s memory; have %s', runtime_memory, self.memory)
1544
+ runtime_memory = memory_spec
1413
1545
 
1414
1546
  if runtime_bindings.has_binding('disks'):
1415
1547
  # Miniwdl doesn't have this, but we need to be able to parse things like:
@@ -1445,9 +1577,7 @@ class WDLTaskJob(WDLBaseJob):
1445
1577
  if spec_parts[2] == 'LOCAL':
1446
1578
  logger.warning('Not rounding LOCAL disk to the nearest 375 GB; workflow execution will differ from Cromwell!')
1447
1579
  total_bytes: float = convert_units(total_gb, 'GB')
1448
- if total_bytes > self.disk:
1449
- runtime_disk = int(total_bytes)
1450
- logger.info('Need to reschedule to get %s disk, have %s', runtime_disk, self.disk)
1580
+ runtime_disk = int(total_bytes)
1451
1581
 
1452
1582
  if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
1453
1583
  # We want to have GPUs
@@ -1467,69 +1597,262 @@ class WDLTaskJob(WDLBaseJob):
1467
1597
  accelerator_spec['brand'] = gpu_brand
1468
1598
 
1469
1599
  accelerator_requirement = parse_accelerator(accelerator_spec)
1470
- if not accelerators_fully_satisfy(self.accelerators, accelerator_requirement, ignore=['model']):
1471
- # We don't meet the accelerator requirement.
1472
- # We are loose on the model here since, really, we *should*
1473
- # have either no accelerators or the accelerators we asked for.
1474
- # If the batch system is ignoring the model, we don't want to
1475
- # loop forever trying for the right model.
1476
- # TODO: Change models overall to a hint???
1477
- runtime_accelerators = [accelerator_requirement]
1478
- logger.info('Need to reschedule to get %s accelerators, have %s', runtime_accelerators, self.accelerators)
1479
-
1480
- if runtime_cores or runtime_memory or runtime_disk or runtime_accelerators:
1481
- # We need to reschedule.
1482
- logger.info('Rescheduling %s with more resources', self)
1483
- # Make the new copy of this job with more resources.
1484
- # TODO: We don't pass along the input or runtime bindings, so they
1485
- # need to get re-evaluated. If we did pass them, we'd have to make
1486
- # sure to upload local files made by WDL code in the inputs/runtime
1487
- # sections and pass along that environment. Right now we just
1488
- # re-evaluate that whole section once we have the requested
1489
- # resources.
1490
- # TODO: What if the runtime section says we need a lot of disk to
1491
- # hold the large files that the inputs section is going to write???
1492
- rescheduled = WDLTaskJob(self._task, self._prev_node_results, self._task_id, self._namespace, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators)
1493
- # Run that as a child
1494
- self.addChild(rescheduled)
1495
-
1496
- # Give it our postprocessing steps
1497
- self.defer_postprocessing(rescheduled)
1498
-
1499
- # And return its result.
1500
- return rescheduled.rv()
1501
-
1502
- # If we get here we have all the resources we need, so run the task
1503
-
1504
- if shutil.which('singularity'):
1600
+ runtime_accelerators = [accelerator_requirement]
1601
+
1602
+ # Schedule to get resources. Pass along the bindings from evaluating all the inputs and decls, and the runtime, with files virtualized.
1603
+ run_job = WDLTaskJob(self._task, virtualize_files(bindings, standard_library), virtualize_files(runtime_bindings, standard_library), self._task_id, self._namespace, self._task_path, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators, wdl_options=self._wdl_options)
1604
+ # Run that as a child
1605
+ self.addChild(run_job)
1606
+
1607
+ # Give it our postprocessing steps
1608
+ self.defer_postprocessing(run_job)
1609
+
1610
+ # And return its result.
1611
+ return run_job.rv()
1505
1612
 
1613
+
1614
+
1615
+ class WDLTaskJob(WDLBaseJob):
1616
+ """
1617
+ Job that runs a WDL task.
1618
+
1619
+ Responsible for re-evaluating input declarations for unspecified inputs,
1620
+ evaluating the runtime section, re-scheduling if resources are not
1621
+ available, running any command, and evaluating the outputs.
1622
+
1623
+ All bindings are in terms of task-internal names.
1624
+ """
1625
+
1626
+ def __init__(self, task: WDL.Tree.Task, task_internal_bindings: Promised[WDLBindings], runtime_bindings: Promised[WDLBindings], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
1627
+ """
1628
+ Make a new job to run a task.
1629
+
1630
+ :param namespace: The namespace that the task's *contents* exist in.
1631
+ The caller has alredy added the task's own name.
1632
+
1633
+ :param task_path: Like the namespace, but including subscript numbers
1634
+ for scatters.
1635
+ """
1636
+
1637
+ # This job should not be local because it represents a real workflow task.
1638
+ # TODO: Instead of re-scheduling with more resources, add a local
1639
+ # "wrapper" job like CWL uses to determine the actual requirements.
1640
+ super().__init__(unitName=task_path + ".command", displayName=namespace + ".command", local=False, **kwargs)
1641
+
1642
+ logger.info("Preparing to run task %s as %s", task.name, namespace)
1643
+
1644
+ self._task = task
1645
+ self._task_internal_bindings = task_internal_bindings
1646
+ self._runtime_bindings = runtime_bindings
1647
+ self._task_id = task_id
1648
+ self._namespace = namespace
1649
+ self._task_path = task_path
1650
+
1651
+ ###
1652
+ # Runtime code injection system
1653
+ ###
1654
+
1655
+ # WDL runtime code injected in the container communicates back to the rest
1656
+ # of the runtime through files in this directory.
1657
+ INJECTED_MESSAGE_DIR = ".toil_wdl_runtime"
1658
+
1659
+ def add_injections(self, command_string: str, task_container: TaskContainer) -> str:
1660
+ """
1661
+ Inject extra Bash code from the Toil WDL runtime into the command for the container.
1662
+
1663
+ Currently doesn't implement the MiniWDL plugin system, but does add
1664
+ resource usage monitoring to Docker containers.
1665
+ """
1666
+ if isinstance(task_container, SwarmContainer):
1667
+ # We're running on Docker Swarm, so we need to monitor CPU usage
1668
+ # and so on from inside the container, since it won't be attributed
1669
+ # to Toil child processes in the leader's self-monitoring.
1670
+ # TODO: Mount this from a file Toil installs instead or something.
1671
+ script = textwrap.dedent("""\
1672
+ function _toil_resource_monitor () {
1673
+ # Turn off error checking and echo in here
1674
+ set +ex
1675
+ MESSAGE_DIR="${1}"
1676
+ mkdir -p "${MESSAGE_DIR}"
1677
+
1678
+ function sample_cpu_usec() {
1679
+ if [[ -f /sys/fs/cgroup/cpu.stat ]] ; then
1680
+ awk '{ if ($1 == "usage_usec") {print $2} }' /sys/fs/cgroup/cpu.stat
1681
+ elif [[ -f /sys/fs/cgroup/cpuacct/cpuacct.stat ]] ; then
1682
+ echo $(( $(head -n 1 /sys/fs/cgroup/cpuacct/cpuacct.stat | cut -f2 -d' ') * 10000 ))
1683
+ fi
1684
+ }
1685
+
1686
+ function sample_memory_bytes() {
1687
+ if [[ -f /sys/fs/cgroup/memory.stat ]] ; then
1688
+ awk '{ if ($1 == "anon") { print $2 } }' /sys/fs/cgroup/memory.stat
1689
+ elif [[ -f /sys/fs/cgroup/memory/memory.stat ]] ; then
1690
+ awk '{ if ($1 == "total_rss") { print $2 } }' /sys/fs/cgroup/memory/memory.stat
1691
+ fi
1692
+ }
1693
+
1694
+ while true ; do
1695
+ printf "CPU\\t" >> ${MESSAGE_DIR}/resources.tsv
1696
+ sample_cpu_usec >> ${MESSAGE_DIR}/resources.tsv
1697
+ printf "Memory\\t" >> ${MESSAGE_DIR}/resources.tsv
1698
+ sample_memory_bytes >> ${MESSAGE_DIR}/resources.tsv
1699
+ sleep 1
1700
+ done
1701
+ }
1702
+ """)
1703
+ parts = [script, f"_toil_resource_monitor {self.INJECTED_MESSAGE_DIR} &", command_string]
1704
+ return "\n".join(parts)
1705
+ else:
1706
+ return command_string
1707
+
1708
+ def handle_injection_messages(self, outputs_library: ToilWDLStdLibTaskOutputs) -> None:
1709
+ """
1710
+ Handle any data received from injected runtime code in the container.
1711
+ """
1712
+
1713
+ message_files = outputs_library._glob(WDL.Value.String(os.path.join(self.INJECTED_MESSAGE_DIR, "*")))
1714
+ logger.debug("Handling message files: %s", message_files)
1715
+ for message_file in message_files.value:
1716
+ self.handle_message_file(message_file.value)
1717
+
1718
+ def handle_message_file(self, file_path: str) -> None:
1719
+ """
1720
+ Handle a message file received from in-container injected code.
1721
+
1722
+ Takes the host-side path of the file.
1723
+ """
1724
+ if os.path.basename(file_path) == "resources.tsv":
1725
+ # This is a TSV of resource usage info.
1726
+ first_cpu_usec: Optional[int] = None
1727
+ last_cpu_usec: Optional[int] = None
1728
+ max_memory_bytes: Optional[int] = None
1729
+
1730
+ for line in open(file_path):
1731
+ if not line.endswith("\n"):
1732
+ # Skip partial lines
1733
+ continue
1734
+ # For each full line we got
1735
+ parts = line.strip().split("\t")
1736
+ if len(parts) != 2:
1737
+ # Skip odd-shaped lines
1738
+ continue
1739
+ if parts[0] == "CPU":
1740
+ # Parse CPU usage
1741
+ cpu_usec = int(parts[1])
1742
+ # Update summary stats
1743
+ if first_cpu_usec is None:
1744
+ first_cpu_usec = cpu_usec
1745
+ last_cpu_usec = cpu_usec
1746
+ elif parts[0] == "Memory":
1747
+ # Parse memory usage
1748
+ memory_bytes = int(parts[1])
1749
+ # Update summary stats
1750
+ if max_memory_bytes is None or max_memory_bytes < memory_bytes:
1751
+ max_memory_bytes = memory_bytes
1752
+
1753
+ if max_memory_bytes is not None:
1754
+ logger.info("Container used at about %s bytes of memory at peak", max_memory_bytes)
1755
+ # Treat it as if used by a child process
1756
+ ResourceMonitor.record_extra_memory(max_memory_bytes // 1024)
1757
+ if last_cpu_usec is not None:
1758
+ assert(first_cpu_usec is not None)
1759
+ cpu_seconds = (last_cpu_usec - first_cpu_usec) / 1000000
1760
+ logger.info("Container used about %s seconds of CPU time", cpu_seconds)
1761
+ # Treat it as if used by a child process
1762
+ ResourceMonitor.record_extra_cpu(cpu_seconds)
1763
+
1764
+ ###
1765
+ # Helper functions to work out what containers runtime we can use
1766
+ ###
1767
+
1768
+ def can_fake_root(self) -> bool:
1769
+ """
1770
+ Determine if --fakeroot is likely to work for Singularity.
1771
+ """
1772
+
1773
+ # We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
1774
+ try:
1775
+ subuid_file = open('/etc/subuid')
1776
+ except OSError as e:
1777
+ logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
1778
+ return False
1779
+ username = get_user_name()
1780
+ for line in subuid_file:
1781
+ if line.split(':')[0].strip() == username:
1782
+ # We have a line assigning subuids
1783
+ return True
1784
+ # If there is no line, we have no subuids
1785
+ logger.warning('No subuids are assigned to %s; cannot fake root.', username)
1786
+ return False
1787
+
1788
+ def can_mount_proc(self) -> bool:
1789
+ """
1790
+ Determine if --containall will work for Singularity. On Kubernetes, this will result in operation not permitted
1791
+ See: https://github.com/apptainer/singularity/issues/5857
1792
+
1793
+ So if Kubernetes is detected, return False
1794
+ :return: bool
1795
+ """
1796
+ return "KUBERNETES_SERVICE_HOST" not in os.environ
1797
+
1798
+ @report_wdl_errors("run task command", exit=True)
1799
+ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
1800
+ """
1801
+ Actually run the task.
1802
+ """
1803
+ super().run(file_store)
1804
+ logger.info("Running task command for %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
1805
+
1806
+ # Set up the WDL standard library
1807
+ # UUID to use for virtualizing files
1808
+ standard_library = ToilWDLStdLibBase(file_store)
1809
+
1810
+ # Get the bindings from after the input section
1811
+ bindings = unwrap(self._task_internal_bindings)
1812
+ # And the bindings from evaluating the runtime section
1813
+ runtime_bindings = unwrap(self._runtime_bindings)
1814
+
1815
+ # We have all the resources we need, so run the task
1816
+
1817
+ if shutil.which('singularity') and self._wdl_options.get("container") in ["singularity", "auto"]:
1506
1818
  # Prepare to use Singularity. We will need plenty of space to
1507
1819
  # download images.
1508
- if 'SINGULARITY_CACHEDIR' not in os.environ:
1509
- # Cache Singularity's layers somehwere known to have space, not in home
1510
- os.environ['SINGULARITY_CACHEDIR'] = os.path.join(file_store.workflow_dir, 'singularity_cache')
1820
+ # Default the Singularity and MiniWDL cache directories. This sets the cache to the same place as
1821
+ # Singularity/MiniWDL's default cache directory
1822
+ # With launch-cluster, the singularity and miniwdl cache is set to /var/lib/toil in abstractProvisioner.py
1823
+ # A current limitation with the singularity/miniwdl cache is it cannot check for image updates if the
1824
+ # filename is the same
1825
+ singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
1826
+ miniwdl_cache = os.path.join(os.path.expanduser("~"), ".cache/miniwdl")
1827
+
1828
+ # Cache Singularity's layers somewhere known to have space
1829
+ os.environ['SINGULARITY_CACHEDIR'] = os.environ.get("SINGULARITY_CACHEDIR", singularity_cache)
1830
+
1511
1831
  # Make sure it exists.
1512
1832
  os.makedirs(os.environ['SINGULARITY_CACHEDIR'], exist_ok=True)
1513
1833
 
1514
- if 'MINIWDL__SINGULARITY__IMAGE_CACHE' not in os.environ:
1515
- # Cache Singularity images for the workflow on this machine.
1516
- # Since MiniWDL does only within-process synchronization for pulls,
1517
- # we also will need to pre-pull one image into here at a time.
1518
- os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'] = os.path.join(file_store.workflow_dir, 'miniwdl_sif_cache')
1834
+ # Cache Singularity images for the workflow on this machine.
1835
+ # Since MiniWDL does only within-process synchronization for pulls,
1836
+ # we also will need to pre-pull one image into here at a time.
1837
+ os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'] = os.environ.get("MINIWDL__SINGULARITY__IMAGE_CACHE", miniwdl_cache)
1838
+
1519
1839
  # Make sure it exists.
1520
1840
  os.makedirs(os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'], exist_ok=True)
1521
1841
 
1522
1842
  # Run containers with Singularity
1523
1843
  TaskContainerImplementation: Type[TaskContainer] = SingularityContainer
1524
- else:
1844
+ elif self._wdl_options.get("container") in ["docker", "auto"]:
1525
1845
  # Run containers with Docker
1846
+ # TODO: Poll if it is available and don't just try and fail.
1526
1847
  TaskContainerImplementation = SwarmContainer
1527
- if runtime_accelerators:
1848
+ if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
1528
1849
  # Complain to the user that this is unlikely to work.
1529
- logger.warning("Running job that needs accelerators with Docker, because "
1530
- "Singularity is not available. Accelerator and GPU support "
1850
+ logger.warning("Running job that might need accelerators with Docker. "
1851
+ "Accelerator and GPU support "
1531
1852
  "is not yet implemented in the MiniWDL Docker "
1532
1853
  "containerization implementation.")
1854
+ else:
1855
+ raise RuntimeError(f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}")
1533
1856
 
1534
1857
  # Set up the MiniWDL container running stuff
1535
1858
  miniwdl_logger = logging.getLogger("MiniWDLContainers")
@@ -1557,9 +1880,20 @@ class WDLTaskJob(WDLBaseJob):
1557
1880
  workdir_in_container: Optional[str] = None
1558
1881
 
1559
1882
  if self._task.command:
1560
- # When the command string references a File, we need to get a path to the file on a local disk, which the commnad will be able to actually use, accounting for e.g. containers.
1561
- # TODO: Figure out whan the command template actually uses File values and lazily download them.
1562
- # For now we just grab all the File values in the inside-the-task environment, since any of them *might* be used.
1883
+ # When the command string references a File, we need to get a path
1884
+ # to the file on a local disk, which the commnad will be able to
1885
+ # actually use, accounting for e.g. containers.
1886
+ #
1887
+ # TODO: Figure out whan the command template actually uses File
1888
+ # values and lazily download them.
1889
+ #
1890
+ # For now we just grab all the File values in the inside-the-task
1891
+ # environment, since any of them *might* be used.
1892
+ #
1893
+ # Some also might be expected to be adjacent to files that are
1894
+ # used, like a BAI that doesn't get referenced in a command line
1895
+ # but must be next to its BAM.
1896
+ #
1563
1897
  # TODO: MiniWDL can parallelize the fetch
1564
1898
  bindings = devirtualize_files(bindings, standard_library)
1565
1899
 
@@ -1597,6 +1931,10 @@ class WDLTaskJob(WDLBaseJob):
1597
1931
  # We can't fake root so don't try.
1598
1932
  command_line.remove('--fakeroot')
1599
1933
 
1934
+ # If on Kubernetes and proc cannot be mounted, get rid of --containall
1935
+ if '--containall' in command_line and not self.can_mount_proc():
1936
+ command_line.remove('--containall')
1937
+
1600
1938
  extra_flags: Set[str] = set()
1601
1939
  accelerators_needed: Optional[List[AcceleratorRequirement]] = self.accelerators
1602
1940
  if accelerators_needed is not None:
@@ -1624,12 +1962,13 @@ class WDLTaskJob(WDLBaseJob):
1624
1962
  task_container._run_invocation = patched_run_invocation # type: ignore
1625
1963
 
1626
1964
  # Show the runtime info to the container
1627
- task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in runtime_bindings})
1965
+ task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in devirtualize_files(runtime_bindings, standard_library)})
1628
1966
 
1629
1967
  # Tell the container to take up all these files. It will assign
1630
1968
  # them all new paths in task_container.input_path_map which we can
1631
1969
  # read. We also get a task_container.host_path() to go the other way.
1632
1970
  add_paths(task_container, get_file_paths_in_bindings(bindings))
1971
+ # This maps from oustide container to inside container
1633
1972
  logger.debug("Using container path map: %s", task_container.input_path_map)
1634
1973
 
1635
1974
  # Replace everything with in-container paths for the command.
@@ -1638,12 +1977,46 @@ class WDLTaskJob(WDLBaseJob):
1638
1977
 
1639
1978
  # Make a new standard library for evaluating the command specifically, which only deals with in-container paths and out-of-container paths.
1640
1979
  command_library = ToilWDLStdLibTaskCommand(file_store, task_container)
1641
-
1642
- # Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
1643
- dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
1980
+
1981
+ def hacky_dedent(text: str) -> str:
1982
+ """
1983
+ Guess what result we would have gotten if we dedented the
1984
+ command before substituting placeholder expressions, given the
1985
+ command after substituting placeholder expressions. Workaround
1986
+ for mimicking MiniWDL making us also suffer from
1987
+ <https://github.com/chanzuckerberg/miniwdl/issues/674>.
1988
+ """
1989
+
1990
+ # First just run MiniWDL's dedent
1991
+ # Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
1992
+ dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
1993
+
1994
+ text = dedent(text)[1]
1995
+
1996
+ # But this can still leave dedenting to do. Find the first
1997
+ # not-all-whitespace line and get its leading whitespace.
1998
+ to_strip: Optional[str] = None
1999
+ for line in text.split("\n"):
2000
+ if len(line.strip()) > 0:
2001
+ # This is the first not-all-whitespace line.
2002
+ # Drop the leading whitespace.
2003
+ rest = line.lstrip()
2004
+ # Grab the part that gets removed by lstrip
2005
+ to_strip = line[0:(len(line) - len(rest))]
2006
+ break
2007
+ if to_strip is None or len(to_strip) == 0:
2008
+ # Nothing to cut
2009
+ return text
2010
+
2011
+ # Cut to_strip off each line that it appears at the start of.
2012
+ return "\n".join((line.removeprefix(to_strip) for line in text.split("\n")))
2013
+
1644
2014
 
1645
2015
  # Work out the command string, and unwrap it
1646
- command_string: str = dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)[1]
2016
+ command_string: str = hacky_dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)
2017
+
2018
+ # Do any command injection we might need to do
2019
+ command_string = self.add_injections(command_string, task_container)
1647
2020
 
1648
2021
  # Grab the standard out and error paths. MyPy complains if we call
1649
2022
  # them because in the current MiniWDL version they are untyped.
@@ -1664,16 +2037,49 @@ class WDLTaskJob(WDLBaseJob):
1664
2037
  with ExitStack() as cleanup:
1665
2038
  task_container._pull(miniwdl_logger, cleanup)
1666
2039
 
1667
- # Run the command in the container
2040
+ # Log that we are about to run the command in the container
1668
2041
  logger.info('Executing command in %s: %s', task_container, command_string)
2042
+
2043
+ # Now our inputs are all downloaded. Let debugging break in (after command is logged).
2044
+ # But we need to hint which host paths are meant to be which container paths
2045
+ host_and_job_paths: List[Tuple[str, str]] = [(k, v) for k, v in task_container.input_path_map.items()]
2046
+ self.files_downloaded_hook(host_and_job_paths)
2047
+
2048
+ # TODO: Really we might want to set up a fake container working directory, to actually help the user.
2049
+
1669
2050
  try:
1670
2051
  task_container.run(miniwdl_logger, command_string)
1671
- finally:
2052
+ except Exception:
1672
2053
  if os.path.exists(host_stderr_txt):
1673
- logger.info('Standard error at %s: %s', host_stderr_txt, open(host_stderr_txt).read())
1674
- if os.path.exists(host_stdout_txt):
1675
- logger.info('Standard output at %s: %s', host_stdout_txt, open(host_stdout_txt).read())
2054
+ size = os.path.getsize(host_stderr_txt)
2055
+ logger.error('Failed task left standard error at %s of %d bytes', host_stderr_txt, size)
2056
+ if size > 0:
2057
+ # Send the whole error stream.
2058
+ file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
2059
+ if logger.isEnabledFor(logging.DEBUG):
2060
+ logger.debug("MiniWDL already logged standard error")
2061
+ else:
2062
+ # At debug level, MiniWDL itself logs command error lines.
2063
+ # But otherwise we just dump into StatsAndLogging;
2064
+ # we also want the messages in the job log that
2065
+ # gets printed at the end of the workflow. So log
2066
+ # the error log ourselves.
2067
+ logger.error("====TASK ERROR LOG====")
2068
+ for line in open(host_stderr_txt, 'r', errors="replace"):
2069
+ logger.error("> %s", line.rstrip('\n'))
2070
+ logger.error("====TASK ERROR LOG====")
1676
2071
 
2072
+ if os.path.exists(host_stdout_txt):
2073
+ size = os.path.getsize(host_stdout_txt)
2074
+ logger.info('Failed task left standard output at %s of %d bytes', host_stdout_txt, size)
2075
+ if size > 0:
2076
+ # Save the whole output stream.
2077
+ # TODO: We can't tell if this was supposed to be
2078
+ # captured. It might really be huge binary data.
2079
+ file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
2080
+
2081
+ # Keep crashing
2082
+ raise
1677
2083
  else:
1678
2084
  # We need to fake stdout and stderr, since nothing ran but the
1679
2085
  # standard library lets you grab them. TODO: Can these be None?
@@ -1687,16 +2093,39 @@ class WDLTaskJob(WDLBaseJob):
1687
2093
  # container-determined strings that are absolute paths to WDL File
1688
2094
  # objects, and like MiniWDL we can say we only support
1689
2095
  # working-directory-based relative paths for globs.
1690
- outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
2096
+ outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, task_container.input_path_map, current_directory_override=workdir_in_container)
2097
+ # Make sure files downloaded as inputs get re-used if we re-upload them.
2098
+ outputs_library.share_files(standard_library)
1691
2099
  output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
1692
2100
 
2101
+ # Now we know if the standard output and error were sent somewhere by
2102
+ # the workflow. If not, we should report them to the leader.
2103
+
2104
+ if not outputs_library.stderr_used() and os.path.exists(host_stderr_txt):
2105
+ size = os.path.getsize(host_stderr_txt)
2106
+ logger.info('Unused standard error at %s of %d bytes', host_stderr_txt, size)
2107
+ if size > 0:
2108
+ # Save the whole error stream because the workflow didn't capture it.
2109
+ file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
2110
+
2111
+ if not outputs_library.stdout_used() and os.path.exists(host_stdout_txt):
2112
+ size = os.path.getsize(host_stdout_txt)
2113
+ logger.info('Unused standard output at %s of %d bytes', host_stdout_txt, size)
2114
+ if size > 0:
2115
+ # Save the whole output stream because the workflow didn't capture it.
2116
+ file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
2117
+
2118
+ # Collect output messages from any code Toil injected into the task.
2119
+ self.handle_injection_messages(outputs_library)
2120
+
1693
2121
  # Drop any files from the output which don't actually exist
1694
2122
  output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
1695
-
1696
- # TODO: Check the output bindings against the types of the decls so we
1697
- # can tell if we have a null in a value that is supposed to not be
1698
- # nullable. We can't just look at the types on the values themselves
1699
- # because those are all the non-nullable versions.
2123
+ for decl in self._task.outputs:
2124
+ if not decl.type.optional and output_bindings[decl.name].value is None:
2125
+ # We have an unacceptable null value. This can happen if a file
2126
+ # is missing but not optional. Don't let it out to annoy the
2127
+ # next task.
2128
+ raise WDL.Error.EvalError(decl, f"non-optional value {decl.name} = {decl.expr} is missing")
1700
2129
 
1701
2130
  # Upload any files in the outputs if not uploaded already. Accounts for how relative paths may still need to be container-relative.
1702
2131
  output_bindings = virtualize_files(output_bindings, outputs_library)
@@ -1711,15 +2140,16 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1711
2140
  Job that evaluates a WDL workflow node.
1712
2141
  """
1713
2142
 
1714
- def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2143
+ def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
1715
2144
  """
1716
2145
  Make a new job to run a workflow node to completion.
1717
2146
  """
1718
- super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, execution_dir=execution_dir, **kwargs)
2147
+ super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, wdl_options=wdl_options or {}, **kwargs)
1719
2148
 
1720
2149
  self._node = node
1721
2150
  self._prev_node_results = prev_node_results
1722
2151
  self._namespace = namespace
2152
+ self._task_path = task_path
1723
2153
 
1724
2154
  if isinstance(self._node, WDL.Tree.Call):
1725
2155
  logger.debug("Preparing job for call node %s", self._node.workflow_node_id)
@@ -1735,7 +2165,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1735
2165
  # Combine the bindings we get from previous jobs
1736
2166
  incoming_bindings = combine_bindings(unwrap_all(self._prev_node_results))
1737
2167
  # Set up the WDL standard library
1738
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
2168
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
1739
2169
  with monkeypatch_coerce(standard_library):
1740
2170
  if isinstance(self._node, WDL.Tree.Decl):
1741
2171
  # This is a variable assignment
@@ -1763,11 +2193,11 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1763
2193
 
1764
2194
  if isinstance(self._node.callee, WDL.Tree.Workflow):
1765
2195
  # This is a call of a workflow
1766
- subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', self._execution_dir)
2196
+ subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
1767
2197
  self.addChild(subjob)
1768
2198
  elif isinstance(self._node.callee, WDL.Tree.Task):
1769
2199
  # This is a call of a task
1770
- subjob = WDLTaskJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}')
2200
+ subjob = WDLTaskWrapperJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
1771
2201
  self.addChild(subjob)
1772
2202
  else:
1773
2203
  raise WDL.Error.InvalidType(self._node, "Cannot call a " + str(type(self._node.callee)))
@@ -1778,14 +2208,14 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1778
2208
  self.defer_postprocessing(subjob)
1779
2209
  return subjob.rv()
1780
2210
  elif isinstance(self._node, WDL.Tree.Scatter):
1781
- subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._execution_dir)
2211
+ subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
1782
2212
  self.addChild(subjob)
1783
2213
  # Scatters don't really make a namespace, just kind of a scope?
1784
2214
  # TODO: Let stuff leave scope!
1785
2215
  self.defer_postprocessing(subjob)
1786
2216
  return subjob.rv()
1787
2217
  elif isinstance(self._node, WDL.Tree.Conditional):
1788
- subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._execution_dir)
2218
+ subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
1789
2219
  self.addChild(subjob)
1790
2220
  # Conditionals don't really make a namespace, just kind of a scope?
1791
2221
  # TODO: Let stuff leave scope!
@@ -1801,11 +2231,11 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
1801
2231
  workflows or tasks or sections.
1802
2232
  """
1803
2233
 
1804
- def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2234
+ def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
1805
2235
  """
1806
2236
  Make a new job to run a list of workflow nodes to completion.
1807
2237
  """
1808
- super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', execution_dir=execution_dir, **kwargs)
2238
+ super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', wdl_options=wdl_options, **kwargs)
1809
2239
 
1810
2240
  self._nodes = nodes
1811
2241
  self._prev_node_results = prev_node_results
@@ -1825,7 +2255,7 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
1825
2255
  # Combine the bindings we get from previous jobs
1826
2256
  current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
1827
2257
  # Set up the WDL standard library
1828
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
2258
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
1829
2259
 
1830
2260
  with monkeypatch_coerce(standard_library):
1831
2261
  for node in self._nodes:
@@ -2005,13 +2435,14 @@ class WDLSectionJob(WDLBaseJob):
2005
2435
  Job that can create more graph for a section of the wrokflow.
2006
2436
  """
2007
2437
 
2008
- def __init__(self, namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2438
+ def __init__(self, namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2009
2439
  """
2010
2440
  Make a WDLSectionJob where the interior runs in the given namespace,
2011
2441
  starting with the root workflow.
2012
2442
  """
2013
- super().__init__(execution_dir, **kwargs)
2443
+ super().__init__(wdl_options=wdl_options, **kwargs)
2014
2444
  self._namespace = namespace
2445
+ self._task_path = task_path
2015
2446
 
2016
2447
  @staticmethod
2017
2448
  def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
@@ -2079,7 +2510,7 @@ class WDLSectionJob(WDLBaseJob):
2079
2510
 
2080
2511
 
2081
2512
 
2082
- def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> WDLBaseJob:
2513
+ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None, subscript: Optional[int] = None) -> WDLBaseJob:
2083
2514
  """
2084
2515
  Make a Toil job to evaluate a subgraph inside a workflow or workflow
2085
2516
  section.
@@ -2095,8 +2526,16 @@ class WDLSectionJob(WDLBaseJob):
2095
2526
  :param local_environment: Bindings in this environment will be
2096
2527
  used to evaluate the subgraph but will go out of scope
2097
2528
  at the end of the section.
2529
+ :param subscript: If the subgraph is being evaluated multiple times,
2530
+ this should be a disambiguating integer for logging.
2098
2531
  """
2099
2532
 
2533
+ # Work out what to call what we are working on
2534
+ task_path = self._task_path
2535
+ if subscript is not None:
2536
+ # We need to include a scatter loop number.
2537
+ task_path += f'.{subscript}'
2538
+
2100
2539
  if local_environment is not None:
2101
2540
  # Bring local environment into scope
2102
2541
  environment = combine_bindings([environment, local_environment])
@@ -2156,10 +2595,10 @@ class WDLSectionJob(WDLBaseJob):
2156
2595
 
2157
2596
  if len(node_ids) == 1:
2158
2597
  # Make a one-node job
2159
- job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, self._execution_dir)
2598
+ job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, task_path, wdl_options=self._wdl_options)
2160
2599
  else:
2161
2600
  # Make a multi-node job
2162
- job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, self._execution_dir)
2601
+ job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, wdl_options=self._wdl_options)
2163
2602
  for prev_job in prev_jobs:
2164
2603
  # Connect up the happens-after relationships to make sure the
2165
2604
  # return values are available.
@@ -2189,7 +2628,7 @@ class WDLSectionJob(WDLBaseJob):
2189
2628
  leaf_rvs.append(environment)
2190
2629
  # And to fill in bindings from code not executed in this instantiation
2191
2630
  # with Null, and filter out stuff that should leave scope.
2192
- sink = WDLCombineBindingsJob(leaf_rvs)
2631
+ sink = WDLCombineBindingsJob(leaf_rvs, wdl_options=self._wdl_options)
2193
2632
  # It runs inside us
2194
2633
  self.addChild(sink)
2195
2634
  for leaf_job in toil_leaves.values():
@@ -2256,11 +2695,11 @@ class WDLScatterJob(WDLSectionJob):
2256
2695
  instance of the body. If an instance of the body doesn't create a binding,
2257
2696
  it gets a null value in the corresponding array.
2258
2697
  """
2259
- def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2698
+ def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2260
2699
  """
2261
2700
  Create a subtree that will run a WDL scatter. The scatter itself and the contents live in the given namespace.
2262
2701
  """
2263
- super().__init__(namespace, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, execution_dir=execution_dir)
2702
+ super().__init__(namespace, task_path, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, wdl_options=wdl_options)
2264
2703
 
2265
2704
  # Because we need to return the return value of the workflow, we need
2266
2705
  # to return a Toil promise for the last/sink job in the workflow's
@@ -2297,7 +2736,7 @@ class WDLScatterJob(WDLSectionJob):
2297
2736
  raise RuntimeError("The returned value from a scatter is not an Array type.")
2298
2737
 
2299
2738
  scatter_jobs = []
2300
- for item in scatter_value.value:
2739
+ for subscript, item in enumerate(scatter_value.value):
2301
2740
  # Make an instantiation of our subgraph for each possible value of
2302
2741
  # the variable. Make sure the variable is bound only for the
2303
2742
  # duration of the body.
@@ -2306,7 +2745,7 @@ class WDLScatterJob(WDLSectionJob):
2306
2745
  # TODO: We need to turn values() into a list because MyPy seems to
2307
2746
  # think a dict_values isn't a Sequence. This is a waste of time to
2308
2747
  # appease MyPy but probably better than a cast?
2309
- scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings))
2748
+ scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings, subscript=subscript))
2310
2749
 
2311
2750
  if len(scatter_jobs) == 0:
2312
2751
  # No scattering is needed. We just need to bind all the names.
@@ -2326,7 +2765,7 @@ class WDLScatterJob(WDLSectionJob):
2326
2765
  # of maybe-optional values. Each body execution will define names it
2327
2766
  # doesn't make as nulls, so we don't have to worry about
2328
2767
  # totally-missing names.
2329
- gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings)
2768
+ gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings, wdl_options=self._wdl_options)
2330
2769
  self.addChild(gather_job)
2331
2770
  for j in scatter_jobs:
2332
2771
  j.addFollowOn(gather_job)
@@ -2395,11 +2834,11 @@ class WDLConditionalJob(WDLSectionJob):
2395
2834
  """
2396
2835
  Job that evaluates a conditional in a WDL workflow.
2397
2836
  """
2398
- def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2837
+ def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2399
2838
  """
2400
2839
  Create a subtree that will run a WDL conditional. The conditional itself and its contents live in the given namespace.
2401
2840
  """
2402
- super().__init__(namespace, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, execution_dir=execution_dir)
2841
+ super().__init__(namespace, task_path, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, wdl_options=wdl_options)
2403
2842
 
2404
2843
  # Once again we need to ship the whole body template to be instantiated
2405
2844
  # into Toil jobs only if it will actually run.
@@ -2447,7 +2886,7 @@ class WDLWorkflowJob(WDLSectionJob):
2447
2886
  Job that evaluates an entire WDL workflow.
2448
2887
  """
2449
2888
 
2450
- def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2889
+ def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2451
2890
  """
2452
2891
  Create a subtree that will run a WDL workflow. The job returns the
2453
2892
  return value of the workflow.
@@ -2455,7 +2894,7 @@ class WDLWorkflowJob(WDLSectionJob):
2455
2894
  :param namespace: the namespace that the workflow's *contents* will be
2456
2895
  in. Caller has already added the workflow's own name.
2457
2896
  """
2458
- super().__init__(namespace, execution_dir, **kwargs)
2897
+ super().__init__(namespace, task_path, wdl_options=wdl_options, **kwargs)
2459
2898
 
2460
2899
  # Because we need to return the return value of the workflow, we need
2461
2900
  # to return a Toil promise for the last/sink job in the workflow's
@@ -2485,7 +2924,7 @@ class WDLWorkflowJob(WDLSectionJob):
2485
2924
  # For a task we only see the insode-the-task namespace.
2486
2925
  bindings = combine_bindings(unwrap_all(self._prev_node_results))
2487
2926
  # Set up the WDL standard library
2488
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
2927
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
2489
2928
 
2490
2929
  if self._workflow.inputs:
2491
2930
  with monkeypatch_coerce(standard_library):
@@ -2499,7 +2938,7 @@ class WDLWorkflowJob(WDLSectionJob):
2499
2938
  if self._workflow.outputs != []: # Compare against empty list as None means there should be outputs
2500
2939
  # Either the output section is declared and nonempty or it is not declared
2501
2940
  # Add evaluating the outputs after the sink
2502
- outputs_job = WDLOutputsJob(self._workflow, sink.rv(), self._execution_dir)
2941
+ outputs_job = WDLOutputsJob(self._workflow, sink.rv(), wdl_options=self._wdl_options)
2503
2942
  sink.addFollowOn(outputs_job)
2504
2943
  # Caller is responsible for making sure namespaces are applied
2505
2944
  self.defer_postprocessing(outputs_job)
@@ -2514,11 +2953,11 @@ class WDLOutputsJob(WDLBaseJob):
2514
2953
 
2515
2954
  Returns an environment with just the outputs bound, in no namespace.
2516
2955
  """
2517
- def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], execution_dir: Optional[str] = None, **kwargs: Any):
2956
+ def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any):
2518
2957
  """
2519
2958
  Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
2520
2959
  """
2521
- super().__init__(execution_dir, **kwargs)
2960
+ super().__init__(wdl_options=wdl_options, **kwargs)
2522
2961
 
2523
2962
  self._bindings = bindings
2524
2963
  self._workflow = workflow
@@ -2548,7 +2987,7 @@ class WDLOutputsJob(WDLBaseJob):
2548
2987
  else:
2549
2988
  # Output section is declared and is nonempty, so evaluate normally
2550
2989
  # Evaluate all the outputs in the normal, non-task-outputs library context
2551
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
2990
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
2552
2991
  # Combine the bindings from the previous job
2553
2992
  output_bindings = evaluate_output_decls(self._workflow.outputs, unwrap(self._bindings), standard_library)
2554
2993
  return self.postprocess(output_bindings)
@@ -2560,13 +2999,13 @@ class WDLRootJob(WDLSectionJob):
2560
2999
  the workflow name; both forms are accepted.
2561
3000
  """
2562
3001
 
2563
- def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
3002
+ def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2564
3003
  """
2565
3004
  Create a subtree to run the workflow and namespace the outputs.
2566
3005
  """
2567
3006
 
2568
- # The root workflow names the root namespace
2569
- super().__init__(workflow.name, execution_dir, **kwargs)
3007
+ # The root workflow names the root namespace and task path.
3008
+ super().__init__(workflow.name, workflow.name, wdl_options=wdl_options, **kwargs)
2570
3009
 
2571
3010
  self._workflow = workflow
2572
3011
  self._inputs = inputs
@@ -2580,7 +3019,7 @@ class WDLRootJob(WDLSectionJob):
2580
3019
 
2581
3020
  # Run the workflow. We rely in this to handle entering the input
2582
3021
  # namespace if needed, or handling free-floating inputs.
2583
- workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._execution_dir)
3022
+ workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._task_path, wdl_options=self._wdl_options)
2584
3023
  workflow_job.then_namespace(self._namespace)
2585
3024
  self.addChild(workflow_job)
2586
3025
  self.defer_postprocessing(workflow_job)
@@ -2642,6 +3081,9 @@ def main() -> None:
2642
3081
  # If we don't have a directory assigned, make one in the current directory.
2643
3082
  output_directory: str = options.output_directory if options.output_directory else mkdtemp(prefix='wdl-out-', dir=os.getcwd())
2644
3083
 
3084
+ # Get the execution directory
3085
+ execution_dir = os.getcwd()
3086
+
2645
3087
  with Toil(options) as toil:
2646
3088
  if options.restart:
2647
3089
  output_bindings = toil.restart()
@@ -2668,7 +3110,7 @@ def main() -> None:
2668
3110
  raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e
2669
3111
  else:
2670
3112
  inputs = {}
2671
-
3113
+
2672
3114
  # Parse out the available and required inputs. Each key in the
2673
3115
  # JSON ought to start with the workflow's name and then a .
2674
3116
  # TODO: WDL's Bindings[] isn't variant in the right way, so we
@@ -2703,51 +3145,28 @@ def main() -> None:
2703
3145
  # Get the execution directory
2704
3146
  execution_dir = os.getcwd()
2705
3147
 
3148
+ # Configure workflow interpreter options
3149
+ wdl_options: Dict[str, str] = {}
3150
+ wdl_options["execution_dir"] = execution_dir
3151
+ wdl_options["container"] = options.container
3152
+ assert wdl_options.get("container") is not None
3153
+
2706
3154
  # Run the workflow and get its outputs namespaced with the workflow name.
2707
- root_job = WDLRootJob(document.workflow, input_bindings, execution_dir)
3155
+ root_job = WDLRootJob(document.workflow, input_bindings, wdl_options=wdl_options)
2708
3156
  output_bindings = toil.start(root_job)
2709
3157
  if not isinstance(output_bindings, WDL.Env.Bindings):
2710
3158
  raise RuntimeError("The output of the WDL job is not a binding.")
2711
3159
 
2712
3160
  # Fetch all the output files
2713
- # TODO: deduplicate with _devirtualize_filename
2714
3161
  def devirtualize_output(filename: str) -> str:
2715
3162
  """
2716
3163
  'devirtualize' a file using the "toil" object instead of a filestore.
2717
3164
  Returns its local path.
2718
3165
  """
2719
- if is_url(filename):
2720
- if filename.startswith(TOIL_URI_SCHEME):
2721
- # This is a reference to the Toil filestore.
2722
- # Deserialize the FileID and required basename
2723
- file_id, parent_id, file_basename = unpack_toil_uri(filename)
2724
- else:
2725
- # Parse the URL and extract the basename
2726
- file_basename = os.path.basename(urlsplit(filename).path)
2727
-
2728
- # Figure out where it should go.
2729
- # If a UUID is included, it will be omitted
2730
- # TODO: Deal with name collisions in the export directory
2731
- dest_name = os.path.join(output_directory, file_basename)
2732
-
2733
- if filename.startswith(TOIL_URI_SCHEME):
2734
- # Export the file
2735
- toil.export_file(file_id, dest_name)
2736
- else:
2737
- # Download to a local file with the right name and execute bit.
2738
- # Open it exclusively
2739
- with open(dest_name, 'xb') as dest_file:
2740
- # And save to it
2741
- size, executable = AbstractJobStore.read_from_url(filename, dest_file)
2742
- if executable:
2743
- # Set the execute bit in the file's permissions
2744
- os.chmod(dest_name, os.stat(dest_name).st_mode | stat.S_IXUSR)
2745
-
2746
- # And return where we put it
2747
- return dest_name
2748
- else:
2749
- # We already had a path
2750
- return filename
3166
+ # Make sure the output directory exists if we have output files
3167
+ # that might need to use it.
3168
+ os.makedirs(output_directory, exist_ok=True)
3169
+ return ToilWDLStdLibBase.devirtualize_to(filename, output_directory, toil, execution_dir)
2751
3170
 
2752
3171
  # Make all the files local files
2753
3172
  output_bindings = map_over_files_in_bindings(output_bindings, devirtualize_output)