toil 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +22 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +59 -45
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/contained_executor.py +4 -5
  6. toil/batchSystems/gridengine.py +1 -1
  7. toil/batchSystems/htcondor.py +5 -5
  8. toil/batchSystems/kubernetes.py +25 -11
  9. toil/batchSystems/local_support.py +3 -3
  10. toil/batchSystems/lsf.py +2 -2
  11. toil/batchSystems/mesos/batchSystem.py +4 -4
  12. toil/batchSystems/mesos/executor.py +3 -2
  13. toil/batchSystems/options.py +9 -0
  14. toil/batchSystems/singleMachine.py +11 -10
  15. toil/batchSystems/slurm.py +64 -22
  16. toil/batchSystems/torque.py +1 -1
  17. toil/bus.py +7 -3
  18. toil/common.py +36 -13
  19. toil/cwl/cwltoil.py +365 -312
  20. toil/deferred.py +1 -1
  21. toil/fileStores/abstractFileStore.py +17 -17
  22. toil/fileStores/cachingFileStore.py +2 -2
  23. toil/fileStores/nonCachingFileStore.py +1 -1
  24. toil/job.py +228 -60
  25. toil/jobStores/abstractJobStore.py +18 -10
  26. toil/jobStores/aws/jobStore.py +280 -218
  27. toil/jobStores/aws/utils.py +57 -29
  28. toil/jobStores/conftest.py +2 -2
  29. toil/jobStores/fileJobStore.py +2 -2
  30. toil/jobStores/googleJobStore.py +3 -4
  31. toil/leader.py +72 -24
  32. toil/lib/aws/__init__.py +26 -10
  33. toil/lib/aws/iam.py +2 -2
  34. toil/lib/aws/session.py +62 -22
  35. toil/lib/aws/utils.py +73 -37
  36. toil/lib/conversions.py +5 -1
  37. toil/lib/ec2.py +118 -69
  38. toil/lib/expando.py +1 -1
  39. toil/lib/io.py +14 -2
  40. toil/lib/misc.py +1 -3
  41. toil/lib/resources.py +55 -21
  42. toil/lib/retry.py +12 -5
  43. toil/lib/threading.py +2 -2
  44. toil/lib/throttle.py +1 -1
  45. toil/options/common.py +27 -24
  46. toil/provisioners/__init__.py +9 -3
  47. toil/provisioners/abstractProvisioner.py +9 -7
  48. toil/provisioners/aws/__init__.py +20 -15
  49. toil/provisioners/aws/awsProvisioner.py +406 -329
  50. toil/provisioners/gceProvisioner.py +2 -2
  51. toil/provisioners/node.py +13 -5
  52. toil/server/app.py +1 -1
  53. toil/statsAndLogging.py +58 -16
  54. toil/test/__init__.py +27 -12
  55. toil/test/batchSystems/batchSystemTest.py +40 -33
  56. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  57. toil/test/batchSystems/test_slurm.py +1 -1
  58. toil/test/cwl/cwlTest.py +8 -91
  59. toil/test/cwl/seqtk_seq.cwl +1 -1
  60. toil/test/docs/scriptsTest.py +10 -13
  61. toil/test/jobStores/jobStoreTest.py +33 -49
  62. toil/test/lib/aws/test_iam.py +2 -2
  63. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  64. toil/test/provisioners/clusterTest.py +90 -8
  65. toil/test/server/serverTest.py +2 -2
  66. toil/test/src/autoDeploymentTest.py +1 -1
  67. toil/test/src/dockerCheckTest.py +2 -1
  68. toil/test/src/environmentTest.py +125 -0
  69. toil/test/src/fileStoreTest.py +1 -1
  70. toil/test/src/jobDescriptionTest.py +18 -8
  71. toil/test/src/jobTest.py +1 -1
  72. toil/test/src/realtimeLoggerTest.py +4 -0
  73. toil/test/src/workerTest.py +52 -19
  74. toil/test/utils/toilDebugTest.py +61 -3
  75. toil/test/utils/utilsTest.py +20 -18
  76. toil/test/wdl/wdltoil_test.py +24 -71
  77. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  78. toil/toilState.py +68 -9
  79. toil/utils/toilDebugJob.py +153 -26
  80. toil/utils/toilLaunchCluster.py +12 -2
  81. toil/utils/toilRsyncCluster.py +7 -2
  82. toil/utils/toilSshCluster.py +7 -3
  83. toil/utils/toilStats.py +2 -1
  84. toil/utils/toilStatus.py +97 -51
  85. toil/version.py +10 -10
  86. toil/wdl/wdltoil.py +318 -51
  87. toil/worker.py +96 -69
  88. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  89. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/METADATA +55 -21
  90. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/RECORD +93 -90
  91. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  92. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  93. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py CHANGED
@@ -24,6 +24,7 @@ import shutil
24
24
  import stat
25
25
  import subprocess
26
26
  import sys
27
+ import textwrap
27
28
  import uuid
28
29
  from contextlib import ExitStack, contextmanager
29
30
  from graphlib import TopologicalSorter
@@ -47,14 +48,15 @@ from urllib.parse import quote, unquote, urljoin, urlsplit
47
48
 
48
49
  import WDL.Error
49
50
  import WDL.runtime.config
50
- from configargparse import ArgParser, SUPPRESS
51
+ from configargparse import ArgParser
51
52
  from WDL._util import byte_size_units, strip_leading_whitespace
52
53
  from WDL.CLI import print_error
53
54
  from WDL.runtime.backend.docker_swarm import SwarmContainer
54
55
  from WDL.runtime.backend.singularity import SingularityContainer
55
56
  from WDL.runtime.task_container import TaskContainer
56
57
 
57
- from toil.common import Toil, addOptions, check_and_create_default_config_file
58
+ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
59
+ from toil.common import Toil, addOptions
58
60
  from toil.fileStores import FileID
59
61
  from toil.fileStores.abstractFileStore import AbstractFileStore
60
62
  from toil.job import (AcceleratorRequirement,
@@ -62,17 +64,19 @@ from toil.job import (AcceleratorRequirement,
62
64
  Promise,
63
65
  Promised,
64
66
  TemporaryID,
65
- accelerators_fully_satisfy,
66
67
  parse_accelerator,
67
68
  unwrap,
68
69
  unwrap_all)
69
- from toil.jobStores.abstractJobStore import (AbstractJobStore,
70
- UnimplementedURLException)
71
- from toil.lib.conversions import convert_units, human2bytes
70
+ from toil.jobStores.abstractJobStore import (AbstractJobStore, UnimplementedURLException,
71
+ InvalidImportExportUrlException, LocatorException)
72
+ from toil.lib.conversions import convert_units, human2bytes, strtobool
72
73
  from toil.lib.io import mkdtemp
73
74
  from toil.lib.memoize import memoize
74
75
  from toil.lib.misc import get_user_name
76
+ from toil.lib.resources import ResourceMonitor
75
77
  from toil.lib.threading import global_mutex
78
+ from toil.provisioners.clusterScaler import JobTooBigError
79
+
76
80
 
77
81
  logger = logging.getLogger(__name__)
78
82
 
@@ -86,19 +90,30 @@ def wdl_error_reporter(task: str, exit: bool = False, log: Callable[[str], None]
86
90
  try:
87
91
  yield
88
92
  except (
93
+ WDL.Error.EvalError,
89
94
  WDL.Error.SyntaxError,
90
95
  WDL.Error.ImportError,
91
96
  WDL.Error.ValidationError,
92
97
  WDL.Error.MultipleValidationErrors,
93
- FileNotFoundError
98
+ FileNotFoundError,
99
+ InsufficientSystemResources,
100
+ LocatorException,
101
+ InvalidImportExportUrlException,
102
+ UnimplementedURLException,
103
+ JobTooBigError
94
104
  ) as e:
95
- log("Could not " + task)
105
+ # Don't expose tracebacks to the user for exceptions that may be expected
106
+ log("Could not " + task + " because:")
107
+
96
108
  # These are the errors that MiniWDL's parser can raise and its reporter
97
- # can report. See
109
+ # can report (plus some extras). See
98
110
  # https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97.
99
111
  #
100
112
  # We are going to use MiniWDL's pretty printer to print them.
113
+ # Make the MiniWDL stuff on stderr loud so people see it
114
+ sys.stderr.write("\n" + "🚨" * 3 + "\n")
101
115
  print_error(e)
116
+ sys.stderr.write("🚨" * 3 + "\n\n")
102
117
  if exit:
103
118
  # Stop right now
104
119
  sys.exit(1)
@@ -524,6 +539,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
524
539
  def __init__(self, file_store: AbstractFileStore, execution_dir: Optional[str] = None):
525
540
  """
526
541
  Set up the standard library.
542
+
543
+ :param execution_dir: Directory to use as the working directory for workflow code.
527
544
  """
528
545
  # TODO: Just always be the 1.2 standard library.
529
546
  wdl_version = "1.2"
@@ -542,27 +559,67 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
542
559
  # UUID to differentiate which node files are virtualized from
543
560
  self._parent_dir_to_ids: Dict[str, uuid.UUID] = dict()
544
561
 
562
+ # Map forward from virtualized files to absolute devirtualized ones.
563
+ self._virtualized_to_devirtualized: Dict[str, str] = {}
564
+ # Allow mapping back from absolute devirtualized files to virtualized
565
+ # paths, to save re-uploads.
566
+ self._devirtualized_to_virtualized: Dict[str, str] = {}
567
+
545
568
  self._execution_dir = execution_dir
546
569
 
570
+ def share_files(self, other: "ToilWDLStdLibBase") -> None:
571
+ """
572
+ Share caches for devirtualizing and virtualizing files with another instance.
573
+
574
+ Files devirtualized by one instance can be re-virtualized back to their
575
+ original virtualized filenames by the other.
576
+ """
577
+
578
+ if id(self._virtualized_to_devirtualized) != id(other._virtualized_to_devirtualized):
579
+ # Merge the virtualized to devirtualized mappings
580
+ self._virtualized_to_devirtualized.update(other._virtualized_to_devirtualized)
581
+ other._virtualized_to_devirtualized = self._virtualized_to_devirtualized
582
+
583
+ if id(self._devirtualized_to_virtualized) != id(other._devirtualized_to_virtualized):
584
+ # Merge the devirtualized to virtualized mappings
585
+ self._devirtualized_to_virtualized.update(other._devirtualized_to_virtualized)
586
+ other._devirtualized_to_virtualized = self._devirtualized_to_virtualized
587
+
547
588
  @memoize
548
589
  def _devirtualize_filename(self, filename: str) -> str:
549
590
  """
550
591
  'devirtualize' filename passed to a read_* function: return a filename that can be open()ed
551
592
  on the local host.
552
593
  """
553
-
554
- return self.devirtualze_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
594
+
595
+ result = self.devirtualize_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
596
+ # Store the back mapping
597
+ self._devirtualized_to_virtualized[result] = filename
598
+ # And the forward
599
+ self._virtualized_to_devirtualized[filename] = result
600
+ return result
555
601
 
556
602
  @staticmethod
557
- def devirtualze_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
603
+ def devirtualize_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
558
604
  """
559
605
  Download or export a WDL virtualized filename/URL to the given directory.
560
606
 
561
- Makes sure sibling files stay siblings and files with the same name don't clobber each other. Called from within this class for tasks, and statically at the end of the workflow for outputs.
607
+ The destination directory must already exist.
608
+
609
+ Makes sure sibling files stay siblings and files with the same name
610
+ don't clobber each other. Called from within this class for tasks, and
611
+ statically at the end of the workflow for outputs.
562
612
 
563
- Returns the local path to the file.
613
+ Returns the local path to the file. If it already had a local path
614
+ elsewhere, it might not actually be put in dest_dir.
564
615
  """
565
616
 
617
+ if not os.path.isdir(dest_dir):
618
+ # os.mkdir fails saying the directory *being made* caused a
619
+ # FileNotFoundError. So check the dest_dir before trying to make
620
+ # directories under it.
621
+ raise RuntimeError(f"Cannot devirtualize {filename} into nonexistent directory {dest_dir}")
622
+
566
623
  # TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
567
624
  # TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
568
625
  if is_url(filename):
@@ -597,8 +654,12 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
597
654
  if filename.startswith(TOIL_URI_SCHEME):
598
655
  # Get a local path to the file
599
656
  if isinstance(file_source, AbstractFileStore):
600
- # Read from the file store
601
- result = file_source.readGlobalFile(file_id, dest_path)
657
+ # Read from the file store.
658
+ # File is not allowed to be modified by the task. See
659
+ # <https://github.com/openwdl/wdl/issues/495>.
660
+ # We try to get away with symlinks and hope the task
661
+ # container can mount the destination file.
662
+ result = file_source.readGlobalFile(file_id, dest_path, mutable=False, symlink=True)
602
663
  elif isinstance(file_source, Toil):
603
664
  # Read from the Toil context
604
665
  file_source.export_file(file_id, dest_path)
@@ -628,6 +689,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
628
689
  raise RuntimeError(f"Virtualized file {filename} looks like a local file but isn't!")
629
690
  return result
630
691
 
692
+ @memoize
631
693
  def _virtualize_filename(self, filename: str) -> str:
632
694
  """
633
695
  from a local path in write_dir, 'virtualize' into the filename as it should present in a
@@ -636,21 +698,36 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
636
698
 
637
699
  if is_url(filename):
638
700
  # Already virtual
639
- logger.debug('Already virtualized %s as WDL file %s', filename, filename)
701
+ logger.debug('Already virtual: %s', filename)
640
702
  return filename
641
703
 
642
704
  # Otherwise this is a local file and we want to fake it as a Toil file store file
643
705
 
644
- # To support relative paths from execution directory, join the execution dir and filename
645
- # If filename is already an abs path, join() will not do anything
706
+ # Make it an absolute path
646
707
  if self._execution_dir is not None:
647
- file_id = self._file_store.writeGlobalFile(os.path.join(self._execution_dir, filename))
708
+ # To support relative paths from execution directory, join the execution dir and filename
709
+ # If filename is already an abs path, join() will not do anything
710
+ abs_filename = os.path.join(self._execution_dir, filename)
648
711
  else:
649
- file_id = self._file_store.writeGlobalFile(filename)
650
- dir = os.path.dirname(os.path.abspath(filename)) # is filename always an abspath?
651
- parent_id = self._parent_dir_to_ids.setdefault(dir, uuid.uuid4())
652
- result = pack_toil_uri(file_id, parent_id, os.path.basename(filename))
712
+ abs_filename = os.path.abspath(filename)
713
+
714
+ if abs_filename in self._devirtualized_to_virtualized:
715
+ # This is a previously devirtualized thing so we can just use the
716
+ # virtual version we remembered instead of reuploading it.
717
+ result = self._devirtualized_to_virtualized[abs_filename]
718
+ logger.debug("Re-using virtualized WDL file %s for %s", result, filename)
719
+ return result
720
+
721
+ file_id = self._file_store.writeGlobalFile(abs_filename)
722
+
723
+ file_dir = os.path.dirname(abs_filename)
724
+ parent_id = self._parent_dir_to_ids.setdefault(file_dir, uuid.uuid4())
725
+ result = pack_toil_uri(file_id, parent_id, os.path.basename(abs_filename))
653
726
  logger.debug('Virtualized %s as WDL file %s', filename, result)
727
+ # Remember the upload in case we share a cache
728
+ self._devirtualized_to_virtualized[abs_filename] = result
729
+ # And remember the local path in case we want a redownload
730
+ self._virtualized_to_devirtualized[result] = abs_filename
654
731
  return result
655
732
 
656
733
  class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
@@ -695,7 +772,7 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
695
772
  logger.debug('Devirtualized %s as out-of-container file %s', filename, result)
696
773
  return result
697
774
 
698
-
775
+ @memoize
699
776
  def _virtualize_filename(self, filename: str) -> str:
700
777
  """
701
778
  From a local path in write_dir, 'virtualize' into the filename as it should present in a
@@ -717,10 +794,11 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
717
794
  functions only allowed in task output sections.
718
795
  """
719
796
 
720
- def __init__(self, file_store: AbstractFileStore, stdout_path: str, stderr_path: str, current_directory_override: Optional[str] = None):
797
+ def __init__(self, file_store: AbstractFileStore, stdout_path: str, stderr_path: str, file_to_mountpoint: Dict[str, str], current_directory_override: Optional[str] = None):
721
798
  """
722
799
  Set up the standard library for a task output section. Needs to know
723
- where standard output and error from the task have been stored.
800
+ where standard output and error from the task have been stored, and
801
+ what local paths to pretend are where for resolving symlinks.
724
802
 
725
803
  If current_directory_override is set, resolves relative paths and globs
726
804
  from there instead of from the real current directory.
@@ -738,6 +816,9 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
738
816
  self._stdout_used = False
739
817
  self._stderr_used = False
740
818
 
819
+ # Reverse and store the file mount dict
820
+ self._mountpoint_to_file = {v: k for k, v in file_to_mountpoint.items()}
821
+
741
822
  # Remember current directory
742
823
  self._current_directory_override = current_directory_override
743
824
 
@@ -806,7 +887,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
806
887
  work_dir = '.' if not self._current_directory_override else self._current_directory_override
807
888
 
808
889
  # TODO: get this to run in the right container if there is one
809
- # Bash (now?) has a compgen builtin for shell completion that can evaluate a glob where the glob is in a quotes string that might have spaces in it. See <https://unix.stackexchange.com/a/616608>.
890
+ # Bash (now?) has a compgen builtin for shell completion that can evaluate a glob where the glob is in a quoted string that might have spaces in it. See <https://unix.stackexchange.com/a/616608>.
810
891
  # This will handle everything except newlines in the filenames.
811
892
  # TODO: Newlines in the filenames?
812
893
  # Since compgen will return 1 if nothing matches, we need to allow a failing exit code here.
@@ -844,6 +925,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
844
925
 
845
926
  return super()._devirtualize_filename(filename)
846
927
 
928
+ @memoize
847
929
  def _virtualize_filename(self, filename: str) -> str:
848
930
  """
849
931
  Go from a local disk filename to a virtualized WDL-side filename.
@@ -854,11 +936,46 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
854
936
  """
855
937
 
856
938
  if not is_url(filename) and not filename.startswith('/'):
857
- # We are getting a bare relative path the supposedly devirtualized side.
939
+ # We are getting a bare relative path on the supposedly devirtualized side.
858
940
  # Find a real path to it relative to the current directory override.
859
941
  work_dir = '.' if not self._current_directory_override else self._current_directory_override
860
942
  filename = os.path.join(work_dir, filename)
861
943
 
944
+ if filename in self._devirtualized_to_virtualized:
945
+ result = self._devirtualized_to_virtualized[filename]
946
+ logger.debug("Re-using virtualized filename %s for %s", result, filename)
947
+ return result
948
+
949
+ if os.path.islink(filename):
950
+ # Recursively resolve symlinks
951
+ here = filename
952
+ # Notice if we have a symlink loop
953
+ seen = {here}
954
+ while os.path.islink(here):
955
+ dest = os.readlink(here)
956
+ if not dest.startswith('/'):
957
+ # Make it absolute
958
+ dest = os.path.join(os.path.dirname(here), dest)
959
+ here = dest
960
+ if here in self._mountpoint_to_file:
961
+ # This points to something mounted into the container, so use that path instead.
962
+ here = self._mountpoint_to_file[here]
963
+ if here in self._devirtualized_to_virtualized:
964
+ # Check the virtualized filenames before following symlinks
965
+ # all the way back to workflow inputs.
966
+ result = self._devirtualized_to_virtualized[here]
967
+ logger.debug("Re-using virtualized filename %s for %s linked from %s", result, here, filename)
968
+ return result
969
+ if here in seen:
970
+ raise RuntimeError(f"Symlink {filename} leads to symlink loop at {here}")
971
+ seen.add(here)
972
+
973
+ if os.path.exists(here):
974
+ logger.debug("Handling symlink %s ultimately to %s", filename, here)
975
+ else:
976
+ logger.error("Handling broken symlink %s ultimately to %s", filename, here)
977
+ filename = here
978
+
862
979
  return super()._virtualize_filename(filename)
863
980
 
864
981
  def evaluate_named_expression(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], name: str, expected_type: Optional[WDL.Type.Base], expression: Optional[WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL.Value.Base:
@@ -1110,8 +1227,10 @@ def drop_missing_files(environment: WDLBindings, current_directory_override: Opt
1110
1227
  logger.warning('File %s with type %s does not actually exist at its URI', filename, value_type)
1111
1228
  return None
1112
1229
  else:
1230
+ # Get the absolute path, not resolving symlinks
1113
1231
  effective_path = os.path.abspath(os.path.join(work_dir, filename))
1114
- if os.path.exists(effective_path):
1232
+ if os.path.islink(effective_path) or os.path.exists(effective_path):
1233
+ # This is a broken symlink or a working symlink or a file.
1115
1234
  return filename
1116
1235
  else:
1117
1236
  logger.warning('File %s with type %s does not actually exist at %s', filename, value_type, effective_path)
@@ -1125,7 +1244,7 @@ def get_file_paths_in_bindings(environment: WDLBindings) -> List[str]:
1125
1244
  duplicates are removed.
1126
1245
 
1127
1246
  TODO: Duplicative with WDL.runtime.task._fspaths, except that is internal
1128
- and supports Direcotry objects.
1247
+ and supports Directory objects.
1129
1248
  """
1130
1249
 
1131
1250
  paths = []
@@ -1250,7 +1369,7 @@ class WDLBaseJob(Job):
1250
1369
  # may have coalesced postprocessing steps deferred by several levels of
1251
1370
  # jobs returning other jobs' promised RVs.
1252
1371
  self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
1253
-
1372
+
1254
1373
  self._wdl_options = wdl_options if wdl_options is not None else {}
1255
1374
 
1256
1375
  assert self._wdl_options.get("container") is not None
@@ -1306,7 +1425,7 @@ class WDLBaseJob(Job):
1306
1425
 
1307
1426
  for action, argument in self._postprocessing_steps:
1308
1427
 
1309
- logger.debug("Apply postprocessing setp: (%s, %s)", action, argument)
1428
+ logger.debug("Apply postprocessing step: (%s, %s)", action, argument)
1310
1429
 
1311
1430
  # Interpret the mini language of postprocessing steps.
1312
1431
  # These are too small to justify being their own separate jobs.
@@ -1378,7 +1497,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
1378
1497
  self._namespace = namespace
1379
1498
  self._task_path = task_path
1380
1499
 
1381
- @report_wdl_errors("evaluate task code")
1500
+ @report_wdl_errors("evaluate task code", exit=True)
1382
1501
  def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
1383
1502
  """
1384
1503
  Evaluate inputs and runtime and schedule the task.
@@ -1407,7 +1526,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
1407
1526
  # Evaluate the runtime section
1408
1527
  runtime_bindings = evaluate_call_inputs(self._task, self._task.runtime, bindings, standard_library)
1409
1528
 
1410
- # Fill these in with not-None if the workflow asks for each resource.
1529
+ # Fill these in with not-None if the workflow asks for each resource.
1411
1530
  runtime_memory: Optional[int] = None
1412
1531
  runtime_cores: Optional[float] = None
1413
1532
  runtime_disk: Optional[int] = None
@@ -1529,6 +1648,123 @@ class WDLTaskJob(WDLBaseJob):
1529
1648
  self._namespace = namespace
1530
1649
  self._task_path = task_path
1531
1650
 
1651
+ ###
1652
+ # Runtime code injection system
1653
+ ###
1654
+
1655
+ # WDL runtime code injected in the container communicates back to the rest
1656
+ # of the runtime through files in this directory.
1657
+ INJECTED_MESSAGE_DIR = ".toil_wdl_runtime"
1658
+
1659
+ def add_injections(self, command_string: str, task_container: TaskContainer) -> str:
1660
+ """
1661
+ Inject extra Bash code from the Toil WDL runtime into the command for the container.
1662
+
1663
+ Currently doesn't implement the MiniWDL plugin system, but does add
1664
+ resource usage monitoring to Docker containers.
1665
+ """
1666
+ if isinstance(task_container, SwarmContainer):
1667
+ # We're running on Docker Swarm, so we need to monitor CPU usage
1668
+ # and so on from inside the container, since it won't be attributed
1669
+ # to Toil child processes in the leader's self-monitoring.
1670
+ # TODO: Mount this from a file Toil installs instead or something.
1671
+ script = textwrap.dedent("""\
1672
+ function _toil_resource_monitor () {
1673
+ # Turn off error checking and echo in here
1674
+ set +ex
1675
+ MESSAGE_DIR="${1}"
1676
+ mkdir -p "${MESSAGE_DIR}"
1677
+
1678
+ function sample_cpu_usec() {
1679
+ if [[ -f /sys/fs/cgroup/cpu.stat ]] ; then
1680
+ awk '{ if ($1 == "usage_usec") {print $2} }' /sys/fs/cgroup/cpu.stat
1681
+ elif [[ -f /sys/fs/cgroup/cpuacct/cpuacct.stat ]] ; then
1682
+ echo $(( $(head -n 1 /sys/fs/cgroup/cpuacct/cpuacct.stat | cut -f2 -d' ') * 10000 ))
1683
+ fi
1684
+ }
1685
+
1686
+ function sample_memory_bytes() {
1687
+ if [[ -f /sys/fs/cgroup/memory.stat ]] ; then
1688
+ awk '{ if ($1 == "anon") { print $2 } }' /sys/fs/cgroup/memory.stat
1689
+ elif [[ -f /sys/fs/cgroup/memory/memory.stat ]] ; then
1690
+ awk '{ if ($1 == "total_rss") { print $2 } }' /sys/fs/cgroup/memory/memory.stat
1691
+ fi
1692
+ }
1693
+
1694
+ while true ; do
1695
+ printf "CPU\\t" >> ${MESSAGE_DIR}/resources.tsv
1696
+ sample_cpu_usec >> ${MESSAGE_DIR}/resources.tsv
1697
+ printf "Memory\\t" >> ${MESSAGE_DIR}/resources.tsv
1698
+ sample_memory_bytes >> ${MESSAGE_DIR}/resources.tsv
1699
+ sleep 1
1700
+ done
1701
+ }
1702
+ """)
1703
+ parts = [script, f"_toil_resource_monitor {self.INJECTED_MESSAGE_DIR} &", command_string]
1704
+ return "\n".join(parts)
1705
+ else:
1706
+ return command_string
1707
+
1708
+ def handle_injection_messages(self, outputs_library: ToilWDLStdLibTaskOutputs) -> None:
1709
+ """
1710
+ Handle any data received from injected runtime code in the container.
1711
+ """
1712
+
1713
+ message_files = outputs_library._glob(WDL.Value.String(os.path.join(self.INJECTED_MESSAGE_DIR, "*")))
1714
+ logger.debug("Handling message files: %s", message_files)
1715
+ for message_file in message_files.value:
1716
+ self.handle_message_file(message_file.value)
1717
+
1718
+ def handle_message_file(self, file_path: str) -> None:
1719
+ """
1720
+ Handle a message file received from in-container injected code.
1721
+
1722
+ Takes the host-side path of the file.
1723
+ """
1724
+ if os.path.basename(file_path) == "resources.tsv":
1725
+ # This is a TSV of resource usage info.
1726
+ first_cpu_usec: Optional[int] = None
1727
+ last_cpu_usec: Optional[int] = None
1728
+ max_memory_bytes: Optional[int] = None
1729
+
1730
+ for line in open(file_path):
1731
+ if not line.endswith("\n"):
1732
+ # Skip partial lines
1733
+ continue
1734
+ # For each full line we got
1735
+ parts = line.strip().split("\t")
1736
+ if len(parts) != 2:
1737
+ # Skip odd-shaped lines
1738
+ continue
1739
+ if parts[0] == "CPU":
1740
+ # Parse CPU usage
1741
+ cpu_usec = int(parts[1])
1742
+ # Update summary stats
1743
+ if first_cpu_usec is None:
1744
+ first_cpu_usec = cpu_usec
1745
+ last_cpu_usec = cpu_usec
1746
+ elif parts[0] == "Memory":
1747
+ # Parse memory usage
1748
+ memory_bytes = int(parts[1])
1749
+ # Update summary stats
1750
+ if max_memory_bytes is None or max_memory_bytes < memory_bytes:
1751
+ max_memory_bytes = memory_bytes
1752
+
1753
+ if max_memory_bytes is not None:
1754
+ logger.info("Container used at about %s bytes of memory at peak", max_memory_bytes)
1755
+ # Treat it as if used by a child process
1756
+ ResourceMonitor.record_extra_memory(max_memory_bytes // 1024)
1757
+ if last_cpu_usec is not None:
1758
+ assert(first_cpu_usec is not None)
1759
+ cpu_seconds = (last_cpu_usec - first_cpu_usec) / 1000000
1760
+ logger.info("Container used about %s seconds of CPU time", cpu_seconds)
1761
+ # Treat it as if used by a child process
1762
+ ResourceMonitor.record_extra_cpu(cpu_seconds)
1763
+
1764
+ ###
1765
+ # Helper functions to work out what containers runtime we can use
1766
+ ###
1767
+
1532
1768
  def can_fake_root(self) -> bool:
1533
1769
  """
1534
1770
  Determine if --fakeroot is likely to work for Singularity.
@@ -1559,7 +1795,7 @@ class WDLTaskJob(WDLBaseJob):
1559
1795
  """
1560
1796
  return "KUBERNETES_SERVICE_HOST" not in os.environ
1561
1797
 
1562
- @report_wdl_errors("run task command")
1798
+ @report_wdl_errors("run task command", exit=True)
1563
1799
  def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
1564
1800
  """
1565
1801
  Actually run the task.
@@ -1575,7 +1811,7 @@ class WDLTaskJob(WDLBaseJob):
1575
1811
  bindings = unwrap(self._task_internal_bindings)
1576
1812
  # And the bindings from evaluating the runtime section
1577
1813
  runtime_bindings = unwrap(self._runtime_bindings)
1578
-
1814
+
1579
1815
  # We have all the resources we need, so run the task
1580
1816
 
1581
1817
  if shutil.which('singularity') and self._wdl_options.get("container") in ["singularity", "auto"]:
@@ -1644,9 +1880,20 @@ class WDLTaskJob(WDLBaseJob):
1644
1880
  workdir_in_container: Optional[str] = None
1645
1881
 
1646
1882
  if self._task.command:
1647
- # When the command string references a File, we need to get a path to the file on a local disk, which the commnad will be able to actually use, accounting for e.g. containers.
1648
- # TODO: Figure out whan the command template actually uses File values and lazily download them.
1649
- # For now we just grab all the File values in the inside-the-task environment, since any of them *might* be used.
1883
+ # When the command string references a File, we need to get a path
1884
+ # to the file on a local disk, which the commnad will be able to
1885
+ # actually use, accounting for e.g. containers.
1886
+ #
1887
+ # TODO: Figure out whan the command template actually uses File
1888
+ # values and lazily download them.
1889
+ #
1890
+ # For now we just grab all the File values in the inside-the-task
1891
+ # environment, since any of them *might* be used.
1892
+ #
1893
+ # Some also might be expected to be adjacent to files that are
1894
+ # used, like a BAI that doesn't get referenced in a command line
1895
+ # but must be next to its BAM.
1896
+ #
1650
1897
  # TODO: MiniWDL can parallelize the fetch
1651
1898
  bindings = devirtualize_files(bindings, standard_library)
1652
1899
 
@@ -1721,6 +1968,7 @@ class WDLTaskJob(WDLBaseJob):
1721
1968
  # them all new paths in task_container.input_path_map which we can
1722
1969
  # read. We also get a task_container.host_path() to go the other way.
1723
1970
  add_paths(task_container, get_file_paths_in_bindings(bindings))
1971
+ # This maps from oustide container to inside container
1724
1972
  logger.debug("Using container path map: %s", task_container.input_path_map)
1725
1973
 
1726
1974
  # Replace everything with in-container paths for the command.
@@ -1767,6 +2015,9 @@ class WDLTaskJob(WDLBaseJob):
1767
2015
  # Work out the command string, and unwrap it
1768
2016
  command_string: str = hacky_dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)
1769
2017
 
2018
+ # Do any command injection we might need to do
2019
+ command_string = self.add_injections(command_string, task_container)
2020
+
1770
2021
  # Grab the standard out and error paths. MyPy complains if we call
1771
2022
  # them because in the current MiniWDL version they are untyped.
1772
2023
  # TODO: MyPy will complain if we accomodate this and they later
@@ -1786,8 +2037,16 @@ class WDLTaskJob(WDLBaseJob):
1786
2037
  with ExitStack() as cleanup:
1787
2038
  task_container._pull(miniwdl_logger, cleanup)
1788
2039
 
1789
- # Run the command in the container
2040
+ # Log that we are about to run the command in the container
1790
2041
  logger.info('Executing command in %s: %s', task_container, command_string)
2042
+
2043
+ # Now our inputs are all downloaded. Let debugging break in (after command is logged).
2044
+ # But we need to hint which host paths are meant to be which container paths
2045
+ host_and_job_paths: List[Tuple[str, str]] = [(k, v) for k, v in task_container.input_path_map.items()]
2046
+ self.files_downloaded_hook(host_and_job_paths)
2047
+
2048
+ # TODO: Really we might want to set up a fake container working directory, to actually help the user.
2049
+
1791
2050
  try:
1792
2051
  task_container.run(miniwdl_logger, command_string)
1793
2052
  except Exception:
@@ -1834,15 +2093,14 @@ class WDLTaskJob(WDLBaseJob):
1834
2093
  # container-determined strings that are absolute paths to WDL File
1835
2094
  # objects, and like MiniWDL we can say we only support
1836
2095
  # working-directory-based relative paths for globs.
1837
- outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
2096
+ outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, task_container.input_path_map, current_directory_override=workdir_in_container)
2097
+ # Make sure files downloaded as inputs get re-used if we re-upload them.
2098
+ outputs_library.share_files(standard_library)
1838
2099
  output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
1839
2100
 
1840
2101
  # Now we know if the standard output and error were sent somewhere by
1841
2102
  # the workflow. If not, we should report them to the leader.
1842
2103
 
1843
- # Drop any files from the output which don't actually exist
1844
- output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
1845
-
1846
2104
  if not outputs_library.stderr_used() and os.path.exists(host_stderr_txt):
1847
2105
  size = os.path.getsize(host_stderr_txt)
1848
2106
  logger.info('Unused standard error at %s of %d bytes', host_stderr_txt, size)
@@ -1857,10 +2115,17 @@ class WDLTaskJob(WDLBaseJob):
1857
2115
  # Save the whole output stream because the workflow didn't capture it.
1858
2116
  file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
1859
2117
 
1860
- # TODO: Check the output bindings against the types of the decls so we
1861
- # can tell if we have a null in a value that is supposed to not be
1862
- # nullable. We can't just look at the types on the values themselves
1863
- # because those are all the non-nullable versions.
2118
+ # Collect output messages from any code Toil injected into the task.
2119
+ self.handle_injection_messages(outputs_library)
2120
+
2121
+ # Drop any files from the output which don't actually exist
2122
+ output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
2123
+ for decl in self._task.outputs:
2124
+ if not decl.type.optional and output_bindings[decl.name].value is None:
2125
+ # We have an unacceptable null value. This can happen if a file
2126
+ # is missing but not optional. Don't let it out to annoy the
2127
+ # next task.
2128
+ raise WDL.Error.EvalError(decl, f"non-optional value {decl.name} = {decl.expr} is missing")
1864
2129
 
1865
2130
  # Upload any files in the outputs if not uploaded already. Accounts for how relative paths may still need to be container-relative.
1866
2131
  output_bindings = virtualize_files(output_bindings, outputs_library)
@@ -2893,13 +3158,15 @@ def main() -> None:
2893
3158
  raise RuntimeError("The output of the WDL job is not a binding.")
2894
3159
 
2895
3160
  # Fetch all the output files
2896
- # TODO: deduplicate with _devirtualize_filename
2897
3161
  def devirtualize_output(filename: str) -> str:
2898
3162
  """
2899
3163
  'devirtualize' a file using the "toil" object instead of a filestore.
2900
3164
  Returns its local path.
2901
3165
  """
2902
- return ToilWDLStdLibBase.devirtualze_to(filename, output_directory, toil, execution_dir)
3166
+ # Make sure the output directory exists if we have output files
3167
+ # that might need to use it.
3168
+ os.makedirs(output_directory, exist_ok=True)
3169
+ return ToilWDLStdLibBase.devirtualize_to(filename, output_directory, toil, execution_dir)
2903
3170
 
2904
3171
  # Make all the files local files
2905
3172
  output_bindings = map_over_files_in_bindings(output_bindings, devirtualize_output)