toil 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +22 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +59 -45
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +2 -2
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +64 -22
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +7 -3
- toil/common.py +36 -13
- toil/cwl/cwltoil.py +365 -312
- toil/deferred.py +1 -1
- toil/fileStores/abstractFileStore.py +17 -17
- toil/fileStores/cachingFileStore.py +2 -2
- toil/fileStores/nonCachingFileStore.py +1 -1
- toil/job.py +228 -60
- toil/jobStores/abstractJobStore.py +18 -10
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +57 -29
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +2 -2
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +72 -24
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +5 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/io.py +14 -2
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +55 -21
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +2 -2
- toil/lib/throttle.py +1 -1
- toil/options/common.py +27 -24
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +9 -7
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +58 -16
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +1 -1
- toil/test/cwl/cwlTest.py +8 -91
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +10 -13
- toil/test/jobStores/jobStoreTest.py +33 -49
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +90 -8
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +61 -3
- toil/test/utils/utilsTest.py +20 -18
- toil/test/wdl/wdltoil_test.py +24 -71
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +2 -1
- toil/utils/toilStatus.py +97 -51
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +318 -51
- toil/worker.py +96 -69
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/METADATA +55 -21
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/RECORD +93 -90
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -24,6 +24,7 @@ import shutil
|
|
|
24
24
|
import stat
|
|
25
25
|
import subprocess
|
|
26
26
|
import sys
|
|
27
|
+
import textwrap
|
|
27
28
|
import uuid
|
|
28
29
|
from contextlib import ExitStack, contextmanager
|
|
29
30
|
from graphlib import TopologicalSorter
|
|
@@ -47,14 +48,15 @@ from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
|
47
48
|
|
|
48
49
|
import WDL.Error
|
|
49
50
|
import WDL.runtime.config
|
|
50
|
-
from configargparse import ArgParser
|
|
51
|
+
from configargparse import ArgParser
|
|
51
52
|
from WDL._util import byte_size_units, strip_leading_whitespace
|
|
52
53
|
from WDL.CLI import print_error
|
|
53
54
|
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
54
55
|
from WDL.runtime.backend.singularity import SingularityContainer
|
|
55
56
|
from WDL.runtime.task_container import TaskContainer
|
|
56
57
|
|
|
57
|
-
from toil.
|
|
58
|
+
from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
59
|
+
from toil.common import Toil, addOptions
|
|
58
60
|
from toil.fileStores import FileID
|
|
59
61
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
60
62
|
from toil.job import (AcceleratorRequirement,
|
|
@@ -62,17 +64,19 @@ from toil.job import (AcceleratorRequirement,
|
|
|
62
64
|
Promise,
|
|
63
65
|
Promised,
|
|
64
66
|
TemporaryID,
|
|
65
|
-
accelerators_fully_satisfy,
|
|
66
67
|
parse_accelerator,
|
|
67
68
|
unwrap,
|
|
68
69
|
unwrap_all)
|
|
69
|
-
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
70
|
-
|
|
71
|
-
from toil.lib.conversions import convert_units, human2bytes
|
|
70
|
+
from toil.jobStores.abstractJobStore import (AbstractJobStore, UnimplementedURLException,
|
|
71
|
+
InvalidImportExportUrlException, LocatorException)
|
|
72
|
+
from toil.lib.conversions import convert_units, human2bytes, strtobool
|
|
72
73
|
from toil.lib.io import mkdtemp
|
|
73
74
|
from toil.lib.memoize import memoize
|
|
74
75
|
from toil.lib.misc import get_user_name
|
|
76
|
+
from toil.lib.resources import ResourceMonitor
|
|
75
77
|
from toil.lib.threading import global_mutex
|
|
78
|
+
from toil.provisioners.clusterScaler import JobTooBigError
|
|
79
|
+
|
|
76
80
|
|
|
77
81
|
logger = logging.getLogger(__name__)
|
|
78
82
|
|
|
@@ -86,19 +90,30 @@ def wdl_error_reporter(task: str, exit: bool = False, log: Callable[[str], None]
|
|
|
86
90
|
try:
|
|
87
91
|
yield
|
|
88
92
|
except (
|
|
93
|
+
WDL.Error.EvalError,
|
|
89
94
|
WDL.Error.SyntaxError,
|
|
90
95
|
WDL.Error.ImportError,
|
|
91
96
|
WDL.Error.ValidationError,
|
|
92
97
|
WDL.Error.MultipleValidationErrors,
|
|
93
|
-
FileNotFoundError
|
|
98
|
+
FileNotFoundError,
|
|
99
|
+
InsufficientSystemResources,
|
|
100
|
+
LocatorException,
|
|
101
|
+
InvalidImportExportUrlException,
|
|
102
|
+
UnimplementedURLException,
|
|
103
|
+
JobTooBigError
|
|
94
104
|
) as e:
|
|
95
|
-
|
|
105
|
+
# Don't expose tracebacks to the user for exceptions that may be expected
|
|
106
|
+
log("Could not " + task + " because:")
|
|
107
|
+
|
|
96
108
|
# These are the errors that MiniWDL's parser can raise and its reporter
|
|
97
|
-
# can report. See
|
|
109
|
+
# can report (plus some extras). See
|
|
98
110
|
# https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97.
|
|
99
111
|
#
|
|
100
112
|
# We are going to use MiniWDL's pretty printer to print them.
|
|
113
|
+
# Make the MiniWDL stuff on stderr loud so people see it
|
|
114
|
+
sys.stderr.write("\n" + "🚨" * 3 + "\n")
|
|
101
115
|
print_error(e)
|
|
116
|
+
sys.stderr.write("🚨" * 3 + "\n\n")
|
|
102
117
|
if exit:
|
|
103
118
|
# Stop right now
|
|
104
119
|
sys.exit(1)
|
|
@@ -524,6 +539,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
524
539
|
def __init__(self, file_store: AbstractFileStore, execution_dir: Optional[str] = None):
|
|
525
540
|
"""
|
|
526
541
|
Set up the standard library.
|
|
542
|
+
|
|
543
|
+
:param execution_dir: Directory to use as the working directory for workflow code.
|
|
527
544
|
"""
|
|
528
545
|
# TODO: Just always be the 1.2 standard library.
|
|
529
546
|
wdl_version = "1.2"
|
|
@@ -542,27 +559,67 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
542
559
|
# UUID to differentiate which node files are virtualized from
|
|
543
560
|
self._parent_dir_to_ids: Dict[str, uuid.UUID] = dict()
|
|
544
561
|
|
|
562
|
+
# Map forward from virtualized files to absolute devirtualized ones.
|
|
563
|
+
self._virtualized_to_devirtualized: Dict[str, str] = {}
|
|
564
|
+
# Allow mapping back from absolute devirtualized files to virtualized
|
|
565
|
+
# paths, to save re-uploads.
|
|
566
|
+
self._devirtualized_to_virtualized: Dict[str, str] = {}
|
|
567
|
+
|
|
545
568
|
self._execution_dir = execution_dir
|
|
546
569
|
|
|
570
|
+
def share_files(self, other: "ToilWDLStdLibBase") -> None:
|
|
571
|
+
"""
|
|
572
|
+
Share caches for devirtualizing and virtualizing files with another instance.
|
|
573
|
+
|
|
574
|
+
Files devirtualized by one instance can be re-virtualized back to their
|
|
575
|
+
original virtualized filenames by the other.
|
|
576
|
+
"""
|
|
577
|
+
|
|
578
|
+
if id(self._virtualized_to_devirtualized) != id(other._virtualized_to_devirtualized):
|
|
579
|
+
# Merge the virtualized to devirtualized mappings
|
|
580
|
+
self._virtualized_to_devirtualized.update(other._virtualized_to_devirtualized)
|
|
581
|
+
other._virtualized_to_devirtualized = self._virtualized_to_devirtualized
|
|
582
|
+
|
|
583
|
+
if id(self._devirtualized_to_virtualized) != id(other._devirtualized_to_virtualized):
|
|
584
|
+
# Merge the devirtualized to virtualized mappings
|
|
585
|
+
self._devirtualized_to_virtualized.update(other._devirtualized_to_virtualized)
|
|
586
|
+
other._devirtualized_to_virtualized = self._devirtualized_to_virtualized
|
|
587
|
+
|
|
547
588
|
@memoize
|
|
548
589
|
def _devirtualize_filename(self, filename: str) -> str:
|
|
549
590
|
"""
|
|
550
591
|
'devirtualize' filename passed to a read_* function: return a filename that can be open()ed
|
|
551
592
|
on the local host.
|
|
552
593
|
"""
|
|
553
|
-
|
|
554
|
-
|
|
594
|
+
|
|
595
|
+
result = self.devirtualize_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
|
|
596
|
+
# Store the back mapping
|
|
597
|
+
self._devirtualized_to_virtualized[result] = filename
|
|
598
|
+
# And the forward
|
|
599
|
+
self._virtualized_to_devirtualized[filename] = result
|
|
600
|
+
return result
|
|
555
601
|
|
|
556
602
|
@staticmethod
|
|
557
|
-
def
|
|
603
|
+
def devirtualize_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
|
|
558
604
|
"""
|
|
559
605
|
Download or export a WDL virtualized filename/URL to the given directory.
|
|
560
606
|
|
|
561
|
-
|
|
607
|
+
The destination directory must already exist.
|
|
608
|
+
|
|
609
|
+
Makes sure sibling files stay siblings and files with the same name
|
|
610
|
+
don't clobber each other. Called from within this class for tasks, and
|
|
611
|
+
statically at the end of the workflow for outputs.
|
|
562
612
|
|
|
563
|
-
Returns the local path to the file.
|
|
613
|
+
Returns the local path to the file. If it already had a local path
|
|
614
|
+
elsewhere, it might not actually be put in dest_dir.
|
|
564
615
|
"""
|
|
565
616
|
|
|
617
|
+
if not os.path.isdir(dest_dir):
|
|
618
|
+
# os.mkdir fails saying the directory *being made* caused a
|
|
619
|
+
# FileNotFoundError. So check the dest_dir before trying to make
|
|
620
|
+
# directories under it.
|
|
621
|
+
raise RuntimeError(f"Cannot devirtualize {filename} into nonexistent directory {dest_dir}")
|
|
622
|
+
|
|
566
623
|
# TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
|
|
567
624
|
# TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
|
|
568
625
|
if is_url(filename):
|
|
@@ -597,8 +654,12 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
597
654
|
if filename.startswith(TOIL_URI_SCHEME):
|
|
598
655
|
# Get a local path to the file
|
|
599
656
|
if isinstance(file_source, AbstractFileStore):
|
|
600
|
-
# Read from the file store
|
|
601
|
-
|
|
657
|
+
# Read from the file store.
|
|
658
|
+
# File is not allowed to be modified by the task. See
|
|
659
|
+
# <https://github.com/openwdl/wdl/issues/495>.
|
|
660
|
+
# We try to get away with symlinks and hope the task
|
|
661
|
+
# container can mount the destination file.
|
|
662
|
+
result = file_source.readGlobalFile(file_id, dest_path, mutable=False, symlink=True)
|
|
602
663
|
elif isinstance(file_source, Toil):
|
|
603
664
|
# Read from the Toil context
|
|
604
665
|
file_source.export_file(file_id, dest_path)
|
|
@@ -628,6 +689,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
628
689
|
raise RuntimeError(f"Virtualized file {filename} looks like a local file but isn't!")
|
|
629
690
|
return result
|
|
630
691
|
|
|
692
|
+
@memoize
|
|
631
693
|
def _virtualize_filename(self, filename: str) -> str:
|
|
632
694
|
"""
|
|
633
695
|
from a local path in write_dir, 'virtualize' into the filename as it should present in a
|
|
@@ -636,21 +698,36 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
636
698
|
|
|
637
699
|
if is_url(filename):
|
|
638
700
|
# Already virtual
|
|
639
|
-
logger.debug('Already
|
|
701
|
+
logger.debug('Already virtual: %s', filename)
|
|
640
702
|
return filename
|
|
641
703
|
|
|
642
704
|
# Otherwise this is a local file and we want to fake it as a Toil file store file
|
|
643
705
|
|
|
644
|
-
#
|
|
645
|
-
# If filename is already an abs path, join() will not do anything
|
|
706
|
+
# Make it an absolute path
|
|
646
707
|
if self._execution_dir is not None:
|
|
647
|
-
|
|
708
|
+
# To support relative paths from execution directory, join the execution dir and filename
|
|
709
|
+
# If filename is already an abs path, join() will not do anything
|
|
710
|
+
abs_filename = os.path.join(self._execution_dir, filename)
|
|
648
711
|
else:
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
712
|
+
abs_filename = os.path.abspath(filename)
|
|
713
|
+
|
|
714
|
+
if abs_filename in self._devirtualized_to_virtualized:
|
|
715
|
+
# This is a previously devirtualized thing so we can just use the
|
|
716
|
+
# virtual version we remembered instead of reuploading it.
|
|
717
|
+
result = self._devirtualized_to_virtualized[abs_filename]
|
|
718
|
+
logger.debug("Re-using virtualized WDL file %s for %s", result, filename)
|
|
719
|
+
return result
|
|
720
|
+
|
|
721
|
+
file_id = self._file_store.writeGlobalFile(abs_filename)
|
|
722
|
+
|
|
723
|
+
file_dir = os.path.dirname(abs_filename)
|
|
724
|
+
parent_id = self._parent_dir_to_ids.setdefault(file_dir, uuid.uuid4())
|
|
725
|
+
result = pack_toil_uri(file_id, parent_id, os.path.basename(abs_filename))
|
|
653
726
|
logger.debug('Virtualized %s as WDL file %s', filename, result)
|
|
727
|
+
# Remember the upload in case we share a cache
|
|
728
|
+
self._devirtualized_to_virtualized[abs_filename] = result
|
|
729
|
+
# And remember the local path in case we want a redownload
|
|
730
|
+
self._virtualized_to_devirtualized[result] = abs_filename
|
|
654
731
|
return result
|
|
655
732
|
|
|
656
733
|
class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
|
|
@@ -695,7 +772,7 @@ class ToilWDLStdLibTaskCommand(ToilWDLStdLibBase):
|
|
|
695
772
|
logger.debug('Devirtualized %s as out-of-container file %s', filename, result)
|
|
696
773
|
return result
|
|
697
774
|
|
|
698
|
-
|
|
775
|
+
@memoize
|
|
699
776
|
def _virtualize_filename(self, filename: str) -> str:
|
|
700
777
|
"""
|
|
701
778
|
From a local path in write_dir, 'virtualize' into the filename as it should present in a
|
|
@@ -717,10 +794,11 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
717
794
|
functions only allowed in task output sections.
|
|
718
795
|
"""
|
|
719
796
|
|
|
720
|
-
def __init__(self, file_store: AbstractFileStore, stdout_path: str, stderr_path: str, current_directory_override: Optional[str] = None):
|
|
797
|
+
def __init__(self, file_store: AbstractFileStore, stdout_path: str, stderr_path: str, file_to_mountpoint: Dict[str, str], current_directory_override: Optional[str] = None):
|
|
721
798
|
"""
|
|
722
799
|
Set up the standard library for a task output section. Needs to know
|
|
723
|
-
where standard output and error from the task have been stored
|
|
800
|
+
where standard output and error from the task have been stored, and
|
|
801
|
+
what local paths to pretend are where for resolving symlinks.
|
|
724
802
|
|
|
725
803
|
If current_directory_override is set, resolves relative paths and globs
|
|
726
804
|
from there instead of from the real current directory.
|
|
@@ -738,6 +816,9 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
738
816
|
self._stdout_used = False
|
|
739
817
|
self._stderr_used = False
|
|
740
818
|
|
|
819
|
+
# Reverse and store the file mount dict
|
|
820
|
+
self._mountpoint_to_file = {v: k for k, v in file_to_mountpoint.items()}
|
|
821
|
+
|
|
741
822
|
# Remember current directory
|
|
742
823
|
self._current_directory_override = current_directory_override
|
|
743
824
|
|
|
@@ -806,7 +887,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
806
887
|
work_dir = '.' if not self._current_directory_override else self._current_directory_override
|
|
807
888
|
|
|
808
889
|
# TODO: get this to run in the right container if there is one
|
|
809
|
-
# Bash (now?) has a compgen builtin for shell completion that can evaluate a glob where the glob is in a
|
|
890
|
+
# Bash (now?) has a compgen builtin for shell completion that can evaluate a glob where the glob is in a quoted string that might have spaces in it. See <https://unix.stackexchange.com/a/616608>.
|
|
810
891
|
# This will handle everything except newlines in the filenames.
|
|
811
892
|
# TODO: Newlines in the filenames?
|
|
812
893
|
# Since compgen will return 1 if nothing matches, we need to allow a failing exit code here.
|
|
@@ -844,6 +925,7 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
844
925
|
|
|
845
926
|
return super()._devirtualize_filename(filename)
|
|
846
927
|
|
|
928
|
+
@memoize
|
|
847
929
|
def _virtualize_filename(self, filename: str) -> str:
|
|
848
930
|
"""
|
|
849
931
|
Go from a local disk filename to a virtualized WDL-side filename.
|
|
@@ -854,11 +936,46 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
854
936
|
"""
|
|
855
937
|
|
|
856
938
|
if not is_url(filename) and not filename.startswith('/'):
|
|
857
|
-
# We are getting a bare relative path the supposedly devirtualized side.
|
|
939
|
+
# We are getting a bare relative path on the supposedly devirtualized side.
|
|
858
940
|
# Find a real path to it relative to the current directory override.
|
|
859
941
|
work_dir = '.' if not self._current_directory_override else self._current_directory_override
|
|
860
942
|
filename = os.path.join(work_dir, filename)
|
|
861
943
|
|
|
944
|
+
if filename in self._devirtualized_to_virtualized:
|
|
945
|
+
result = self._devirtualized_to_virtualized[filename]
|
|
946
|
+
logger.debug("Re-using virtualized filename %s for %s", result, filename)
|
|
947
|
+
return result
|
|
948
|
+
|
|
949
|
+
if os.path.islink(filename):
|
|
950
|
+
# Recursively resolve symlinks
|
|
951
|
+
here = filename
|
|
952
|
+
# Notice if we have a symlink loop
|
|
953
|
+
seen = {here}
|
|
954
|
+
while os.path.islink(here):
|
|
955
|
+
dest = os.readlink(here)
|
|
956
|
+
if not dest.startswith('/'):
|
|
957
|
+
# Make it absolute
|
|
958
|
+
dest = os.path.join(os.path.dirname(here), dest)
|
|
959
|
+
here = dest
|
|
960
|
+
if here in self._mountpoint_to_file:
|
|
961
|
+
# This points to something mounted into the container, so use that path instead.
|
|
962
|
+
here = self._mountpoint_to_file[here]
|
|
963
|
+
if here in self._devirtualized_to_virtualized:
|
|
964
|
+
# Check the virtualized filenames before following symlinks
|
|
965
|
+
# all the way back to workflow inputs.
|
|
966
|
+
result = self._devirtualized_to_virtualized[here]
|
|
967
|
+
logger.debug("Re-using virtualized filename %s for %s linked from %s", result, here, filename)
|
|
968
|
+
return result
|
|
969
|
+
if here in seen:
|
|
970
|
+
raise RuntimeError(f"Symlink {filename} leads to symlink loop at {here}")
|
|
971
|
+
seen.add(here)
|
|
972
|
+
|
|
973
|
+
if os.path.exists(here):
|
|
974
|
+
logger.debug("Handling symlink %s ultimately to %s", filename, here)
|
|
975
|
+
else:
|
|
976
|
+
logger.error("Handling broken symlink %s ultimately to %s", filename, here)
|
|
977
|
+
filename = here
|
|
978
|
+
|
|
862
979
|
return super()._virtualize_filename(filename)
|
|
863
980
|
|
|
864
981
|
def evaluate_named_expression(context: Union[WDL.Error.SourceNode, WDL.Error.SourcePosition], name: str, expected_type: Optional[WDL.Type.Base], expression: Optional[WDL.Expr.Base], environment: WDLBindings, stdlib: WDL.StdLib.Base) -> WDL.Value.Base:
|
|
@@ -1110,8 +1227,10 @@ def drop_missing_files(environment: WDLBindings, current_directory_override: Opt
|
|
|
1110
1227
|
logger.warning('File %s with type %s does not actually exist at its URI', filename, value_type)
|
|
1111
1228
|
return None
|
|
1112
1229
|
else:
|
|
1230
|
+
# Get the absolute path, not resolving symlinks
|
|
1113
1231
|
effective_path = os.path.abspath(os.path.join(work_dir, filename))
|
|
1114
|
-
if os.path.exists(effective_path):
|
|
1232
|
+
if os.path.islink(effective_path) or os.path.exists(effective_path):
|
|
1233
|
+
# This is a broken symlink or a working symlink or a file.
|
|
1115
1234
|
return filename
|
|
1116
1235
|
else:
|
|
1117
1236
|
logger.warning('File %s with type %s does not actually exist at %s', filename, value_type, effective_path)
|
|
@@ -1125,7 +1244,7 @@ def get_file_paths_in_bindings(environment: WDLBindings) -> List[str]:
|
|
|
1125
1244
|
duplicates are removed.
|
|
1126
1245
|
|
|
1127
1246
|
TODO: Duplicative with WDL.runtime.task._fspaths, except that is internal
|
|
1128
|
-
and supports
|
|
1247
|
+
and supports Directory objects.
|
|
1129
1248
|
"""
|
|
1130
1249
|
|
|
1131
1250
|
paths = []
|
|
@@ -1250,7 +1369,7 @@ class WDLBaseJob(Job):
|
|
|
1250
1369
|
# may have coalesced postprocessing steps deferred by several levels of
|
|
1251
1370
|
# jobs returning other jobs' promised RVs.
|
|
1252
1371
|
self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
|
|
1253
|
-
|
|
1372
|
+
|
|
1254
1373
|
self._wdl_options = wdl_options if wdl_options is not None else {}
|
|
1255
1374
|
|
|
1256
1375
|
assert self._wdl_options.get("container") is not None
|
|
@@ -1306,7 +1425,7 @@ class WDLBaseJob(Job):
|
|
|
1306
1425
|
|
|
1307
1426
|
for action, argument in self._postprocessing_steps:
|
|
1308
1427
|
|
|
1309
|
-
logger.debug("Apply postprocessing
|
|
1428
|
+
logger.debug("Apply postprocessing step: (%s, %s)", action, argument)
|
|
1310
1429
|
|
|
1311
1430
|
# Interpret the mini language of postprocessing steps.
|
|
1312
1431
|
# These are too small to justify being their own separate jobs.
|
|
@@ -1378,7 +1497,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
1378
1497
|
self._namespace = namespace
|
|
1379
1498
|
self._task_path = task_path
|
|
1380
1499
|
|
|
1381
|
-
@report_wdl_errors("evaluate task code")
|
|
1500
|
+
@report_wdl_errors("evaluate task code", exit=True)
|
|
1382
1501
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1383
1502
|
"""
|
|
1384
1503
|
Evaluate inputs and runtime and schedule the task.
|
|
@@ -1407,7 +1526,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
1407
1526
|
# Evaluate the runtime section
|
|
1408
1527
|
runtime_bindings = evaluate_call_inputs(self._task, self._task.runtime, bindings, standard_library)
|
|
1409
1528
|
|
|
1410
|
-
# Fill these in with not-None if the workflow asks for each resource.
|
|
1529
|
+
# Fill these in with not-None if the workflow asks for each resource.
|
|
1411
1530
|
runtime_memory: Optional[int] = None
|
|
1412
1531
|
runtime_cores: Optional[float] = None
|
|
1413
1532
|
runtime_disk: Optional[int] = None
|
|
@@ -1529,6 +1648,123 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1529
1648
|
self._namespace = namespace
|
|
1530
1649
|
self._task_path = task_path
|
|
1531
1650
|
|
|
1651
|
+
###
|
|
1652
|
+
# Runtime code injection system
|
|
1653
|
+
###
|
|
1654
|
+
|
|
1655
|
+
# WDL runtime code injected in the container communicates back to the rest
|
|
1656
|
+
# of the runtime through files in this directory.
|
|
1657
|
+
INJECTED_MESSAGE_DIR = ".toil_wdl_runtime"
|
|
1658
|
+
|
|
1659
|
+
def add_injections(self, command_string: str, task_container: TaskContainer) -> str:
|
|
1660
|
+
"""
|
|
1661
|
+
Inject extra Bash code from the Toil WDL runtime into the command for the container.
|
|
1662
|
+
|
|
1663
|
+
Currently doesn't implement the MiniWDL plugin system, but does add
|
|
1664
|
+
resource usage monitoring to Docker containers.
|
|
1665
|
+
"""
|
|
1666
|
+
if isinstance(task_container, SwarmContainer):
|
|
1667
|
+
# We're running on Docker Swarm, so we need to monitor CPU usage
|
|
1668
|
+
# and so on from inside the container, since it won't be attributed
|
|
1669
|
+
# to Toil child processes in the leader's self-monitoring.
|
|
1670
|
+
# TODO: Mount this from a file Toil installs instead or something.
|
|
1671
|
+
script = textwrap.dedent("""\
|
|
1672
|
+
function _toil_resource_monitor () {
|
|
1673
|
+
# Turn off error checking and echo in here
|
|
1674
|
+
set +ex
|
|
1675
|
+
MESSAGE_DIR="${1}"
|
|
1676
|
+
mkdir -p "${MESSAGE_DIR}"
|
|
1677
|
+
|
|
1678
|
+
function sample_cpu_usec() {
|
|
1679
|
+
if [[ -f /sys/fs/cgroup/cpu.stat ]] ; then
|
|
1680
|
+
awk '{ if ($1 == "usage_usec") {print $2} }' /sys/fs/cgroup/cpu.stat
|
|
1681
|
+
elif [[ -f /sys/fs/cgroup/cpuacct/cpuacct.stat ]] ; then
|
|
1682
|
+
echo $(( $(head -n 1 /sys/fs/cgroup/cpuacct/cpuacct.stat | cut -f2 -d' ') * 10000 ))
|
|
1683
|
+
fi
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
function sample_memory_bytes() {
|
|
1687
|
+
if [[ -f /sys/fs/cgroup/memory.stat ]] ; then
|
|
1688
|
+
awk '{ if ($1 == "anon") { print $2 } }' /sys/fs/cgroup/memory.stat
|
|
1689
|
+
elif [[ -f /sys/fs/cgroup/memory/memory.stat ]] ; then
|
|
1690
|
+
awk '{ if ($1 == "total_rss") { print $2 } }' /sys/fs/cgroup/memory/memory.stat
|
|
1691
|
+
fi
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
while true ; do
|
|
1695
|
+
printf "CPU\\t" >> ${MESSAGE_DIR}/resources.tsv
|
|
1696
|
+
sample_cpu_usec >> ${MESSAGE_DIR}/resources.tsv
|
|
1697
|
+
printf "Memory\\t" >> ${MESSAGE_DIR}/resources.tsv
|
|
1698
|
+
sample_memory_bytes >> ${MESSAGE_DIR}/resources.tsv
|
|
1699
|
+
sleep 1
|
|
1700
|
+
done
|
|
1701
|
+
}
|
|
1702
|
+
""")
|
|
1703
|
+
parts = [script, f"_toil_resource_monitor {self.INJECTED_MESSAGE_DIR} &", command_string]
|
|
1704
|
+
return "\n".join(parts)
|
|
1705
|
+
else:
|
|
1706
|
+
return command_string
|
|
1707
|
+
|
|
1708
|
+
def handle_injection_messages(self, outputs_library: ToilWDLStdLibTaskOutputs) -> None:
|
|
1709
|
+
"""
|
|
1710
|
+
Handle any data received from injected runtime code in the container.
|
|
1711
|
+
"""
|
|
1712
|
+
|
|
1713
|
+
message_files = outputs_library._glob(WDL.Value.String(os.path.join(self.INJECTED_MESSAGE_DIR, "*")))
|
|
1714
|
+
logger.debug("Handling message files: %s", message_files)
|
|
1715
|
+
for message_file in message_files.value:
|
|
1716
|
+
self.handle_message_file(message_file.value)
|
|
1717
|
+
|
|
1718
|
+
def handle_message_file(self, file_path: str) -> None:
|
|
1719
|
+
"""
|
|
1720
|
+
Handle a message file received from in-container injected code.
|
|
1721
|
+
|
|
1722
|
+
Takes the host-side path of the file.
|
|
1723
|
+
"""
|
|
1724
|
+
if os.path.basename(file_path) == "resources.tsv":
|
|
1725
|
+
# This is a TSV of resource usage info.
|
|
1726
|
+
first_cpu_usec: Optional[int] = None
|
|
1727
|
+
last_cpu_usec: Optional[int] = None
|
|
1728
|
+
max_memory_bytes: Optional[int] = None
|
|
1729
|
+
|
|
1730
|
+
for line in open(file_path):
|
|
1731
|
+
if not line.endswith("\n"):
|
|
1732
|
+
# Skip partial lines
|
|
1733
|
+
continue
|
|
1734
|
+
# For each full line we got
|
|
1735
|
+
parts = line.strip().split("\t")
|
|
1736
|
+
if len(parts) != 2:
|
|
1737
|
+
# Skip odd-shaped lines
|
|
1738
|
+
continue
|
|
1739
|
+
if parts[0] == "CPU":
|
|
1740
|
+
# Parse CPU usage
|
|
1741
|
+
cpu_usec = int(parts[1])
|
|
1742
|
+
# Update summary stats
|
|
1743
|
+
if first_cpu_usec is None:
|
|
1744
|
+
first_cpu_usec = cpu_usec
|
|
1745
|
+
last_cpu_usec = cpu_usec
|
|
1746
|
+
elif parts[0] == "Memory":
|
|
1747
|
+
# Parse memory usage
|
|
1748
|
+
memory_bytes = int(parts[1])
|
|
1749
|
+
# Update summary stats
|
|
1750
|
+
if max_memory_bytes is None or max_memory_bytes < memory_bytes:
|
|
1751
|
+
max_memory_bytes = memory_bytes
|
|
1752
|
+
|
|
1753
|
+
if max_memory_bytes is not None:
|
|
1754
|
+
logger.info("Container used at about %s bytes of memory at peak", max_memory_bytes)
|
|
1755
|
+
# Treat it as if used by a child process
|
|
1756
|
+
ResourceMonitor.record_extra_memory(max_memory_bytes // 1024)
|
|
1757
|
+
if last_cpu_usec is not None:
|
|
1758
|
+
assert(first_cpu_usec is not None)
|
|
1759
|
+
cpu_seconds = (last_cpu_usec - first_cpu_usec) / 1000000
|
|
1760
|
+
logger.info("Container used about %s seconds of CPU time", cpu_seconds)
|
|
1761
|
+
# Treat it as if used by a child process
|
|
1762
|
+
ResourceMonitor.record_extra_cpu(cpu_seconds)
|
|
1763
|
+
|
|
1764
|
+
###
|
|
1765
|
+
# Helper functions to work out what containers runtime we can use
|
|
1766
|
+
###
|
|
1767
|
+
|
|
1532
1768
|
def can_fake_root(self) -> bool:
|
|
1533
1769
|
"""
|
|
1534
1770
|
Determine if --fakeroot is likely to work for Singularity.
|
|
@@ -1559,7 +1795,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1559
1795
|
"""
|
|
1560
1796
|
return "KUBERNETES_SERVICE_HOST" not in os.environ
|
|
1561
1797
|
|
|
1562
|
-
@report_wdl_errors("run task command")
|
|
1798
|
+
@report_wdl_errors("run task command", exit=True)
|
|
1563
1799
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1564
1800
|
"""
|
|
1565
1801
|
Actually run the task.
|
|
@@ -1575,7 +1811,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1575
1811
|
bindings = unwrap(self._task_internal_bindings)
|
|
1576
1812
|
# And the bindings from evaluating the runtime section
|
|
1577
1813
|
runtime_bindings = unwrap(self._runtime_bindings)
|
|
1578
|
-
|
|
1814
|
+
|
|
1579
1815
|
# We have all the resources we need, so run the task
|
|
1580
1816
|
|
|
1581
1817
|
if shutil.which('singularity') and self._wdl_options.get("container") in ["singularity", "auto"]:
|
|
@@ -1644,9 +1880,20 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1644
1880
|
workdir_in_container: Optional[str] = None
|
|
1645
1881
|
|
|
1646
1882
|
if self._task.command:
|
|
1647
|
-
# When the command string references a File, we need to get a path
|
|
1648
|
-
#
|
|
1649
|
-
#
|
|
1883
|
+
# When the command string references a File, we need to get a path
|
|
1884
|
+
# to the file on a local disk, which the commnad will be able to
|
|
1885
|
+
# actually use, accounting for e.g. containers.
|
|
1886
|
+
#
|
|
1887
|
+
# TODO: Figure out whan the command template actually uses File
|
|
1888
|
+
# values and lazily download them.
|
|
1889
|
+
#
|
|
1890
|
+
# For now we just grab all the File values in the inside-the-task
|
|
1891
|
+
# environment, since any of them *might* be used.
|
|
1892
|
+
#
|
|
1893
|
+
# Some also might be expected to be adjacent to files that are
|
|
1894
|
+
# used, like a BAI that doesn't get referenced in a command line
|
|
1895
|
+
# but must be next to its BAM.
|
|
1896
|
+
#
|
|
1650
1897
|
# TODO: MiniWDL can parallelize the fetch
|
|
1651
1898
|
bindings = devirtualize_files(bindings, standard_library)
|
|
1652
1899
|
|
|
@@ -1721,6 +1968,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1721
1968
|
# them all new paths in task_container.input_path_map which we can
|
|
1722
1969
|
# read. We also get a task_container.host_path() to go the other way.
|
|
1723
1970
|
add_paths(task_container, get_file_paths_in_bindings(bindings))
|
|
1971
|
+
# This maps from oustide container to inside container
|
|
1724
1972
|
logger.debug("Using container path map: %s", task_container.input_path_map)
|
|
1725
1973
|
|
|
1726
1974
|
# Replace everything with in-container paths for the command.
|
|
@@ -1767,6 +2015,9 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1767
2015
|
# Work out the command string, and unwrap it
|
|
1768
2016
|
command_string: str = hacky_dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)
|
|
1769
2017
|
|
|
2018
|
+
# Do any command injection we might need to do
|
|
2019
|
+
command_string = self.add_injections(command_string, task_container)
|
|
2020
|
+
|
|
1770
2021
|
# Grab the standard out and error paths. MyPy complains if we call
|
|
1771
2022
|
# them because in the current MiniWDL version they are untyped.
|
|
1772
2023
|
# TODO: MyPy will complain if we accomodate this and they later
|
|
@@ -1786,8 +2037,16 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1786
2037
|
with ExitStack() as cleanup:
|
|
1787
2038
|
task_container._pull(miniwdl_logger, cleanup)
|
|
1788
2039
|
|
|
1789
|
-
#
|
|
2040
|
+
# Log that we are about to run the command in the container
|
|
1790
2041
|
logger.info('Executing command in %s: %s', task_container, command_string)
|
|
2042
|
+
|
|
2043
|
+
# Now our inputs are all downloaded. Let debugging break in (after command is logged).
|
|
2044
|
+
# But we need to hint which host paths are meant to be which container paths
|
|
2045
|
+
host_and_job_paths: List[Tuple[str, str]] = [(k, v) for k, v in task_container.input_path_map.items()]
|
|
2046
|
+
self.files_downloaded_hook(host_and_job_paths)
|
|
2047
|
+
|
|
2048
|
+
# TODO: Really we might want to set up a fake container working directory, to actually help the user.
|
|
2049
|
+
|
|
1791
2050
|
try:
|
|
1792
2051
|
task_container.run(miniwdl_logger, command_string)
|
|
1793
2052
|
except Exception:
|
|
@@ -1834,15 +2093,14 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1834
2093
|
# container-determined strings that are absolute paths to WDL File
|
|
1835
2094
|
# objects, and like MiniWDL we can say we only support
|
|
1836
2095
|
# working-directory-based relative paths for globs.
|
|
1837
|
-
outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
|
|
2096
|
+
outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, task_container.input_path_map, current_directory_override=workdir_in_container)
|
|
2097
|
+
# Make sure files downloaded as inputs get re-used if we re-upload them.
|
|
2098
|
+
outputs_library.share_files(standard_library)
|
|
1838
2099
|
output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
|
|
1839
2100
|
|
|
1840
2101
|
# Now we know if the standard output and error were sent somewhere by
|
|
1841
2102
|
# the workflow. If not, we should report them to the leader.
|
|
1842
2103
|
|
|
1843
|
-
# Drop any files from the output which don't actually exist
|
|
1844
|
-
output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
|
|
1845
|
-
|
|
1846
2104
|
if not outputs_library.stderr_used() and os.path.exists(host_stderr_txt):
|
|
1847
2105
|
size = os.path.getsize(host_stderr_txt)
|
|
1848
2106
|
logger.info('Unused standard error at %s of %d bytes', host_stderr_txt, size)
|
|
@@ -1857,10 +2115,17 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1857
2115
|
# Save the whole output stream because the workflow didn't capture it.
|
|
1858
2116
|
file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
|
|
1859
2117
|
|
|
1860
|
-
#
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
#
|
|
2118
|
+
# Collect output messages from any code Toil injected into the task.
|
|
2119
|
+
self.handle_injection_messages(outputs_library)
|
|
2120
|
+
|
|
2121
|
+
# Drop any files from the output which don't actually exist
|
|
2122
|
+
output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
|
|
2123
|
+
for decl in self._task.outputs:
|
|
2124
|
+
if not decl.type.optional and output_bindings[decl.name].value is None:
|
|
2125
|
+
# We have an unacceptable null value. This can happen if a file
|
|
2126
|
+
# is missing but not optional. Don't let it out to annoy the
|
|
2127
|
+
# next task.
|
|
2128
|
+
raise WDL.Error.EvalError(decl, f"non-optional value {decl.name} = {decl.expr} is missing")
|
|
1864
2129
|
|
|
1865
2130
|
# Upload any files in the outputs if not uploaded already. Accounts for how relative paths may still need to be container-relative.
|
|
1866
2131
|
output_bindings = virtualize_files(output_bindings, outputs_library)
|
|
@@ -2893,13 +3158,15 @@ def main() -> None:
|
|
|
2893
3158
|
raise RuntimeError("The output of the WDL job is not a binding.")
|
|
2894
3159
|
|
|
2895
3160
|
# Fetch all the output files
|
|
2896
|
-
# TODO: deduplicate with _devirtualize_filename
|
|
2897
3161
|
def devirtualize_output(filename: str) -> str:
|
|
2898
3162
|
"""
|
|
2899
3163
|
'devirtualize' a file using the "toil" object instead of a filestore.
|
|
2900
3164
|
Returns its local path.
|
|
2901
3165
|
"""
|
|
2902
|
-
|
|
3166
|
+
# Make sure the output directory exists if we have output files
|
|
3167
|
+
# that might need to use it.
|
|
3168
|
+
os.makedirs(output_directory, exist_ok=True)
|
|
3169
|
+
return ToilWDLStdLibBase.devirtualize_to(filename, output_directory, toil, execution_dir)
|
|
2903
3170
|
|
|
2904
3171
|
# Make all the files local files
|
|
2905
3172
|
output_bindings = map_over_files_in_bindings(output_bindings, devirtualize_output)
|