toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/job.py
CHANGED
|
@@ -43,12 +43,16 @@ from typing import (TYPE_CHECKING,
|
|
|
43
43
|
cast,
|
|
44
44
|
overload)
|
|
45
45
|
|
|
46
|
+
from configargparse import ArgParser
|
|
47
|
+
|
|
48
|
+
from toil.bus import Names
|
|
46
49
|
from toil.lib.compatibility import deprecated
|
|
47
50
|
|
|
48
51
|
if sys.version_info >= (3, 8):
|
|
49
52
|
from typing import TypedDict
|
|
50
53
|
else:
|
|
51
54
|
from typing_extensions import TypedDict
|
|
55
|
+
|
|
52
56
|
import dill
|
|
53
57
|
# TODO: When this gets into the standard library, get it from there and drop
|
|
54
58
|
# typing-extensions dependency on Pythons that are new enough.
|
|
@@ -70,10 +74,11 @@ from toil.resource import ModuleDescriptor
|
|
|
70
74
|
from toil.statsAndLogging import set_logging_from_options
|
|
71
75
|
|
|
72
76
|
if TYPE_CHECKING:
|
|
77
|
+
from optparse import OptionParser
|
|
78
|
+
|
|
73
79
|
from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
|
|
74
80
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
75
81
|
from toil.jobStores.abstractJobStore import AbstractJobStore
|
|
76
|
-
from optparse import OptionParser
|
|
77
82
|
|
|
78
83
|
logger = logging.getLogger(__name__)
|
|
79
84
|
|
|
@@ -266,7 +271,8 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
|
|
|
266
271
|
elif possible_description in APIS:
|
|
267
272
|
parsed['api'] = possible_description
|
|
268
273
|
else:
|
|
269
|
-
|
|
274
|
+
if possible_description is not None:
|
|
275
|
+
parsed['model'] = possible_description
|
|
270
276
|
elif isinstance(spec, dict):
|
|
271
277
|
# It's a dict, so merge with the defaults.
|
|
272
278
|
parsed.update(spec)
|
|
@@ -423,6 +429,7 @@ class Requirer:
|
|
|
423
429
|
raise RuntimeError(f"Config assigned multiple times to {self}")
|
|
424
430
|
self._config = config
|
|
425
431
|
|
|
432
|
+
|
|
426
433
|
def __getstate__(self) -> Dict[str, Any]:
|
|
427
434
|
"""Return the dict to use as the instance's __dict__ when pickling."""
|
|
428
435
|
# We want to exclude the config from pickling.
|
|
@@ -449,19 +456,15 @@ class Requirer:
|
|
|
449
456
|
|
|
450
457
|
def __deepcopy__(self, memo: Any) -> "Requirer":
|
|
451
458
|
"""Return a semantically-deep copy of the object, for :meth:`copy.deepcopy`."""
|
|
452
|
-
#
|
|
453
|
-
# that
|
|
454
|
-
|
|
455
|
-
#
|
|
456
|
-
implementation = self.__deepcopy__
|
|
457
|
-
self.__deepcopy__ = None # type: ignore[assignment]
|
|
458
|
-
|
|
459
|
-
# Do the deepcopy which omits the config via __getstate__ override
|
|
460
|
-
clone = copy.deepcopy(self, memo)
|
|
459
|
+
# We used to use <https://stackoverflow.com/a/40484215> and
|
|
460
|
+
# <https://stackoverflow.com/a/71125311> but that would result in
|
|
461
|
+
# copies sometimes resurrecting weirdly old job versions. So now we
|
|
462
|
+
# just actually implement __deepcopy__.
|
|
461
463
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
464
|
+
clone = type(self).__new__(self.__class__)
|
|
465
|
+
state = self.__getstate__()
|
|
466
|
+
clone_state = copy.deepcopy(state, memo)
|
|
467
|
+
clone.__dict__.update(clone_state)
|
|
465
468
|
|
|
466
469
|
if self._config is not None:
|
|
467
470
|
# Share a config reference
|
|
@@ -598,7 +601,8 @@ class Requirer:
|
|
|
598
601
|
)
|
|
599
602
|
return value
|
|
600
603
|
elif self._config is not None:
|
|
601
|
-
|
|
604
|
+
values = [getattr(self._config, 'default_' + requirement, None), getattr(self._config, 'default' + requirement.capitalize(), None)]
|
|
605
|
+
value = values[0] if values[0] is not None else values[1]
|
|
602
606
|
if value is None:
|
|
603
607
|
raise AttributeError(
|
|
604
608
|
f"Encountered None for default '{requirement}' requirement "
|
|
@@ -707,7 +711,6 @@ class Requirer:
|
|
|
707
711
|
parts = ['no requirements']
|
|
708
712
|
return ', '.join(parts)
|
|
709
713
|
|
|
710
|
-
|
|
711
714
|
class JobDescription(Requirer):
|
|
712
715
|
"""
|
|
713
716
|
Stores all the information that the Toil Leader ever needs to know about a Job.
|
|
@@ -795,15 +798,30 @@ class JobDescription(Requirer):
|
|
|
795
798
|
# default value for this workflow execution.
|
|
796
799
|
self._remainingTryCount = None
|
|
797
800
|
|
|
798
|
-
# Holds FileStore FileIDs of the files that
|
|
799
|
-
#
|
|
800
|
-
#
|
|
801
|
-
#
|
|
801
|
+
# Holds FileStore FileIDs of the files that should be seen as deleted,
|
|
802
|
+
# as part of a transaction with the writing of this version of the job
|
|
803
|
+
# to the job store. Used to journal deletions of files and recover from
|
|
804
|
+
# a worker crash between committing a JobDescription update (for
|
|
805
|
+
# example, severing the body of a completed job from the
|
|
806
|
+
# JobDescription) and actually executing the requested deletions (i.e.
|
|
807
|
+
# the deletions made by executing the body).
|
|
808
|
+
#
|
|
809
|
+
# Since the files being deleted might be required to execute the job
|
|
810
|
+
# body, we can't delete them first, but we also don't want to leave
|
|
811
|
+
# them behind if we die right after saving the JobDescription.
|
|
812
|
+
#
|
|
813
|
+
# This will be empty at all times except when a new version of a job is
|
|
814
|
+
# in the process of being committed.
|
|
802
815
|
self.filesToDelete = []
|
|
803
816
|
|
|
804
|
-
# Holds
|
|
805
|
-
# job, and which should be deleted when this job finally is deleted
|
|
806
|
-
|
|
817
|
+
# Holds job names and IDs of the jobs that have been chained into this
|
|
818
|
+
# job, and which should be deleted when this job finally is deleted
|
|
819
|
+
# (but not before). The successor relationships with them will have
|
|
820
|
+
# been cut, so we need to hold onto them somehow. Includes each
|
|
821
|
+
# chained-in job with its original ID, and also this job's ID with its
|
|
822
|
+
# original names, or is empty if no chaining has happened.
|
|
823
|
+
# The first job in the chain comes first in the list.
|
|
824
|
+
self._merged_job_names: List[Names] = []
|
|
807
825
|
|
|
808
826
|
# The number of direct predecessors of the job. Needs to be stored at
|
|
809
827
|
# the JobDescription to support dynamically-created jobs with multiple
|
|
@@ -849,10 +867,29 @@ class JobDescription(Requirer):
|
|
|
849
867
|
# Every time we update a job description in place in the job store, we
|
|
850
868
|
# increment this.
|
|
851
869
|
self._job_version = 0
|
|
870
|
+
# And we log who made the version (by PID)
|
|
871
|
+
self._job_version_writer = 0
|
|
872
|
+
|
|
873
|
+
def get_names(self) -> Names:
|
|
874
|
+
"""
|
|
875
|
+
Get the names and ID of this job as a named tuple.
|
|
876
|
+
"""
|
|
877
|
+
return Names(self.jobName, self.unitName, self.displayName, self.displayName, str(self.jobStoreID))
|
|
852
878
|
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
879
|
+
def get_chain(self) -> List[Names]:
|
|
880
|
+
"""
|
|
881
|
+
Get all the jobs that executed in this job's chain, in order.
|
|
882
|
+
|
|
883
|
+
For each job, produces a named tuple with its various names and its
|
|
884
|
+
original job store ID. The jobs in the chain are in execution order.
|
|
885
|
+
|
|
886
|
+
If the job hasn't run yet or it didn't chain, produces a one-item list.
|
|
887
|
+
"""
|
|
888
|
+
if len(self._merged_job_names) == 0:
|
|
889
|
+
# We haven't merged so we're just ourselves.
|
|
890
|
+
return [self.get_names()]
|
|
891
|
+
else:
|
|
892
|
+
return list(self._merged_job_names)
|
|
856
893
|
|
|
857
894
|
def serviceHostIDsInBatches(self) -> Iterator[List[str]]:
|
|
858
895
|
"""
|
|
@@ -1027,17 +1064,40 @@ class JobDescription(Requirer):
|
|
|
1027
1064
|
logger.debug('%s is adopting successor phases from %s of: %s', self, other, old_phases)
|
|
1028
1065
|
self.successor_phases = old_phases + self.successor_phases
|
|
1029
1066
|
|
|
1030
|
-
#
|
|
1031
|
-
#
|
|
1032
|
-
|
|
1067
|
+
# When deleting, we need to delete the files for our old ID, and also
|
|
1068
|
+
# anything that needed to be deleted for the job we are replacing. And
|
|
1069
|
+
# we need to keep track of all the names of jobs involved for logging.
|
|
1070
|
+
|
|
1071
|
+
# We need first the job we are merging into if nothing has merged into
|
|
1072
|
+
# it yet, then anything that already merged into it (including it),
|
|
1073
|
+
# then us if nothing has yet merged into us, then anything that merged
|
|
1074
|
+
# into us (inclusing us)
|
|
1075
|
+
_merged_job_names = []
|
|
1076
|
+
if len(other._merged_job_names) == 0:
|
|
1077
|
+
_merged_job_names.append(other.get_names())
|
|
1078
|
+
_merged_job_names += other._merged_job_names
|
|
1079
|
+
if len(self._merged_job_names) == 0:
|
|
1080
|
+
_merged_job_names.append(self.get_names())
|
|
1081
|
+
_merged_job_names += self._merged_job_names
|
|
1082
|
+
self._merged_job_names = _merged_job_names
|
|
1083
|
+
|
|
1084
|
+
# Now steal its ID.
|
|
1033
1085
|
self.jobStoreID = other.jobStoreID
|
|
1034
1086
|
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
self.filesToDelete
|
|
1038
|
-
|
|
1087
|
+
if len(other.filesToDelete) > 0:
|
|
1088
|
+
raise RuntimeError("Trying to take on the ID of a job that is in the process of being committed!")
|
|
1089
|
+
if len(self.filesToDelete) > 0:
|
|
1090
|
+
raise RuntimeError("Trying to take on the ID of anothe job while in the process of being committed!")
|
|
1039
1091
|
|
|
1040
1092
|
self._job_version = other._job_version
|
|
1093
|
+
self._job_version_writer = os.getpid()
|
|
1094
|
+
|
|
1095
|
+
def check_new_version(self, other: "JobDescription") -> None:
|
|
1096
|
+
"""
|
|
1097
|
+
Make sure a prospective new version of the JobDescription is actually moving forward in time and not backward.
|
|
1098
|
+
"""
|
|
1099
|
+
if other._job_version < self._job_version:
|
|
1100
|
+
raise RuntimeError(f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}")
|
|
1041
1101
|
|
|
1042
1102
|
def addChild(self, childID: str) -> None:
|
|
1043
1103
|
"""Make the job with the given ID a child of the described job."""
|
|
@@ -1055,7 +1115,8 @@ class JobDescription(Requirer):
|
|
|
1055
1115
|
first, and must have already been added.
|
|
1056
1116
|
"""
|
|
1057
1117
|
# Make sure we aren't clobbering something
|
|
1058
|
-
|
|
1118
|
+
if serviceID in self.serviceTree:
|
|
1119
|
+
raise RuntimeError("Job is already in the service tree.")
|
|
1059
1120
|
self.serviceTree[serviceID] = []
|
|
1060
1121
|
if parentServiceID is not None:
|
|
1061
1122
|
self.serviceTree[parentServiceID].append(serviceID)
|
|
@@ -1124,9 +1185,11 @@ class JobDescription(Requirer):
|
|
|
1124
1185
|
from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
|
|
1125
1186
|
|
|
1126
1187
|
# Old version of this function used to take a config. Make sure that isn't happening.
|
|
1127
|
-
|
|
1188
|
+
if isinstance(exit_status, Config):
|
|
1189
|
+
raise RuntimeError("Passing a Config as an exit status.")
|
|
1128
1190
|
# Make sure we have an assigned config.
|
|
1129
|
-
|
|
1191
|
+
if self._config is None:
|
|
1192
|
+
raise RuntimeError("The job's config is not assigned.")
|
|
1130
1193
|
|
|
1131
1194
|
if self._config.enableUnlimitedPreemptibleRetries and exit_reason == BatchJobExitReason.LOST:
|
|
1132
1195
|
logger.info("*Not* reducing try count (%s) of job %s with ID %s",
|
|
@@ -1217,6 +1280,14 @@ class JobDescription(Requirer):
|
|
|
1217
1280
|
def __repr__(self):
|
|
1218
1281
|
return f'{self.__class__.__name__}( **{self.__dict__!r} )'
|
|
1219
1282
|
|
|
1283
|
+
def reserve_versions(self, count: int) -> None:
|
|
1284
|
+
"""
|
|
1285
|
+
Reserve a job version number for later, for journaling asynchronously.
|
|
1286
|
+
"""
|
|
1287
|
+
self._job_version += count
|
|
1288
|
+
self._job_version_writer = os.getpid()
|
|
1289
|
+
logger.debug("Skip ahead to job version: %s", self)
|
|
1290
|
+
|
|
1220
1291
|
def pre_update_hook(self) -> None:
|
|
1221
1292
|
"""
|
|
1222
1293
|
Run before pickling and saving a created or updated version of this job.
|
|
@@ -1224,28 +1295,9 @@ class JobDescription(Requirer):
|
|
|
1224
1295
|
Called by the job store.
|
|
1225
1296
|
"""
|
|
1226
1297
|
self._job_version += 1
|
|
1298
|
+
self._job_version_writer = os.getpid()
|
|
1227
1299
|
logger.debug("New job version: %s", self)
|
|
1228
1300
|
|
|
1229
|
-
def get_job_kind(self) -> str:
|
|
1230
|
-
"""
|
|
1231
|
-
Return an identifying string for the job.
|
|
1232
|
-
|
|
1233
|
-
The result may contain spaces.
|
|
1234
|
-
|
|
1235
|
-
Returns: Either the unit name, job name, or display name, which identifies
|
|
1236
|
-
the kind of job it is to toil.
|
|
1237
|
-
Otherwise "Unknown Job" in case no identifier is available
|
|
1238
|
-
"""
|
|
1239
|
-
if self.unitName:
|
|
1240
|
-
return self.unitName
|
|
1241
|
-
elif self.jobName:
|
|
1242
|
-
return self.jobName
|
|
1243
|
-
elif self.displayName:
|
|
1244
|
-
return self.displayName
|
|
1245
|
-
else:
|
|
1246
|
-
return "Unknown Job"
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
1301
|
class ServiceJobDescription(JobDescription):
|
|
1250
1302
|
"""A description of a job that hosts a service."""
|
|
1251
1303
|
|
|
@@ -1309,12 +1361,14 @@ class CheckpointJobDescription(JobDescription):
|
|
|
1309
1361
|
|
|
1310
1362
|
Returns a list with the IDs of any successors deleted.
|
|
1311
1363
|
"""
|
|
1312
|
-
|
|
1364
|
+
if self.checkpoint is None:
|
|
1365
|
+
raise RuntimeError("Cannot restart a checkpoint job. The checkpoint was never set.")
|
|
1313
1366
|
successorsDeleted = []
|
|
1314
1367
|
all_successors = list(self.allSuccessors())
|
|
1315
1368
|
if len(all_successors) > 0 or self.serviceTree or self.command is not None:
|
|
1316
1369
|
if self.command is not None:
|
|
1317
|
-
|
|
1370
|
+
if self.command != self.checkpoint:
|
|
1371
|
+
raise RuntimeError("The command and checkpoint are not the same.")
|
|
1318
1372
|
logger.debug("Checkpoint job already has command set to run")
|
|
1319
1373
|
else:
|
|
1320
1374
|
self.command = self.checkpoint
|
|
@@ -1600,8 +1654,8 @@ class Job:
|
|
|
1600
1654
|
|
|
1601
1655
|
:return: childJob: for call chaining
|
|
1602
1656
|
"""
|
|
1603
|
-
|
|
1604
|
-
|
|
1657
|
+
if not isinstance(childJob, Job):
|
|
1658
|
+
raise RuntimeError("The type of the child job is not a job.")
|
|
1605
1659
|
# Join the job graphs
|
|
1606
1660
|
self._jobGraphsJoined(childJob)
|
|
1607
1661
|
# Remember the child relationship
|
|
@@ -1627,8 +1681,8 @@ class Job:
|
|
|
1627
1681
|
|
|
1628
1682
|
:return: followOnJob for call chaining
|
|
1629
1683
|
"""
|
|
1630
|
-
|
|
1631
|
-
|
|
1684
|
+
if not isinstance(followOnJob, Job):
|
|
1685
|
+
raise RuntimeError("The type of the follow-on job is not a job.")
|
|
1632
1686
|
# Join the job graphs
|
|
1633
1687
|
self._jobGraphsJoined(followOnJob)
|
|
1634
1688
|
# Remember the follow-on relationship
|
|
@@ -1651,7 +1705,7 @@ class Job:
|
|
|
1651
1705
|
return self._description.hasChild(followOnJob.jobStoreID)
|
|
1652
1706
|
|
|
1653
1707
|
def addService(
|
|
1654
|
-
self, service: "Service", parentService: Optional["Service"] = None
|
|
1708
|
+
self, service: "Job.Service", parentService: Optional["Job.Service"] = None
|
|
1655
1709
|
) -> "Promise":
|
|
1656
1710
|
"""
|
|
1657
1711
|
Add a service.
|
|
@@ -1698,7 +1752,7 @@ class Job:
|
|
|
1698
1752
|
# Return the promise for the service's startup result
|
|
1699
1753
|
return hostingJob.rv()
|
|
1700
1754
|
|
|
1701
|
-
def hasService(self, service: "Service") -> bool:
|
|
1755
|
+
def hasService(self, service: "Job.Service") -> bool:
|
|
1702
1756
|
"""Return True if the given Service is a service of this job, and False otherwise."""
|
|
1703
1757
|
return service.hostID is None or self._description.hasServiceHostJob(service.hostID)
|
|
1704
1758
|
|
|
@@ -1781,8 +1835,8 @@ class Job:
|
|
|
1781
1835
|
return self._tempDir
|
|
1782
1836
|
|
|
1783
1837
|
def log(self, text: str, level=logging.INFO) -> None:
|
|
1784
|
-
"""Log using :func:`fileStore.
|
|
1785
|
-
self._fileStore.
|
|
1838
|
+
"""Log using :func:`fileStore.log_to_leader`."""
|
|
1839
|
+
self._fileStore.log_to_leader(text, level)
|
|
1786
1840
|
|
|
1787
1841
|
@staticmethod
|
|
1788
1842
|
def wrapFn(fn, *args, **kwargs) -> "FunctionWrappingJob":
|
|
@@ -1991,7 +2045,8 @@ class Job:
|
|
|
1991
2045
|
for successor in [self._registry[jID] for jID in self.description.allSuccessors() if jID in self._registry] + extraEdges[self]:
|
|
1992
2046
|
# Grab all the successors in the current registry (i.e. added form this node) and look at them.
|
|
1993
2047
|
successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
|
|
1994
|
-
|
|
2048
|
+
if stack.pop() != self:
|
|
2049
|
+
raise RuntimeError("The stack ordering/elements was changed.")
|
|
1995
2050
|
if self in stack:
|
|
1996
2051
|
stack.append(self)
|
|
1997
2052
|
raise JobGraphDeadlockException("A cycle of job dependencies has been detected '%s'" % stack)
|
|
@@ -2109,37 +2164,49 @@ class Job:
|
|
|
2109
2164
|
"""Used to setup and run Toil workflow."""
|
|
2110
2165
|
|
|
2111
2166
|
@staticmethod
|
|
2112
|
-
def getDefaultArgumentParser() -> ArgumentParser:
|
|
2167
|
+
def getDefaultArgumentParser(jobstore_as_flag: bool = False) -> ArgumentParser:
|
|
2113
2168
|
"""
|
|
2114
2169
|
Get argument parser with added toil workflow options.
|
|
2115
2170
|
|
|
2171
|
+
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
2116
2172
|
:returns: The argument parser used by a toil workflow with added Toil options.
|
|
2117
2173
|
"""
|
|
2118
|
-
parser =
|
|
2119
|
-
Job.Runner.addToilOptions(parser)
|
|
2174
|
+
parser = ArgParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
|
2175
|
+
Job.Runner.addToilOptions(parser, jobstore_as_flag=jobstore_as_flag)
|
|
2120
2176
|
return parser
|
|
2121
2177
|
|
|
2122
2178
|
@staticmethod
|
|
2123
|
-
def getDefaultOptions(jobStore: str) -> Namespace:
|
|
2179
|
+
def getDefaultOptions(jobStore: Optional[str] = None, jobstore_as_flag: bool = False) -> Namespace:
|
|
2124
2180
|
"""
|
|
2125
2181
|
Get default options for a toil workflow.
|
|
2126
2182
|
|
|
2127
2183
|
:param jobStore: A string describing the jobStore \
|
|
2128
2184
|
for the workflow.
|
|
2185
|
+
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
2129
2186
|
:returns: The options used by a toil workflow.
|
|
2130
2187
|
"""
|
|
2131
|
-
|
|
2132
|
-
|
|
2188
|
+
# setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
|
|
2189
|
+
if not jobstore_as_flag and jobStore is None:
|
|
2190
|
+
raise RuntimeError("The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
|
|
2191
|
+
"to False!")
|
|
2192
|
+
parser = Job.Runner.getDefaultArgumentParser(jobstore_as_flag=jobstore_as_flag)
|
|
2193
|
+
arguments = []
|
|
2194
|
+
if jobstore_as_flag and jobStore is not None:
|
|
2195
|
+
arguments = ["--jobstore", jobStore]
|
|
2196
|
+
if not jobstore_as_flag and jobStore is not None:
|
|
2197
|
+
arguments = [jobStore]
|
|
2198
|
+
return parser.parse_args(args=arguments)
|
|
2133
2199
|
|
|
2134
2200
|
@staticmethod
|
|
2135
|
-
def addToilOptions(parser: Union["OptionParser", ArgumentParser]) -> None:
|
|
2201
|
+
def addToilOptions(parser: Union["OptionParser", ArgumentParser], jobstore_as_flag: bool = False) -> None:
|
|
2136
2202
|
"""
|
|
2137
2203
|
Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
|
|
2138
2204
|
parser object.
|
|
2139
2205
|
|
|
2140
2206
|
:param parser: Options object to add toil options to.
|
|
2207
|
+
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
2141
2208
|
"""
|
|
2142
|
-
addOptions(parser)
|
|
2209
|
+
addOptions(parser, jobstore_as_flag=jobstore_as_flag)
|
|
2143
2210
|
|
|
2144
2211
|
@staticmethod
|
|
2145
2212
|
def startToil(job: "Job", options) -> Any:
|
|
@@ -2279,8 +2346,8 @@ class Job:
|
|
|
2279
2346
|
unpickler = FilteredUnpickler(fileHandle)
|
|
2280
2347
|
|
|
2281
2348
|
runnable = unpickler.load()
|
|
2282
|
-
if requireInstanceOf is not None:
|
|
2283
|
-
|
|
2349
|
+
if requireInstanceOf is not None and not isinstance(runnable, requireInstanceOf):
|
|
2350
|
+
raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
|
|
2284
2351
|
|
|
2285
2352
|
return runnable
|
|
2286
2353
|
|
|
@@ -2450,7 +2517,8 @@ class Job:
|
|
|
2450
2517
|
|
|
2451
2518
|
# We can't save the job in the right place for cleanup unless the
|
|
2452
2519
|
# description has a real ID.
|
|
2453
|
-
|
|
2520
|
+
if isinstance(self.jobStoreID, TemporaryID):
|
|
2521
|
+
raise RuntimeError(f"Tried to save job {self} without ID assigned!")
|
|
2454
2522
|
|
|
2455
2523
|
# Note that we can't accept any more requests for our return value
|
|
2456
2524
|
self._disablePromiseRegistration()
|
|
@@ -2553,10 +2621,11 @@ class Job:
|
|
|
2553
2621
|
# Set up to save last job first, so promises flow the right way
|
|
2554
2622
|
ordering.reverse()
|
|
2555
2623
|
|
|
2556
|
-
logger.
|
|
2624
|
+
logger.debug("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
|
|
2557
2625
|
|
|
2558
2626
|
# Make sure we're the root
|
|
2559
|
-
|
|
2627
|
+
if ordering[-1] != self:
|
|
2628
|
+
raise RuntimeError("The current job is not the root.")
|
|
2560
2629
|
|
|
2561
2630
|
# Don't verify the ordering length: it excludes service host jobs.
|
|
2562
2631
|
ordered_ids = {o.jobStoreID for o in ordering}
|
|
@@ -2572,17 +2641,17 @@ class Job:
|
|
|
2572
2641
|
self._fulfillPromises(returnValues, jobStore)
|
|
2573
2642
|
|
|
2574
2643
|
for job in ordering:
|
|
2575
|
-
logger.
|
|
2644
|
+
logger.debug("Processing job %s", job.description)
|
|
2576
2645
|
for serviceBatch in reversed(list(job.description.serviceHostIDsInBatches())):
|
|
2577
2646
|
# For each batch of service host jobs in reverse order they start
|
|
2578
2647
|
for serviceID in serviceBatch:
|
|
2579
|
-
logger.
|
|
2648
|
+
logger.debug("Processing service %s", serviceID)
|
|
2580
2649
|
if serviceID in self._registry:
|
|
2581
2650
|
# It's a new service
|
|
2582
2651
|
|
|
2583
2652
|
# Find the actual job
|
|
2584
2653
|
serviceJob = self._registry[serviceID]
|
|
2585
|
-
logger.
|
|
2654
|
+
logger.debug("Saving service %s", serviceJob.description)
|
|
2586
2655
|
# Pickle the service body, which triggers all the promise stuff
|
|
2587
2656
|
serviceJob.saveBody(jobStore)
|
|
2588
2657
|
if job != self or saveSelf:
|
|
@@ -2641,7 +2710,8 @@ class Job:
|
|
|
2641
2710
|
command = jobDescription.command
|
|
2642
2711
|
|
|
2643
2712
|
commandTokens = command.split()
|
|
2644
|
-
|
|
2713
|
+
if "_toil" != commandTokens[0]:
|
|
2714
|
+
raise RuntimeError("An invalid command was passed into the job.")
|
|
2645
2715
|
userModule = ModuleDescriptor.fromCommand(commandTokens[2:])
|
|
2646
2716
|
logger.debug('Loading user module %s.', userModule)
|
|
2647
2717
|
userModule = cls._loadUserModule(userModule)
|
|
@@ -2732,7 +2802,8 @@ class Job:
|
|
|
2732
2802
|
clock=str(totalCpuTime - startClock),
|
|
2733
2803
|
class_name=self._jobName(),
|
|
2734
2804
|
memory=str(totalMemoryUsage),
|
|
2735
|
-
requested_cores=str(self.cores)
|
|
2805
|
+
requested_cores=str(self.cores),
|
|
2806
|
+
disk=str(fileStore.get_disk_usage())
|
|
2736
2807
|
)
|
|
2737
2808
|
)
|
|
2738
2809
|
|
|
@@ -3025,22 +3096,23 @@ class EncapsulatedJob(Job):
|
|
|
3025
3096
|
self.encapsulatedFollowOn = None
|
|
3026
3097
|
|
|
3027
3098
|
def addChild(self, childJob):
|
|
3028
|
-
|
|
3029
|
-
"Children cannot be added to EncapsulatedJob while it is running"
|
|
3099
|
+
if self.encapsulatedFollowOn is None:
|
|
3100
|
+
raise RuntimeError("Children cannot be added to EncapsulatedJob while it is running")
|
|
3030
3101
|
return Job.addChild(self.encapsulatedFollowOn, childJob)
|
|
3031
3102
|
|
|
3032
3103
|
def addService(self, service, parentService=None):
|
|
3033
|
-
|
|
3034
|
-
"Services cannot be added to EncapsulatedJob while it is running"
|
|
3104
|
+
if self.encapsulatedFollowOn is None:
|
|
3105
|
+
raise RuntimeError("Services cannot be added to EncapsulatedJob while it is running")
|
|
3035
3106
|
return Job.addService(self.encapsulatedFollowOn, service, parentService=parentService)
|
|
3036
3107
|
|
|
3037
3108
|
def addFollowOn(self, followOnJob):
|
|
3038
|
-
|
|
3039
|
-
"Follow-ons cannot be added to EncapsulatedJob while it is running"
|
|
3109
|
+
if self.encapsulatedFollowOn is None:
|
|
3110
|
+
raise RuntimeError("Follow-ons cannot be added to EncapsulatedJob while it is running")
|
|
3040
3111
|
return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
|
|
3041
3112
|
|
|
3042
3113
|
def rv(self, *path) -> "Promise":
|
|
3043
|
-
|
|
3114
|
+
if self.encapsulatedJob is None:
|
|
3115
|
+
raise RuntimeError("The encapsulated job was not set.")
|
|
3044
3116
|
return self.encapsulatedJob.rv(*path)
|
|
3045
3117
|
|
|
3046
3118
|
def prepareForPromiseRegistration(self, jobStore):
|
|
@@ -3052,7 +3124,8 @@ class EncapsulatedJob(Job):
|
|
|
3052
3124
|
self.encapsulatedJob.prepareForPromiseRegistration(jobStore)
|
|
3053
3125
|
|
|
3054
3126
|
def _disablePromiseRegistration(self):
|
|
3055
|
-
|
|
3127
|
+
if self.encapsulatedJob is None:
|
|
3128
|
+
raise RuntimeError("The encapsulated job was not set.")
|
|
3056
3129
|
super()._disablePromiseRegistration()
|
|
3057
3130
|
self.encapsulatedJob._disablePromiseRegistration()
|
|
3058
3131
|
|
|
@@ -3068,7 +3141,8 @@ class EncapsulatedJob(Job):
|
|
|
3068
3141
|
return self.__class__, (None,)
|
|
3069
3142
|
|
|
3070
3143
|
def getUserScript(self):
|
|
3071
|
-
|
|
3144
|
+
if self.encapsulatedJob is None:
|
|
3145
|
+
raise RuntimeError("The encapsulated job was not set.")
|
|
3072
3146
|
return self.encapsulatedJob.getUserScript()
|
|
3073
3147
|
|
|
3074
3148
|
|
|
@@ -3085,7 +3159,8 @@ class ServiceHostJob(Job):
|
|
|
3085
3159
|
"""
|
|
3086
3160
|
|
|
3087
3161
|
# Make sure the service hasn't been given a host already.
|
|
3088
|
-
|
|
3162
|
+
if service.hostID is not None:
|
|
3163
|
+
raise RuntimeError("Cannot set the host. The service has already been given a host.")
|
|
3089
3164
|
|
|
3090
3165
|
# Make ourselves with name info from the Service and a
|
|
3091
3166
|
# ServiceJobDescription that has the service control flags.
|
|
@@ -3172,14 +3247,17 @@ class ServiceHostJob(Job):
|
|
|
3172
3247
|
|
|
3173
3248
|
#Now flag that the service is running jobs can connect to it
|
|
3174
3249
|
logger.debug("Removing the start jobStoreID to indicate that establishment of the service")
|
|
3175
|
-
|
|
3250
|
+
if self.description.startJobStoreID is None:
|
|
3251
|
+
raise RuntimeError("No start jobStoreID to remove.")
|
|
3176
3252
|
if fileStore.jobStore.file_exists(self.description.startJobStoreID):
|
|
3177
3253
|
fileStore.jobStore.delete_file(self.description.startJobStoreID)
|
|
3178
|
-
|
|
3254
|
+
if fileStore.jobStore.file_exists(self.description.startJobStoreID):
|
|
3255
|
+
raise RuntimeError("The start jobStoreID is not a file.")
|
|
3179
3256
|
|
|
3180
3257
|
#Now block until we are told to stop, which is indicated by the removal
|
|
3181
3258
|
#of a file
|
|
3182
|
-
|
|
3259
|
+
if self.description.terminateJobStoreID is None:
|
|
3260
|
+
raise RuntimeError("No terminate jobStoreID to use.")
|
|
3183
3261
|
while True:
|
|
3184
3262
|
# Check for the terminate signal
|
|
3185
3263
|
if not fileStore.jobStore.file_exists(self.description.terminateJobStoreID):
|
|
@@ -3273,7 +3351,8 @@ class Promise:
|
|
|
3273
3351
|
@staticmethod
|
|
3274
3352
|
def __new__(cls, *args) -> "Promise":
|
|
3275
3353
|
"""Instantiate this Promise."""
|
|
3276
|
-
|
|
3354
|
+
if len(args) != 2:
|
|
3355
|
+
raise RuntimeError("Cannot instantiate promise. Invalid number of arguments given (Expected 2).")
|
|
3277
3356
|
if isinstance(args[0], Job):
|
|
3278
3357
|
# Regular instantiation when promise is created, before it is being pickled
|
|
3279
3358
|
return super().__new__(cls)
|
|
@@ -3357,10 +3436,12 @@ class PromisedRequirement:
|
|
|
3357
3436
|
:type args: int or .Promise
|
|
3358
3437
|
"""
|
|
3359
3438
|
if hasattr(valueOrCallable, '__call__'):
|
|
3360
|
-
|
|
3439
|
+
if len(args) == 0:
|
|
3440
|
+
raise RuntimeError('Need parameters for PromisedRequirement function.')
|
|
3361
3441
|
func = valueOrCallable
|
|
3362
3442
|
else:
|
|
3363
|
-
|
|
3443
|
+
if len(args) != 0:
|
|
3444
|
+
raise RuntimeError('Define a PromisedRequirement function to handle multiple arguments.')
|
|
3364
3445
|
func = lambda x: x
|
|
3365
3446
|
args = [valueOrCallable]
|
|
3366
3447
|
|