toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
|
@@ -21,14 +21,17 @@ from contextlib import contextmanager
|
|
|
21
21
|
from typing import (IO,
|
|
22
22
|
Any,
|
|
23
23
|
Callable,
|
|
24
|
+
ContextManager,
|
|
24
25
|
DefaultDict,
|
|
25
26
|
Dict,
|
|
26
27
|
Generator,
|
|
27
28
|
Iterator,
|
|
28
29
|
List,
|
|
30
|
+
Literal,
|
|
29
31
|
Optional,
|
|
30
32
|
Union,
|
|
31
|
-
cast
|
|
33
|
+
cast,
|
|
34
|
+
overload)
|
|
32
35
|
|
|
33
36
|
import dill
|
|
34
37
|
|
|
@@ -40,7 +43,7 @@ from toil.jobStores.abstractJobStore import AbstractJobStore
|
|
|
40
43
|
from toil.lib.compatibility import deprecated
|
|
41
44
|
from toil.lib.conversions import bytes2human
|
|
42
45
|
from toil.lib.io import make_public_dir, robust_rmtree
|
|
43
|
-
from toil.lib.retry import
|
|
46
|
+
from toil.lib.retry import ErrorCondition, retry
|
|
44
47
|
from toil.lib.threading import get_process_name, process_name_exists
|
|
45
48
|
|
|
46
49
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
@@ -118,15 +121,19 @@ class NonCachingFileStore(AbstractFileStore):
|
|
|
118
121
|
disk_usage = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
|
|
119
122
|
f"{bytes2human(jobReqs)}B [{jobReqs}B] requested).")
|
|
120
123
|
if disk > jobReqs:
|
|
121
|
-
self.
|
|
124
|
+
self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
|
|
122
125
|
f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
|
|
123
126
|
level=logging.WARNING)
|
|
124
127
|
else:
|
|
125
|
-
self.
|
|
128
|
+
self.log_to_leader(disk_usage, level=logging.DEBUG)
|
|
126
129
|
os.chdir(startingDir)
|
|
127
130
|
# Finally delete the job from the worker
|
|
128
131
|
self.check_for_state_corruption()
|
|
129
|
-
|
|
132
|
+
try:
|
|
133
|
+
os.remove(self.jobStateFile)
|
|
134
|
+
except FileNotFoundError:
|
|
135
|
+
logger.exception('Job state file %s has gone missing unexpectedly; some cleanup for failed jobs may be getting skipped!', self.jobStateFile)
|
|
136
|
+
pass
|
|
130
137
|
|
|
131
138
|
def writeGlobalFile(self, localFileName: str, cleanup: bool=False) -> FileID:
|
|
132
139
|
absLocalFileName = self._resolveAbsoluteLocalPath(localFileName)
|
|
@@ -152,7 +159,25 @@ class NonCachingFileStore(AbstractFileStore):
|
|
|
152
159
|
self.logAccess(fileStoreID, localFilePath)
|
|
153
160
|
return localFilePath
|
|
154
161
|
|
|
155
|
-
@
|
|
162
|
+
@overload
|
|
163
|
+
def readGlobalFileStream(
|
|
164
|
+
self,
|
|
165
|
+
fileStoreID: str,
|
|
166
|
+
encoding: Literal[None] = None,
|
|
167
|
+
errors: Optional[str] = None,
|
|
168
|
+
) -> ContextManager[IO[bytes]]:
|
|
169
|
+
...
|
|
170
|
+
|
|
171
|
+
@overload
|
|
172
|
+
def readGlobalFileStream(
|
|
173
|
+
self, fileStoreID: str, encoding: str, errors: Optional[str] = None
|
|
174
|
+
) -> ContextManager[IO[str]]:
|
|
175
|
+
...
|
|
176
|
+
|
|
177
|
+
# TODO: This seems to hit https://github.com/python/mypy/issues/11373
|
|
178
|
+
# But that is supposedly fixed.
|
|
179
|
+
|
|
180
|
+
@contextmanager # type: ignore
|
|
156
181
|
def readGlobalFileStream(self, fileStoreID: str, encoding: Optional[str] = None, errors: Optional[str] = None) -> Iterator[Union[IO[bytes], IO[str]]]:
|
|
157
182
|
with self.jobStore.read_file_stream(fileStoreID, encoding=encoding, errors=errors) as f:
|
|
158
183
|
self.logAccess(fileStoreID)
|
|
@@ -194,18 +219,21 @@ class NonCachingFileStore(AbstractFileStore):
|
|
|
194
219
|
if self.waitForPreviousCommit is not None:
|
|
195
220
|
self.waitForPreviousCommit()
|
|
196
221
|
|
|
222
|
+
# We are going to commit synchronously, so no need to clone a snapshot
|
|
223
|
+
# of the job description or mess with its version numbering.
|
|
224
|
+
|
|
197
225
|
if not jobState:
|
|
198
226
|
# All our operations that need committing are job state related
|
|
199
227
|
return
|
|
200
228
|
|
|
201
229
|
try:
|
|
202
|
-
# Indicate any files that should be deleted once the
|
|
203
|
-
# the job
|
|
230
|
+
# Indicate any files that should be seen as deleted once the
|
|
231
|
+
# update of the job description is visible.
|
|
232
|
+
if len(self.jobDesc.filesToDelete) > 0:
|
|
233
|
+
raise RuntimeError("Job is already in the process of being committed!")
|
|
204
234
|
self.jobDesc.filesToDelete = list(self.filesToDelete)
|
|
205
235
|
# Complete the job
|
|
206
236
|
self.jobStore.update_job(self.jobDesc)
|
|
207
|
-
# Delete any remnant jobs
|
|
208
|
-
list(map(self.jobStore.delete_job, self.jobsToDelete))
|
|
209
237
|
# Delete any remnant files
|
|
210
238
|
list(map(self.jobStore.delete_file, self.filesToDelete))
|
|
211
239
|
# Remove the files to delete list, having successfully removed the files
|
|
@@ -217,6 +245,7 @@ class NonCachingFileStore(AbstractFileStore):
|
|
|
217
245
|
self._terminateEvent.set()
|
|
218
246
|
raise
|
|
219
247
|
|
|
248
|
+
|
|
220
249
|
def __del__(self) -> None:
|
|
221
250
|
"""
|
|
222
251
|
Cleanup function that is run when destroying the class instance. Nothing to do since there
|
|
@@ -299,6 +328,10 @@ class NonCachingFileStore(AbstractFileStore):
|
|
|
299
328
|
# This is a FileNotFoundError.
|
|
300
329
|
# job finished & deleted its jobState file since the jobState files were discovered
|
|
301
330
|
continue
|
|
331
|
+
elif e.errno == 5:
|
|
332
|
+
# This is a OSError: [Errno 5] Input/output error (jobStatefile seems to disappear
|
|
333
|
+
# on network file system sometimes)
|
|
334
|
+
continue
|
|
302
335
|
else:
|
|
303
336
|
raise
|
|
304
337
|
|
toil/job.py
CHANGED
|
@@ -43,12 +43,15 @@ from typing import (TYPE_CHECKING,
|
|
|
43
43
|
cast,
|
|
44
44
|
overload)
|
|
45
45
|
|
|
46
|
+
from configargparse import ArgParser
|
|
47
|
+
|
|
46
48
|
from toil.lib.compatibility import deprecated
|
|
47
49
|
|
|
48
50
|
if sys.version_info >= (3, 8):
|
|
49
51
|
from typing import TypedDict
|
|
50
52
|
else:
|
|
51
53
|
from typing_extensions import TypedDict
|
|
54
|
+
|
|
52
55
|
import dill
|
|
53
56
|
# TODO: When this gets into the standard library, get it from there and drop
|
|
54
57
|
# typing-extensions dependency on Pythons that are new enough.
|
|
@@ -70,10 +73,11 @@ from toil.resource import ModuleDescriptor
|
|
|
70
73
|
from toil.statsAndLogging import set_logging_from_options
|
|
71
74
|
|
|
72
75
|
if TYPE_CHECKING:
|
|
76
|
+
from optparse import OptionParser
|
|
77
|
+
|
|
73
78
|
from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
|
|
74
79
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
75
80
|
from toil.jobStores.abstractJobStore import AbstractJobStore
|
|
76
|
-
from optparse import OptionParser
|
|
77
81
|
|
|
78
82
|
logger = logging.getLogger(__name__)
|
|
79
83
|
|
|
@@ -266,7 +270,8 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
|
|
|
266
270
|
elif possible_description in APIS:
|
|
267
271
|
parsed['api'] = possible_description
|
|
268
272
|
else:
|
|
269
|
-
|
|
273
|
+
if possible_description is not None:
|
|
274
|
+
parsed['model'] = possible_description
|
|
270
275
|
elif isinstance(spec, dict):
|
|
271
276
|
# It's a dict, so merge with the defaults.
|
|
272
277
|
parsed.update(spec)
|
|
@@ -423,6 +428,7 @@ class Requirer:
|
|
|
423
428
|
raise RuntimeError(f"Config assigned multiple times to {self}")
|
|
424
429
|
self._config = config
|
|
425
430
|
|
|
431
|
+
|
|
426
432
|
def __getstate__(self) -> Dict[str, Any]:
|
|
427
433
|
"""Return the dict to use as the instance's __dict__ when pickling."""
|
|
428
434
|
# We want to exclude the config from pickling.
|
|
@@ -449,19 +455,15 @@ class Requirer:
|
|
|
449
455
|
|
|
450
456
|
def __deepcopy__(self, memo: Any) -> "Requirer":
|
|
451
457
|
"""Return a semantically-deep copy of the object, for :meth:`copy.deepcopy`."""
|
|
452
|
-
#
|
|
453
|
-
# that
|
|
458
|
+
# We used to use <https://stackoverflow.com/a/40484215> and
|
|
459
|
+
# <https://stackoverflow.com/a/71125311> but that would result in
|
|
460
|
+
# copies sometimes resurrecting weirdly old job versions. So now we
|
|
461
|
+
# just actually implement __deepcopy__.
|
|
454
462
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
# Do the deepcopy which omits the config via __getstate__ override
|
|
460
|
-
clone = copy.deepcopy(self, memo)
|
|
461
|
-
|
|
462
|
-
# Put back the override on us and the copy
|
|
463
|
-
self.__deepcopy__ = implementation # type: ignore[assignment]
|
|
464
|
-
clone.__deepcopy__ = implementation # type: ignore[assignment]
|
|
463
|
+
clone = type(self).__new__(self.__class__)
|
|
464
|
+
state = self.__getstate__()
|
|
465
|
+
clone_state = copy.deepcopy(state, memo)
|
|
466
|
+
clone.__dict__.update(clone_state)
|
|
465
467
|
|
|
466
468
|
if self._config is not None:
|
|
467
469
|
# Share a config reference
|
|
@@ -598,7 +600,8 @@ class Requirer:
|
|
|
598
600
|
)
|
|
599
601
|
return value
|
|
600
602
|
elif self._config is not None:
|
|
601
|
-
|
|
603
|
+
values = [getattr(self._config, 'default_' + requirement, None), getattr(self._config, 'default' + requirement.capitalize(), None)]
|
|
604
|
+
value = values[0] if values[0] is not None else values[1]
|
|
602
605
|
if value is None:
|
|
603
606
|
raise AttributeError(
|
|
604
607
|
f"Encountered None for default '{requirement}' requirement "
|
|
@@ -795,15 +798,27 @@ class JobDescription(Requirer):
|
|
|
795
798
|
# default value for this workflow execution.
|
|
796
799
|
self._remainingTryCount = None
|
|
797
800
|
|
|
798
|
-
# Holds FileStore FileIDs of the files that
|
|
799
|
-
#
|
|
800
|
-
#
|
|
801
|
-
#
|
|
801
|
+
# Holds FileStore FileIDs of the files that should be seen as deleted,
|
|
802
|
+
# as part of a transaction with the writing of this version of the job
|
|
803
|
+
# to the job store. Used to journal deletions of files and recover from
|
|
804
|
+
# a worker crash between committing a JobDescription update (for
|
|
805
|
+
# example, severing the body of a completed job from the
|
|
806
|
+
# JobDescription) and actually executing the requested deletions (i.e.
|
|
807
|
+
# the deletions made by executing the body).
|
|
808
|
+
#
|
|
809
|
+
# Since the files being deleted might be required to execute the job
|
|
810
|
+
# body, we can't delete them first, but we also don't want to leave
|
|
811
|
+
# them behind if we die right after saving the JobDescription.
|
|
812
|
+
#
|
|
813
|
+
# This will be empty at all times except when a new version of a job is
|
|
814
|
+
# in the process of being committed.
|
|
802
815
|
self.filesToDelete = []
|
|
803
816
|
|
|
804
817
|
# Holds JobStore Job IDs of the jobs that have been chained into this
|
|
805
|
-
# job, and which should be deleted when this job finally is deleted
|
|
806
|
-
|
|
818
|
+
# job, and which should be deleted when this job finally is deleted
|
|
819
|
+
# (but not before). The successor relationships with them will have
|
|
820
|
+
# been cut, so we need to hold onto them somehow.
|
|
821
|
+
self.merged_jobs = []
|
|
807
822
|
|
|
808
823
|
# The number of direct predecessors of the job. Needs to be stored at
|
|
809
824
|
# the JobDescription to support dynamically-created jobs with multiple
|
|
@@ -849,6 +864,8 @@ class JobDescription(Requirer):
|
|
|
849
864
|
# Every time we update a job description in place in the job store, we
|
|
850
865
|
# increment this.
|
|
851
866
|
self._job_version = 0
|
|
867
|
+
# And we log who made the version (by PID)
|
|
868
|
+
self._job_version_writer = 0
|
|
852
869
|
|
|
853
870
|
# Human-readable names of jobs that were run as part of this job's
|
|
854
871
|
# invocation, starting with this job
|
|
@@ -1027,17 +1044,25 @@ class JobDescription(Requirer):
|
|
|
1027
1044
|
logger.debug('%s is adopting successor phases from %s of: %s', self, other, old_phases)
|
|
1028
1045
|
self.successor_phases = old_phases + self.successor_phases
|
|
1029
1046
|
|
|
1030
|
-
#
|
|
1031
|
-
#
|
|
1032
|
-
|
|
1047
|
+
# When deleting, we need to delete the files for our old ID, and also
|
|
1048
|
+
# anything that needed to be deleted for the job we are replacing.
|
|
1049
|
+
self.merged_jobs += [self.jobStoreID] + other.merged_jobs
|
|
1033
1050
|
self.jobStoreID = other.jobStoreID
|
|
1034
1051
|
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
self.filesToDelete
|
|
1038
|
-
|
|
1052
|
+
if len(other.filesToDelete) > 0:
|
|
1053
|
+
raise RuntimeError("Trying to take on the ID of a job that is in the process of being committed!")
|
|
1054
|
+
if len(self.filesToDelete) > 0:
|
|
1055
|
+
raise RuntimeError("Trying to take on the ID of anothe job while in the process of being committed!")
|
|
1039
1056
|
|
|
1040
1057
|
self._job_version = other._job_version
|
|
1058
|
+
self._job_version_writer = os.getpid()
|
|
1059
|
+
|
|
1060
|
+
def check_new_version(self, other: "JobDescription") -> None:
|
|
1061
|
+
"""
|
|
1062
|
+
Make sure a prospective new version of the JobDescription is actually moving forward in time and not backward.
|
|
1063
|
+
"""
|
|
1064
|
+
if other._job_version < self._job_version:
|
|
1065
|
+
raise RuntimeError(f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}")
|
|
1041
1066
|
|
|
1042
1067
|
def addChild(self, childID: str) -> None:
|
|
1043
1068
|
"""Make the job with the given ID a child of the described job."""
|
|
@@ -1055,7 +1080,8 @@ class JobDescription(Requirer):
|
|
|
1055
1080
|
first, and must have already been added.
|
|
1056
1081
|
"""
|
|
1057
1082
|
# Make sure we aren't clobbering something
|
|
1058
|
-
|
|
1083
|
+
if serviceID in self.serviceTree:
|
|
1084
|
+
raise RuntimeError("Job is already in the service tree.")
|
|
1059
1085
|
self.serviceTree[serviceID] = []
|
|
1060
1086
|
if parentServiceID is not None:
|
|
1061
1087
|
self.serviceTree[parentServiceID].append(serviceID)
|
|
@@ -1124,9 +1150,11 @@ class JobDescription(Requirer):
|
|
|
1124
1150
|
from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
|
|
1125
1151
|
|
|
1126
1152
|
# Old version of this function used to take a config. Make sure that isn't happening.
|
|
1127
|
-
|
|
1153
|
+
if isinstance(exit_status, Config):
|
|
1154
|
+
raise RuntimeError("Passing a Config as an exit status.")
|
|
1128
1155
|
# Make sure we have an assigned config.
|
|
1129
|
-
|
|
1156
|
+
if self._config is None:
|
|
1157
|
+
raise RuntimeError("The job's config is not assigned.")
|
|
1130
1158
|
|
|
1131
1159
|
if self._config.enableUnlimitedPreemptibleRetries and exit_reason == BatchJobExitReason.LOST:
|
|
1132
1160
|
logger.info("*Not* reducing try count (%s) of job %s with ID %s",
|
|
@@ -1217,6 +1245,14 @@ class JobDescription(Requirer):
|
|
|
1217
1245
|
def __repr__(self):
|
|
1218
1246
|
return f'{self.__class__.__name__}( **{self.__dict__!r} )'
|
|
1219
1247
|
|
|
1248
|
+
def reserve_versions(self, count: int) -> None:
|
|
1249
|
+
"""
|
|
1250
|
+
Reserve a job version number for later, for journaling asynchronously.
|
|
1251
|
+
"""
|
|
1252
|
+
self._job_version += count
|
|
1253
|
+
self._job_version_writer = os.getpid()
|
|
1254
|
+
logger.debug("Skip ahead to job version: %s", self)
|
|
1255
|
+
|
|
1220
1256
|
def pre_update_hook(self) -> None:
|
|
1221
1257
|
"""
|
|
1222
1258
|
Run before pickling and saving a created or updated version of this job.
|
|
@@ -1224,6 +1260,7 @@ class JobDescription(Requirer):
|
|
|
1224
1260
|
Called by the job store.
|
|
1225
1261
|
"""
|
|
1226
1262
|
self._job_version += 1
|
|
1263
|
+
self._job_version_writer = os.getpid()
|
|
1227
1264
|
logger.debug("New job version: %s", self)
|
|
1228
1265
|
|
|
1229
1266
|
def get_job_kind(self) -> str:
|
|
@@ -1309,12 +1346,14 @@ class CheckpointJobDescription(JobDescription):
|
|
|
1309
1346
|
|
|
1310
1347
|
Returns a list with the IDs of any successors deleted.
|
|
1311
1348
|
"""
|
|
1312
|
-
|
|
1349
|
+
if self.checkpoint is None:
|
|
1350
|
+
raise RuntimeError("Cannot restart a checkpoint job. The checkpoint was never set.")
|
|
1313
1351
|
successorsDeleted = []
|
|
1314
1352
|
all_successors = list(self.allSuccessors())
|
|
1315
1353
|
if len(all_successors) > 0 or self.serviceTree or self.command is not None:
|
|
1316
1354
|
if self.command is not None:
|
|
1317
|
-
|
|
1355
|
+
if self.command != self.checkpoint:
|
|
1356
|
+
raise RuntimeError("The command and checkpoint are not the same.")
|
|
1318
1357
|
logger.debug("Checkpoint job already has command set to run")
|
|
1319
1358
|
else:
|
|
1320
1359
|
self.command = self.checkpoint
|
|
@@ -1600,8 +1639,8 @@ class Job:
|
|
|
1600
1639
|
|
|
1601
1640
|
:return: childJob: for call chaining
|
|
1602
1641
|
"""
|
|
1603
|
-
|
|
1604
|
-
|
|
1642
|
+
if not isinstance(childJob, Job):
|
|
1643
|
+
raise RuntimeError("The type of the child job is not a job.")
|
|
1605
1644
|
# Join the job graphs
|
|
1606
1645
|
self._jobGraphsJoined(childJob)
|
|
1607
1646
|
# Remember the child relationship
|
|
@@ -1627,8 +1666,8 @@ class Job:
|
|
|
1627
1666
|
|
|
1628
1667
|
:return: followOnJob for call chaining
|
|
1629
1668
|
"""
|
|
1630
|
-
|
|
1631
|
-
|
|
1669
|
+
if not isinstance(followOnJob, Job):
|
|
1670
|
+
raise RuntimeError("The type of the follow-on job is not a job.")
|
|
1632
1671
|
# Join the job graphs
|
|
1633
1672
|
self._jobGraphsJoined(followOnJob)
|
|
1634
1673
|
# Remember the follow-on relationship
|
|
@@ -1651,7 +1690,7 @@ class Job:
|
|
|
1651
1690
|
return self._description.hasChild(followOnJob.jobStoreID)
|
|
1652
1691
|
|
|
1653
1692
|
def addService(
|
|
1654
|
-
self, service: "Service", parentService: Optional["Service"] = None
|
|
1693
|
+
self, service: "Job.Service", parentService: Optional["Job.Service"] = None
|
|
1655
1694
|
) -> "Promise":
|
|
1656
1695
|
"""
|
|
1657
1696
|
Add a service.
|
|
@@ -1698,7 +1737,7 @@ class Job:
|
|
|
1698
1737
|
# Return the promise for the service's startup result
|
|
1699
1738
|
return hostingJob.rv()
|
|
1700
1739
|
|
|
1701
|
-
def hasService(self, service: "Service") -> bool:
|
|
1740
|
+
def hasService(self, service: "Job.Service") -> bool:
|
|
1702
1741
|
"""Return True if the given Service is a service of this job, and False otherwise."""
|
|
1703
1742
|
return service.hostID is None or self._description.hasServiceHostJob(service.hostID)
|
|
1704
1743
|
|
|
@@ -1781,8 +1820,8 @@ class Job:
|
|
|
1781
1820
|
return self._tempDir
|
|
1782
1821
|
|
|
1783
1822
|
def log(self, text: str, level=logging.INFO) -> None:
|
|
1784
|
-
"""Log using :func:`fileStore.
|
|
1785
|
-
self._fileStore.
|
|
1823
|
+
"""Log using :func:`fileStore.log_to_leader`."""
|
|
1824
|
+
self._fileStore.log_to_leader(text, level)
|
|
1786
1825
|
|
|
1787
1826
|
@staticmethod
|
|
1788
1827
|
def wrapFn(fn, *args, **kwargs) -> "FunctionWrappingJob":
|
|
@@ -1991,7 +2030,8 @@ class Job:
|
|
|
1991
2030
|
for successor in [self._registry[jID] for jID in self.description.allSuccessors() if jID in self._registry] + extraEdges[self]:
|
|
1992
2031
|
# Grab all the successors in the current registry (i.e. added form this node) and look at them.
|
|
1993
2032
|
successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
|
|
1994
|
-
|
|
2033
|
+
if stack.pop() != self:
|
|
2034
|
+
raise RuntimeError("The stack ordering/elements was changed.")
|
|
1995
2035
|
if self in stack:
|
|
1996
2036
|
stack.append(self)
|
|
1997
2037
|
raise JobGraphDeadlockException("A cycle of job dependencies has been detected '%s'" % stack)
|
|
@@ -2109,37 +2149,49 @@ class Job:
|
|
|
2109
2149
|
"""Used to setup and run Toil workflow."""
|
|
2110
2150
|
|
|
2111
2151
|
@staticmethod
|
|
2112
|
-
def getDefaultArgumentParser() -> ArgumentParser:
|
|
2152
|
+
def getDefaultArgumentParser(jobstore_as_flag: bool = False) -> ArgumentParser:
|
|
2113
2153
|
"""
|
|
2114
2154
|
Get argument parser with added toil workflow options.
|
|
2115
2155
|
|
|
2156
|
+
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
2116
2157
|
:returns: The argument parser used by a toil workflow with added Toil options.
|
|
2117
2158
|
"""
|
|
2118
|
-
parser =
|
|
2119
|
-
Job.Runner.addToilOptions(parser)
|
|
2159
|
+
parser = ArgParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
|
2160
|
+
Job.Runner.addToilOptions(parser, jobstore_as_flag=jobstore_as_flag)
|
|
2120
2161
|
return parser
|
|
2121
2162
|
|
|
2122
2163
|
@staticmethod
|
|
2123
|
-
def getDefaultOptions(jobStore: str) -> Namespace:
|
|
2164
|
+
def getDefaultOptions(jobStore: Optional[str] = None, jobstore_as_flag: bool = False) -> Namespace:
|
|
2124
2165
|
"""
|
|
2125
2166
|
Get default options for a toil workflow.
|
|
2126
2167
|
|
|
2127
2168
|
:param jobStore: A string describing the jobStore \
|
|
2128
2169
|
for the workflow.
|
|
2170
|
+
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
2129
2171
|
:returns: The options used by a toil workflow.
|
|
2130
2172
|
"""
|
|
2131
|
-
|
|
2132
|
-
|
|
2173
|
+
# setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
|
|
2174
|
+
if not jobstore_as_flag and jobStore is None:
|
|
2175
|
+
raise RuntimeError("The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
|
|
2176
|
+
"to False!")
|
|
2177
|
+
parser = Job.Runner.getDefaultArgumentParser(jobstore_as_flag=jobstore_as_flag)
|
|
2178
|
+
arguments = []
|
|
2179
|
+
if jobstore_as_flag and jobStore is not None:
|
|
2180
|
+
arguments = ["--jobstore", jobStore]
|
|
2181
|
+
if not jobstore_as_flag and jobStore is not None:
|
|
2182
|
+
arguments = [jobStore]
|
|
2183
|
+
return parser.parse_args(args=arguments)
|
|
2133
2184
|
|
|
2134
2185
|
@staticmethod
|
|
2135
|
-
def addToilOptions(parser: Union["OptionParser", ArgumentParser]) -> None:
|
|
2186
|
+
def addToilOptions(parser: Union["OptionParser", ArgumentParser], jobstore_as_flag: bool = False) -> None:
|
|
2136
2187
|
"""
|
|
2137
2188
|
Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
|
|
2138
2189
|
parser object.
|
|
2139
2190
|
|
|
2140
2191
|
:param parser: Options object to add toil options to.
|
|
2192
|
+
:param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
|
|
2141
2193
|
"""
|
|
2142
|
-
addOptions(parser)
|
|
2194
|
+
addOptions(parser, jobstore_as_flag=jobstore_as_flag)
|
|
2143
2195
|
|
|
2144
2196
|
@staticmethod
|
|
2145
2197
|
def startToil(job: "Job", options) -> Any:
|
|
@@ -2279,8 +2331,8 @@ class Job:
|
|
|
2279
2331
|
unpickler = FilteredUnpickler(fileHandle)
|
|
2280
2332
|
|
|
2281
2333
|
runnable = unpickler.load()
|
|
2282
|
-
if requireInstanceOf is not None:
|
|
2283
|
-
|
|
2334
|
+
if requireInstanceOf is not None and not isinstance(runnable, requireInstanceOf):
|
|
2335
|
+
raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
|
|
2284
2336
|
|
|
2285
2337
|
return runnable
|
|
2286
2338
|
|
|
@@ -2450,7 +2502,8 @@ class Job:
|
|
|
2450
2502
|
|
|
2451
2503
|
# We can't save the job in the right place for cleanup unless the
|
|
2452
2504
|
# description has a real ID.
|
|
2453
|
-
|
|
2505
|
+
if isinstance(self.jobStoreID, TemporaryID):
|
|
2506
|
+
raise RuntimeError(f"Tried to save job {self} without ID assigned!")
|
|
2454
2507
|
|
|
2455
2508
|
# Note that we can't accept any more requests for our return value
|
|
2456
2509
|
self._disablePromiseRegistration()
|
|
@@ -2553,10 +2606,11 @@ class Job:
|
|
|
2553
2606
|
# Set up to save last job first, so promises flow the right way
|
|
2554
2607
|
ordering.reverse()
|
|
2555
2608
|
|
|
2556
|
-
logger.
|
|
2609
|
+
logger.debug("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
|
|
2557
2610
|
|
|
2558
2611
|
# Make sure we're the root
|
|
2559
|
-
|
|
2612
|
+
if ordering[-1] != self:
|
|
2613
|
+
raise RuntimeError("The current job is not the root.")
|
|
2560
2614
|
|
|
2561
2615
|
# Don't verify the ordering length: it excludes service host jobs.
|
|
2562
2616
|
ordered_ids = {o.jobStoreID for o in ordering}
|
|
@@ -2572,17 +2626,17 @@ class Job:
|
|
|
2572
2626
|
self._fulfillPromises(returnValues, jobStore)
|
|
2573
2627
|
|
|
2574
2628
|
for job in ordering:
|
|
2575
|
-
logger.
|
|
2629
|
+
logger.debug("Processing job %s", job.description)
|
|
2576
2630
|
for serviceBatch in reversed(list(job.description.serviceHostIDsInBatches())):
|
|
2577
2631
|
# For each batch of service host jobs in reverse order they start
|
|
2578
2632
|
for serviceID in serviceBatch:
|
|
2579
|
-
logger.
|
|
2633
|
+
logger.debug("Processing service %s", serviceID)
|
|
2580
2634
|
if serviceID in self._registry:
|
|
2581
2635
|
# It's a new service
|
|
2582
2636
|
|
|
2583
2637
|
# Find the actual job
|
|
2584
2638
|
serviceJob = self._registry[serviceID]
|
|
2585
|
-
logger.
|
|
2639
|
+
logger.debug("Saving service %s", serviceJob.description)
|
|
2586
2640
|
# Pickle the service body, which triggers all the promise stuff
|
|
2587
2641
|
serviceJob.saveBody(jobStore)
|
|
2588
2642
|
if job != self or saveSelf:
|
|
@@ -2641,7 +2695,8 @@ class Job:
|
|
|
2641
2695
|
command = jobDescription.command
|
|
2642
2696
|
|
|
2643
2697
|
commandTokens = command.split()
|
|
2644
|
-
|
|
2698
|
+
if "_toil" != commandTokens[0]:
|
|
2699
|
+
raise RuntimeError("An invalid command was passed into the job.")
|
|
2645
2700
|
userModule = ModuleDescriptor.fromCommand(commandTokens[2:])
|
|
2646
2701
|
logger.debug('Loading user module %s.', userModule)
|
|
2647
2702
|
userModule = cls._loadUserModule(userModule)
|
|
@@ -3025,22 +3080,23 @@ class EncapsulatedJob(Job):
|
|
|
3025
3080
|
self.encapsulatedFollowOn = None
|
|
3026
3081
|
|
|
3027
3082
|
def addChild(self, childJob):
|
|
3028
|
-
|
|
3029
|
-
"Children cannot be added to EncapsulatedJob while it is running"
|
|
3083
|
+
if self.encapsulatedFollowOn is None:
|
|
3084
|
+
raise RuntimeError("Children cannot be added to EncapsulatedJob while it is running")
|
|
3030
3085
|
return Job.addChild(self.encapsulatedFollowOn, childJob)
|
|
3031
3086
|
|
|
3032
3087
|
def addService(self, service, parentService=None):
|
|
3033
|
-
|
|
3034
|
-
"Services cannot be added to EncapsulatedJob while it is running"
|
|
3088
|
+
if self.encapsulatedFollowOn is None:
|
|
3089
|
+
raise RuntimeError("Services cannot be added to EncapsulatedJob while it is running")
|
|
3035
3090
|
return Job.addService(self.encapsulatedFollowOn, service, parentService=parentService)
|
|
3036
3091
|
|
|
3037
3092
|
def addFollowOn(self, followOnJob):
|
|
3038
|
-
|
|
3039
|
-
"Follow-ons cannot be added to EncapsulatedJob while it is running"
|
|
3093
|
+
if self.encapsulatedFollowOn is None:
|
|
3094
|
+
raise RuntimeError("Follow-ons cannot be added to EncapsulatedJob while it is running")
|
|
3040
3095
|
return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
|
|
3041
3096
|
|
|
3042
3097
|
def rv(self, *path) -> "Promise":
|
|
3043
|
-
|
|
3098
|
+
if self.encapsulatedJob is None:
|
|
3099
|
+
raise RuntimeError("The encapsulated job was not set.")
|
|
3044
3100
|
return self.encapsulatedJob.rv(*path)
|
|
3045
3101
|
|
|
3046
3102
|
def prepareForPromiseRegistration(self, jobStore):
|
|
@@ -3052,7 +3108,8 @@ class EncapsulatedJob(Job):
|
|
|
3052
3108
|
self.encapsulatedJob.prepareForPromiseRegistration(jobStore)
|
|
3053
3109
|
|
|
3054
3110
|
def _disablePromiseRegistration(self):
|
|
3055
|
-
|
|
3111
|
+
if self.encapsulatedJob is None:
|
|
3112
|
+
raise RuntimeError("The encapsulated job was not set.")
|
|
3056
3113
|
super()._disablePromiseRegistration()
|
|
3057
3114
|
self.encapsulatedJob._disablePromiseRegistration()
|
|
3058
3115
|
|
|
@@ -3068,7 +3125,8 @@ class EncapsulatedJob(Job):
|
|
|
3068
3125
|
return self.__class__, (None,)
|
|
3069
3126
|
|
|
3070
3127
|
def getUserScript(self):
|
|
3071
|
-
|
|
3128
|
+
if self.encapsulatedJob is None:
|
|
3129
|
+
raise RuntimeError("The encapsulated job was not set.")
|
|
3072
3130
|
return self.encapsulatedJob.getUserScript()
|
|
3073
3131
|
|
|
3074
3132
|
|
|
@@ -3085,7 +3143,8 @@ class ServiceHostJob(Job):
|
|
|
3085
3143
|
"""
|
|
3086
3144
|
|
|
3087
3145
|
# Make sure the service hasn't been given a host already.
|
|
3088
|
-
|
|
3146
|
+
if service.hostID is not None:
|
|
3147
|
+
raise RuntimeError("Cannot set the host. The service has already been given a host.")
|
|
3089
3148
|
|
|
3090
3149
|
# Make ourselves with name info from the Service and a
|
|
3091
3150
|
# ServiceJobDescription that has the service control flags.
|
|
@@ -3172,14 +3231,17 @@ class ServiceHostJob(Job):
|
|
|
3172
3231
|
|
|
3173
3232
|
#Now flag that the service is running jobs can connect to it
|
|
3174
3233
|
logger.debug("Removing the start jobStoreID to indicate that establishment of the service")
|
|
3175
|
-
|
|
3234
|
+
if self.description.startJobStoreID is None:
|
|
3235
|
+
raise RuntimeError("No start jobStoreID to remove.")
|
|
3176
3236
|
if fileStore.jobStore.file_exists(self.description.startJobStoreID):
|
|
3177
3237
|
fileStore.jobStore.delete_file(self.description.startJobStoreID)
|
|
3178
|
-
|
|
3238
|
+
if fileStore.jobStore.file_exists(self.description.startJobStoreID):
|
|
3239
|
+
raise RuntimeError("The start jobStoreID is not a file.")
|
|
3179
3240
|
|
|
3180
3241
|
#Now block until we are told to stop, which is indicated by the removal
|
|
3181
3242
|
#of a file
|
|
3182
|
-
|
|
3243
|
+
if self.description.terminateJobStoreID is None:
|
|
3244
|
+
raise RuntimeError("No terminate jobStoreID to use.")
|
|
3183
3245
|
while True:
|
|
3184
3246
|
# Check for the terminate signal
|
|
3185
3247
|
if not fileStore.jobStore.file_exists(self.description.terminateJobStoreID):
|
|
@@ -3273,7 +3335,8 @@ class Promise:
|
|
|
3273
3335
|
@staticmethod
|
|
3274
3336
|
def __new__(cls, *args) -> "Promise":
|
|
3275
3337
|
"""Instantiate this Promise."""
|
|
3276
|
-
|
|
3338
|
+
if len(args) != 2:
|
|
3339
|
+
raise RuntimeError("Cannot instantiate promise. Invalid number of arguments given (Expected 2).")
|
|
3277
3340
|
if isinstance(args[0], Job):
|
|
3278
3341
|
# Regular instantiation when promise is created, before it is being pickled
|
|
3279
3342
|
return super().__new__(cls)
|
|
@@ -3357,10 +3420,12 @@ class PromisedRequirement:
|
|
|
3357
3420
|
:type args: int or .Promise
|
|
3358
3421
|
"""
|
|
3359
3422
|
if hasattr(valueOrCallable, '__call__'):
|
|
3360
|
-
|
|
3423
|
+
if len(args) == 0:
|
|
3424
|
+
raise RuntimeError('Need parameters for PromisedRequirement function.')
|
|
3361
3425
|
func = valueOrCallable
|
|
3362
3426
|
else:
|
|
3363
|
-
|
|
3427
|
+
if len(args) != 0:
|
|
3428
|
+
raise RuntimeError('Define a PromisedRequirement function to handle multiple arguments.')
|
|
3364
3429
|
func = lambda x: x
|
|
3365
3430
|
args = [valueOrCallable]
|
|
3366
3431
|
|