toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
@@ -21,14 +21,17 @@ from contextlib import contextmanager
21
21
  from typing import (IO,
22
22
  Any,
23
23
  Callable,
24
+ ContextManager,
24
25
  DefaultDict,
25
26
  Dict,
26
27
  Generator,
27
28
  Iterator,
28
29
  List,
30
+ Literal,
29
31
  Optional,
30
32
  Union,
31
- cast)
33
+ cast,
34
+ overload)
32
35
 
33
36
  import dill
34
37
 
@@ -40,7 +43,7 @@ from toil.jobStores.abstractJobStore import AbstractJobStore
40
43
  from toil.lib.compatibility import deprecated
41
44
  from toil.lib.conversions import bytes2human
42
45
  from toil.lib.io import make_public_dir, robust_rmtree
43
- from toil.lib.retry import retry, ErrorCondition
46
+ from toil.lib.retry import ErrorCondition, retry
44
47
  from toil.lib.threading import get_process_name, process_name_exists
45
48
 
46
49
  logger: logging.Logger = logging.getLogger(__name__)
@@ -118,15 +121,19 @@ class NonCachingFileStore(AbstractFileStore):
118
121
  disk_usage = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
119
122
  f"{bytes2human(jobReqs)}B [{jobReqs}B] requested).")
120
123
  if disk > jobReqs:
121
- self.logToMaster("Job used more disk than requested. For CWL, consider increasing the outdirMin "
124
+ self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
122
125
  f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
123
126
  level=logging.WARNING)
124
127
  else:
125
- self.logToMaster(disk_usage, level=logging.DEBUG)
128
+ self.log_to_leader(disk_usage, level=logging.DEBUG)
126
129
  os.chdir(startingDir)
127
130
  # Finally delete the job from the worker
128
131
  self.check_for_state_corruption()
129
- os.remove(self.jobStateFile)
132
+ try:
133
+ os.remove(self.jobStateFile)
134
+ except FileNotFoundError:
135
+ logger.exception('Job state file %s has gone missing unexpectedly; some cleanup for failed jobs may be getting skipped!', self.jobStateFile)
136
+ pass
130
137
 
131
138
  def writeGlobalFile(self, localFileName: str, cleanup: bool=False) -> FileID:
132
139
  absLocalFileName = self._resolveAbsoluteLocalPath(localFileName)
@@ -152,7 +159,25 @@ class NonCachingFileStore(AbstractFileStore):
152
159
  self.logAccess(fileStoreID, localFilePath)
153
160
  return localFilePath
154
161
 
155
- @contextmanager
162
+ @overload
163
+ def readGlobalFileStream(
164
+ self,
165
+ fileStoreID: str,
166
+ encoding: Literal[None] = None,
167
+ errors: Optional[str] = None,
168
+ ) -> ContextManager[IO[bytes]]:
169
+ ...
170
+
171
+ @overload
172
+ def readGlobalFileStream(
173
+ self, fileStoreID: str, encoding: str, errors: Optional[str] = None
174
+ ) -> ContextManager[IO[str]]:
175
+ ...
176
+
177
+ # TODO: This seems to hit https://github.com/python/mypy/issues/11373
178
+ # But that is supposedly fixed.
179
+
180
+ @contextmanager # type: ignore
156
181
  def readGlobalFileStream(self, fileStoreID: str, encoding: Optional[str] = None, errors: Optional[str] = None) -> Iterator[Union[IO[bytes], IO[str]]]:
157
182
  with self.jobStore.read_file_stream(fileStoreID, encoding=encoding, errors=errors) as f:
158
183
  self.logAccess(fileStoreID)
@@ -194,18 +219,21 @@ class NonCachingFileStore(AbstractFileStore):
194
219
  if self.waitForPreviousCommit is not None:
195
220
  self.waitForPreviousCommit()
196
221
 
222
+ # We are going to commit synchronously, so no need to clone a snapshot
223
+ # of the job description or mess with its version numbering.
224
+
197
225
  if not jobState:
198
226
  # All our operations that need committing are job state related
199
227
  return
200
228
 
201
229
  try:
202
- # Indicate any files that should be deleted once the update of
203
- # the job wrapper is completed.
230
+ # Indicate any files that should be seen as deleted once the
231
+ # update of the job description is visible.
232
+ if len(self.jobDesc.filesToDelete) > 0:
233
+ raise RuntimeError("Job is already in the process of being committed!")
204
234
  self.jobDesc.filesToDelete = list(self.filesToDelete)
205
235
  # Complete the job
206
236
  self.jobStore.update_job(self.jobDesc)
207
- # Delete any remnant jobs
208
- list(map(self.jobStore.delete_job, self.jobsToDelete))
209
237
  # Delete any remnant files
210
238
  list(map(self.jobStore.delete_file, self.filesToDelete))
211
239
  # Remove the files to delete list, having successfully removed the files
@@ -217,6 +245,7 @@ class NonCachingFileStore(AbstractFileStore):
217
245
  self._terminateEvent.set()
218
246
  raise
219
247
 
248
+
220
249
  def __del__(self) -> None:
221
250
  """
222
251
  Cleanup function that is run when destroying the class instance. Nothing to do since there
@@ -299,6 +328,10 @@ class NonCachingFileStore(AbstractFileStore):
299
328
  # This is a FileNotFoundError.
300
329
  # job finished & deleted its jobState file since the jobState files were discovered
301
330
  continue
331
+ elif e.errno == 5:
332
+ # This is a OSError: [Errno 5] Input/output error (jobStatefile seems to disappear
333
+ # on network file system sometimes)
334
+ continue
302
335
  else:
303
336
  raise
304
337
 
toil/job.py CHANGED
@@ -43,12 +43,15 @@ from typing import (TYPE_CHECKING,
43
43
  cast,
44
44
  overload)
45
45
 
46
+ from configargparse import ArgParser
47
+
46
48
  from toil.lib.compatibility import deprecated
47
49
 
48
50
  if sys.version_info >= (3, 8):
49
51
  from typing import TypedDict
50
52
  else:
51
53
  from typing_extensions import TypedDict
54
+
52
55
  import dill
53
56
  # TODO: When this gets into the standard library, get it from there and drop
54
57
  # typing-extensions dependency on Pythons that are new enough.
@@ -70,10 +73,11 @@ from toil.resource import ModuleDescriptor
70
73
  from toil.statsAndLogging import set_logging_from_options
71
74
 
72
75
  if TYPE_CHECKING:
76
+ from optparse import OptionParser
77
+
73
78
  from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
74
79
  from toil.fileStores.abstractFileStore import AbstractFileStore
75
80
  from toil.jobStores.abstractJobStore import AbstractJobStore
76
- from optparse import OptionParser
77
81
 
78
82
  logger = logging.getLogger(__name__)
79
83
 
@@ -266,7 +270,8 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
266
270
  elif possible_description in APIS:
267
271
  parsed['api'] = possible_description
268
272
  else:
269
- parsed['model'] = possible_description
273
+ if possible_description is not None:
274
+ parsed['model'] = possible_description
270
275
  elif isinstance(spec, dict):
271
276
  # It's a dict, so merge with the defaults.
272
277
  parsed.update(spec)
@@ -423,6 +428,7 @@ class Requirer:
423
428
  raise RuntimeError(f"Config assigned multiple times to {self}")
424
429
  self._config = config
425
430
 
431
+
426
432
  def __getstate__(self) -> Dict[str, Any]:
427
433
  """Return the dict to use as the instance's __dict__ when pickling."""
428
434
  # We want to exclude the config from pickling.
@@ -449,19 +455,15 @@ class Requirer:
449
455
 
450
456
  def __deepcopy__(self, memo: Any) -> "Requirer":
451
457
  """Return a semantically-deep copy of the object, for :meth:`copy.deepcopy`."""
452
- # See https://stackoverflow.com/a/40484215 for how to do an override
453
- # that uses the base implementation
458
+ # We used to use <https://stackoverflow.com/a/40484215> and
459
+ # <https://stackoverflow.com/a/71125311> but that would result in
460
+ # copies sometimes resurrecting weirdly old job versions. So now we
461
+ # just actually implement __deepcopy__.
454
462
 
455
- # Hide this override
456
- implementation = self.__deepcopy__
457
- self.__deepcopy__ = None # type: ignore[assignment]
458
-
459
- # Do the deepcopy which omits the config via __getstate__ override
460
- clone = copy.deepcopy(self, memo)
461
-
462
- # Put back the override on us and the copy
463
- self.__deepcopy__ = implementation # type: ignore[assignment]
464
- clone.__deepcopy__ = implementation # type: ignore[assignment]
463
+ clone = type(self).__new__(self.__class__)
464
+ state = self.__getstate__()
465
+ clone_state = copy.deepcopy(state, memo)
466
+ clone.__dict__.update(clone_state)
465
467
 
466
468
  if self._config is not None:
467
469
  # Share a config reference
@@ -598,7 +600,8 @@ class Requirer:
598
600
  )
599
601
  return value
600
602
  elif self._config is not None:
601
- value = getattr(self._config, 'default' + requirement.capitalize())
603
+ values = [getattr(self._config, 'default_' + requirement, None), getattr(self._config, 'default' + requirement.capitalize(), None)]
604
+ value = values[0] if values[0] is not None else values[1]
602
605
  if value is None:
603
606
  raise AttributeError(
604
607
  f"Encountered None for default '{requirement}' requirement "
@@ -795,15 +798,27 @@ class JobDescription(Requirer):
795
798
  # default value for this workflow execution.
796
799
  self._remainingTryCount = None
797
800
 
798
- # Holds FileStore FileIDs of the files that this job has deleted. Used
799
- # to journal deletions of files and recover from a worker crash between
800
- # committing a JobDescription update and actually executing the
801
- # requested deletions.
801
+ # Holds FileStore FileIDs of the files that should be seen as deleted,
802
+ # as part of a transaction with the writing of this version of the job
803
+ # to the job store. Used to journal deletions of files and recover from
804
+ # a worker crash between committing a JobDescription update (for
805
+ # example, severing the body of a completed job from the
806
+ # JobDescription) and actually executing the requested deletions (i.e.
807
+ # the deletions made by executing the body).
808
+ #
809
+ # Since the files being deleted might be required to execute the job
810
+ # body, we can't delete them first, but we also don't want to leave
811
+ # them behind if we die right after saving the JobDescription.
812
+ #
813
+ # This will be empty at all times except when a new version of a job is
814
+ # in the process of being committed.
802
815
  self.filesToDelete = []
803
816
 
804
817
  # Holds JobStore Job IDs of the jobs that have been chained into this
805
- # job, and which should be deleted when this job finally is deleted.
806
- self.jobsToDelete = []
818
+ # job, and which should be deleted when this job finally is deleted
819
+ # (but not before). The successor relationships with them will have
820
+ # been cut, so we need to hold onto them somehow.
821
+ self.merged_jobs = []
807
822
 
808
823
  # The number of direct predecessors of the job. Needs to be stored at
809
824
  # the JobDescription to support dynamically-created jobs with multiple
@@ -849,6 +864,8 @@ class JobDescription(Requirer):
849
864
  # Every time we update a job description in place in the job store, we
850
865
  # increment this.
851
866
  self._job_version = 0
867
+ # And we log who made the version (by PID)
868
+ self._job_version_writer = 0
852
869
 
853
870
  # Human-readable names of jobs that were run as part of this job's
854
871
  # invocation, starting with this job
@@ -1027,17 +1044,25 @@ class JobDescription(Requirer):
1027
1044
  logger.debug('%s is adopting successor phases from %s of: %s', self, other, old_phases)
1028
1045
  self.successor_phases = old_phases + self.successor_phases
1029
1046
 
1030
- # TODO: also be able to take on the successors of the other job, under
1031
- # ours on the stack, somehow.
1032
-
1047
+ # When deleting, we need to delete the files for our old ID, and also
1048
+ # anything that needed to be deleted for the job we are replacing.
1049
+ self.merged_jobs += [self.jobStoreID] + other.merged_jobs
1033
1050
  self.jobStoreID = other.jobStoreID
1034
1051
 
1035
- # Save files and jobs to delete from the job we replaced, so we can
1036
- # roll up a whole chain of jobs and delete them when they're all done.
1037
- self.filesToDelete += other.filesToDelete
1038
- self.jobsToDelete += other.jobsToDelete
1052
+ if len(other.filesToDelete) > 0:
1053
+ raise RuntimeError("Trying to take on the ID of a job that is in the process of being committed!")
1054
+ if len(self.filesToDelete) > 0:
1055
+ raise RuntimeError("Trying to take on the ID of anothe job while in the process of being committed!")
1039
1056
 
1040
1057
  self._job_version = other._job_version
1058
+ self._job_version_writer = os.getpid()
1059
+
1060
+ def check_new_version(self, other: "JobDescription") -> None:
1061
+ """
1062
+ Make sure a prospective new version of the JobDescription is actually moving forward in time and not backward.
1063
+ """
1064
+ if other._job_version < self._job_version:
1065
+ raise RuntimeError(f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}")
1041
1066
 
1042
1067
  def addChild(self, childID: str) -> None:
1043
1068
  """Make the job with the given ID a child of the described job."""
@@ -1055,7 +1080,8 @@ class JobDescription(Requirer):
1055
1080
  first, and must have already been added.
1056
1081
  """
1057
1082
  # Make sure we aren't clobbering something
1058
- assert serviceID not in self.serviceTree
1083
+ if serviceID in self.serviceTree:
1084
+ raise RuntimeError("Job is already in the service tree.")
1059
1085
  self.serviceTree[serviceID] = []
1060
1086
  if parentServiceID is not None:
1061
1087
  self.serviceTree[parentServiceID].append(serviceID)
@@ -1124,9 +1150,11 @@ class JobDescription(Requirer):
1124
1150
  from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
1125
1151
 
1126
1152
  # Old version of this function used to take a config. Make sure that isn't happening.
1127
- assert not isinstance(exit_status, Config), "Passing a Config as an exit status"
1153
+ if isinstance(exit_status, Config):
1154
+ raise RuntimeError("Passing a Config as an exit status.")
1128
1155
  # Make sure we have an assigned config.
1129
- assert self._config is not None
1156
+ if self._config is None:
1157
+ raise RuntimeError("The job's config is not assigned.")
1130
1158
 
1131
1159
  if self._config.enableUnlimitedPreemptibleRetries and exit_reason == BatchJobExitReason.LOST:
1132
1160
  logger.info("*Not* reducing try count (%s) of job %s with ID %s",
@@ -1217,6 +1245,14 @@ class JobDescription(Requirer):
1217
1245
  def __repr__(self):
1218
1246
  return f'{self.__class__.__name__}( **{self.__dict__!r} )'
1219
1247
 
1248
+ def reserve_versions(self, count: int) -> None:
1249
+ """
1250
+ Reserve a job version number for later, for journaling asynchronously.
1251
+ """
1252
+ self._job_version += count
1253
+ self._job_version_writer = os.getpid()
1254
+ logger.debug("Skip ahead to job version: %s", self)
1255
+
1220
1256
  def pre_update_hook(self) -> None:
1221
1257
  """
1222
1258
  Run before pickling and saving a created or updated version of this job.
@@ -1224,6 +1260,7 @@ class JobDescription(Requirer):
1224
1260
  Called by the job store.
1225
1261
  """
1226
1262
  self._job_version += 1
1263
+ self._job_version_writer = os.getpid()
1227
1264
  logger.debug("New job version: %s", self)
1228
1265
 
1229
1266
  def get_job_kind(self) -> str:
@@ -1309,12 +1346,14 @@ class CheckpointJobDescription(JobDescription):
1309
1346
 
1310
1347
  Returns a list with the IDs of any successors deleted.
1311
1348
  """
1312
- assert self.checkpoint is not None
1349
+ if self.checkpoint is None:
1350
+ raise RuntimeError("Cannot restart a checkpoint job. The checkpoint was never set.")
1313
1351
  successorsDeleted = []
1314
1352
  all_successors = list(self.allSuccessors())
1315
1353
  if len(all_successors) > 0 or self.serviceTree or self.command is not None:
1316
1354
  if self.command is not None:
1317
- assert self.command == self.checkpoint
1355
+ if self.command != self.checkpoint:
1356
+ raise RuntimeError("The command and checkpoint are not the same.")
1318
1357
  logger.debug("Checkpoint job already has command set to run")
1319
1358
  else:
1320
1359
  self.command = self.checkpoint
@@ -1600,8 +1639,8 @@ class Job:
1600
1639
 
1601
1640
  :return: childJob: for call chaining
1602
1641
  """
1603
- assert isinstance(childJob, Job)
1604
-
1642
+ if not isinstance(childJob, Job):
1643
+ raise RuntimeError("The type of the child job is not a job.")
1605
1644
  # Join the job graphs
1606
1645
  self._jobGraphsJoined(childJob)
1607
1646
  # Remember the child relationship
@@ -1627,8 +1666,8 @@ class Job:
1627
1666
 
1628
1667
  :return: followOnJob for call chaining
1629
1668
  """
1630
- assert isinstance(followOnJob, Job)
1631
-
1669
+ if not isinstance(followOnJob, Job):
1670
+ raise RuntimeError("The type of the follow-on job is not a job.")
1632
1671
  # Join the job graphs
1633
1672
  self._jobGraphsJoined(followOnJob)
1634
1673
  # Remember the follow-on relationship
@@ -1651,7 +1690,7 @@ class Job:
1651
1690
  return self._description.hasChild(followOnJob.jobStoreID)
1652
1691
 
1653
1692
  def addService(
1654
- self, service: "Service", parentService: Optional["Service"] = None
1693
+ self, service: "Job.Service", parentService: Optional["Job.Service"] = None
1655
1694
  ) -> "Promise":
1656
1695
  """
1657
1696
  Add a service.
@@ -1698,7 +1737,7 @@ class Job:
1698
1737
  # Return the promise for the service's startup result
1699
1738
  return hostingJob.rv()
1700
1739
 
1701
- def hasService(self, service: "Service") -> bool:
1740
+ def hasService(self, service: "Job.Service") -> bool:
1702
1741
  """Return True if the given Service is a service of this job, and False otherwise."""
1703
1742
  return service.hostID is None or self._description.hasServiceHostJob(service.hostID)
1704
1743
 
@@ -1781,8 +1820,8 @@ class Job:
1781
1820
  return self._tempDir
1782
1821
 
1783
1822
  def log(self, text: str, level=logging.INFO) -> None:
1784
- """Log using :func:`fileStore.logToMaster`."""
1785
- self._fileStore.logToMaster(text, level)
1823
+ """Log using :func:`fileStore.log_to_leader`."""
1824
+ self._fileStore.log_to_leader(text, level)
1786
1825
 
1787
1826
  @staticmethod
1788
1827
  def wrapFn(fn, *args, **kwargs) -> "FunctionWrappingJob":
@@ -1991,7 +2030,8 @@ class Job:
1991
2030
  for successor in [self._registry[jID] for jID in self.description.allSuccessors() if jID in self._registry] + extraEdges[self]:
1992
2031
  # Grab all the successors in the current registry (i.e. added form this node) and look at them.
1993
2032
  successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
1994
- assert stack.pop() == self
2033
+ if stack.pop() != self:
2034
+ raise RuntimeError("The stack ordering/elements was changed.")
1995
2035
  if self in stack:
1996
2036
  stack.append(self)
1997
2037
  raise JobGraphDeadlockException("A cycle of job dependencies has been detected '%s'" % stack)
@@ -2109,37 +2149,49 @@ class Job:
2109
2149
  """Used to setup and run Toil workflow."""
2110
2150
 
2111
2151
  @staticmethod
2112
- def getDefaultArgumentParser() -> ArgumentParser:
2152
+ def getDefaultArgumentParser(jobstore_as_flag: bool = False) -> ArgumentParser:
2113
2153
  """
2114
2154
  Get argument parser with added toil workflow options.
2115
2155
 
2156
+ :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2116
2157
  :returns: The argument parser used by a toil workflow with added Toil options.
2117
2158
  """
2118
- parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
2119
- Job.Runner.addToilOptions(parser)
2159
+ parser = ArgParser(formatter_class=ArgumentDefaultsHelpFormatter)
2160
+ Job.Runner.addToilOptions(parser, jobstore_as_flag=jobstore_as_flag)
2120
2161
  return parser
2121
2162
 
2122
2163
  @staticmethod
2123
- def getDefaultOptions(jobStore: str) -> Namespace:
2164
+ def getDefaultOptions(jobStore: Optional[str] = None, jobstore_as_flag: bool = False) -> Namespace:
2124
2165
  """
2125
2166
  Get default options for a toil workflow.
2126
2167
 
2127
2168
  :param jobStore: A string describing the jobStore \
2128
2169
  for the workflow.
2170
+ :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2129
2171
  :returns: The options used by a toil workflow.
2130
2172
  """
2131
- parser = Job.Runner.getDefaultArgumentParser()
2132
- return parser.parse_args(args=[jobStore])
2173
+ # setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
2174
+ if not jobstore_as_flag and jobStore is None:
2175
+ raise RuntimeError("The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
2176
+ "to False!")
2177
+ parser = Job.Runner.getDefaultArgumentParser(jobstore_as_flag=jobstore_as_flag)
2178
+ arguments = []
2179
+ if jobstore_as_flag and jobStore is not None:
2180
+ arguments = ["--jobstore", jobStore]
2181
+ if not jobstore_as_flag and jobStore is not None:
2182
+ arguments = [jobStore]
2183
+ return parser.parse_args(args=arguments)
2133
2184
 
2134
2185
  @staticmethod
2135
- def addToilOptions(parser: Union["OptionParser", ArgumentParser]) -> None:
2186
+ def addToilOptions(parser: Union["OptionParser", ArgumentParser], jobstore_as_flag: bool = False) -> None:
2136
2187
  """
2137
2188
  Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
2138
2189
  parser object.
2139
2190
 
2140
2191
  :param parser: Options object to add toil options to.
2192
+ :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2141
2193
  """
2142
- addOptions(parser)
2194
+ addOptions(parser, jobstore_as_flag=jobstore_as_flag)
2143
2195
 
2144
2196
  @staticmethod
2145
2197
  def startToil(job: "Job", options) -> Any:
@@ -2279,8 +2331,8 @@ class Job:
2279
2331
  unpickler = FilteredUnpickler(fileHandle)
2280
2332
 
2281
2333
  runnable = unpickler.load()
2282
- if requireInstanceOf is not None:
2283
- assert isinstance(runnable, requireInstanceOf), f"Did not find a {requireInstanceOf} when expected"
2334
+ if requireInstanceOf is not None and not isinstance(runnable, requireInstanceOf):
2335
+ raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
2284
2336
 
2285
2337
  return runnable
2286
2338
 
@@ -2450,7 +2502,8 @@ class Job:
2450
2502
 
2451
2503
  # We can't save the job in the right place for cleanup unless the
2452
2504
  # description has a real ID.
2453
- assert not isinstance(self.jobStoreID, TemporaryID), f"Tried to save job {self} without ID assigned!"
2505
+ if isinstance(self.jobStoreID, TemporaryID):
2506
+ raise RuntimeError(f"Tried to save job {self} without ID assigned!")
2454
2507
 
2455
2508
  # Note that we can't accept any more requests for our return value
2456
2509
  self._disablePromiseRegistration()
@@ -2553,10 +2606,11 @@ class Job:
2553
2606
  # Set up to save last job first, so promises flow the right way
2554
2607
  ordering.reverse()
2555
2608
 
2556
- logger.info("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
2609
+ logger.debug("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
2557
2610
 
2558
2611
  # Make sure we're the root
2559
- assert ordering[-1] == self
2612
+ if ordering[-1] != self:
2613
+ raise RuntimeError("The current job is not the root.")
2560
2614
 
2561
2615
  # Don't verify the ordering length: it excludes service host jobs.
2562
2616
  ordered_ids = {o.jobStoreID for o in ordering}
@@ -2572,17 +2626,17 @@ class Job:
2572
2626
  self._fulfillPromises(returnValues, jobStore)
2573
2627
 
2574
2628
  for job in ordering:
2575
- logger.info("Processing job %s", job.description)
2629
+ logger.debug("Processing job %s", job.description)
2576
2630
  for serviceBatch in reversed(list(job.description.serviceHostIDsInBatches())):
2577
2631
  # For each batch of service host jobs in reverse order they start
2578
2632
  for serviceID in serviceBatch:
2579
- logger.info("Processing service %s", serviceID)
2633
+ logger.debug("Processing service %s", serviceID)
2580
2634
  if serviceID in self._registry:
2581
2635
  # It's a new service
2582
2636
 
2583
2637
  # Find the actual job
2584
2638
  serviceJob = self._registry[serviceID]
2585
- logger.info("Saving service %s", serviceJob.description)
2639
+ logger.debug("Saving service %s", serviceJob.description)
2586
2640
  # Pickle the service body, which triggers all the promise stuff
2587
2641
  serviceJob.saveBody(jobStore)
2588
2642
  if job != self or saveSelf:
@@ -2641,7 +2695,8 @@ class Job:
2641
2695
  command = jobDescription.command
2642
2696
 
2643
2697
  commandTokens = command.split()
2644
- assert "_toil" == commandTokens[0]
2698
+ if "_toil" != commandTokens[0]:
2699
+ raise RuntimeError("An invalid command was passed into the job.")
2645
2700
  userModule = ModuleDescriptor.fromCommand(commandTokens[2:])
2646
2701
  logger.debug('Loading user module %s.', userModule)
2647
2702
  userModule = cls._loadUserModule(userModule)
@@ -3025,22 +3080,23 @@ class EncapsulatedJob(Job):
3025
3080
  self.encapsulatedFollowOn = None
3026
3081
 
3027
3082
  def addChild(self, childJob):
3028
- assert self.encapsulatedFollowOn is not None, \
3029
- "Children cannot be added to EncapsulatedJob while it is running"
3083
+ if self.encapsulatedFollowOn is None:
3084
+ raise RuntimeError("Children cannot be added to EncapsulatedJob while it is running")
3030
3085
  return Job.addChild(self.encapsulatedFollowOn, childJob)
3031
3086
 
3032
3087
  def addService(self, service, parentService=None):
3033
- assert self.encapsulatedFollowOn is not None, \
3034
- "Services cannot be added to EncapsulatedJob while it is running"
3088
+ if self.encapsulatedFollowOn is None:
3089
+ raise RuntimeError("Services cannot be added to EncapsulatedJob while it is running")
3035
3090
  return Job.addService(self.encapsulatedFollowOn, service, parentService=parentService)
3036
3091
 
3037
3092
  def addFollowOn(self, followOnJob):
3038
- assert self.encapsulatedFollowOn is not None, \
3039
- "Follow-ons cannot be added to EncapsulatedJob while it is running"
3093
+ if self.encapsulatedFollowOn is None:
3094
+ raise RuntimeError("Follow-ons cannot be added to EncapsulatedJob while it is running")
3040
3095
  return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
3041
3096
 
3042
3097
  def rv(self, *path) -> "Promise":
3043
- assert self.encapsulatedJob is not None
3098
+ if self.encapsulatedJob is None:
3099
+ raise RuntimeError("The encapsulated job was not set.")
3044
3100
  return self.encapsulatedJob.rv(*path)
3045
3101
 
3046
3102
  def prepareForPromiseRegistration(self, jobStore):
@@ -3052,7 +3108,8 @@ class EncapsulatedJob(Job):
3052
3108
  self.encapsulatedJob.prepareForPromiseRegistration(jobStore)
3053
3109
 
3054
3110
  def _disablePromiseRegistration(self):
3055
- assert self.encapsulatedJob is not None
3111
+ if self.encapsulatedJob is None:
3112
+ raise RuntimeError("The encapsulated job was not set.")
3056
3113
  super()._disablePromiseRegistration()
3057
3114
  self.encapsulatedJob._disablePromiseRegistration()
3058
3115
 
@@ -3068,7 +3125,8 @@ class EncapsulatedJob(Job):
3068
3125
  return self.__class__, (None,)
3069
3126
 
3070
3127
  def getUserScript(self):
3071
- assert self.encapsulatedJob is not None
3128
+ if self.encapsulatedJob is None:
3129
+ raise RuntimeError("The encapsulated job was not set.")
3072
3130
  return self.encapsulatedJob.getUserScript()
3073
3131
 
3074
3132
 
@@ -3085,7 +3143,8 @@ class ServiceHostJob(Job):
3085
3143
  """
3086
3144
 
3087
3145
  # Make sure the service hasn't been given a host already.
3088
- assert service.hostID is None
3146
+ if service.hostID is not None:
3147
+ raise RuntimeError("Cannot set the host. The service has already been given a host.")
3089
3148
 
3090
3149
  # Make ourselves with name info from the Service and a
3091
3150
  # ServiceJobDescription that has the service control flags.
@@ -3172,14 +3231,17 @@ class ServiceHostJob(Job):
3172
3231
 
3173
3232
  #Now flag that the service is running jobs can connect to it
3174
3233
  logger.debug("Removing the start jobStoreID to indicate that establishment of the service")
3175
- assert self.description.startJobStoreID != None
3234
+ if self.description.startJobStoreID is None:
3235
+ raise RuntimeError("No start jobStoreID to remove.")
3176
3236
  if fileStore.jobStore.file_exists(self.description.startJobStoreID):
3177
3237
  fileStore.jobStore.delete_file(self.description.startJobStoreID)
3178
- assert not fileStore.jobStore.file_exists(self.description.startJobStoreID)
3238
+ if fileStore.jobStore.file_exists(self.description.startJobStoreID):
3239
+ raise RuntimeError("The start jobStoreID is not a file.")
3179
3240
 
3180
3241
  #Now block until we are told to stop, which is indicated by the removal
3181
3242
  #of a file
3182
- assert self.description.terminateJobStoreID != None
3243
+ if self.description.terminateJobStoreID is None:
3244
+ raise RuntimeError("No terminate jobStoreID to use.")
3183
3245
  while True:
3184
3246
  # Check for the terminate signal
3185
3247
  if not fileStore.jobStore.file_exists(self.description.terminateJobStoreID):
@@ -3273,7 +3335,8 @@ class Promise:
3273
3335
  @staticmethod
3274
3336
  def __new__(cls, *args) -> "Promise":
3275
3337
  """Instantiate this Promise."""
3276
- assert len(args) == 2
3338
+ if len(args) != 2:
3339
+ raise RuntimeError("Cannot instantiate promise. Invalid number of arguments given (Expected 2).")
3277
3340
  if isinstance(args[0], Job):
3278
3341
  # Regular instantiation when promise is created, before it is being pickled
3279
3342
  return super().__new__(cls)
@@ -3357,10 +3420,12 @@ class PromisedRequirement:
3357
3420
  :type args: int or .Promise
3358
3421
  """
3359
3422
  if hasattr(valueOrCallable, '__call__'):
3360
- assert len(args) != 0, 'Need parameters for PromisedRequirement function.'
3423
+ if len(args) == 0:
3424
+ raise RuntimeError('Need parameters for PromisedRequirement function.')
3361
3425
  func = valueOrCallable
3362
3426
  else:
3363
- assert len(args) == 0, 'Define a PromisedRequirement function to handle multiple arguments.'
3427
+ if len(args) != 0:
3428
+ raise RuntimeError('Define a PromisedRequirement function to handle multiple arguments.')
3364
3429
  func = lambda x: x
3365
3430
  args = [valueOrCallable]
3366
3431