toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +39 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/htcondor.py +0 -1
  8. toil/batchSystems/kubernetes.py +34 -31
  9. toil/batchSystems/local_support.py +3 -1
  10. toil/batchSystems/lsf.py +7 -7
  11. toil/batchSystems/mesos/batchSystem.py +7 -7
  12. toil/batchSystems/options.py +32 -83
  13. toil/batchSystems/registry.py +104 -23
  14. toil/batchSystems/singleMachine.py +16 -13
  15. toil/batchSystems/slurm.py +87 -16
  16. toil/batchSystems/torque.py +0 -1
  17. toil/bus.py +44 -8
  18. toil/common.py +544 -753
  19. toil/cwl/__init__.py +28 -32
  20. toil/cwl/cwltoil.py +595 -574
  21. toil/cwl/utils.py +55 -10
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/__init__.py +2 -2
  24. toil/fileStores/abstractFileStore.py +88 -14
  25. toil/fileStores/cachingFileStore.py +610 -549
  26. toil/fileStores/nonCachingFileStore.py +46 -22
  27. toil/job.py +182 -101
  28. toil/jobStores/abstractJobStore.py +161 -95
  29. toil/jobStores/aws/jobStore.py +23 -9
  30. toil/jobStores/aws/utils.py +6 -6
  31. toil/jobStores/fileJobStore.py +116 -18
  32. toil/jobStores/googleJobStore.py +16 -7
  33. toil/jobStores/utils.py +5 -6
  34. toil/leader.py +87 -56
  35. toil/lib/accelerators.py +10 -5
  36. toil/lib/aws/__init__.py +3 -14
  37. toil/lib/aws/ami.py +22 -9
  38. toil/lib/aws/iam.py +21 -13
  39. toil/lib/aws/session.py +2 -16
  40. toil/lib/aws/utils.py +4 -5
  41. toil/lib/compatibility.py +1 -1
  42. toil/lib/conversions.py +26 -3
  43. toil/lib/docker.py +22 -23
  44. toil/lib/ec2.py +10 -6
  45. toil/lib/ec2nodes.py +106 -100
  46. toil/lib/encryption/_nacl.py +2 -1
  47. toil/lib/generatedEC2Lists.py +325 -18
  48. toil/lib/io.py +49 -2
  49. toil/lib/misc.py +1 -1
  50. toil/lib/resources.py +9 -2
  51. toil/lib/threading.py +101 -38
  52. toil/options/common.py +736 -0
  53. toil/options/cwl.py +336 -0
  54. toil/options/wdl.py +37 -0
  55. toil/provisioners/abstractProvisioner.py +9 -4
  56. toil/provisioners/aws/__init__.py +3 -6
  57. toil/provisioners/aws/awsProvisioner.py +6 -0
  58. toil/provisioners/clusterScaler.py +3 -2
  59. toil/provisioners/gceProvisioner.py +2 -2
  60. toil/realtimeLogger.py +2 -1
  61. toil/resource.py +24 -18
  62. toil/server/app.py +2 -3
  63. toil/server/cli/wes_cwl_runner.py +4 -4
  64. toil/server/utils.py +1 -1
  65. toil/server/wes/abstract_backend.py +3 -2
  66. toil/server/wes/amazon_wes_utils.py +5 -4
  67. toil/server/wes/tasks.py +2 -3
  68. toil/server/wes/toil_backend.py +2 -10
  69. toil/server/wsgi_app.py +2 -0
  70. toil/serviceManager.py +12 -10
  71. toil/statsAndLogging.py +41 -9
  72. toil/test/__init__.py +29 -54
  73. toil/test/batchSystems/batchSystemTest.py +11 -111
  74. toil/test/batchSystems/test_slurm.py +24 -8
  75. toil/test/cactus/__init__.py +0 -0
  76. toil/test/cactus/test_cactus_integration.py +58 -0
  77. toil/test/cwl/cwlTest.py +438 -223
  78. toil/test/cwl/glob_dir.cwl +15 -0
  79. toil/test/cwl/preemptible.cwl +21 -0
  80. toil/test/cwl/preemptible_expression.cwl +28 -0
  81. toil/test/cwl/revsort.cwl +1 -1
  82. toil/test/cwl/revsort2.cwl +1 -1
  83. toil/test/docs/scriptsTest.py +2 -3
  84. toil/test/jobStores/jobStoreTest.py +34 -21
  85. toil/test/lib/aws/test_iam.py +4 -14
  86. toil/test/lib/aws/test_utils.py +0 -3
  87. toil/test/lib/dockerTest.py +4 -4
  88. toil/test/lib/test_ec2.py +12 -17
  89. toil/test/mesos/helloWorld.py +4 -5
  90. toil/test/mesos/stress.py +1 -1
  91. toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
  92. toil/test/options/options.py +37 -0
  93. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  94. toil/test/provisioners/clusterScalerTest.py +6 -4
  95. toil/test/provisioners/clusterTest.py +23 -11
  96. toil/test/provisioners/gceProvisionerTest.py +0 -6
  97. toil/test/provisioners/restartScript.py +3 -2
  98. toil/test/server/serverTest.py +1 -1
  99. toil/test/sort/restart_sort.py +2 -1
  100. toil/test/sort/sort.py +2 -1
  101. toil/test/sort/sortTest.py +2 -13
  102. toil/test/src/autoDeploymentTest.py +45 -45
  103. toil/test/src/busTest.py +5 -5
  104. toil/test/src/checkpointTest.py +2 -2
  105. toil/test/src/deferredFunctionTest.py +1 -1
  106. toil/test/src/fileStoreTest.py +32 -16
  107. toil/test/src/helloWorldTest.py +1 -1
  108. toil/test/src/importExportFileTest.py +1 -1
  109. toil/test/src/jobDescriptionTest.py +2 -1
  110. toil/test/src/jobServiceTest.py +1 -1
  111. toil/test/src/jobTest.py +18 -18
  112. toil/test/src/miscTests.py +5 -3
  113. toil/test/src/promisedRequirementTest.py +3 -3
  114. toil/test/src/realtimeLoggerTest.py +1 -1
  115. toil/test/src/resourceTest.py +2 -2
  116. toil/test/src/restartDAGTest.py +1 -1
  117. toil/test/src/resumabilityTest.py +36 -2
  118. toil/test/src/retainTempDirTest.py +1 -1
  119. toil/test/src/systemTest.py +2 -2
  120. toil/test/src/toilContextManagerTest.py +2 -2
  121. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  122. toil/test/utils/toilDebugTest.py +98 -32
  123. toil/test/utils/toilKillTest.py +2 -2
  124. toil/test/utils/utilsTest.py +23 -3
  125. toil/test/wdl/wdltoil_test.py +223 -45
  126. toil/toilState.py +7 -6
  127. toil/utils/toilClean.py +1 -1
  128. toil/utils/toilConfig.py +36 -0
  129. toil/utils/toilDebugFile.py +60 -33
  130. toil/utils/toilDebugJob.py +39 -12
  131. toil/utils/toilDestroyCluster.py +1 -1
  132. toil/utils/toilKill.py +1 -1
  133. toil/utils/toilLaunchCluster.py +13 -2
  134. toil/utils/toilMain.py +3 -2
  135. toil/utils/toilRsyncCluster.py +1 -1
  136. toil/utils/toilSshCluster.py +1 -1
  137. toil/utils/toilStats.py +445 -305
  138. toil/utils/toilStatus.py +2 -5
  139. toil/version.py +10 -10
  140. toil/wdl/utils.py +2 -122
  141. toil/wdl/wdltoil.py +1257 -492
  142. toil/worker.py +55 -46
  143. toil-6.1.0.dist-info/METADATA +124 -0
  144. toil-6.1.0.dist-info/RECORD +241 -0
  145. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
  146. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
  147. toil/batchSystems/parasol.py +0 -379
  148. toil/batchSystems/tes.py +0 -459
  149. toil/test/batchSystems/parasolTestSupport.py +0 -117
  150. toil/test/wdl/builtinTest.py +0 -506
  151. toil/test/wdl/toilwdlTest.py +0 -522
  152. toil/wdl/toilwdl.py +0 -141
  153. toil/wdl/versions/dev.py +0 -107
  154. toil/wdl/versions/draft2.py +0 -980
  155. toil/wdl/versions/v1.py +0 -794
  156. toil/wdl/wdl_analysis.py +0 -116
  157. toil/wdl/wdl_functions.py +0 -997
  158. toil/wdl/wdl_synthesis.py +0 -1011
  159. toil/wdl/wdl_types.py +0 -243
  160. toil-5.12.0.dist-info/METADATA +0 -118
  161. toil-5.12.0.dist-info/RECORD +0 -244
  162. /toil/{wdl/versions → options}/__init__.py +0 -0
  163. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  164. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/job.py CHANGED
@@ -43,12 +43,16 @@ from typing import (TYPE_CHECKING,
43
43
  cast,
44
44
  overload)
45
45
 
46
+ from configargparse import ArgParser
47
+
48
+ from toil.bus import Names
46
49
  from toil.lib.compatibility import deprecated
47
50
 
48
51
  if sys.version_info >= (3, 8):
49
52
  from typing import TypedDict
50
53
  else:
51
54
  from typing_extensions import TypedDict
55
+
52
56
  import dill
53
57
  # TODO: When this gets into the standard library, get it from there and drop
54
58
  # typing-extensions dependency on Pythons that are new enough.
@@ -70,10 +74,11 @@ from toil.resource import ModuleDescriptor
70
74
  from toil.statsAndLogging import set_logging_from_options
71
75
 
72
76
  if TYPE_CHECKING:
77
+ from optparse import OptionParser
78
+
73
79
  from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
74
80
  from toil.fileStores.abstractFileStore import AbstractFileStore
75
81
  from toil.jobStores.abstractJobStore import AbstractJobStore
76
- from optparse import OptionParser
77
82
 
78
83
  logger = logging.getLogger(__name__)
79
84
 
@@ -266,7 +271,8 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
266
271
  elif possible_description in APIS:
267
272
  parsed['api'] = possible_description
268
273
  else:
269
- parsed['model'] = possible_description
274
+ if possible_description is not None:
275
+ parsed['model'] = possible_description
270
276
  elif isinstance(spec, dict):
271
277
  # It's a dict, so merge with the defaults.
272
278
  parsed.update(spec)
@@ -423,6 +429,7 @@ class Requirer:
423
429
  raise RuntimeError(f"Config assigned multiple times to {self}")
424
430
  self._config = config
425
431
 
432
+
426
433
  def __getstate__(self) -> Dict[str, Any]:
427
434
  """Return the dict to use as the instance's __dict__ when pickling."""
428
435
  # We want to exclude the config from pickling.
@@ -449,19 +456,15 @@ class Requirer:
449
456
 
450
457
  def __deepcopy__(self, memo: Any) -> "Requirer":
451
458
  """Return a semantically-deep copy of the object, for :meth:`copy.deepcopy`."""
452
- # See https://stackoverflow.com/a/40484215 for how to do an override
453
- # that uses the base implementation
454
-
455
- # Hide this override
456
- implementation = self.__deepcopy__
457
- self.__deepcopy__ = None # type: ignore[assignment]
458
-
459
- # Do the deepcopy which omits the config via __getstate__ override
460
- clone = copy.deepcopy(self, memo)
459
+ # We used to use <https://stackoverflow.com/a/40484215> and
460
+ # <https://stackoverflow.com/a/71125311> but that would result in
461
+ # copies sometimes resurrecting weirdly old job versions. So now we
462
+ # just actually implement __deepcopy__.
461
463
 
462
- # Put back the override on us and the copy
463
- self.__deepcopy__ = implementation # type: ignore[assignment]
464
- clone.__deepcopy__ = implementation # type: ignore[assignment]
464
+ clone = type(self).__new__(self.__class__)
465
+ state = self.__getstate__()
466
+ clone_state = copy.deepcopy(state, memo)
467
+ clone.__dict__.update(clone_state)
465
468
 
466
469
  if self._config is not None:
467
470
  # Share a config reference
@@ -598,7 +601,8 @@ class Requirer:
598
601
  )
599
602
  return value
600
603
  elif self._config is not None:
601
- value = getattr(self._config, 'default' + requirement.capitalize())
604
+ values = [getattr(self._config, 'default_' + requirement, None), getattr(self._config, 'default' + requirement.capitalize(), None)]
605
+ value = values[0] if values[0] is not None else values[1]
602
606
  if value is None:
603
607
  raise AttributeError(
604
608
  f"Encountered None for default '{requirement}' requirement "
@@ -707,7 +711,6 @@ class Requirer:
707
711
  parts = ['no requirements']
708
712
  return ', '.join(parts)
709
713
 
710
-
711
714
  class JobDescription(Requirer):
712
715
  """
713
716
  Stores all the information that the Toil Leader ever needs to know about a Job.
@@ -795,15 +798,30 @@ class JobDescription(Requirer):
795
798
  # default value for this workflow execution.
796
799
  self._remainingTryCount = None
797
800
 
798
- # Holds FileStore FileIDs of the files that this job has deleted. Used
799
- # to journal deletions of files and recover from a worker crash between
800
- # committing a JobDescription update and actually executing the
801
- # requested deletions.
801
+ # Holds FileStore FileIDs of the files that should be seen as deleted,
802
+ # as part of a transaction with the writing of this version of the job
803
+ # to the job store. Used to journal deletions of files and recover from
804
+ # a worker crash between committing a JobDescription update (for
805
+ # example, severing the body of a completed job from the
806
+ # JobDescription) and actually executing the requested deletions (i.e.
807
+ # the deletions made by executing the body).
808
+ #
809
+ # Since the files being deleted might be required to execute the job
810
+ # body, we can't delete them first, but we also don't want to leave
811
+ # them behind if we die right after saving the JobDescription.
812
+ #
813
+ # This will be empty at all times except when a new version of a job is
814
+ # in the process of being committed.
802
815
  self.filesToDelete = []
803
816
 
804
- # Holds JobStore Job IDs of the jobs that have been chained into this
805
- # job, and which should be deleted when this job finally is deleted.
806
- self.jobsToDelete = []
817
+ # Holds job names and IDs of the jobs that have been chained into this
818
+ # job, and which should be deleted when this job finally is deleted
819
+ # (but not before). The successor relationships with them will have
820
+ # been cut, so we need to hold onto them somehow. Includes each
821
+ # chained-in job with its original ID, and also this job's ID with its
822
+ # original names, or is empty if no chaining has happened.
823
+ # The first job in the chain comes first in the list.
824
+ self._merged_job_names: List[Names] = []
807
825
 
808
826
  # The number of direct predecessors of the job. Needs to be stored at
809
827
  # the JobDescription to support dynamically-created jobs with multiple
@@ -849,10 +867,29 @@ class JobDescription(Requirer):
849
867
  # Every time we update a job description in place in the job store, we
850
868
  # increment this.
851
869
  self._job_version = 0
870
+ # And we log who made the version (by PID)
871
+ self._job_version_writer = 0
872
+
873
+ def get_names(self) -> Names:
874
+ """
875
+ Get the names and ID of this job as a named tuple.
876
+ """
877
+ return Names(self.jobName, self.unitName, self.displayName, self.displayName, str(self.jobStoreID))
852
878
 
853
- # Human-readable names of jobs that were run as part of this job's
854
- # invocation, starting with this job
855
- self.chainedJobs = []
879
+ def get_chain(self) -> List[Names]:
880
+ """
881
+ Get all the jobs that executed in this job's chain, in order.
882
+
883
+ For each job, produces a named tuple with its various names and its
884
+ original job store ID. The jobs in the chain are in execution order.
885
+
886
+ If the job hasn't run yet or it didn't chain, produces a one-item list.
887
+ """
888
+ if len(self._merged_job_names) == 0:
889
+ # We haven't merged so we're just ourselves.
890
+ return [self.get_names()]
891
+ else:
892
+ return list(self._merged_job_names)
856
893
 
857
894
  def serviceHostIDsInBatches(self) -> Iterator[List[str]]:
858
895
  """
@@ -1027,17 +1064,40 @@ class JobDescription(Requirer):
1027
1064
  logger.debug('%s is adopting successor phases from %s of: %s', self, other, old_phases)
1028
1065
  self.successor_phases = old_phases + self.successor_phases
1029
1066
 
1030
- # TODO: also be able to take on the successors of the other job, under
1031
- # ours on the stack, somehow.
1032
-
1067
+ # When deleting, we need to delete the files for our old ID, and also
1068
+ # anything that needed to be deleted for the job we are replacing. And
1069
+ # we need to keep track of all the names of jobs involved for logging.
1070
+
1071
+ # We need first the job we are merging into if nothing has merged into
1072
+ # it yet, then anything that already merged into it (including it),
1073
+ # then us if nothing has yet merged into us, then anything that merged
1074
+ # into us (inclusing us)
1075
+ _merged_job_names = []
1076
+ if len(other._merged_job_names) == 0:
1077
+ _merged_job_names.append(other.get_names())
1078
+ _merged_job_names += other._merged_job_names
1079
+ if len(self._merged_job_names) == 0:
1080
+ _merged_job_names.append(self.get_names())
1081
+ _merged_job_names += self._merged_job_names
1082
+ self._merged_job_names = _merged_job_names
1083
+
1084
+ # Now steal its ID.
1033
1085
  self.jobStoreID = other.jobStoreID
1034
1086
 
1035
- # Save files and jobs to delete from the job we replaced, so we can
1036
- # roll up a whole chain of jobs and delete them when they're all done.
1037
- self.filesToDelete += other.filesToDelete
1038
- self.jobsToDelete += other.jobsToDelete
1087
+ if len(other.filesToDelete) > 0:
1088
+ raise RuntimeError("Trying to take on the ID of a job that is in the process of being committed!")
1089
+ if len(self.filesToDelete) > 0:
1090
+ raise RuntimeError("Trying to take on the ID of anothe job while in the process of being committed!")
1039
1091
 
1040
1092
  self._job_version = other._job_version
1093
+ self._job_version_writer = os.getpid()
1094
+
1095
+ def check_new_version(self, other: "JobDescription") -> None:
1096
+ """
1097
+ Make sure a prospective new version of the JobDescription is actually moving forward in time and not backward.
1098
+ """
1099
+ if other._job_version < self._job_version:
1100
+ raise RuntimeError(f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}")
1041
1101
 
1042
1102
  def addChild(self, childID: str) -> None:
1043
1103
  """Make the job with the given ID a child of the described job."""
@@ -1055,7 +1115,8 @@ class JobDescription(Requirer):
1055
1115
  first, and must have already been added.
1056
1116
  """
1057
1117
  # Make sure we aren't clobbering something
1058
- assert serviceID not in self.serviceTree
1118
+ if serviceID in self.serviceTree:
1119
+ raise RuntimeError("Job is already in the service tree.")
1059
1120
  self.serviceTree[serviceID] = []
1060
1121
  if parentServiceID is not None:
1061
1122
  self.serviceTree[parentServiceID].append(serviceID)
@@ -1124,9 +1185,11 @@ class JobDescription(Requirer):
1124
1185
  from toil.batchSystems.abstractBatchSystem import BatchJobExitReason
1125
1186
 
1126
1187
  # Old version of this function used to take a config. Make sure that isn't happening.
1127
- assert not isinstance(exit_status, Config), "Passing a Config as an exit status"
1188
+ if isinstance(exit_status, Config):
1189
+ raise RuntimeError("Passing a Config as an exit status.")
1128
1190
  # Make sure we have an assigned config.
1129
- assert self._config is not None
1191
+ if self._config is None:
1192
+ raise RuntimeError("The job's config is not assigned.")
1130
1193
 
1131
1194
  if self._config.enableUnlimitedPreemptibleRetries and exit_reason == BatchJobExitReason.LOST:
1132
1195
  logger.info("*Not* reducing try count (%s) of job %s with ID %s",
@@ -1217,6 +1280,14 @@ class JobDescription(Requirer):
1217
1280
  def __repr__(self):
1218
1281
  return f'{self.__class__.__name__}( **{self.__dict__!r} )'
1219
1282
 
1283
+ def reserve_versions(self, count: int) -> None:
1284
+ """
1285
+ Reserve a job version number for later, for journaling asynchronously.
1286
+ """
1287
+ self._job_version += count
1288
+ self._job_version_writer = os.getpid()
1289
+ logger.debug("Skip ahead to job version: %s", self)
1290
+
1220
1291
  def pre_update_hook(self) -> None:
1221
1292
  """
1222
1293
  Run before pickling and saving a created or updated version of this job.
@@ -1224,28 +1295,9 @@ class JobDescription(Requirer):
1224
1295
  Called by the job store.
1225
1296
  """
1226
1297
  self._job_version += 1
1298
+ self._job_version_writer = os.getpid()
1227
1299
  logger.debug("New job version: %s", self)
1228
1300
 
1229
- def get_job_kind(self) -> str:
1230
- """
1231
- Return an identifying string for the job.
1232
-
1233
- The result may contain spaces.
1234
-
1235
- Returns: Either the unit name, job name, or display name, which identifies
1236
- the kind of job it is to toil.
1237
- Otherwise "Unknown Job" in case no identifier is available
1238
- """
1239
- if self.unitName:
1240
- return self.unitName
1241
- elif self.jobName:
1242
- return self.jobName
1243
- elif self.displayName:
1244
- return self.displayName
1245
- else:
1246
- return "Unknown Job"
1247
-
1248
-
1249
1301
  class ServiceJobDescription(JobDescription):
1250
1302
  """A description of a job that hosts a service."""
1251
1303
 
@@ -1309,12 +1361,14 @@ class CheckpointJobDescription(JobDescription):
1309
1361
 
1310
1362
  Returns a list with the IDs of any successors deleted.
1311
1363
  """
1312
- assert self.checkpoint is not None
1364
+ if self.checkpoint is None:
1365
+ raise RuntimeError("Cannot restart a checkpoint job. The checkpoint was never set.")
1313
1366
  successorsDeleted = []
1314
1367
  all_successors = list(self.allSuccessors())
1315
1368
  if len(all_successors) > 0 or self.serviceTree or self.command is not None:
1316
1369
  if self.command is not None:
1317
- assert self.command == self.checkpoint
1370
+ if self.command != self.checkpoint:
1371
+ raise RuntimeError("The command and checkpoint are not the same.")
1318
1372
  logger.debug("Checkpoint job already has command set to run")
1319
1373
  else:
1320
1374
  self.command = self.checkpoint
@@ -1600,8 +1654,8 @@ class Job:
1600
1654
 
1601
1655
  :return: childJob: for call chaining
1602
1656
  """
1603
- assert isinstance(childJob, Job)
1604
-
1657
+ if not isinstance(childJob, Job):
1658
+ raise RuntimeError("The type of the child job is not a job.")
1605
1659
  # Join the job graphs
1606
1660
  self._jobGraphsJoined(childJob)
1607
1661
  # Remember the child relationship
@@ -1627,8 +1681,8 @@ class Job:
1627
1681
 
1628
1682
  :return: followOnJob for call chaining
1629
1683
  """
1630
- assert isinstance(followOnJob, Job)
1631
-
1684
+ if not isinstance(followOnJob, Job):
1685
+ raise RuntimeError("The type of the follow-on job is not a job.")
1632
1686
  # Join the job graphs
1633
1687
  self._jobGraphsJoined(followOnJob)
1634
1688
  # Remember the follow-on relationship
@@ -1651,7 +1705,7 @@ class Job:
1651
1705
  return self._description.hasChild(followOnJob.jobStoreID)
1652
1706
 
1653
1707
  def addService(
1654
- self, service: "Service", parentService: Optional["Service"] = None
1708
+ self, service: "Job.Service", parentService: Optional["Job.Service"] = None
1655
1709
  ) -> "Promise":
1656
1710
  """
1657
1711
  Add a service.
@@ -1698,7 +1752,7 @@ class Job:
1698
1752
  # Return the promise for the service's startup result
1699
1753
  return hostingJob.rv()
1700
1754
 
1701
- def hasService(self, service: "Service") -> bool:
1755
+ def hasService(self, service: "Job.Service") -> bool:
1702
1756
  """Return True if the given Service is a service of this job, and False otherwise."""
1703
1757
  return service.hostID is None or self._description.hasServiceHostJob(service.hostID)
1704
1758
 
@@ -1781,8 +1835,8 @@ class Job:
1781
1835
  return self._tempDir
1782
1836
 
1783
1837
  def log(self, text: str, level=logging.INFO) -> None:
1784
- """Log using :func:`fileStore.logToMaster`."""
1785
- self._fileStore.logToMaster(text, level)
1838
+ """Log using :func:`fileStore.log_to_leader`."""
1839
+ self._fileStore.log_to_leader(text, level)
1786
1840
 
1787
1841
  @staticmethod
1788
1842
  def wrapFn(fn, *args, **kwargs) -> "FunctionWrappingJob":
@@ -1991,7 +2045,8 @@ class Job:
1991
2045
  for successor in [self._registry[jID] for jID in self.description.allSuccessors() if jID in self._registry] + extraEdges[self]:
1992
2046
  # Grab all the successors in the current registry (i.e. added form this node) and look at them.
1993
2047
  successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
1994
- assert stack.pop() == self
2048
+ if stack.pop() != self:
2049
+ raise RuntimeError("The stack ordering/elements was changed.")
1995
2050
  if self in stack:
1996
2051
  stack.append(self)
1997
2052
  raise JobGraphDeadlockException("A cycle of job dependencies has been detected '%s'" % stack)
@@ -2109,37 +2164,49 @@ class Job:
2109
2164
  """Used to setup and run Toil workflow."""
2110
2165
 
2111
2166
  @staticmethod
2112
- def getDefaultArgumentParser() -> ArgumentParser:
2167
+ def getDefaultArgumentParser(jobstore_as_flag: bool = False) -> ArgumentParser:
2113
2168
  """
2114
2169
  Get argument parser with added toil workflow options.
2115
2170
 
2171
+ :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2116
2172
  :returns: The argument parser used by a toil workflow with added Toil options.
2117
2173
  """
2118
- parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
2119
- Job.Runner.addToilOptions(parser)
2174
+ parser = ArgParser(formatter_class=ArgumentDefaultsHelpFormatter)
2175
+ Job.Runner.addToilOptions(parser, jobstore_as_flag=jobstore_as_flag)
2120
2176
  return parser
2121
2177
 
2122
2178
  @staticmethod
2123
- def getDefaultOptions(jobStore: str) -> Namespace:
2179
+ def getDefaultOptions(jobStore: Optional[str] = None, jobstore_as_flag: bool = False) -> Namespace:
2124
2180
  """
2125
2181
  Get default options for a toil workflow.
2126
2182
 
2127
2183
  :param jobStore: A string describing the jobStore \
2128
2184
  for the workflow.
2185
+ :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2129
2186
  :returns: The options used by a toil workflow.
2130
2187
  """
2131
- parser = Job.Runner.getDefaultArgumentParser()
2132
- return parser.parse_args(args=[jobStore])
2188
+ # setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
2189
+ if not jobstore_as_flag and jobStore is None:
2190
+ raise RuntimeError("The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
2191
+ "to False!")
2192
+ parser = Job.Runner.getDefaultArgumentParser(jobstore_as_flag=jobstore_as_flag)
2193
+ arguments = []
2194
+ if jobstore_as_flag and jobStore is not None:
2195
+ arguments = ["--jobstore", jobStore]
2196
+ if not jobstore_as_flag and jobStore is not None:
2197
+ arguments = [jobStore]
2198
+ return parser.parse_args(args=arguments)
2133
2199
 
2134
2200
  @staticmethod
2135
- def addToilOptions(parser: Union["OptionParser", ArgumentParser]) -> None:
2201
+ def addToilOptions(parser: Union["OptionParser", ArgumentParser], jobstore_as_flag: bool = False) -> None:
2136
2202
  """
2137
2203
  Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
2138
2204
  parser object.
2139
2205
 
2140
2206
  :param parser: Options object to add toil options to.
2207
+ :param jobstore_as_flag: make the job store option a --jobStore flag instead of a required jobStore positional argument.
2141
2208
  """
2142
- addOptions(parser)
2209
+ addOptions(parser, jobstore_as_flag=jobstore_as_flag)
2143
2210
 
2144
2211
  @staticmethod
2145
2212
  def startToil(job: "Job", options) -> Any:
@@ -2279,8 +2346,8 @@ class Job:
2279
2346
  unpickler = FilteredUnpickler(fileHandle)
2280
2347
 
2281
2348
  runnable = unpickler.load()
2282
- if requireInstanceOf is not None:
2283
- assert isinstance(runnable, requireInstanceOf), f"Did not find a {requireInstanceOf} when expected"
2349
+ if requireInstanceOf is not None and not isinstance(runnable, requireInstanceOf):
2350
+ raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
2284
2351
 
2285
2352
  return runnable
2286
2353
 
@@ -2450,7 +2517,8 @@ class Job:
2450
2517
 
2451
2518
  # We can't save the job in the right place for cleanup unless the
2452
2519
  # description has a real ID.
2453
- assert not isinstance(self.jobStoreID, TemporaryID), f"Tried to save job {self} without ID assigned!"
2520
+ if isinstance(self.jobStoreID, TemporaryID):
2521
+ raise RuntimeError(f"Tried to save job {self} without ID assigned!")
2454
2522
 
2455
2523
  # Note that we can't accept any more requests for our return value
2456
2524
  self._disablePromiseRegistration()
@@ -2553,10 +2621,11 @@ class Job:
2553
2621
  # Set up to save last job first, so promises flow the right way
2554
2622
  ordering.reverse()
2555
2623
 
2556
- logger.info("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
2624
+ logger.debug("Saving graph of %d jobs, %d non-service, %d new", len(allJobs), len(ordering), len(fakeToReal))
2557
2625
 
2558
2626
  # Make sure we're the root
2559
- assert ordering[-1] == self
2627
+ if ordering[-1] != self:
2628
+ raise RuntimeError("The current job is not the root.")
2560
2629
 
2561
2630
  # Don't verify the ordering length: it excludes service host jobs.
2562
2631
  ordered_ids = {o.jobStoreID for o in ordering}
@@ -2572,17 +2641,17 @@ class Job:
2572
2641
  self._fulfillPromises(returnValues, jobStore)
2573
2642
 
2574
2643
  for job in ordering:
2575
- logger.info("Processing job %s", job.description)
2644
+ logger.debug("Processing job %s", job.description)
2576
2645
  for serviceBatch in reversed(list(job.description.serviceHostIDsInBatches())):
2577
2646
  # For each batch of service host jobs in reverse order they start
2578
2647
  for serviceID in serviceBatch:
2579
- logger.info("Processing service %s", serviceID)
2648
+ logger.debug("Processing service %s", serviceID)
2580
2649
  if serviceID in self._registry:
2581
2650
  # It's a new service
2582
2651
 
2583
2652
  # Find the actual job
2584
2653
  serviceJob = self._registry[serviceID]
2585
- logger.info("Saving service %s", serviceJob.description)
2654
+ logger.debug("Saving service %s", serviceJob.description)
2586
2655
  # Pickle the service body, which triggers all the promise stuff
2587
2656
  serviceJob.saveBody(jobStore)
2588
2657
  if job != self or saveSelf:
@@ -2641,7 +2710,8 @@ class Job:
2641
2710
  command = jobDescription.command
2642
2711
 
2643
2712
  commandTokens = command.split()
2644
- assert "_toil" == commandTokens[0]
2713
+ if "_toil" != commandTokens[0]:
2714
+ raise RuntimeError("An invalid command was passed into the job.")
2645
2715
  userModule = ModuleDescriptor.fromCommand(commandTokens[2:])
2646
2716
  logger.debug('Loading user module %s.', userModule)
2647
2717
  userModule = cls._loadUserModule(userModule)
@@ -2732,7 +2802,8 @@ class Job:
2732
2802
  clock=str(totalCpuTime - startClock),
2733
2803
  class_name=self._jobName(),
2734
2804
  memory=str(totalMemoryUsage),
2735
- requested_cores=str(self.cores)
2805
+ requested_cores=str(self.cores),
2806
+ disk=str(fileStore.get_disk_usage())
2736
2807
  )
2737
2808
  )
2738
2809
 
@@ -3025,22 +3096,23 @@ class EncapsulatedJob(Job):
3025
3096
  self.encapsulatedFollowOn = None
3026
3097
 
3027
3098
  def addChild(self, childJob):
3028
- assert self.encapsulatedFollowOn is not None, \
3029
- "Children cannot be added to EncapsulatedJob while it is running"
3099
+ if self.encapsulatedFollowOn is None:
3100
+ raise RuntimeError("Children cannot be added to EncapsulatedJob while it is running")
3030
3101
  return Job.addChild(self.encapsulatedFollowOn, childJob)
3031
3102
 
3032
3103
  def addService(self, service, parentService=None):
3033
- assert self.encapsulatedFollowOn is not None, \
3034
- "Services cannot be added to EncapsulatedJob while it is running"
3104
+ if self.encapsulatedFollowOn is None:
3105
+ raise RuntimeError("Services cannot be added to EncapsulatedJob while it is running")
3035
3106
  return Job.addService(self.encapsulatedFollowOn, service, parentService=parentService)
3036
3107
 
3037
3108
  def addFollowOn(self, followOnJob):
3038
- assert self.encapsulatedFollowOn is not None, \
3039
- "Follow-ons cannot be added to EncapsulatedJob while it is running"
3109
+ if self.encapsulatedFollowOn is None:
3110
+ raise RuntimeError("Follow-ons cannot be added to EncapsulatedJob while it is running")
3040
3111
  return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
3041
3112
 
3042
3113
  def rv(self, *path) -> "Promise":
3043
- assert self.encapsulatedJob is not None
3114
+ if self.encapsulatedJob is None:
3115
+ raise RuntimeError("The encapsulated job was not set.")
3044
3116
  return self.encapsulatedJob.rv(*path)
3045
3117
 
3046
3118
  def prepareForPromiseRegistration(self, jobStore):
@@ -3052,7 +3124,8 @@ class EncapsulatedJob(Job):
3052
3124
  self.encapsulatedJob.prepareForPromiseRegistration(jobStore)
3053
3125
 
3054
3126
  def _disablePromiseRegistration(self):
3055
- assert self.encapsulatedJob is not None
3127
+ if self.encapsulatedJob is None:
3128
+ raise RuntimeError("The encapsulated job was not set.")
3056
3129
  super()._disablePromiseRegistration()
3057
3130
  self.encapsulatedJob._disablePromiseRegistration()
3058
3131
 
@@ -3068,7 +3141,8 @@ class EncapsulatedJob(Job):
3068
3141
  return self.__class__, (None,)
3069
3142
 
3070
3143
  def getUserScript(self):
3071
- assert self.encapsulatedJob is not None
3144
+ if self.encapsulatedJob is None:
3145
+ raise RuntimeError("The encapsulated job was not set.")
3072
3146
  return self.encapsulatedJob.getUserScript()
3073
3147
 
3074
3148
 
@@ -3085,7 +3159,8 @@ class ServiceHostJob(Job):
3085
3159
  """
3086
3160
 
3087
3161
  # Make sure the service hasn't been given a host already.
3088
- assert service.hostID is None
3162
+ if service.hostID is not None:
3163
+ raise RuntimeError("Cannot set the host. The service has already been given a host.")
3089
3164
 
3090
3165
  # Make ourselves with name info from the Service and a
3091
3166
  # ServiceJobDescription that has the service control flags.
@@ -3172,14 +3247,17 @@ class ServiceHostJob(Job):
3172
3247
 
3173
3248
  #Now flag that the service is running jobs can connect to it
3174
3249
  logger.debug("Removing the start jobStoreID to indicate that establishment of the service")
3175
- assert self.description.startJobStoreID != None
3250
+ if self.description.startJobStoreID is None:
3251
+ raise RuntimeError("No start jobStoreID to remove.")
3176
3252
  if fileStore.jobStore.file_exists(self.description.startJobStoreID):
3177
3253
  fileStore.jobStore.delete_file(self.description.startJobStoreID)
3178
- assert not fileStore.jobStore.file_exists(self.description.startJobStoreID)
3254
+ if fileStore.jobStore.file_exists(self.description.startJobStoreID):
3255
+ raise RuntimeError("The start jobStoreID is not a file.")
3179
3256
 
3180
3257
  #Now block until we are told to stop, which is indicated by the removal
3181
3258
  #of a file
3182
- assert self.description.terminateJobStoreID != None
3259
+ if self.description.terminateJobStoreID is None:
3260
+ raise RuntimeError("No terminate jobStoreID to use.")
3183
3261
  while True:
3184
3262
  # Check for the terminate signal
3185
3263
  if not fileStore.jobStore.file_exists(self.description.terminateJobStoreID):
@@ -3273,7 +3351,8 @@ class Promise:
3273
3351
  @staticmethod
3274
3352
  def __new__(cls, *args) -> "Promise":
3275
3353
  """Instantiate this Promise."""
3276
- assert len(args) == 2
3354
+ if len(args) != 2:
3355
+ raise RuntimeError("Cannot instantiate promise. Invalid number of arguments given (Expected 2).")
3277
3356
  if isinstance(args[0], Job):
3278
3357
  # Regular instantiation when promise is created, before it is being pickled
3279
3358
  return super().__new__(cls)
@@ -3357,10 +3436,12 @@ class PromisedRequirement:
3357
3436
  :type args: int or .Promise
3358
3437
  """
3359
3438
  if hasattr(valueOrCallable, '__call__'):
3360
- assert len(args) != 0, 'Need parameters for PromisedRequirement function.'
3439
+ if len(args) == 0:
3440
+ raise RuntimeError('Need parameters for PromisedRequirement function.')
3361
3441
  func = valueOrCallable
3362
3442
  else:
3363
- assert len(args) == 0, 'Define a PromisedRequirement function to handle multiple arguments.'
3443
+ if len(args) != 0:
3444
+ raise RuntimeError('Define a PromisedRequirement function to handle multiple arguments.')
3364
3445
  func = lambda x: x
3365
3446
  args = [valueOrCallable]
3366
3447