toil 6.1.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +22 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +59 -45
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/contained_executor.py +4 -5
  6. toil/batchSystems/gridengine.py +1 -1
  7. toil/batchSystems/htcondor.py +5 -5
  8. toil/batchSystems/kubernetes.py +25 -11
  9. toil/batchSystems/local_support.py +3 -3
  10. toil/batchSystems/lsf.py +2 -2
  11. toil/batchSystems/mesos/batchSystem.py +4 -4
  12. toil/batchSystems/mesos/executor.py +3 -2
  13. toil/batchSystems/options.py +9 -0
  14. toil/batchSystems/singleMachine.py +11 -10
  15. toil/batchSystems/slurm.py +64 -22
  16. toil/batchSystems/torque.py +1 -1
  17. toil/bus.py +7 -3
  18. toil/common.py +36 -13
  19. toil/cwl/cwltoil.py +365 -312
  20. toil/deferred.py +1 -1
  21. toil/fileStores/abstractFileStore.py +17 -17
  22. toil/fileStores/cachingFileStore.py +2 -2
  23. toil/fileStores/nonCachingFileStore.py +1 -1
  24. toil/job.py +228 -60
  25. toil/jobStores/abstractJobStore.py +18 -10
  26. toil/jobStores/aws/jobStore.py +280 -218
  27. toil/jobStores/aws/utils.py +57 -29
  28. toil/jobStores/conftest.py +2 -2
  29. toil/jobStores/fileJobStore.py +2 -2
  30. toil/jobStores/googleJobStore.py +3 -4
  31. toil/leader.py +72 -24
  32. toil/lib/aws/__init__.py +26 -10
  33. toil/lib/aws/iam.py +2 -2
  34. toil/lib/aws/session.py +62 -22
  35. toil/lib/aws/utils.py +73 -37
  36. toil/lib/conversions.py +5 -1
  37. toil/lib/ec2.py +118 -69
  38. toil/lib/expando.py +1 -1
  39. toil/lib/io.py +14 -2
  40. toil/lib/misc.py +1 -3
  41. toil/lib/resources.py +55 -21
  42. toil/lib/retry.py +12 -5
  43. toil/lib/threading.py +2 -2
  44. toil/lib/throttle.py +1 -1
  45. toil/options/common.py +27 -24
  46. toil/provisioners/__init__.py +9 -3
  47. toil/provisioners/abstractProvisioner.py +9 -7
  48. toil/provisioners/aws/__init__.py +20 -15
  49. toil/provisioners/aws/awsProvisioner.py +406 -329
  50. toil/provisioners/gceProvisioner.py +2 -2
  51. toil/provisioners/node.py +13 -5
  52. toil/server/app.py +1 -1
  53. toil/statsAndLogging.py +58 -16
  54. toil/test/__init__.py +27 -12
  55. toil/test/batchSystems/batchSystemTest.py +40 -33
  56. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  57. toil/test/batchSystems/test_slurm.py +1 -1
  58. toil/test/cwl/cwlTest.py +8 -91
  59. toil/test/cwl/seqtk_seq.cwl +1 -1
  60. toil/test/docs/scriptsTest.py +10 -13
  61. toil/test/jobStores/jobStoreTest.py +33 -49
  62. toil/test/lib/aws/test_iam.py +2 -2
  63. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  64. toil/test/provisioners/clusterTest.py +90 -8
  65. toil/test/server/serverTest.py +2 -2
  66. toil/test/src/autoDeploymentTest.py +1 -1
  67. toil/test/src/dockerCheckTest.py +2 -1
  68. toil/test/src/environmentTest.py +125 -0
  69. toil/test/src/fileStoreTest.py +1 -1
  70. toil/test/src/jobDescriptionTest.py +18 -8
  71. toil/test/src/jobTest.py +1 -1
  72. toil/test/src/realtimeLoggerTest.py +4 -0
  73. toil/test/src/workerTest.py +52 -19
  74. toil/test/utils/toilDebugTest.py +61 -3
  75. toil/test/utils/utilsTest.py +20 -18
  76. toil/test/wdl/wdltoil_test.py +24 -71
  77. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  78. toil/toilState.py +68 -9
  79. toil/utils/toilDebugJob.py +153 -26
  80. toil/utils/toilLaunchCluster.py +12 -2
  81. toil/utils/toilRsyncCluster.py +7 -2
  82. toil/utils/toilSshCluster.py +7 -3
  83. toil/utils/toilStats.py +2 -1
  84. toil/utils/toilStatus.py +97 -51
  85. toil/version.py +10 -10
  86. toil/wdl/wdltoil.py +318 -51
  87. toil/worker.py +96 -69
  88. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  89. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/METADATA +55 -21
  90. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/RECORD +93 -90
  91. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  92. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  93. {toil-6.1.0.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
@@ -56,6 +56,7 @@ class UtilsTest(ToilTest):
56
56
  super().setUp()
57
57
  self.tempDir = self._createTempDir()
58
58
  self.tempFile = get_temp_file(rootDir=self.tempDir)
59
+ self.outputFile = get_temp_file(rootDir=self.tempDir)
59
60
  self.outputFile = 'someSortedStuff.txt'
60
61
  self.toilDir = os.path.join(self.tempDir, "jobstore")
61
62
  self.assertFalse(os.path.exists(self.toilDir))
@@ -73,9 +74,9 @@ class UtilsTest(ToilTest):
73
74
  '-m',
74
75
  'toil.test.sort.sort',
75
76
  f'file:{self.toilDir}',
77
+ f'--fileToSort={self.tempFile}',
78
+ f'--outputFile={self.outputFile}',
76
79
  '--clean=never',
77
- '--numLines=1',
78
- '--lineLength=1'
79
80
  ]
80
81
 
81
82
  self.restart_sort_workflow_cmd = [
@@ -91,7 +92,7 @@ class UtilsTest(ToilTest):
91
92
  if os.path.exists(self.toilDir):
92
93
  shutil.rmtree(self.toilDir)
93
94
 
94
- for f in ['fileToSort.txt', 'sortedFile.txt', 'output.txt']:
95
+ for f in [self.tempFile, self.outputFile, os.path.join(self.tempDir, "output.txt")]:
95
96
  if os.path.exists(f):
96
97
  os.remove(f)
97
98
 
@@ -314,14 +315,14 @@ class UtilsTest(ToilTest):
314
315
  def testGetPIDStatus(self):
315
316
  """Test that ToilStatus.getPIDStatus() behaves as expected."""
316
317
  wf = subprocess.Popen(self.sort_workflow_cmd)
317
- self.check_status('RUNNING', status_fn=ToilStatus.getPIDStatus, seconds=20)
318
+ self.check_status('RUNNING', status_fn=ToilStatus.getPIDStatus, seconds=60)
318
319
  wf.wait()
319
- self.check_status('COMPLETED', status_fn=ToilStatus.getPIDStatus)
320
+ self.check_status('COMPLETED', status_fn=ToilStatus.getPIDStatus, seconds=60)
320
321
 
321
322
  # TODO: we need to reach into the FileJobStore's files and delete this
322
323
  # shared file. We assume we know its internal layout.
323
324
  os.remove(os.path.join(self.toilDir, 'files/shared/pid.log'))
324
- self.check_status('QUEUED', status_fn=ToilStatus.getPIDStatus)
325
+ self.check_status('QUEUED', status_fn=ToilStatus.getPIDStatus, seconds=60)
325
326
 
326
327
  def testGetStatusFailedToilWF(self):
327
328
  """
@@ -331,9 +332,9 @@ class UtilsTest(ToilTest):
331
332
  """
332
333
  # --badWorker is set to force failure.
333
334
  wf = subprocess.Popen(self.sort_workflow_cmd + ['--badWorker=1'])
334
- self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
335
+ self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=60)
335
336
  wf.wait()
336
- self.check_status('ERROR', status_fn=ToilStatus.getStatus)
337
+ self.check_status('ERROR', status_fn=ToilStatus.getStatus, seconds=60)
337
338
 
338
339
  @needs_cwl
339
340
  @needs_docker
@@ -341,22 +342,22 @@ class UtilsTest(ToilTest):
341
342
  """Test that ToilStatus.getStatus() behaves as expected with a failing CWL workflow."""
342
343
  # --badWorker is set to force failure.
343
344
  cmd = ['toil-cwl-runner', '--jobStore', self.toilDir, '--clean=never', '--badWorker=1',
344
- 'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt']
345
+ 'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt', f'--outdir={self.tempDir}']
345
346
  wf = subprocess.Popen(cmd)
346
- self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
347
+ self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=60)
347
348
  wf.wait()
348
- self.check_status('ERROR', status_fn=ToilStatus.getStatus)
349
+ self.check_status('ERROR', status_fn=ToilStatus.getStatus, seconds=60)
349
350
 
350
351
  @needs_cwl
351
352
  @needs_docker
352
353
  def testGetStatusSuccessfulCWLWF(self):
353
354
  """Test that ToilStatus.getStatus() behaves as expected with a successful CWL workflow."""
354
355
  cmd = ['toil-cwl-runner', '--jobStore', self.toilDir, '--clean=never',
355
- 'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt']
356
+ 'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input', 'src/toil/test/cwl/whale.txt', f'--outdir={self.tempDir}']
356
357
  wf = subprocess.Popen(cmd)
357
- self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=20)
358
+ self.check_status('RUNNING', status_fn=ToilStatus.getStatus, seconds=60)
358
359
  wf.wait()
359
- self.check_status('COMPLETED', status_fn=ToilStatus.getStatus)
360
+ self.check_status('COMPLETED', status_fn=ToilStatus.getStatus, seconds=60)
360
361
 
361
362
  @needs_cwl
362
363
  @patch('builtins.print')
@@ -375,23 +376,24 @@ class UtilsTest(ToilTest):
375
376
  args, kwargs = mock_print.call_args
376
377
  self.assertIn('invalidcommand', args[0])
377
378
 
379
+ @pytest.mark.timeout(1200)
378
380
  def testRestartAttribute(self):
379
381
  """
380
- Test that the job store is only destroyed when we observe a succcessful workflow run.
382
+ Test that the job store is only destroyed when we observe a successful workflow run.
381
383
  The following simulates a failing workflow that attempts to resume without restart().
382
384
  In this case, the job store should not be destroyed until restart() is called.
383
385
  """
384
386
  # Run a workflow that will always fail
385
- cmd = self.restart_sort_workflow_cmd + ['--badWorker=1']
387
+ cmd = self.restart_sort_workflow_cmd + ['--badWorker=1', '--logDebug']
386
388
  subprocess.run(cmd)
387
389
 
388
- restart_cmd = self.restart_sort_workflow_cmd + ['--badWorker=0', '--restart']
390
+ restart_cmd = self.restart_sort_workflow_cmd + ['--badWorker=0', '--logDebug', '--restart']
389
391
  subprocess.run(restart_cmd)
390
392
 
391
393
  # Check the job store exists after restart attempt
392
394
  self.assertTrue(os.path.exists(self.toilDir))
393
395
 
394
- successful_cmd = [python, '-m', 'toil.test.sort.sort', 'file:' + self.toilDir,
396
+ successful_cmd = [python, '-m', 'toil.test.sort.sort', '--logDebug', 'file:' + self.toilDir,
395
397
  '--restart']
396
398
  subprocess.run(successful_cmd)
397
399
 
@@ -16,12 +16,12 @@ from toil.test import (ToilTest,
16
16
  needs_docker_cuda,
17
17
  needs_google_storage,
18
18
  needs_singularity_or_docker,
19
+ needs_wdl,
19
20
  slow, integrative)
20
- from toil.test.provisioners.clusterTest import AbstractClusterTest
21
21
  from toil.version import exactPython
22
22
  from toil.wdl.wdltoil import WDLSectionJob, WDLWorkflowGraph
23
23
 
24
-
24
+ @needs_wdl
25
25
  class BaseWDLTest(ToilTest):
26
26
  """Base test class for WDL tests."""
27
27
 
@@ -45,7 +45,7 @@ class WDLConformanceTests(BaseWDLTest):
45
45
  def setUpClass(cls) -> None:
46
46
 
47
47
  url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
48
- commit = "032fb99a1458d456b6d5f17d27928469ec1a1c68"
48
+ commit = "c87b62b4f460e009fd42edec13669c4db14cf90c"
49
49
 
50
50
  p = subprocess.Popen(
51
51
  f"git clone {url} {cls.wdl_dir} && cd {cls.wdl_dir} && git checkout {commit}",
@@ -64,7 +64,7 @@ class WDLConformanceTests(BaseWDLTest):
64
64
  # estimated running time: 2 minutes
65
65
  @slow
66
66
  def test_conformance_tests_v10(self):
67
- tests_to_run = "0,1,5-7,9-15,17,22-24,26,28-30,32-40,53,57-59,62,67-69"
67
+ tests_to_run = "0-15,17-20,22-71,73-77"
68
68
  p = subprocess.run(self.base_command + ["-v", "1.0", "-n", tests_to_run], capture_output=True)
69
69
 
70
70
  if p.returncode != 0:
@@ -75,7 +75,7 @@ class WDLConformanceTests(BaseWDLTest):
75
75
  # estimated running time: 2 minutes
76
76
  @slow
77
77
  def test_conformance_tests_v11(self):
78
- tests_to_run = "2-11,13-15,17-20,22-24,26,29,30,32-40,53,57-59,62,67-69"
78
+ tests_to_run = "1-63,65-71,73-75,77"
79
79
  p = subprocess.run(self.base_command + ["-v", "1.1", "-n", tests_to_run], capture_output=True)
80
80
 
81
81
  if p.returncode != 0:
@@ -83,6 +83,16 @@ class WDLConformanceTests(BaseWDLTest):
83
83
 
84
84
  p.check_returncode()
85
85
 
86
+ @slow
87
+ def test_conformance_tests_integration(self):
88
+ ids_to_run = "encode,tut01,tut02,tut03,tut04"
89
+ p = subprocess.run(self.base_command + ["-v", "1.0", "--id", ids_to_run], capture_output=True)
90
+
91
+ if p.returncode != 0:
92
+ print(p.stdout.decode('utf-8', errors='replace'))
93
+
94
+ p.check_returncode()
95
+
86
96
  @classmethod
87
97
  def tearDownClass(cls) -> None:
88
98
  upper_dir = os.path.dirname(os.getcwd())
@@ -116,6 +126,14 @@ class WDLTests(BaseWDLTest):
116
126
  assert os.path.exists(result['ga4ghMd5.value'])
117
127
  assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
118
128
 
129
+ def test_missing_output_directory(self):
130
+ """
131
+ Test if Toil can run a WDL workflow into a new directory.
132
+ """
133
+ wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
134
+ json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
135
+ subprocess.check_call(self.base_command + [wdl, json_file, '-o', os.path.join(self.output_dir, "does", "not", "exist"), '--logDebug', '--retryCount=0'])
136
+
119
137
  @needs_singularity_or_docker
120
138
  def test_miniwdl_self_test(self, extra_args: Optional[List[str]] = None) -> None:
121
139
  """Test if the MiniWDL self test runs and produces the expected output."""
@@ -141,7 +159,7 @@ class WDLTests(BaseWDLTest):
141
159
  assert isinstance(outputs['hello_caller.message_files'], list)
142
160
  assert len(outputs['hello_caller.message_files']) == 2
143
161
  for item in outputs['hello_caller.message_files']:
144
- # All the files should be strings in the "out" direcotry
162
+ # All the files should be strings in the "out" directory
145
163
  assert isinstance(item, str)
146
164
  assert item.startswith(out_dir)
147
165
 
@@ -349,70 +367,5 @@ class WDLTests(BaseWDLTest):
349
367
  assert "successor" in result[1]
350
368
 
351
369
 
352
- @integrative
353
- @slow
354
- @pytest.mark.timeout(600)
355
- class WDLKubernetesClusterTest(AbstractClusterTest):
356
- """
357
- Ensure WDL works on the Kubernetes batchsystem.
358
- """
359
-
360
- def __init__(self, name):
361
- super().__init__(name)
362
- self.clusterName = 'wdl-integration-test-' + str(uuid4())
363
- # t2.medium is the minimum t2 instance that permits Kubernetes
364
- self.leaderNodeType = "t2.medium"
365
- self.instanceTypes = ["t2.medium"]
366
- self.clusterType = "kubernetes"
367
-
368
- def setUp(self) -> None:
369
- super().setUp()
370
- self.jobStore = f'aws:{self.awsRegion()}:wdl-test-{uuid4()}'
371
-
372
- def launchCluster(self) -> None:
373
- self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
374
- '--nodeTypes', ",".join(self.instanceTypes),
375
- '-w', ",".join(self.numWorkers),
376
- '--nodeStorage', str(self.requestedLeaderStorage)])
377
-
378
- def test_wdl_kubernetes_cluster(self):
379
- """
380
- Test that a wdl workflow works on a kubernetes cluster. Launches a cluster with 1 worker. This runs a wdl
381
- workflow that performs an image pull on the worker.
382
- :return:
383
- """
384
- self.numWorkers = "1"
385
- self.requestedLeaderStorage = 30
386
- # create the cluster
387
- self.launchCluster()
388
- # get leader
389
- self.cluster = cluster_factory(
390
- provisioner="aws", zone=self.zone, clusterName=self.clusterName
391
- )
392
- self.leader = self.cluster.getLeader()
393
-
394
- url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
395
- commit = "09b9659cd01473e836738a2e0dd205df0adb49c5"
396
- wdl_dir = "wdl_conformance_tests"
397
-
398
- # get the wdl-conformance-tests repo to get WDL tasks to run
399
- self.sshUtil([
400
- "bash",
401
- "-c",
402
- f"git clone {url} {wdl_dir} && cd {wdl_dir} && git checkout {commit}"
403
- ])
404
-
405
- # run on kubernetes batchsystem
406
- toil_options = ['--batchSystem=kubernetes',
407
- f"--jobstore={self.jobStore}"]
408
-
409
- # run WDL workflow that will run singularity
410
- test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
411
- self.sshUtil([
412
- "bash",
413
- "-c",
414
- f"cd {wdl_dir} && toil-wdl-runner {' '.join(test_options)} {' '.join(toil_options)}"])
415
-
416
-
417
370
  if __name__ == "__main__":
418
371
  unittest.main() # run all tests
@@ -0,0 +1,77 @@
1
+ import unittest
2
+
3
+ from toil.test.provisioners.clusterTest import AbstractClusterTest
4
+ from uuid import uuid4
5
+
6
+ import pytest
7
+
8
+ from toil.provisioners import cluster_factory
9
+ from toil.test import (slow, integrative)
10
+
11
+ @integrative
12
+ @slow
13
+ @pytest.mark.timeout(600)
14
+ class WDLKubernetesClusterTest(AbstractClusterTest):
15
+ """
16
+ Ensure WDL works on the Kubernetes batchsystem.
17
+ """
18
+
19
+ def __init__(self, name):
20
+ super().__init__(name)
21
+ self.clusterName = 'wdl-integration-test-' + str(uuid4())
22
+ # t2.medium is the minimum t2 instance that permits Kubernetes
23
+ self.leaderNodeType = "t2.medium"
24
+ self.instanceTypes = ["t2.medium"]
25
+ self.clusterType = "kubernetes"
26
+
27
+ def setUp(self) -> None:
28
+ super().setUp()
29
+ self.jobStore = f'aws:{self.awsRegion()}:wdl-test-{uuid4()}'
30
+
31
+ def launchCluster(self) -> None:
32
+ self.createClusterUtil(args=['--leaderStorage', str(self.requestedLeaderStorage),
33
+ '--nodeTypes', ",".join(self.instanceTypes),
34
+ '-w', ",".join(self.numWorkers),
35
+ '--nodeStorage', str(self.requestedLeaderStorage)])
36
+
37
+ def test_wdl_kubernetes_cluster(self):
38
+ """
39
+ Test that a wdl workflow works on a kubernetes cluster. Launches a cluster with 1 worker. This runs a wdl
40
+ workflow that performs an image pull on the worker.
41
+ :return:
42
+ """
43
+ self.numWorkers = "1"
44
+ self.requestedLeaderStorage = 30
45
+ # create the cluster
46
+ self.launchCluster()
47
+ # get leader
48
+ self.cluster = cluster_factory(
49
+ provisioner="aws", zone=self.zone, clusterName=self.clusterName
50
+ )
51
+ self.leader = self.cluster.getLeader()
52
+
53
+ url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
54
+ commit = "09b9659cd01473e836738a2e0dd205df0adb49c5"
55
+ wdl_dir = "wdl_conformance_tests"
56
+
57
+ # get the wdl-conformance-tests repo to get WDL tasks to run
58
+ self.sshUtil([
59
+ "bash",
60
+ "-c",
61
+ f"git clone {url} {wdl_dir} && cd {wdl_dir} && git checkout {commit}"
62
+ ])
63
+
64
+ # run on kubernetes batchsystem
65
+ toil_options = ['--batchSystem=kubernetes',
66
+ f"--jobstore={self.jobStore}"]
67
+
68
+ # run WDL workflow that will run singularity
69
+ test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
70
+ self.sshUtil([
71
+ "bash",
72
+ "-c",
73
+ f"cd {wdl_dir} && toil-wdl-runner {' '.join(test_options)} {' '.join(toil_options)}"])
74
+
75
+
76
+ if __name__ == "__main__":
77
+ unittest.main() # run all tests
toil/toilState.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import logging
15
+ import time
15
16
  from typing import Dict, Optional, Set
16
17
 
17
18
  from toil.bus import JobUpdatedMessage, MessageBus
@@ -183,12 +184,70 @@ class ToilState:
183
184
  if job_id in self.__job_database:
184
185
  # Update the one true copy in place
185
186
  old_truth = self.__job_database[job_id]
186
- old_truth.check_new_version(new_truth)
187
+ old_truth.assert_is_not_newer_than(new_truth)
187
188
  old_truth.__dict__.update(new_truth.__dict__)
188
189
  else:
189
190
  # Just keep the new one
190
191
  self.__job_database[job_id] = new_truth
191
192
 
193
+ def reset_job_expecting_change(self, job_id: str, timeout: float) -> bool:
194
+ """
195
+ Discard any local modifications to a JobDescription.
196
+
197
+ Will make modifications from other hosts visible.
198
+
199
+ Will wait for up to timeout seconds for a modification (or deletion)
200
+ from another host to actually be visible.
201
+
202
+ Always replaces the JobDescription with what is stored in the job
203
+ store, even if no modification ends up being visible.
204
+
205
+ Returns True if an update was detected in time, and False otherwise.
206
+ """
207
+
208
+ start_time = time.time()
209
+ wait_time = 0.1
210
+ initially_known = job_id in self.__job_database
211
+ new_truth: Optional[JobDescription] = None
212
+ while True:
213
+ try:
214
+ new_truth = self.__job_store.load_job(job_id)
215
+ except NoSuchJobException:
216
+ # The job is gone now.
217
+ if job_id in self.__job_database:
218
+ # So forget about it
219
+ del self.__job_database[job_id]
220
+ # TODO: Other collections may still reference it.
221
+ if initially_known:
222
+ # Job was deleted, that's an update
223
+ return True
224
+ else:
225
+ if job_id in self.__job_database:
226
+ # We have an old version to compare against
227
+ old_truth = self.__job_database[job_id]
228
+ old_truth.assert_is_not_newer_than(new_truth)
229
+ if old_truth.is_updated_by(new_truth):
230
+ # Do the update
231
+ old_truth.__dict__.update(new_truth.__dict__)
232
+ return True
233
+ else:
234
+ # Just keep the new one. That's an update.
235
+ self.__job_database[job_id] = new_truth
236
+ return True
237
+ # We looked but didn't get a good update
238
+ time_elapsed = time.time() - start_time
239
+ if time_elapsed >= timeout:
240
+ # We're out of time to check.
241
+ if new_truth is not None:
242
+ # Commit whatever we managed to load to accomplish a real
243
+ # reset.
244
+ old_truth.__dict__.update(new_truth.__dict__)
245
+ return False
246
+ # Wait a little and poll again
247
+ time.sleep(min(timeout - time_elapsed, wait_time))
248
+ # Using exponential backoff
249
+ wait_time *= 2
250
+
192
251
  # The next 3 functions provide tracking of how many successor jobs a given job
193
252
  # is waiting on, exposing only legit operations.
194
253
  # TODO: turn these into messages?
@@ -247,10 +306,10 @@ class ToilState:
247
306
 
248
307
  :param jobDesc: The description for the root job of the workflow being run.
249
308
  """
250
- # If the job description has a command, is a checkpoint, has services
309
+ # If the job description has a body, is a checkpoint, has services
251
310
  # or is ready to be deleted it is ready to be processed (i.e. it is updated)
252
311
  if (
253
- jobDesc.command is not None
312
+ jobDesc.has_body()
254
313
  or (
255
314
  isinstance(jobDesc, CheckpointJobDescription)
256
315
  and jobDesc.checkpoint is not None
@@ -259,10 +318,10 @@ class ToilState:
259
318
  or jobDesc.nextSuccessors() is None
260
319
  ):
261
320
  logger.debug(
262
- "Found job to run: %s, with command: %s, with checkpoint: %s, with "
321
+ "Found job to run: %s, with body: %s, with checkpoint: %s, with "
263
322
  "services: %s, with no next successors: %s",
264
323
  jobDesc.jobStoreID,
265
- jobDesc.command is not None,
324
+ jobDesc.has_body(),
266
325
  isinstance(jobDesc, CheckpointJobDescription)
267
326
  and jobDesc.checkpoint is not None,
268
327
  len(jobDesc.services) > 0,
@@ -272,18 +331,18 @@ class ToilState:
272
331
  self.bus.publish(JobUpdatedMessage(str(jobDesc.jobStoreID), 0))
273
332
 
274
333
  if isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None:
275
- jobDesc.command = jobDesc.checkpoint
334
+ jobDesc.restore_checkpoint()
276
335
 
277
336
  else: # There exist successors
278
337
  logger.debug(
279
338
  "Adding job: %s to the state with %s successors",
280
339
  jobDesc.jobStoreID,
281
- len(jobDesc.nextSuccessors()),
340
+ len(jobDesc.nextSuccessors() or set()),
282
341
  )
283
342
 
284
343
  # Record the number of successors
285
344
  self.successorCounts[str(jobDesc.jobStoreID)] = len(
286
- jobDesc.nextSuccessors()
345
+ jobDesc.nextSuccessors() or set()
287
346
  )
288
347
 
289
348
  def processSuccessorWithMultiplePredecessors(successor: JobDescription) -> None:
@@ -305,7 +364,7 @@ class ToilState:
305
364
  self._buildToilState(successor)
306
365
 
307
366
  # For each successor
308
- for successorJobStoreID in jobDesc.nextSuccessors():
367
+ for successorJobStoreID in jobDesc.nextSuccessors() or set():
309
368
 
310
369
  # If the successor does not yet point back at a
311
370
  # predecessor we have not yet considered it