toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/test/cwl/cwlTest.py CHANGED
@@ -23,16 +23,10 @@ import sys
23
23
  import unittest
24
24
  import uuid
25
25
  import zipfile
26
-
27
26
  from functools import partial
28
27
  from io import StringIO
29
28
  from pathlib import Path
30
- from typing import (TYPE_CHECKING,
31
- Callable,
32
- Dict,
33
- List,
34
- Optional,
35
- cast)
29
+ from typing import TYPE_CHECKING, Callable, Optional, cast
36
30
  from unittest.mock import Mock, call
37
31
  from urllib.request import urlretrieve
38
32
 
@@ -46,32 +40,33 @@ sys.path.insert(0, pkg_root) # noqa
46
40
 
47
41
  from schema_salad.exceptions import ValidationException
48
42
 
49
- from toil.cwl.utils import (DirectoryStructure,
50
- download_structure,
51
- visit_cwl_class_and_reduce,
52
- visit_top_cwl_class)
43
+ from toil.cwl.utils import (
44
+ DirectoryStructure,
45
+ download_structure,
46
+ visit_cwl_class_and_reduce,
47
+ visit_top_cwl_class,
48
+ )
53
49
  from toil.fileStores import FileID
54
50
  from toil.fileStores.abstractFileStore import AbstractFileStore
55
51
  from toil.lib.threading import cpu_count
56
- from toil.provisioners import cluster_factory
57
- from toil.test import (ToilTest,
58
- needs_aws_ec2,
59
- needs_aws_s3,
60
- needs_cwl,
61
- needs_docker,
62
- needs_docker_cuda,
63
- needs_env_var,
64
- needs_fetchable_appliance,
65
- needs_gridengine,
66
- needs_kubernetes,
67
- needs_local_cuda,
68
- needs_lsf,
69
- needs_mesos,
70
- needs_online,
71
- needs_slurm,
72
- needs_torque,
73
- needs_wes_server,
74
- slow)
52
+ from toil.test import (
53
+ ToilTest,
54
+ needs_aws_s3,
55
+ needs_cwl,
56
+ needs_docker,
57
+ needs_docker_cuda,
58
+ needs_gridengine,
59
+ needs_kubernetes,
60
+ needs_local_cuda,
61
+ needs_lsf,
62
+ needs_mesos,
63
+ needs_online,
64
+ needs_singularity_or_docker,
65
+ needs_slurm,
66
+ needs_torque,
67
+ needs_wes_server,
68
+ slow,
69
+ )
75
70
 
76
71
  log = logging.getLogger(__name__)
77
72
  CONFORMANCE_TEST_TIMEOUT = 10000
@@ -86,7 +81,7 @@ def run_conformance_tests(
86
81
  selected_tests: Optional[str] = None,
87
82
  selected_tags: Optional[str] = None,
88
83
  skipped_tests: Optional[str] = None,
89
- extra_args: Optional[List[str]] = None,
84
+ extra_args: Optional[list[str]] = None,
90
85
  must_support_all_features: bool = False,
91
86
  junit_file: Optional[str] = None,
92
87
  ) -> None:
@@ -147,7 +142,7 @@ def run_conformance_tests(
147
142
  "--relax-path-checks",
148
143
  # Defaults to 20s but we can't start hundreds of nodejs processes that fast on our CI potatoes
149
144
  "--eval-timeout=600",
150
- f"--caching={caching}"
145
+ f"--caching={caching}",
151
146
  ]
152
147
 
153
148
  if extra_args:
@@ -181,34 +176,55 @@ def run_conformance_tests(
181
176
  cmd.extend(["--"] + args_passed_directly_to_runner)
182
177
 
183
178
  log.info("Running: '%s'", "' '".join(cmd))
179
+ output_lines: list[str] = []
184
180
  try:
185
- output = subprocess.check_output(cmd, cwd=workDir, stderr=subprocess.STDOUT)
181
+ child = subprocess.Popen(
182
+ cmd, cwd=workDir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
183
+ )
184
+
185
+ if child.stdout is not None:
186
+ for line_bytes in child.stdout:
187
+ # Pass through all the logs
188
+ line_text = line_bytes.decode("utf-8", errors="replace").rstrip()
189
+ output_lines.append(line_text)
190
+ log.info(line_text)
191
+
192
+ # Once it's done writing, amke sure it succeeded.
193
+ child.wait()
194
+ log.info("CWL tests finished with exit code %s", child.returncode)
195
+ if child.returncode != 0:
196
+ # Act like check_output and raise an error.
197
+ raise subprocess.CalledProcessError(child.returncode, " ".join(cmd))
186
198
  finally:
187
199
  if job_store_override:
188
200
  # Clean up the job store we used for all the tests, if it is still there.
189
201
  subprocess.run(["toil", "clean", job_store_override])
190
202
 
191
203
  except subprocess.CalledProcessError as e:
204
+ log.info("CWL test runner return code was unsuccessful")
192
205
  only_unsupported = False
193
206
  # check output -- if we failed but only have unsupported features, we're okay
194
207
  p = re.compile(
195
208
  r"(?P<failures>\d+) failures, (?P<unsupported>\d+) unsupported features"
196
209
  )
197
210
 
198
- error_log = e.output.decode("utf-8")
199
- for line in error_log.split("\n"):
200
- m = p.search(line)
211
+ for line_text in output_lines:
212
+ m = p.search(line_text)
201
213
  if m:
202
214
  if int(m.group("failures")) == 0 and int(m.group("unsupported")) > 0:
203
215
  only_unsupported = True
204
216
  break
205
217
  if (not only_unsupported) or must_support_all_features:
206
- print(error_log)
218
+ log.error(
219
+ "CWL tests gave unacceptable output:\n%s", "\n".join(output_lines)
220
+ )
207
221
  raise e
222
+ log.info("Unsuccessful return code is OK")
208
223
 
209
224
 
210
225
  TesterFuncType = Callable[[str, str, "CWLObjectType"], None]
211
226
 
227
+
212
228
  @needs_cwl
213
229
  class CWLWorkflowTest(ToilTest):
214
230
  """
@@ -222,11 +238,14 @@ class CWLWorkflowTest(ToilTest):
222
238
  self.outDir = f"/tmp/toil-cwl-test-{str(uuid.uuid4())}"
223
239
  os.makedirs(self.outDir)
224
240
  self.rootDir = self._projectRootPath()
241
+ self.jobStoreDir = f"./jobstore-{str(uuid.uuid4())}"
225
242
 
226
243
  def tearDown(self) -> None:
227
244
  """Clean up outputs."""
228
245
  if os.path.exists(self.outDir):
229
246
  shutil.rmtree(self.outDir)
247
+ if os.path.exists(self.jobStoreDir):
248
+ shutil.rmtree(self.jobStoreDir)
230
249
  unittest.TestCase.tearDown(self)
231
250
 
232
251
  def test_cwl_cmdline_input(self) -> None:
@@ -234,6 +253,7 @@ class CWLWorkflowTest(ToilTest):
234
253
  Test that running a CWL workflow with inputs specified on the command line passes.
235
254
  """
236
255
  from toil.cwl import cwltoil
256
+
237
257
  cwlfile = "src/toil/test/cwl/conditional_wf.cwl"
238
258
  args = [cwlfile, "--message", "str", "--sleep", "2"]
239
259
  st = StringIO()
@@ -245,7 +265,7 @@ class CWLWorkflowTest(ToilTest):
245
265
  cwlfile: str,
246
266
  jobfile: str,
247
267
  expect: "CWLObjectType",
248
- main_args: List[str] = [],
268
+ main_args: list[str] = [],
249
269
  out_name: str = "output",
250
270
  output_here: bool = False,
251
271
  ) -> None:
@@ -255,13 +275,7 @@ class CWLWorkflowTest(ToilTest):
255
275
  main_args = main_args[:]
256
276
  if not output_here:
257
277
  # Don't just dump output in the working directory.
258
- main_args.extend(
259
- [
260
- "--logDebug",
261
- "--outdir",
262
- self.outDir
263
- ]
264
- )
278
+ main_args.extend(["--logDebug", "--outdir", self.outDir])
265
279
  main_args.extend(
266
280
  [
267
281
  os.path.join(self.rootDir, cwlfile),
@@ -276,7 +290,12 @@ class CWLWorkflowTest(ToilTest):
276
290
  self.assertEqual(out, expect)
277
291
 
278
292
  for k, v in expect.items():
279
- if isinstance(v, dict) and "class" in v and v["class"] == "File" and "path" in v:
293
+ if (
294
+ isinstance(v, dict)
295
+ and "class" in v
296
+ and v["class"] == "File"
297
+ and "path" in v
298
+ ):
280
299
  # This is a top-level output file.
281
300
  # None of our output files should be executable.
282
301
  self.assertTrue(os.path.exists(v["path"]))
@@ -402,6 +421,11 @@ class CWLWorkflowTest(ToilTest):
402
421
  "revsort.cwl", partial(self._tester, main_args=["--no-compute-checksum"])
403
422
  )
404
423
 
424
+ def test_run_revsort_no_container(self) -> None:
425
+ self.revsort(
426
+ "revsort.cwl", partial(self._tester, main_args=["--no-container"])
427
+ )
428
+
405
429
  def test_run_revsort2(self) -> None:
406
430
  self.revsort("revsort2.cwl", self._tester)
407
431
 
@@ -415,6 +439,24 @@ class CWLWorkflowTest(ToilTest):
415
439
  self._expected_colon_output(self.outDir),
416
440
  out_name="result",
417
441
  )
442
+
443
+ @pytest.mark.integrative
444
+ @needs_singularity_or_docker
445
+ def test_run_dockstore_trs(self) -> None:
446
+ from toil.cwl import cwltoil
447
+
448
+ stdout = StringIO()
449
+ main_args = [
450
+ "--outdir",
451
+ self.outDir,
452
+ "#workflow/github.com/dockstore-testing/md5sum-checker:master",
453
+ "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/refs/heads/master/md5sum/md5sum-input-cwl.json"
454
+ ]
455
+ cwltoil.main(main_args, stdout=stdout)
456
+ out = json.loads(stdout.getvalue())
457
+ with open(out.get("output_file", {}).get("location")[len("file://") :]) as f:
458
+ computed_hash = f.read().strip()
459
+ self.assertEqual(computed_hash, "00579a00e3e7fa0674428ac7049423e2")
418
460
 
419
461
  def test_glob_dir_bypass_file_store(self) -> None:
420
462
  self.maxDiff = 1000
@@ -426,7 +468,7 @@ class CWLWorkflowTest(ToilTest):
426
468
  "src/toil/test/cwl/empty.json",
427
469
  self._expected_glob_dir_output(os.getcwd()),
428
470
  main_args=["--bypass-file-store"],
429
- output_here=True
471
+ output_here=True,
430
472
  )
431
473
  finally:
432
474
  # Clean up anything we made in the current directory.
@@ -435,6 +477,69 @@ class CWLWorkflowTest(ToilTest):
435
477
  except FileNotFoundError:
436
478
  pass
437
479
 
480
+ def test_required_input_condition_protection(self) -> None:
481
+ # This doesn't run containerized
482
+ self._tester(
483
+ "src/toil/test/cwl/not_run_required_input.cwl",
484
+ "src/toil/test/cwl/empty.json",
485
+ {},
486
+ )
487
+
488
+ @needs_slurm
489
+ def test_slurm_node_memory(self) -> None:
490
+ pass
491
+
492
+ # Run the workflow. This will either finish quickly and tell us the
493
+ # memory we got, or take a long time because it requested a whole
494
+ # node's worth of memory and no nodes are free right now. We need to
495
+ # support both.
496
+
497
+ # And if we run out of time we need to stop the workflow gracefully and
498
+ # cancel the Slurm jobs.
499
+
500
+ main_args = [
501
+ f"--jobStore={self.jobStoreDir}",
502
+ # Avoid racing to toil kill before the jobstore is removed
503
+ "--clean=never",
504
+ "--batchSystem=slurm",
505
+ "--no-cwl-default-ram",
506
+ "--slurmDefaultAllMem=True",
507
+ "--outdir",
508
+ self.outDir,
509
+ os.path.join(self.rootDir, "src/toil/test/cwl/measure_default_memory.cwl"),
510
+ ]
511
+ try:
512
+ log.debug("Start test workflow")
513
+ child = subprocess.Popen(
514
+ ["toil-cwl-runner"] + main_args, stdout=subprocess.PIPE
515
+ )
516
+ output, _ = child.communicate(timeout=60)
517
+ except subprocess.TimeoutExpired:
518
+ # The job didn't finish quickly; presumably waiting for a full node.
519
+ # Stop the workflow
520
+ log.debug("Workflow might be waiting for a full node. Stop it.")
521
+ subprocess.check_call(["toil", "kill", self.jobStoreDir])
522
+ # Wait another little bit for it to clean up, making sure to collect output in case it is blocked on writing
523
+ child.communicate(timeout=20)
524
+ # Kill it off in case it is still running
525
+ child.kill()
526
+ # Reap it
527
+ child.wait()
528
+ # The test passes
529
+ else:
530
+ out = json.loads(output)
531
+ log.debug("Workflow output: %s", out)
532
+ memory_string = out["memory"]
533
+ log.debug("Observed memory: %s", memory_string)
534
+ # If there's no memory limit enforced, Slurm will return "unlimited".
535
+ # Set result to something sensible.
536
+ if memory_string.strip() == "unlimited":
537
+ result = 4 * 1024 * 1024
538
+ else:
539
+ result = int(memory_string)
540
+ # We should see more than the CWL default or the Toil default, assuming Slurm nodes of reasonable size (3 GiB).
541
+ self.assertGreater(result, 3 * 1024 * 1024)
542
+
438
543
  @needs_aws_s3
439
544
  def test_download_s3(self) -> None:
440
545
  self.download("download_s3.json", self._tester)
@@ -446,7 +551,10 @@ class CWLWorkflowTest(ToilTest):
446
551
  self.download("download_https.json", self._tester)
447
552
 
448
553
  def test_download_https_reference(self) -> None:
449
- self.download("download_https.json", partial(self._tester, main_args=["--reference-inputs"]))
554
+ self.download(
555
+ "download_https.json",
556
+ partial(self._tester, main_args=["--reference-inputs"]),
557
+ )
450
558
 
451
559
  def test_download_file(self) -> None:
452
560
  self.download("download_file.json", self._tester)
@@ -457,7 +565,10 @@ class CWLWorkflowTest(ToilTest):
457
565
 
458
566
  @needs_aws_s3
459
567
  def test_download_directory_s3_reference(self) -> None:
460
- self.download_directory("download_directory_s3.json", partial(self._tester, main_args=["--reference-inputs"]))
568
+ self.download_directory(
569
+ "download_directory_s3.json",
570
+ partial(self._tester, main_args=["--reference-inputs"]),
571
+ )
461
572
 
462
573
  def test_download_directory_file(self) -> None:
463
574
  self.download_directory("download_directory_file.json", self._tester)
@@ -502,7 +613,10 @@ class CWLWorkflowTest(ToilTest):
502
613
  "src/toil/test/cwl/seqtk_seq.cwl",
503
614
  "src/toil/test/cwl/seqtk_seq_job.json",
504
615
  self._expected_seqtk_output(self.outDir),
505
- main_args=["--default-container", "quay.io/biocontainers/seqtk:1.4--he4a0461_1"],
616
+ main_args=[
617
+ "--default-container",
618
+ "quay.io/biocontainers/seqtk:1.4--he4a0461_1",
619
+ ],
506
620
  out_name="output1",
507
621
  )
508
622
 
@@ -566,7 +680,11 @@ class CWLWorkflowTest(ToilTest):
566
680
  # Force a failure by trying to use an incorrect version of `rev` from the PATH
567
681
  os.environ["PATH"] = path_with_bogus_rev()
568
682
  try:
569
- subprocess.check_output(["toil-cwl-runner"] + cmd, env=os.environ.copy(), stderr=subprocess.STDOUT)
683
+ subprocess.check_output(
684
+ ["toil-cwl-runner"] + cmd,
685
+ env=os.environ.copy(),
686
+ stderr=subprocess.STDOUT,
687
+ )
570
688
  self.fail("Expected problem job with incorrect PATH did not fail")
571
689
  except subprocess.CalledProcessError:
572
690
  pass
@@ -576,13 +694,61 @@ class CWLWorkflowTest(ToilTest):
576
694
  cwltoil.main(cmd)
577
695
  # Should fail because previous job completed successfully
578
696
  try:
579
- subprocess.check_output(["toil-cwl-runner"] + cmd, env=os.environ.copy(), stderr=subprocess.STDOUT)
697
+ subprocess.check_output(
698
+ ["toil-cwl-runner"] + cmd,
699
+ env=os.environ.copy(),
700
+ stderr=subprocess.STDOUT,
701
+ )
580
702
  self.fail("Restart with missing directory did not fail")
581
703
  except subprocess.CalledProcessError:
582
704
  pass
583
705
 
706
+ def test_caching(self) -> None:
707
+ log.info("Running CWL caching test.")
708
+ from toil.cwl import cwltoil
709
+
710
+ outDir = self._createTempDir()
711
+ cacheDir = self._createTempDir()
712
+
713
+ cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl")
714
+ log_path = os.path.join(outDir, "log")
715
+ cmd = [
716
+ "--outdir",
717
+ outDir,
718
+ "--jobStore",
719
+ os.path.join(outDir, "jobStore"),
720
+ "--clean=always",
721
+ "--no-container",
722
+ "--cachedir",
723
+ cacheDir,
724
+ os.path.join(cwlDir, "revsort.cwl"),
725
+ os.path.join(cwlDir, "revsort-job.json"),
726
+ ]
727
+ st = StringIO()
728
+ ret = cwltoil.main(cmd, stdout=st)
729
+ assert ret == 0
730
+ # cwltool hashes certain steps into directories, ensure it exists
731
+ # since cwltool caches per task and revsort has 2 cwl tasks, there should be 2 directories and 2 status files
732
+ assert (len(os.listdir(cacheDir)) == 4)
733
+
734
+ # Rerun the workflow to ensure there is a cache hit and that we don't rerun the tools
735
+ st = StringIO()
736
+ cmd = [
737
+ "--writeLogsFromAllJobs=True",
738
+ "--writeLogs",
739
+ log_path
740
+ ] + cmd
741
+ ret = cwltoil.main(cmd, stdout=st)
742
+ assert ret == 0
743
+
744
+ # Ensure all of the worker logs are using their cached outputs
745
+ for file in os.listdir(log_path):
746
+ assert "Using cached output" in open(os.path.join(log_path, file), encoding="utf-8").read()
747
+
748
+
749
+
584
750
  @needs_aws_s3
585
- def test_streamable(self, extra_args: Optional[List[str]] = None) -> None:
751
+ def test_streamable(self, extra_args: Optional[list[str]] = None) -> None:
586
752
  """
587
753
  Test that a file with 'streamable'=True is a named pipe.
588
754
  This is a CWL1.2 feature.
@@ -666,7 +832,6 @@ class CWLWorkflowTest(ToilTest):
666
832
  except ValidationException as e:
667
833
  # Make sure we chastise the user appropriately.
668
834
  assert "expressions are not allowed" in str(e)
669
-
670
835
 
671
836
  @staticmethod
672
837
  def _expected_seqtk_output(outDir: str) -> "CWLObjectType":
@@ -723,7 +888,7 @@ class CWLWorkflowTest(ToilTest):
723
888
  "size": 0,
724
889
  "class": "File",
725
890
  "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
726
- "path": path
891
+ "path": path,
727
892
  }
728
893
  }
729
894
 
@@ -750,9 +915,9 @@ class CWLWorkflowTest(ToilTest):
750
915
  "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
751
916
  "size": 0,
752
917
  "nameroot": "test",
753
- "nameext": ".txt"
918
+ "nameext": ".txt",
754
919
  }
755
- ]
920
+ ],
756
921
  }
757
922
  }
758
923
 
@@ -785,7 +950,7 @@ class CWLWorkflowTest(ToilTest):
785
950
  "size": 1111,
786
951
  "nameroot": "whale",
787
952
  "nameext": ".txt",
788
- "path": f"{path}/whale.txt"
953
+ "path": f"{path}/whale.txt",
789
954
  }
790
955
  ],
791
956
  }
@@ -805,6 +970,58 @@ class CWLWorkflowTest(ToilTest):
805
970
  }
806
971
  }
807
972
 
973
+ def test_missing_import(self) -> None:
974
+ tmp_path = self._createTempDir()
975
+ out_dir = os.path.join(tmp_path, "cwl-out-dir")
976
+ toil = "toil-cwl-runner"
977
+ options = [
978
+ f"--outdir={out_dir}",
979
+ "--clean=always",
980
+ ]
981
+ cmd = [toil] + options + ["src/toil/test/cwl/revsort.cwl", "src/toil/test/cwl/revsort-job-missing.json"]
982
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
983
+ stdout, stderr = p.communicate()
984
+ # Make sure that the missing file is mentioned in the log so the user knows
985
+ assert b"missing.txt" in stderr
986
+ assert p.returncode == 1
987
+
988
+ @needs_aws_s3
989
+ def test_optional_secondary_files_exists(self) -> None:
990
+ tmp_path = self._createTempDir()
991
+ out_dir = os.path.join(tmp_path, "cwl-out-dir")
992
+
993
+ cwlfile = "src/toil/test/cwl/optional-file.cwl"
994
+ jobfile = "src/toil/test/cwl/optional-file-exists.json"
995
+
996
+ args = [
997
+ os.path.join(self.rootDir, cwlfile),
998
+ os.path.join(self.rootDir, jobfile),
999
+ f"--outdir={out_dir}"
1000
+ ]
1001
+ from toil.cwl import cwltoil
1002
+
1003
+ ret = cwltoil.main(args)
1004
+ assert ret == 0
1005
+ assert os.path.exists(os.path.join(out_dir, "wdl_templates_old.zip"))
1006
+
1007
+ @needs_aws_s3
1008
+ def test_optional_secondary_files_missing(self) -> None:
1009
+ tmp_path = self._createTempDir()
1010
+ out_dir = os.path.join(tmp_path, "cwl-out-dir")
1011
+
1012
+ cwlfile = "src/toil/test/cwl/optional-file.cwl"
1013
+ jobfile = "src/toil/test/cwl/optional-file-missing.json"
1014
+
1015
+ args = [
1016
+ os.path.join(self.rootDir, cwlfile),
1017
+ os.path.join(self.rootDir, jobfile),
1018
+ f"--outdir={out_dir}"
1019
+ ]
1020
+ from toil.cwl import cwltoil
1021
+
1022
+ ret = cwltoil.main(args)
1023
+ assert ret == 0
1024
+ assert not os.path.exists(os.path.join(out_dir, "hello_old.zip"))
808
1025
 
809
1026
  @needs_cwl
810
1027
  @needs_online
@@ -853,7 +1070,7 @@ class CWLv10Test(ToilTest):
853
1070
  caching: bool = False,
854
1071
  selected_tests: Optional[str] = None,
855
1072
  skipped_tests: Optional[str] = None,
856
- extra_args: Optional[List[str]] = None,
1073
+ extra_args: Optional[list[str]] = None,
857
1074
  ) -> None:
858
1075
  run_conformance_tests(
859
1076
  workDir=self.workDir,
@@ -981,7 +1198,7 @@ class CWLv11Test(ToilTest):
981
1198
  caching: bool = False,
982
1199
  batchSystem: Optional[str] = None,
983
1200
  skipped_tests: Optional[str] = None,
984
- extra_args: Optional[List[str]] = None,
1201
+ extra_args: Optional[list[str]] = None,
985
1202
  ) -> None:
986
1203
  run_conformance_tests(
987
1204
  workDir=self.cwlSpec,
@@ -1055,7 +1272,7 @@ class CWLv12Test(ToilTest):
1055
1272
  batchSystem: Optional[str] = None,
1056
1273
  selected_tests: Optional[str] = None,
1057
1274
  skipped_tests: Optional[str] = None,
1058
- extra_args: Optional[List[str]] = None,
1275
+ extra_args: Optional[list[str]] = None,
1059
1276
  must_support_all_features: bool = False,
1060
1277
  junit_file: Optional[str] = None,
1061
1278
  ) -> None:
@@ -1073,15 +1290,20 @@ class CWLv12Test(ToilTest):
1073
1290
  must_support_all_features=must_support_all_features,
1074
1291
  junit_file=junit_file,
1075
1292
  )
1076
-
1077
1293
  @slow
1078
1294
  @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
1079
1295
  def test_run_conformance_with_caching(self) -> None:
1080
1296
  self.test_run_conformance(
1081
1297
  caching=True,
1082
- junit_file = os.path.join(
1083
- self.rootDir, "caching-conformance-1.2.junit.xml"
1084
- )
1298
+ junit_file=os.path.join(self.rootDir, "caching-conformance-1.2.junit.xml"),
1299
+ )
1300
+
1301
+ @slow
1302
+ @pytest.mark.timeout(CONFORMANCE_TEST_TIMEOUT)
1303
+ def test_run_conformance_with_task_caching(self) -> None:
1304
+ self.test_run_conformance(
1305
+ junit_file=os.path.join(self.rootDir, "task-caching-conformance-1.2.junit.xml"),
1306
+ extra_args=["--cachedir", self._createTempDir("task_cache")]
1085
1307
  )
1086
1308
 
1087
1309
  @slow
@@ -1093,10 +1315,11 @@ class CWLv12Test(ToilTest):
1093
1315
  features.
1094
1316
  """
1095
1317
  self.test_run_conformance(
1096
- extra_args=["--bypass-file-store"], must_support_all_features=True,
1097
- junit_file = os.path.join(
1318
+ extra_args=["--bypass-file-store"],
1319
+ must_support_all_features=True,
1320
+ junit_file=os.path.join(
1098
1321
  self.rootDir, "in-place-update-conformance-1.2.junit.xml"
1099
- )
1322
+ ),
1100
1323
  )
1101
1324
 
1102
1325
  @slow
@@ -1272,6 +1495,9 @@ def test_log_dir_echo_stderr(tmp_path: Path) -> None:
1272
1495
  assert output == "hello\n"
1273
1496
 
1274
1497
 
1498
+ # TODO: It's not clear how this test tests filename conflict resolution; it
1499
+ # seems like it runs a python script to copy some files and makes sure the
1500
+ # workflow doesn't fail.
1275
1501
  @needs_cwl
1276
1502
  @pytest.mark.cwl_small_log_dir
1277
1503
  def test_filename_conflict_resolution(tmp_path: Path) -> None:
@@ -1294,6 +1520,27 @@ def test_filename_conflict_resolution(tmp_path: Path) -> None:
1294
1520
  assert b"Finished toil run successfully" in stderr
1295
1521
  assert p.returncode == 0
1296
1522
 
1523
+
1524
+ @needs_cwl
1525
+ @pytest.mark.cwl_small_log_dir
1526
+ def test_filename_conflict_resolution_3_or_more(tmp_path: Path) -> None:
1527
+ out_dir = tmp_path / "cwl-out-dir"
1528
+ toil = "toil-cwl-runner"
1529
+ options = [
1530
+ f"--outdir={out_dir}",
1531
+ "--clean=always",
1532
+ ]
1533
+ cwl = os.path.join(os.path.dirname(__file__), "scatter_duplicate_outputs.cwl")
1534
+ cmd = [toil] + options + [cwl]
1535
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1536
+ stdout, stderr = p.communicate()
1537
+ assert b"Finished toil run successfully" in stderr
1538
+ assert p.returncode == 0
1539
+ assert (
1540
+ len(os.listdir(out_dir)) == 9
1541
+ ), "All 9 files made by the scatter should be in the directory"
1542
+
1543
+
1297
1544
  @needs_cwl
1298
1545
  @needs_docker
1299
1546
  @pytest.mark.cwl_small_log_dir
@@ -1316,6 +1563,7 @@ def test_filename_conflict_detection(tmp_path: Path) -> None:
1316
1563
  assert b"File staging conflict" in stderr
1317
1564
  assert p.returncode != 0
1318
1565
 
1566
+
1319
1567
  @needs_cwl
1320
1568
  @needs_docker
1321
1569
  @pytest.mark.cwl_small_log_dir
@@ -1357,7 +1605,10 @@ def test_pick_value_with_one_null_value(caplog: pytest.LogCaptureFixture) -> Non
1357
1605
  with caplog.at_level(logging.WARNING, logger="toil.cwl.cwltoil"):
1358
1606
  cwltoil.main(args)
1359
1607
  for line in caplog.messages:
1360
- assert "You had a conditional step that did not run, but you did not use pickValue to handle the skipped input." not in line
1608
+ assert (
1609
+ "You had a conditional step that did not run, but you did not use pickValue to handle the skipped input."
1610
+ not in line
1611
+ )
1361
1612
 
1362
1613
 
1363
1614
  @needs_cwl
@@ -1395,6 +1646,8 @@ def test_workflow_echo_string_scatter_capture_stdout() -> None:
1395
1646
  cmd = [toil, jobstore, option_1, option_2, option_3, cwl]
1396
1647
  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1397
1648
  stdout, stderr = p.communicate()
1649
+ log.debug("Workflow standard output: %s", stdout)
1650
+ assert len(stdout) > 0
1398
1651
  outputs = json.loads(stdout)
1399
1652
  out_list = outputs["list_out"]
1400
1653
  assert len(out_list) == 2, f"outList shoud have two file elements {out_list}"
@@ -1505,7 +1758,7 @@ def test_visit_cwl_class_and_reduce() -> None:
1505
1758
  up_count = 0
1506
1759
  up_child_count = 0
1507
1760
 
1508
- def op_up(thing: "CWLObjectType", down_value: int, child_results: List[str]) -> str:
1761
+ def op_up(thing: "CWLObjectType", down_value: int, child_results: list[str]) -> str:
1509
1762
  """
1510
1763
  Check the down return value and the up return values, and count
1511
1764
  what we visit going up and what child relationships we have.
@@ -1559,9 +1812,9 @@ def test_download_structure(tmp_path: Path) -> None:
1559
1812
  # These will be populated.
1560
1813
  # TODO: This cache seems unused. Remove it?
1561
1814
  # This maps filesystem path to CWL URI
1562
- index: Dict[str, str] = {}
1815
+ index: dict[str, str] = {}
1563
1816
  # This maps CWL URI to filesystem path
1564
- existing: Dict[str, str] = {}
1817
+ existing: dict[str, str] = {}
1565
1818
 
1566
1819
  # Do the download
1567
1820
  download_structure(file_store, index, existing, structure, to_dir)
@@ -1614,3 +1867,53 @@ def test_download_structure(tmp_path: Path) -> None:
1614
1867
  ],
1615
1868
  any_order=True,
1616
1869
  )
1870
+
1871
+
1872
+ @needs_cwl
1873
+ @pytest.mark.timeout(300)
1874
+ def test_import_on_workers() -> None:
1875
+ args = [
1876
+ "src/toil/test/cwl/download.cwl",
1877
+ "src/toil/test/cwl/download_file.json",
1878
+ "--runImportsOnWorkers",
1879
+ "--importWorkersDisk=10MiB",
1880
+ "--realTimeLogging=True",
1881
+ "--logLevel=INFO",
1882
+ "--logColors=False",
1883
+ ]
1884
+ from toil.cwl import cwltoil
1885
+
1886
+ detector = ImportWorkersMessageHandler()
1887
+
1888
+ # Set up a log message detector to the root logger
1889
+ logging.getLogger().addHandler(detector)
1890
+
1891
+ cwltoil.main(args)
1892
+
1893
+ assert detector.detected is True
1894
+
1895
+
1896
+ # StreamHandler is generic, _typeshed doesn't exist at runtime, do a bit of typing trickery, see https://github.com/python/typeshed/issues/5680
1897
+ if TYPE_CHECKING:
1898
+ from _typeshed import SupportsWrite
1899
+
1900
+ _stream_handler = logging.StreamHandler[SupportsWrite[str]]
1901
+ else:
1902
+ _stream_handler = logging.StreamHandler
1903
+
1904
+
1905
+ class ImportWorkersMessageHandler(_stream_handler):
1906
+ """
1907
+ Detect the import workers log message and set a flag.
1908
+ """
1909
+
1910
+ def __init__(self) -> None:
1911
+ self.detected = False # Have we seen the message we want?
1912
+
1913
+ super().__init__(sys.stderr)
1914
+
1915
+ def emit(self, record: logging.LogRecord) -> None:
1916
+ if (record.msg % record.args).startswith(
1917
+ "Issued job 'CWLImportJob' CWLImportJob"
1918
+ ):
1919
+ self.detected = True