toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. toil/__init__.py +121 -83
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +38 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +489 -137
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +630 -359
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1114 -532
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +988 -315
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +727 -403
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +193 -58
  49. toil/lib/aws/utils.py +238 -218
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +322 -209
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +4 -2
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +99 -11
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +65 -18
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +19 -7
  71. toil/lib/retry.py +115 -77
  72. toil/lib/threading.py +282 -80
  73. toil/lib/throttle.py +15 -14
  74. toil/options/common.py +834 -401
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +70 -19
  78. toil/provisioners/__init__.py +111 -46
  79. toil/provisioners/abstractProvisioner.py +322 -157
  80. toil/provisioners/aws/__init__.py +62 -30
  81. toil/provisioners/aws/awsProvisioner.py +980 -627
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +147 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +127 -61
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +148 -64
  98. toil/test/__init__.py +263 -179
  99. toil/test/batchSystems/batchSystemTest.py +438 -195
  100. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +93 -47
  104. toil/test/cactus/test_cactus_integration.py +20 -22
  105. toil/test/cwl/cwlTest.py +271 -71
  106. toil/test/cwl/measure_default_memory.cwl +12 -0
  107. toil/test/cwl/not_run_required_input.cwl +29 -0
  108. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  109. toil/test/docs/scriptsTest.py +60 -34
  110. toil/test/jobStores/jobStoreTest.py +412 -235
  111. toil/test/lib/aws/test_iam.py +116 -48
  112. toil/test/lib/aws/test_s3.py +16 -9
  113. toil/test/lib/aws/test_utils.py +5 -6
  114. toil/test/lib/dockerTest.py +118 -141
  115. toil/test/lib/test_conversions.py +113 -115
  116. toil/test/lib/test_ec2.py +57 -49
  117. toil/test/lib/test_integration.py +104 -0
  118. toil/test/lib/test_misc.py +12 -5
  119. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  120. toil/test/mesos/helloWorld.py +7 -6
  121. toil/test/mesos/stress.py +25 -20
  122. toil/test/options/options.py +7 -2
  123. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  124. toil/test/provisioners/clusterScalerTest.py +440 -250
  125. toil/test/provisioners/clusterTest.py +81 -42
  126. toil/test/provisioners/gceProvisionerTest.py +174 -100
  127. toil/test/provisioners/provisionerTest.py +25 -13
  128. toil/test/provisioners/restartScript.py +5 -4
  129. toil/test/server/serverTest.py +188 -141
  130. toil/test/sort/restart_sort.py +137 -68
  131. toil/test/sort/sort.py +134 -66
  132. toil/test/sort/sortTest.py +91 -49
  133. toil/test/src/autoDeploymentTest.py +140 -100
  134. toil/test/src/busTest.py +20 -18
  135. toil/test/src/checkpointTest.py +8 -2
  136. toil/test/src/deferredFunctionTest.py +49 -35
  137. toil/test/src/dockerCheckTest.py +33 -26
  138. toil/test/src/environmentTest.py +20 -10
  139. toil/test/src/fileStoreTest.py +538 -271
  140. toil/test/src/helloWorldTest.py +7 -4
  141. toil/test/src/importExportFileTest.py +61 -31
  142. toil/test/src/jobDescriptionTest.py +32 -17
  143. toil/test/src/jobEncapsulationTest.py +2 -0
  144. toil/test/src/jobFileStoreTest.py +74 -50
  145. toil/test/src/jobServiceTest.py +187 -73
  146. toil/test/src/jobTest.py +120 -70
  147. toil/test/src/miscTests.py +19 -18
  148. toil/test/src/promisedRequirementTest.py +82 -36
  149. toil/test/src/promisesTest.py +7 -6
  150. toil/test/src/realtimeLoggerTest.py +6 -6
  151. toil/test/src/regularLogTest.py +71 -37
  152. toil/test/src/resourceTest.py +80 -49
  153. toil/test/src/restartDAGTest.py +36 -22
  154. toil/test/src/resumabilityTest.py +9 -2
  155. toil/test/src/retainTempDirTest.py +45 -14
  156. toil/test/src/systemTest.py +12 -8
  157. toil/test/src/threadingTest.py +44 -25
  158. toil/test/src/toilContextManagerTest.py +10 -7
  159. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  160. toil/test/src/workerTest.py +33 -16
  161. toil/test/utils/toilDebugTest.py +70 -58
  162. toil/test/utils/toilKillTest.py +4 -5
  163. toil/test/utils/utilsTest.py +239 -102
  164. toil/test/wdl/wdltoil_test.py +789 -148
  165. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  166. toil/toilState.py +52 -26
  167. toil/utils/toilConfig.py +13 -4
  168. toil/utils/toilDebugFile.py +44 -27
  169. toil/utils/toilDebugJob.py +85 -25
  170. toil/utils/toilDestroyCluster.py +11 -6
  171. toil/utils/toilKill.py +8 -3
  172. toil/utils/toilLaunchCluster.py +251 -145
  173. toil/utils/toilMain.py +37 -16
  174. toil/utils/toilRsyncCluster.py +27 -14
  175. toil/utils/toilSshCluster.py +45 -22
  176. toil/utils/toilStats.py +75 -36
  177. toil/utils/toilStatus.py +226 -119
  178. toil/utils/toilUpdateEC2Instances.py +3 -1
  179. toil/version.py +11 -11
  180. toil/wdl/utils.py +5 -5
  181. toil/wdl/wdltoil.py +3513 -1052
  182. toil/worker.py +269 -128
  183. toil-8.0.0.dist-info/METADATA +173 -0
  184. toil-8.0.0.dist-info/RECORD +253 -0
  185. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  186. toil-7.0.0.dist-info/METADATA +0 -158
  187. toil-7.0.0.dist-info/RECORD +0 -244
  188. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
  189. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  190. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/test/cwl/cwlTest.py CHANGED
@@ -23,16 +23,10 @@ import sys
23
23
  import unittest
24
24
  import uuid
25
25
  import zipfile
26
-
27
26
  from functools import partial
28
27
  from io import StringIO
29
28
  from pathlib import Path
30
- from typing import (TYPE_CHECKING,
31
- Callable,
32
- Dict,
33
- List,
34
- Optional,
35
- cast)
29
+ from typing import TYPE_CHECKING, Callable, Optional, cast
36
30
  from unittest.mock import Mock, call
37
31
  from urllib.request import urlretrieve
38
32
 
@@ -46,32 +40,33 @@ sys.path.insert(0, pkg_root) # noqa
46
40
 
47
41
  from schema_salad.exceptions import ValidationException
48
42
 
49
- from toil.cwl.utils import (DirectoryStructure,
50
- download_structure,
51
- visit_cwl_class_and_reduce,
52
- visit_top_cwl_class)
43
+ from toil.cwl.utils import (
44
+ DirectoryStructure,
45
+ download_structure,
46
+ visit_cwl_class_and_reduce,
47
+ visit_top_cwl_class,
48
+ )
53
49
  from toil.fileStores import FileID
54
50
  from toil.fileStores.abstractFileStore import AbstractFileStore
55
51
  from toil.lib.threading import cpu_count
56
- from toil.provisioners import cluster_factory
57
- from toil.test import (ToilTest,
58
- needs_aws_ec2,
59
- needs_aws_s3,
60
- needs_cwl,
61
- needs_docker,
62
- needs_docker_cuda,
63
- needs_env_var,
64
- needs_fetchable_appliance,
65
- needs_gridengine,
66
- needs_kubernetes,
67
- needs_local_cuda,
68
- needs_lsf,
69
- needs_mesos,
70
- needs_online,
71
- needs_slurm,
72
- needs_torque,
73
- needs_wes_server,
74
- slow)
52
+ from toil.test import (
53
+ ToilTest,
54
+ needs_aws_s3,
55
+ needs_cwl,
56
+ needs_docker,
57
+ needs_docker_cuda,
58
+ needs_gridengine,
59
+ needs_kubernetes,
60
+ needs_local_cuda,
61
+ needs_lsf,
62
+ needs_mesos,
63
+ needs_online,
64
+ needs_singularity_or_docker,
65
+ needs_slurm,
66
+ needs_torque,
67
+ needs_wes_server,
68
+ slow,
69
+ )
75
70
 
76
71
  log = logging.getLogger(__name__)
77
72
  CONFORMANCE_TEST_TIMEOUT = 10000
@@ -86,7 +81,7 @@ def run_conformance_tests(
86
81
  selected_tests: Optional[str] = None,
87
82
  selected_tags: Optional[str] = None,
88
83
  skipped_tests: Optional[str] = None,
89
- extra_args: Optional[List[str]] = None,
84
+ extra_args: Optional[list[str]] = None,
90
85
  must_support_all_features: bool = False,
91
86
  junit_file: Optional[str] = None,
92
87
  ) -> None:
@@ -147,7 +142,7 @@ def run_conformance_tests(
147
142
  "--relax-path-checks",
148
143
  # Defaults to 20s but we can't start hundreds of nodejs processes that fast on our CI potatoes
149
144
  "--eval-timeout=600",
150
- f"--caching={caching}"
145
+ f"--caching={caching}",
151
146
  ]
152
147
 
153
148
  if extra_args:
@@ -181,34 +176,55 @@ def run_conformance_tests(
181
176
  cmd.extend(["--"] + args_passed_directly_to_runner)
182
177
 
183
178
  log.info("Running: '%s'", "' '".join(cmd))
179
+ output_lines: list[str] = []
184
180
  try:
185
- output = subprocess.check_output(cmd, cwd=workDir, stderr=subprocess.STDOUT)
181
+ child = subprocess.Popen(
182
+ cmd, cwd=workDir, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
183
+ )
184
+
185
+ if child.stdout is not None:
186
+ for line_bytes in child.stdout:
187
+ # Pass through all the logs
188
+ line_text = line_bytes.decode("utf-8", errors="replace").rstrip()
189
+ output_lines.append(line_text)
190
+ log.info(line_text)
191
+
192
+ # Once it's done writing, amke sure it succeeded.
193
+ child.wait()
194
+ log.info("CWL tests finished with exit code %s", child.returncode)
195
+ if child.returncode != 0:
196
+ # Act like check_output and raise an error.
197
+ raise subprocess.CalledProcessError(child.returncode, " ".join(cmd))
186
198
  finally:
187
199
  if job_store_override:
188
200
  # Clean up the job store we used for all the tests, if it is still there.
189
201
  subprocess.run(["toil", "clean", job_store_override])
190
202
 
191
203
  except subprocess.CalledProcessError as e:
204
+ log.info("CWL test runner return code was unsuccessful")
192
205
  only_unsupported = False
193
206
  # check output -- if we failed but only have unsupported features, we're okay
194
207
  p = re.compile(
195
208
  r"(?P<failures>\d+) failures, (?P<unsupported>\d+) unsupported features"
196
209
  )
197
210
 
198
- error_log = e.output.decode("utf-8")
199
- for line in error_log.split("\n"):
200
- m = p.search(line)
211
+ for line_text in output_lines:
212
+ m = p.search(line_text)
201
213
  if m:
202
214
  if int(m.group("failures")) == 0 and int(m.group("unsupported")) > 0:
203
215
  only_unsupported = True
204
216
  break
205
217
  if (not only_unsupported) or must_support_all_features:
206
- print(error_log)
218
+ log.error(
219
+ "CWL tests gave unacceptable output:\n%s", "\n".join(output_lines)
220
+ )
207
221
  raise e
222
+ log.info("Unsuccessful return code is OK")
208
223
 
209
224
 
210
225
  TesterFuncType = Callable[[str, str, "CWLObjectType"], None]
211
226
 
227
+
212
228
  @needs_cwl
213
229
  class CWLWorkflowTest(ToilTest):
214
230
  """
@@ -222,11 +238,14 @@ class CWLWorkflowTest(ToilTest):
222
238
  self.outDir = f"/tmp/toil-cwl-test-{str(uuid.uuid4())}"
223
239
  os.makedirs(self.outDir)
224
240
  self.rootDir = self._projectRootPath()
241
+ self.jobStoreDir = f"./jobstore-{str(uuid.uuid4())}"
225
242
 
226
243
  def tearDown(self) -> None:
227
244
  """Clean up outputs."""
228
245
  if os.path.exists(self.outDir):
229
246
  shutil.rmtree(self.outDir)
247
+ if os.path.exists(self.jobStoreDir):
248
+ shutil.rmtree(self.jobStoreDir)
230
249
  unittest.TestCase.tearDown(self)
231
250
 
232
251
  def test_cwl_cmdline_input(self) -> None:
@@ -234,6 +253,7 @@ class CWLWorkflowTest(ToilTest):
234
253
  Test that running a CWL workflow with inputs specified on the command line passes.
235
254
  """
236
255
  from toil.cwl import cwltoil
256
+
237
257
  cwlfile = "src/toil/test/cwl/conditional_wf.cwl"
238
258
  args = [cwlfile, "--message", "str", "--sleep", "2"]
239
259
  st = StringIO()
@@ -245,7 +265,7 @@ class CWLWorkflowTest(ToilTest):
245
265
  cwlfile: str,
246
266
  jobfile: str,
247
267
  expect: "CWLObjectType",
248
- main_args: List[str] = [],
268
+ main_args: list[str] = [],
249
269
  out_name: str = "output",
250
270
  output_here: bool = False,
251
271
  ) -> None:
@@ -255,13 +275,7 @@ class CWLWorkflowTest(ToilTest):
255
275
  main_args = main_args[:]
256
276
  if not output_here:
257
277
  # Don't just dump output in the working directory.
258
- main_args.extend(
259
- [
260
- "--logDebug",
261
- "--outdir",
262
- self.outDir
263
- ]
264
- )
278
+ main_args.extend(["--logDebug", "--outdir", self.outDir])
265
279
  main_args.extend(
266
280
  [
267
281
  os.path.join(self.rootDir, cwlfile),
@@ -276,7 +290,12 @@ class CWLWorkflowTest(ToilTest):
276
290
  self.assertEqual(out, expect)
277
291
 
278
292
  for k, v in expect.items():
279
- if isinstance(v, dict) and "class" in v and v["class"] == "File" and "path" in v:
293
+ if (
294
+ isinstance(v, dict)
295
+ and "class" in v
296
+ and v["class"] == "File"
297
+ and "path" in v
298
+ ):
280
299
  # This is a top-level output file.
281
300
  # None of our output files should be executable.
282
301
  self.assertTrue(os.path.exists(v["path"]))
@@ -402,6 +421,11 @@ class CWLWorkflowTest(ToilTest):
402
421
  "revsort.cwl", partial(self._tester, main_args=["--no-compute-checksum"])
403
422
  )
404
423
 
424
+ def test_run_revsort_no_container(self) -> None:
425
+ self.revsort(
426
+ "revsort.cwl", partial(self._tester, main_args=["--no-container"])
427
+ )
428
+
405
429
  def test_run_revsort2(self) -> None:
406
430
  self.revsort("revsort2.cwl", self._tester)
407
431
 
@@ -415,6 +439,24 @@ class CWLWorkflowTest(ToilTest):
415
439
  self._expected_colon_output(self.outDir),
416
440
  out_name="result",
417
441
  )
442
+
443
+ @pytest.mark.integrative
444
+ @needs_singularity_or_docker
445
+ def test_run_dockstore_trs(self) -> None:
446
+ from toil.cwl import cwltoil
447
+
448
+ stdout = StringIO()
449
+ main_args = [
450
+ "--outdir",
451
+ self.outDir,
452
+ "#workflow/github.com/dockstore-testing/md5sum-checker",
453
+ "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/refs/heads/master/md5sum/md5sum-input-cwl.json"
454
+ ]
455
+ cwltoil.main(main_args, stdout=stdout)
456
+ out = json.loads(stdout.getvalue())
457
+ with open(out.get("output_file", {}).get("location")[len("file://") :]) as f:
458
+ computed_hash = f.read().strip()
459
+ self.assertEqual(computed_hash, "00579a00e3e7fa0674428ac7049423e2")
418
460
 
419
461
  def test_glob_dir_bypass_file_store(self) -> None:
420
462
  self.maxDiff = 1000
@@ -426,7 +468,7 @@ class CWLWorkflowTest(ToilTest):
426
468
  "src/toil/test/cwl/empty.json",
427
469
  self._expected_glob_dir_output(os.getcwd()),
428
470
  main_args=["--bypass-file-store"],
429
- output_here=True
471
+ output_here=True,
430
472
  )
431
473
  finally:
432
474
  # Clean up anything we made in the current directory.
@@ -435,6 +477,69 @@ class CWLWorkflowTest(ToilTest):
435
477
  except FileNotFoundError:
436
478
  pass
437
479
 
480
+ def test_required_input_condition_protection(self) -> None:
481
+ # This doesn't run containerized
482
+ self._tester(
483
+ "src/toil/test/cwl/not_run_required_input.cwl",
484
+ "src/toil/test/cwl/empty.json",
485
+ {},
486
+ )
487
+
488
+ @needs_slurm
489
+ def test_slurm_node_memory(self) -> None:
490
+ pass
491
+
492
+ # Run the workflow. This will either finish quickly and tell us the
493
+ # memory we got, or take a long time because it requested a whole
494
+ # node's worth of memory and no nodes are free right now. We need to
495
+ # support both.
496
+
497
+ # And if we run out of time we need to stop the workflow gracefully and
498
+ # cancel the Slurm jobs.
499
+
500
+ main_args = [
501
+ f"--jobStore={self.jobStoreDir}",
502
+ # Avoid racing to toil kill before the jobstore is removed
503
+ "--clean=never",
504
+ "--batchSystem=slurm",
505
+ "--no-cwl-default-ram",
506
+ "--slurmDefaultAllMem=True",
507
+ "--outdir",
508
+ self.outDir,
509
+ os.path.join(self.rootDir, "src/toil/test/cwl/measure_default_memory.cwl"),
510
+ ]
511
+ try:
512
+ log.debug("Start test workflow")
513
+ child = subprocess.Popen(
514
+ ["toil-cwl-runner"] + main_args, stdout=subprocess.PIPE
515
+ )
516
+ output, _ = child.communicate(timeout=60)
517
+ except subprocess.TimeoutExpired:
518
+ # The job didn't finish quickly; presumably waiting for a full node.
519
+ # Stop the workflow
520
+ log.debug("Workflow might be waiting for a full node. Stop it.")
521
+ subprocess.check_call(["toil", "kill", self.jobStoreDir])
522
+ # Wait another little bit for it to clean up, making sure to collect output in case it is blocked on writing
523
+ child.communicate(timeout=20)
524
+ # Kill it off in case it is still running
525
+ child.kill()
526
+ # Reap it
527
+ child.wait()
528
+ # The test passes
529
+ else:
530
+ out = json.loads(output)
531
+ log.debug("Workflow output: %s", out)
532
+ memory_string = out["memory"]
533
+ log.debug("Observed memory: %s", memory_string)
534
+ # If there's no memory limit enforced, Slurm will return "unlimited".
535
+ # Set result to something sensible.
536
+ if memory_string.strip() == "unlimited":
537
+ result = 4 * 1024 * 1024
538
+ else:
539
+ result = int(memory_string)
540
+ # We should see more than the CWL default or the Toil default, assuming Slurm nodes of reasonable size (3 GiB).
541
+ self.assertGreater(result, 3 * 1024 * 1024)
542
+
438
543
  @needs_aws_s3
439
544
  def test_download_s3(self) -> None:
440
545
  self.download("download_s3.json", self._tester)
@@ -446,7 +551,10 @@ class CWLWorkflowTest(ToilTest):
446
551
  self.download("download_https.json", self._tester)
447
552
 
448
553
  def test_download_https_reference(self) -> None:
449
- self.download("download_https.json", partial(self._tester, main_args=["--reference-inputs"]))
554
+ self.download(
555
+ "download_https.json",
556
+ partial(self._tester, main_args=["--reference-inputs"]),
557
+ )
450
558
 
451
559
  def test_download_file(self) -> None:
452
560
  self.download("download_file.json", self._tester)
@@ -457,7 +565,10 @@ class CWLWorkflowTest(ToilTest):
457
565
 
458
566
  @needs_aws_s3
459
567
  def test_download_directory_s3_reference(self) -> None:
460
- self.download_directory("download_directory_s3.json", partial(self._tester, main_args=["--reference-inputs"]))
568
+ self.download_directory(
569
+ "download_directory_s3.json",
570
+ partial(self._tester, main_args=["--reference-inputs"]),
571
+ )
461
572
 
462
573
  def test_download_directory_file(self) -> None:
463
574
  self.download_directory("download_directory_file.json", self._tester)
@@ -502,7 +613,10 @@ class CWLWorkflowTest(ToilTest):
502
613
  "src/toil/test/cwl/seqtk_seq.cwl",
503
614
  "src/toil/test/cwl/seqtk_seq_job.json",
504
615
  self._expected_seqtk_output(self.outDir),
505
- main_args=["--default-container", "quay.io/biocontainers/seqtk:1.4--he4a0461_1"],
616
+ main_args=[
617
+ "--default-container",
618
+ "quay.io/biocontainers/seqtk:1.4--he4a0461_1",
619
+ ],
506
620
  out_name="output1",
507
621
  )
508
622
 
@@ -566,7 +680,11 @@ class CWLWorkflowTest(ToilTest):
566
680
  # Force a failure by trying to use an incorrect version of `rev` from the PATH
567
681
  os.environ["PATH"] = path_with_bogus_rev()
568
682
  try:
569
- subprocess.check_output(["toil-cwl-runner"] + cmd, env=os.environ.copy(), stderr=subprocess.STDOUT)
683
+ subprocess.check_output(
684
+ ["toil-cwl-runner"] + cmd,
685
+ env=os.environ.copy(),
686
+ stderr=subprocess.STDOUT,
687
+ )
570
688
  self.fail("Expected problem job with incorrect PATH did not fail")
571
689
  except subprocess.CalledProcessError:
572
690
  pass
@@ -576,13 +694,17 @@ class CWLWorkflowTest(ToilTest):
576
694
  cwltoil.main(cmd)
577
695
  # Should fail because previous job completed successfully
578
696
  try:
579
- subprocess.check_output(["toil-cwl-runner"] + cmd, env=os.environ.copy(), stderr=subprocess.STDOUT)
697
+ subprocess.check_output(
698
+ ["toil-cwl-runner"] + cmd,
699
+ env=os.environ.copy(),
700
+ stderr=subprocess.STDOUT,
701
+ )
580
702
  self.fail("Restart with missing directory did not fail")
581
703
  except subprocess.CalledProcessError:
582
704
  pass
583
705
 
584
706
  @needs_aws_s3
585
- def test_streamable(self, extra_args: Optional[List[str]] = None) -> None:
707
+ def test_streamable(self, extra_args: Optional[list[str]] = None) -> None:
586
708
  """
587
709
  Test that a file with 'streamable'=True is a named pipe.
588
710
  This is a CWL1.2 feature.
@@ -666,7 +788,6 @@ class CWLWorkflowTest(ToilTest):
666
788
  except ValidationException as e:
667
789
  # Make sure we chastise the user appropriately.
668
790
  assert "expressions are not allowed" in str(e)
669
-
670
791
 
671
792
  @staticmethod
672
793
  def _expected_seqtk_output(outDir: str) -> "CWLObjectType":
@@ -723,7 +844,7 @@ class CWLWorkflowTest(ToilTest):
723
844
  "size": 0,
724
845
  "class": "File",
725
846
  "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
726
- "path": path
847
+ "path": path,
727
848
  }
728
849
  }
729
850
 
@@ -750,9 +871,9 @@ class CWLWorkflowTest(ToilTest):
750
871
  "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
751
872
  "size": 0,
752
873
  "nameroot": "test",
753
- "nameext": ".txt"
874
+ "nameext": ".txt",
754
875
  }
755
- ]
876
+ ],
756
877
  }
757
878
  }
758
879
 
@@ -785,7 +906,7 @@ class CWLWorkflowTest(ToilTest):
785
906
  "size": 1111,
786
907
  "nameroot": "whale",
787
908
  "nameext": ".txt",
788
- "path": f"{path}/whale.txt"
909
+ "path": f"{path}/whale.txt",
789
910
  }
790
911
  ],
791
912
  }
@@ -853,7 +974,7 @@ class CWLv10Test(ToilTest):
853
974
  caching: bool = False,
854
975
  selected_tests: Optional[str] = None,
855
976
  skipped_tests: Optional[str] = None,
856
- extra_args: Optional[List[str]] = None,
977
+ extra_args: Optional[list[str]] = None,
857
978
  ) -> None:
858
979
  run_conformance_tests(
859
980
  workDir=self.workDir,
@@ -981,7 +1102,7 @@ class CWLv11Test(ToilTest):
981
1102
  caching: bool = False,
982
1103
  batchSystem: Optional[str] = None,
983
1104
  skipped_tests: Optional[str] = None,
984
- extra_args: Optional[List[str]] = None,
1105
+ extra_args: Optional[list[str]] = None,
985
1106
  ) -> None:
986
1107
  run_conformance_tests(
987
1108
  workDir=self.cwlSpec,
@@ -1055,7 +1176,7 @@ class CWLv12Test(ToilTest):
1055
1176
  batchSystem: Optional[str] = None,
1056
1177
  selected_tests: Optional[str] = None,
1057
1178
  skipped_tests: Optional[str] = None,
1058
- extra_args: Optional[List[str]] = None,
1179
+ extra_args: Optional[list[str]] = None,
1059
1180
  must_support_all_features: bool = False,
1060
1181
  junit_file: Optional[str] = None,
1061
1182
  ) -> None:
@@ -1079,9 +1200,7 @@ class CWLv12Test(ToilTest):
1079
1200
  def test_run_conformance_with_caching(self) -> None:
1080
1201
  self.test_run_conformance(
1081
1202
  caching=True,
1082
- junit_file = os.path.join(
1083
- self.rootDir, "caching-conformance-1.2.junit.xml"
1084
- )
1203
+ junit_file=os.path.join(self.rootDir, "caching-conformance-1.2.junit.xml"),
1085
1204
  )
1086
1205
 
1087
1206
  @slow
@@ -1093,10 +1212,11 @@ class CWLv12Test(ToilTest):
1093
1212
  features.
1094
1213
  """
1095
1214
  self.test_run_conformance(
1096
- extra_args=["--bypass-file-store"], must_support_all_features=True,
1097
- junit_file = os.path.join(
1215
+ extra_args=["--bypass-file-store"],
1216
+ must_support_all_features=True,
1217
+ junit_file=os.path.join(
1098
1218
  self.rootDir, "in-place-update-conformance-1.2.junit.xml"
1099
- )
1219
+ ),
1100
1220
  )
1101
1221
 
1102
1222
  @slow
@@ -1272,6 +1392,9 @@ def test_log_dir_echo_stderr(tmp_path: Path) -> None:
1272
1392
  assert output == "hello\n"
1273
1393
 
1274
1394
 
1395
+ # TODO: It's not clear how this test tests filename conflict resolution; it
1396
+ # seems like it runs a python script to copy some files and makes sure the
1397
+ # workflow doesn't fail.
1275
1398
  @needs_cwl
1276
1399
  @pytest.mark.cwl_small_log_dir
1277
1400
  def test_filename_conflict_resolution(tmp_path: Path) -> None:
@@ -1294,6 +1417,27 @@ def test_filename_conflict_resolution(tmp_path: Path) -> None:
1294
1417
  assert b"Finished toil run successfully" in stderr
1295
1418
  assert p.returncode == 0
1296
1419
 
1420
+
1421
+ @needs_cwl
1422
+ @pytest.mark.cwl_small_log_dir
1423
+ def test_filename_conflict_resolution_3_or_more(tmp_path: Path) -> None:
1424
+ out_dir = tmp_path / "cwl-out-dir"
1425
+ toil = "toil-cwl-runner"
1426
+ options = [
1427
+ f"--outdir={out_dir}",
1428
+ "--clean=always",
1429
+ ]
1430
+ cwl = os.path.join(os.path.dirname(__file__), "scatter_duplicate_outputs.cwl")
1431
+ cmd = [toil] + options + [cwl]
1432
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1433
+ stdout, stderr = p.communicate()
1434
+ assert b"Finished toil run successfully" in stderr
1435
+ assert p.returncode == 0
1436
+ assert (
1437
+ len(os.listdir(out_dir)) == 9
1438
+ ), "All 9 files made by the scatter should be in the directory"
1439
+
1440
+
1297
1441
  @needs_cwl
1298
1442
  @needs_docker
1299
1443
  @pytest.mark.cwl_small_log_dir
@@ -1316,6 +1460,7 @@ def test_filename_conflict_detection(tmp_path: Path) -> None:
1316
1460
  assert b"File staging conflict" in stderr
1317
1461
  assert p.returncode != 0
1318
1462
 
1463
+
1319
1464
  @needs_cwl
1320
1465
  @needs_docker
1321
1466
  @pytest.mark.cwl_small_log_dir
@@ -1357,7 +1502,10 @@ def test_pick_value_with_one_null_value(caplog: pytest.LogCaptureFixture) -> Non
1357
1502
  with caplog.at_level(logging.WARNING, logger="toil.cwl.cwltoil"):
1358
1503
  cwltoil.main(args)
1359
1504
  for line in caplog.messages:
1360
- assert "You had a conditional step that did not run, but you did not use pickValue to handle the skipped input." not in line
1505
+ assert (
1506
+ "You had a conditional step that did not run, but you did not use pickValue to handle the skipped input."
1507
+ not in line
1508
+ )
1361
1509
 
1362
1510
 
1363
1511
  @needs_cwl
@@ -1395,6 +1543,8 @@ def test_workflow_echo_string_scatter_capture_stdout() -> None:
1395
1543
  cmd = [toil, jobstore, option_1, option_2, option_3, cwl]
1396
1544
  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1397
1545
  stdout, stderr = p.communicate()
1546
+ log.debug("Workflow standard output: %s", stdout)
1547
+ assert len(stdout) > 0
1398
1548
  outputs = json.loads(stdout)
1399
1549
  out_list = outputs["list_out"]
1400
1550
  assert len(out_list) == 2, f"outList shoud have two file elements {out_list}"
@@ -1505,7 +1655,7 @@ def test_visit_cwl_class_and_reduce() -> None:
1505
1655
  up_count = 0
1506
1656
  up_child_count = 0
1507
1657
 
1508
- def op_up(thing: "CWLObjectType", down_value: int, child_results: List[str]) -> str:
1658
+ def op_up(thing: "CWLObjectType", down_value: int, child_results: list[str]) -> str:
1509
1659
  """
1510
1660
  Check the down return value and the up return values, and count
1511
1661
  what we visit going up and what child relationships we have.
@@ -1559,9 +1709,9 @@ def test_download_structure(tmp_path: Path) -> None:
1559
1709
  # These will be populated.
1560
1710
  # TODO: This cache seems unused. Remove it?
1561
1711
  # This maps filesystem path to CWL URI
1562
- index: Dict[str, str] = {}
1712
+ index: dict[str, str] = {}
1563
1713
  # This maps CWL URI to filesystem path
1564
- existing: Dict[str, str] = {}
1714
+ existing: dict[str, str] = {}
1565
1715
 
1566
1716
  # Do the download
1567
1717
  download_structure(file_store, index, existing, structure, to_dir)
@@ -1614,3 +1764,53 @@ def test_download_structure(tmp_path: Path) -> None:
1614
1764
  ],
1615
1765
  any_order=True,
1616
1766
  )
1767
+
1768
+
1769
+ @needs_cwl
1770
+ @pytest.mark.timeout(300)
1771
+ def test_import_on_workers() -> None:
1772
+ args = [
1773
+ "src/toil/test/cwl/download.cwl",
1774
+ "src/toil/test/cwl/download_file.json",
1775
+ "--runImportsOnWorkers",
1776
+ "--importWorkersDisk=10MiB",
1777
+ "--realTimeLogging=True",
1778
+ "--logLevel=INFO",
1779
+ "--logColors=False",
1780
+ ]
1781
+ from toil.cwl import cwltoil
1782
+
1783
+ detector = ImportWorkersMessageHandler()
1784
+
1785
+ # Set up a log message detector to the root logger
1786
+ logging.getLogger().addHandler(detector)
1787
+
1788
+ cwltoil.main(args)
1789
+
1790
+ assert detector.detected is True
1791
+
1792
+
1793
+ # StreamHandler is generic, _typeshed doesn't exist at runtime, do a bit of typing trickery, see https://github.com/python/typeshed/issues/5680
1794
+ if TYPE_CHECKING:
1795
+ from _typeshed import SupportsWrite
1796
+
1797
+ _stream_handler = logging.StreamHandler[SupportsWrite[str]]
1798
+ else:
1799
+ _stream_handler = logging.StreamHandler
1800
+
1801
+
1802
+ class ImportWorkersMessageHandler(_stream_handler):
1803
+ """
1804
+ Detect the import workers log message and set a flag.
1805
+ """
1806
+
1807
+ def __init__(self) -> None:
1808
+ self.detected = False # Have we seen the message we want?
1809
+
1810
+ super().__init__(sys.stderr)
1811
+
1812
+ def emit(self, record: logging.LogRecord) -> None:
1813
+ if (record.msg % record.args).startswith(
1814
+ "Issued job 'CWLImportJob' CWLImportJob"
1815
+ ):
1816
+ self.detected = True
@@ -0,0 +1,12 @@
1
+ cwlVersion: v1.2
2
+ class: CommandLineTool
3
+ inputs: []
4
+ baseCommand: ["bash", "-c", "ulimit -m"]
5
+ stdout: memory.txt
6
+ outputs:
7
+ memory:
8
+ type: string
9
+ outputBinding:
10
+ glob: memory.txt
11
+ loadContents: True
12
+ outputEval: $(self[0].contents)
@@ -0,0 +1,29 @@
1
+ # This workflow fills in a required int from an optional int, but only when the
2
+ # int is really present. But it also uses the value to compute the conditional
3
+ # task's resource requirements, so Toil can't just schedule the task and then
4
+ # check the condition.
5
+ # See <https://github.com/DataBiosphere/toil/issues/4930#issue-2297563321>
6
+ cwlVersion: v1.2
7
+ class: Workflow
8
+ requirements:
9
+ InlineJavascriptRequirement: {}
10
+ inputs:
11
+ optional_input: int?
12
+ steps:
13
+ the_step:
14
+ in:
15
+ required_input:
16
+ source: optional_input
17
+ when: $(inputs.required_input != null)
18
+ run:
19
+ cwlVersion: v1.2
20
+ class: CommandLineTool
21
+ inputs:
22
+ required_input: int
23
+ requirements:
24
+ ResourceRequirement:
25
+ coresMax: $(inputs.required_input)
26
+ baseCommand: "nproc"
27
+ outputs: []
28
+ out: []
29
+ outputs: []