toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -1,29 +1,47 @@
1
1
  import json
2
+ import logging
2
3
  import os
4
+ import pytest
5
+ import re
3
6
  import shutil
7
+ import string
4
8
  import subprocess
5
9
  import unittest
6
- import uuid
7
- from typing import Any, Dict, List, Optional, Set
10
+ from typing import Any, Optional, Union
8
11
  from unittest.mock import patch
9
-
10
- from unittest.mock import patch
11
- from typing import Any, Dict, List, Set
12
-
13
- from toil.test import (ToilTest,
14
- needs_docker_cuda,
15
- needs_google_storage,
16
- needs_singularity_or_docker,
17
- slow)
12
+ from uuid import uuid4
13
+
14
+ import WDL.Error
15
+ import WDL.Expr
16
+
17
+ from toil.fileStores import FileID
18
+ from toil.test import (
19
+ ToilTest,
20
+ needs_docker,
21
+ needs_docker_cuda,
22
+ needs_google_storage,
23
+ needs_singularity_or_docker,
24
+ needs_wdl,
25
+ slow,
26
+ )
18
27
  from toil.version import exactPython
19
- from toil.wdl.wdltoil import WDLSectionJob, WDLWorkflowGraph
28
+ from toil.wdl.wdltoil import (
29
+ WDLSectionJob,
30
+ WDLWorkflowGraph,
31
+ parse_disks,
32
+ remove_common_leading_whitespace,
33
+ )
20
34
 
35
+ logger = logging.getLogger(__name__)
21
36
 
37
+
38
+ @needs_wdl
22
39
  class BaseWDLTest(ToilTest):
23
40
  """Base test class for WDL tests."""
41
+
24
42
  def setUp(self) -> None:
25
43
  """Runs anew before each test to create farm fresh temp dirs."""
26
- self.output_dir = os.path.join('/tmp/', 'toil-wdl-test-' + str(uuid.uuid4()))
44
+ self.output_dir = os.path.join("/tmp/", "toil-wdl-test-" + str(uuid4()))
27
45
  os.makedirs(self.output_dir)
28
46
 
29
47
  def tearDown(self) -> None:
@@ -31,52 +49,138 @@ class BaseWDLTest(ToilTest):
31
49
  shutil.rmtree(self.output_dir)
32
50
 
33
51
 
52
+ WDL_CONFORMANCE_TEST_REPO = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
53
+ WDL_CONFORMANCE_TEST_COMMIT = "baf44bcc7e6f6927540adf77d91b26a5558ae4b7"
54
+ # These tests are known to require things not implemented by
55
+ # Toil and will not be run in CI.
56
+ WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL = [
57
+ 16, # Basic object test (deprecated and removed in 1.1); MiniWDL and toil-wdl-runner do not support Objects, so this will fail if ran by them
58
+ 21, # Parser: expression placeholders in strings in conditional expressions in 1.0, Cromwell style; Fails with MiniWDL and toil-wdl-runner
59
+ 64, # Legacy test for as_map_as_input; It looks like MiniWDL does not have the function as_map()
60
+ 77, # Test that array cannot coerce to a string. WDL 1.1 does not allow compound types to coerce into a string. This should return a TypeError.
61
+ ]
62
+ WDL_UNIT_TESTS_UNSUPPORTED_BY_TOIL = [
63
+ 14, # test_object, Objects are not supported
64
+ 19, # map_to_struct, miniwdl cannot coerce map to struct, https://github.com/chanzuckerberg/miniwdl/issues/712
65
+ 52, # relative_and_absolute, needs root to run
66
+ 58, # test_gpu, needs gpu to run, else warning
67
+ 59, # will be fixed in #5001
68
+ 66, # This needs way too many resources (and actually doesn't work?), see https://github.com/DataBiosphere/wdl-conformance-tests/blob/2d617b703a33791f75f30a9db43c3740a499cd89/README_UNIT.md?plain=1#L8
69
+ 67, # same as above
70
+ 68, # Bug, see #https://github.com/DataBiosphere/toil/issues/4993
71
+ 69, # Same as 68
72
+ 87, # MiniWDL does not handle metacharacters properly when running regex, https://github.com/chanzuckerberg/miniwdl/issues/709
73
+ 97, # miniwdl bug, see https://github.com/chanzuckerberg/miniwdl/issues/701
74
+ 105, # miniwdl (and toil) bug, unserializable json is serialized, see https://github.com/chanzuckerberg/miniwdl/issues/702
75
+ 107, # object not supported
76
+ 108, # object not supported
77
+ 109, # object not supported
78
+ 110, # object not supported
79
+ 120, # miniwdl bug, see https://github.com/chanzuckerberg/miniwdl/issues/699
80
+ 131, # miniwdl bug, evalerror, see https://github.com/chanzuckerberg/miniwdl/issues/700
81
+ 134, # same as 131
82
+ 144 # miniwdl and toil bug
83
+ ]
84
+
85
+
86
+
34
87
  class WDLConformanceTests(BaseWDLTest):
35
88
  """
36
89
  WDL conformance tests for Toil.
37
90
  """
91
+
38
92
  wdl_dir = "wdl-conformance-tests"
93
+
39
94
  @classmethod
40
95
  def setUpClass(cls) -> None:
41
96
 
42
- url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
43
- commit = "032fb99a1458d456b6d5f17d27928469ec1a1c68"
44
-
45
97
  p = subprocess.Popen(
46
- f"git clone {url} {cls.wdl_dir} && cd {cls.wdl_dir} && git checkout {commit}",
98
+ f"git clone {WDL_CONFORMANCE_TEST_REPO} {cls.wdl_dir} && cd {cls.wdl_dir} && git checkout {WDL_CONFORMANCE_TEST_COMMIT}",
47
99
  shell=True,
48
100
  )
49
101
 
50
102
  p.communicate()
51
103
 
52
104
  if p.returncode > 0:
53
- raise RuntimeError
105
+ raise RuntimeError("Could not clone WDL conformance tests")
54
106
 
55
107
  os.chdir(cls.wdl_dir)
56
108
 
57
109
  cls.base_command = [exactPython, "run.py", "--runner", "toil-wdl-runner"]
58
110
 
59
- # estimated running time: 2 minutes
60
- @slow
61
- def test_conformance_tests_v10(self):
62
- tests_to_run = "0,1,5-7,9-15,17,22-24,26,28-30,32-40,53,57-59,62,67-69"
63
- p = subprocess.run(self.base_command + ["-v", "1.0", "-n", tests_to_run], capture_output=True)
111
+ def check(self, p: subprocess.CompletedProcess) -> None:
112
+ """
113
+ Make sure a call completed or explain why it failed.
114
+ """
64
115
 
65
116
  if p.returncode != 0:
66
- print(p.stdout.decode('utf-8', errors='replace'))
117
+ logger.error(
118
+ "Failed process standard output: %s",
119
+ p.stdout.decode("utf-8", errors="replace"),
120
+ )
121
+ logger.error(
122
+ "Failed process standard error: %s",
123
+ p.stderr.decode("utf-8", errors="replace"),
124
+ )
67
125
 
68
126
  p.check_returncode()
69
127
 
70
- # estimated running time: 2 minutes
128
+ @slow
129
+ def test_unit_tests_v11(self):
130
+ # There are still some bugs with the WDL spec, use a fixed version until
131
+ # See comments of https://github.com/openwdl/wdl/pull/669
132
+ repo_url = "https://github.com/stxue1/wdl.git"
133
+ repo_branch = "wdl-1.1.3-fixes"
134
+ command = f"{exactPython} setup_unit_tests.py -v 1.1 --extra-patch-data unit_tests_patch_data.yaml --repo {repo_url} --branch {repo_branch} --force-pull"
135
+ p = subprocess.run(command.split(" "), capture_output=True)
136
+ self.check(p)
137
+ command = f"{exactPython} run_unit.py -r toil-wdl-runner -v 1.1 --progress --exclude-numbers {','.join([str(t) for t in WDL_UNIT_TESTS_UNSUPPORTED_BY_TOIL])}"
138
+ p = subprocess.run(command.split(" "), capture_output=True)
139
+ self.check(p)
140
+
141
+ # estimated running time: 10 minutes
142
+ @slow
143
+ def test_conformance_tests_v10(self):
144
+ command = self.base_command + ["-v", "1.0"]
145
+ if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
146
+ command.append("--exclude-numbers")
147
+ command.append(
148
+ ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
149
+ )
150
+ p = subprocess.run(command, capture_output=True)
151
+
152
+ self.check(p)
153
+
154
+ # estimated running time: 10 minutes
71
155
  @slow
72
156
  def test_conformance_tests_v11(self):
73
- tests_to_run = "2-11,13-15,17-20,22-24,26,29,30,32-40,53,57-59,62,67-69"
74
- p = subprocess.run(self.base_command + ["-v", "1.1", "-n", tests_to_run], capture_output=True)
157
+ command = self.base_command + ["-v", "1.1"]
158
+ if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
159
+ command.append("--exclude-numbers")
160
+ command.append(
161
+ ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
162
+ )
163
+ p = subprocess.run(command, capture_output=True)
75
164
 
76
- if p.returncode != 0:
77
- print(p.stdout.decode('utf-8', errors='replace'))
165
+ self.check(p)
78
166
 
79
- p.check_returncode()
167
+ @slow
168
+ def test_conformance_tests_integration(self):
169
+ ids_to_run = "encode,tut01,tut02,tut03,tut04"
170
+ p = subprocess.run(
171
+ self.base_command
172
+ + [
173
+ "-v",
174
+ "1.0",
175
+ "--conformance-file",
176
+ "integration.yaml",
177
+ "--id",
178
+ ids_to_run,
179
+ ],
180
+ capture_output=True,
181
+ )
182
+
183
+ self.check(p)
80
184
 
81
185
  @classmethod
82
186
  def tearDownClass(cls) -> None:
@@ -87,10 +191,11 @@ class WDLConformanceTests(BaseWDLTest):
87
191
 
88
192
  class WDLTests(BaseWDLTest):
89
193
  """Tests for Toil's MiniWDL-based implementation."""
194
+
90
195
  @classmethod
91
196
  def setUpClass(cls) -> None:
92
197
  """Runs once for all tests."""
93
- cls.base_command = [exactPython, '-m', 'toil.wdl.wdltoil']
198
+ cls.base_command = [exactPython, "-m", "toil.wdl.wdltoil"]
94
199
 
95
200
  # We inherit a testMD5sum but it is going to need Singularity or Docker
96
201
  # now. And also needs to have a WDL 1.0+ WDL file. So we replace it.
@@ -98,46 +203,338 @@ class WDLTests(BaseWDLTest):
98
203
  def test_MD5sum(self):
99
204
  """Test if Toil produces the same outputs as known good outputs for WDL's
100
205
  GATK tutorial #1."""
101
- wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
102
- json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
206
+ wdl = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.1.0.wdl")
207
+ json_file = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.json")
208
+
209
+ result_json = subprocess.check_output(
210
+ self.base_command
211
+ + [wdl, json_file, "-o", self.output_dir, "--logDebug", "--retryCount=0"]
212
+ )
213
+ result = json.loads(result_json)
214
+
215
+ assert "ga4ghMd5.value" in result
216
+ assert isinstance(result["ga4ghMd5.value"], str)
217
+ assert os.path.exists(result["ga4ghMd5.value"])
218
+ assert os.path.basename(result["ga4ghMd5.value"]) == "md5sum.txt"
219
+
220
+ def test_url_to_file(self):
221
+ """
222
+ Test if web URL strings can be coerced to usable Files.
223
+ """
224
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/url_to_file.wdl")
225
+
226
+ result_json = subprocess.check_output(
227
+ self.base_command
228
+ + [wdl, "-o", self.output_dir, "--logInfo", "--retryCount=0"]
229
+ )
230
+ result = json.loads(result_json)
231
+
232
+ assert "url_to_file.first_line" in result
233
+ assert isinstance(result["url_to_file.first_line"], str)
234
+ self.assertEqual(result["url_to_file.first_line"], "chr1\t248387328")
103
235
 
104
- result_json = subprocess.check_output(self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug', '--retryCount=0'])
236
+ @needs_docker
237
+ def test_wait(self):
238
+ """
239
+ Test if Bash "wait" works in WDL scripts.
240
+ """
241
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/wait.wdl")
242
+
243
+ result_json = subprocess.check_output(
244
+ self.base_command
245
+ + [
246
+ wdl,
247
+ "-o",
248
+ self.output_dir,
249
+ "--logInfo",
250
+ "--retryCount=0",
251
+ "--wdlContainer=docker",
252
+ ]
253
+ )
105
254
  result = json.loads(result_json)
106
255
 
107
- assert 'ga4ghMd5.value' in result
108
- assert isinstance(result['ga4ghMd5.value'], str)
109
- assert os.path.exists(result['ga4ghMd5.value'])
110
- assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
256
+ assert "wait.result" in result
257
+ assert isinstance(result["wait.result"], str)
258
+ self.assertEqual(result["wait.result"], "waited")
111
259
 
112
260
  @needs_singularity_or_docker
113
- def test_miniwdl_self_test(self, extra_args: Optional[List[str]] = None) -> None:
114
- """Test if the MiniWDL self test runs and produces the expected output."""
115
- wdl_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/self_test.wdl')
116
- json_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/inputs.json')
261
+ def test_all_call_outputs(self):
262
+ """
263
+ Test if Toil can collect all call outputs from a workflow that doesn't expose them.
264
+ """
265
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/not_enough_outputs.wdl")
117
266
 
118
- result_json = subprocess.check_output(self.base_command + [wdl_file, json_file, '--logDebug', '-o', self.output_dir, '--outputDialect', 'miniwdl'] + (extra_args or []))
267
+ # With no flag we don't include the call outputs
268
+ result_json = subprocess.check_output(
269
+ self.base_command
270
+ + [wdl, "-o", self.output_dir, "--logInfo", "--retryCount=0"]
271
+ )
119
272
  result = json.loads(result_json)
120
273
 
121
- # Expect MiniWDL-style output with a designated "dir"
274
+ assert "wf.only_result" in result
275
+ assert "wf.do_math.square" not in result
276
+ assert "wf.do_math.cube" not in result
277
+ assert "wf.should_never_output" not in result
278
+
279
+ # With flag off we don't include the call outputs
280
+ result_json = subprocess.check_output(
281
+ self.base_command
282
+ + [
283
+ wdl,
284
+ "-o",
285
+ self.output_dir,
286
+ "--logInfo",
287
+ "--retryCount=0",
288
+ "--allCallOutputs=false",
289
+ ]
290
+ )
291
+ result = json.loads(result_json)
292
+
293
+ assert "wf.only_result" in result
294
+ assert "wf.do_math.square" not in result
295
+ assert "wf.do_math.cube" not in result
296
+ assert "wf.should_never_output" not in result
297
+
298
+ # With flag on we do include the call outputs
299
+ result_json = subprocess.check_output(
300
+ self.base_command
301
+ + [
302
+ wdl,
303
+ "-o",
304
+ self.output_dir,
305
+ "--logInfo",
306
+ "--retryCount=0",
307
+ "--allCallOutputs=on",
308
+ ]
309
+ )
310
+ result = json.loads(result_json)
311
+
312
+ assert "wf.only_result" in result
313
+ assert "wf.do_math.square" in result
314
+ assert "wf.do_math.cube" in result
315
+ assert "wf.should_never_output" not in result
316
+
317
+ @needs_singularity_or_docker
318
+ def test_croo_detection(self):
319
+ """
320
+ Test if Toil can detect and do something sensible with Cromwell Output Organizer workflows.
321
+ """
322
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/croo.wdl")
323
+
324
+ # With no flag we should include all task outputs
325
+ result_json = subprocess.check_output(
326
+ self.base_command
327
+ + [wdl, "-o", self.output_dir, "--logInfo", "--retryCount=0"]
328
+ )
329
+ result = json.loads(result_json)
330
+
331
+ assert "wf.only_result" in result
332
+ assert "wf.do_math.square" in result
333
+ assert "wf.do_math.cube" in result
334
+ assert "wf.should_never_output" not in result
335
+
336
+ # With flag off we obey the WDL spec even if we're suspicious
337
+ result_json = subprocess.check_output(
338
+ self.base_command
339
+ + [
340
+ wdl,
341
+ "-o",
342
+ self.output_dir,
343
+ "--logInfo",
344
+ "--retryCount=0",
345
+ "--allCallOutputs=off",
346
+ ]
347
+ )
348
+ result = json.loads(result_json)
349
+
350
+ assert "wf.only_result" in result
351
+ assert "wf.do_math.square" not in result
352
+ assert "wf.do_math.cube" not in result
353
+ assert "wf.should_never_output" not in result
354
+
355
+ @needs_singularity_or_docker
356
+ def test_caching(self):
357
+ """
358
+ Test if Toil can cache task runs.
359
+ """
360
+ wdl = os.path.abspath('src/toil/test/wdl/testfiles/random.wdl')
361
+
362
+ caching_env = dict(os.environ)
363
+ caching_env["MINIWDL__CALL_CACHE__GET"] = "true"
364
+ caching_env["MINIWDL__CALL_CACHE__PUT"] = "true"
365
+ caching_env["MINIWDL__CALL_CACHE__DIR"] = self._createTempDir("cache")
366
+
367
+ result_json = subprocess.check_output(
368
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 1, "random.task_2_input": 1}'],
369
+ env=caching_env)
370
+ result_initial = json.loads(result_json)
371
+
372
+ assert 'random.value_seen' in result_initial
373
+ assert 'random.value_written' in result_initial
374
+
375
+ result_json = subprocess.check_output(
376
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 1, "random.task_2_input": 1}'],
377
+ env=caching_env)
378
+ result_cached = json.loads(result_json)
379
+
380
+ assert 'random.value_seen' in result_cached
381
+ assert 'random.value_written' in result_cached
382
+
383
+ assert result_cached['random.value_seen'] == result_initial['random.value_seen']
384
+ assert result_cached['random.value_written'] == result_initial['random.value_written']
385
+
386
+ result_json = subprocess.check_output(
387
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 2, "random.task_2_input": 1}'],
388
+ env=caching_env)
389
+ result_not_cached = json.loads(result_json)
390
+
391
+ assert 'random.value_seen' in result_not_cached
392
+ assert 'random.value_written' in result_not_cached
122
393
 
123
- assert 'dir' in result
124
- assert isinstance(result['dir'], str)
125
- out_dir = result['dir']
394
+ assert result_not_cached['random.value_seen'] != result_initial['random.value_seen']
395
+ assert result_not_cached['random.value_written'] != result_initial['random.value_written']
126
396
 
127
- assert 'outputs' in result
128
- assert isinstance(result['outputs'], dict)
129
- outputs = result['outputs']
397
+ result_json = subprocess.check_output(
398
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 1, "random.task_2_input": 2}'],
399
+ env=caching_env)
400
+ result_part_cached = json.loads(result_json)
130
401
 
131
- assert 'hello_caller.message_files' in outputs
132
- assert isinstance(outputs['hello_caller.message_files'], list)
133
- assert len(outputs['hello_caller.message_files']) == 2
134
- for item in outputs['hello_caller.message_files']:
135
- # All the files should be strings in the "out" direcotry
136
- assert isinstance(item, str)
137
- assert item.startswith(out_dir)
402
+ assert 'random.value_seen' in result_part_cached
403
+ assert 'random.value_written' in result_part_cached
138
404
 
139
- assert 'hello_caller.messages' in outputs
140
- assert outputs['hello_caller.messages'] == ["Hello, Alyssa P. Hacker!", "Hello, Ben Bitdiddle!"]
405
+ assert result_part_cached['random.value_seen'] == result_initial['random.value_seen']
406
+ assert result_part_cached['random.value_written'] != result_initial['random.value_written']
407
+ assert result_part_cached['random.value_written'] != result_not_cached['random.value_written']
408
+
409
+
410
+
411
+ def test_url_to_optional_file(self):
412
+ """
413
+ Test if missing and error-producing URLs are handled correctly for optional File? values.
414
+ """
415
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/url_to_optional_file.wdl")
416
+
417
+ def run_for_code(code: int) -> dict:
418
+ """
419
+ Run a workflow coercing URL to File? where the URL returns the given status code.
420
+
421
+ Return the parsed output.
422
+ """
423
+ logger.info("Test optional file with HTTP code %s", code)
424
+ json_value = '{"url_to_optional_file.http_code": %d}' % code
425
+ result_json = subprocess.check_output(
426
+ self.base_command
427
+ + [
428
+ wdl,
429
+ json_value,
430
+ "-o",
431
+ self.output_dir,
432
+ "--logInfo",
433
+ "--retryCount=0",
434
+ ]
435
+ )
436
+ result = json.loads(result_json)
437
+ return result
438
+
439
+ # Check files that exist
440
+ result = run_for_code(200)
441
+ assert "url_to_optional_file.out_file" in result
442
+ self.assertNotEqual(result["url_to_optional_file.out_file"], None)
443
+
444
+ for code in (404, 410):
445
+ # Check files that definitely don't
446
+ result = run_for_code(code)
447
+ assert "url_to_optional_file.out_file" in result
448
+ self.assertEqual(result["url_to_optional_file.out_file"], None)
449
+
450
+ for code in (402, 418, 500, 502):
451
+ # Check that cases where the server refuses to say if the file
452
+ # exists stop the workflow.
453
+ with self.assertRaises(subprocess.CalledProcessError):
454
+ run_for_code(code)
455
+
456
+ def test_missing_output_directory(self):
457
+ """
458
+ Test if Toil can run a WDL workflow into a new directory.
459
+ """
460
+ wdl = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.1.0.wdl")
461
+ json_file = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.json")
462
+ subprocess.check_call(
463
+ self.base_command
464
+ + [
465
+ wdl,
466
+ json_file,
467
+ "-o",
468
+ os.path.join(self.output_dir, "does", "not", "exist"),
469
+ "--logDebug",
470
+ "--retryCount=0",
471
+ ]
472
+ )
473
+
474
+ @needs_singularity_or_docker
475
+ def test_miniwdl_self_test(self, extra_args: Optional[list[str]] = None) -> None:
476
+ """Test if the MiniWDL self test runs and produces the expected output."""
477
+ wdl_file = os.path.abspath("src/toil/test/wdl/miniwdl_self_test/self_test.wdl")
478
+ json_file = os.path.abspath("src/toil/test/wdl/miniwdl_self_test/inputs.json")
479
+
480
+ result_json = subprocess.check_output(
481
+ self.base_command
482
+ + [
483
+ wdl_file,
484
+ json_file,
485
+ "--logDebug",
486
+ "-o",
487
+ self.output_dir,
488
+ "--outputDialect",
489
+ "miniwdl",
490
+ ]
491
+ + (extra_args or [])
492
+ )
493
+ result = json.loads(result_json)
494
+
495
+ # Expect MiniWDL-style output with a designated "dir"
496
+
497
+ assert "dir" in result
498
+ assert isinstance(result["dir"], str)
499
+ out_dir = result["dir"]
500
+
501
+ assert "outputs" in result
502
+ assert isinstance(result["outputs"], dict)
503
+ outputs = result["outputs"]
504
+
505
+ assert "hello_caller.message_files" in outputs
506
+ assert isinstance(outputs["hello_caller.message_files"], list)
507
+ assert len(outputs["hello_caller.message_files"]) == 2
508
+ for item in outputs["hello_caller.message_files"]:
509
+ # All the files should be strings in the "out" directory
510
+ assert isinstance(item, str), "File output must be a string"
511
+ assert item.startswith(
512
+ out_dir
513
+ ), "File output must be in the output directory"
514
+
515
+ # Look at the filename within that directory
516
+ name_in_out_dir = item[len(out_dir) :]
517
+
518
+ # Ity should contain the job name of "hello", so they are human-readable.
519
+ assert (
520
+ "hello" in name_in_out_dir
521
+ ), f"File output {name_in_out_dir} should have the originating task name in it"
522
+
523
+ # And it should not contain non-human-readable content.
524
+ #
525
+ # We use a threshold number of digits as a proxy for this, but
526
+ # don't try and get around this by just rolling other random
527
+ # strings; we want these outputs to be human-readable!!!
528
+ digit_count = len([c for c in name_in_out_dir if c in string.digits])
529
+ assert (
530
+ digit_count < 3
531
+ ), f"File output {name_in_out_dir} has {digit_count} digits, which is too many to be plausibly human-readable"
532
+
533
+ assert "hello_caller.messages" in outputs
534
+ assert outputs["hello_caller.messages"] == [
535
+ "Hello, Alyssa P. Hacker!",
536
+ "Hello, Ben Bitdiddle!",
537
+ ]
141
538
 
142
539
  @needs_singularity_or_docker
143
540
  def test_miniwdl_self_test_by_reference(self) -> None:
@@ -146,6 +543,23 @@ class WDLTests(BaseWDLTest):
146
543
  """
147
544
  self.test_miniwdl_self_test(extra_args=["--referenceInputs=True"])
148
545
 
546
+ @pytest.mark.integrative
547
+ @needs_singularity_or_docker
548
+ def test_dockstore_trs(self, extra_args: Optional[list[str]] = None) -> None:
549
+ wdl_file = "#workflow/github.com/dockstore/bcc2020-training/HelloWorld:master"
550
+ # Needs an input but doesn't provide a good one.
551
+ json_input = json.dumps({"hello_world.hello.myName": "https://raw.githubusercontent.com/dockstore/bcc2020-training/refs/heads/master/wdl-training/exercise1/name.txt"})
552
+
553
+ result_json = subprocess.check_output(
554
+ self.base_command + [wdl_file, json_input, '--logDebug', '-o', self.output_dir, '--outputDialect',
555
+ 'miniwdl'] + (extra_args or []))
556
+ result = json.loads(result_json)
557
+
558
+ with open(result.get("outputs", {}).get("hello_world.helloFile")) as f:
559
+ result_text = f.read().strip()
560
+
561
+ self.assertEqual(result_text, "Hello World!\nMy name is potato.")
562
+
149
563
  @slow
150
564
  @needs_docker_cuda
151
565
  def test_giraffe_deepvariant(self):
@@ -153,84 +567,111 @@ class WDLTests(BaseWDLTest):
153
567
  # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
154
568
 
155
569
  json_dir = self._createTempDir()
156
- base_uri = 'https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a'
570
+ base_uri = "https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a"
157
571
 
158
572
  wdl_file = f"{base_uri}/workflows/giraffe_and_deepvariant.wdl"
159
- json_file = os.path.abspath(os.path.join(json_dir, 'inputs.json'))
160
- with open(json_file, 'w') as fp:
573
+ json_file = os.path.abspath(os.path.join(json_dir, "inputs.json"))
574
+ with open(json_file, "w") as fp:
161
575
  # Write some inputs. We need to override the example inputs to use a GPU container, but that means we need absolute input URLs.
162
- json.dump(fp, {
163
- "GiraffeDeepVariant.INPUT_READ_FILE_1": f"{base_uri}/tests/small_sim_graph/reads_1.fastq.gz",
164
- "GiraffeDeepVariant.INPUT_READ_FILE_2": f"{base_uri}/tests/small_sim_graph/reads_2.fastq.gz",
165
- "GiraffeDeepVariant.XG_FILE": f"{base_uri}/tests/small_sim_graph/graph.xg",
166
- "GiraffeDeepVariant.SAMPLE_NAME": "s0",
167
- "GiraffeDeepVariant.GBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gbwt",
168
- "GiraffeDeepVariant.GGBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gg",
169
- "GiraffeDeepVariant.MIN_FILE": f"{base_uri}/tests/small_sim_graph/graph.min",
170
- "GiraffeDeepVariant.DIST_FILE": f"{base_uri}/tests/small_sim_graph/graph.dist",
171
- "GiraffeDeepVariant.OUTPUT_GAF": True,
172
- "GiraffeDeepVariant.runDeepVariantCallVariants.in_dv_gpu_container": "google/deepvariant:1.3.0-gpu"
173
- })
174
-
175
- result_json = subprocess.check_output(self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl'])
576
+ json.dump(
577
+ fp,
578
+ {
579
+ "GiraffeDeepVariant.INPUT_READ_FILE_1": f"{base_uri}/tests/small_sim_graph/reads_1.fastq.gz",
580
+ "GiraffeDeepVariant.INPUT_READ_FILE_2": f"{base_uri}/tests/small_sim_graph/reads_2.fastq.gz",
581
+ "GiraffeDeepVariant.XG_FILE": f"{base_uri}/tests/small_sim_graph/graph.xg",
582
+ "GiraffeDeepVariant.SAMPLE_NAME": "s0",
583
+ "GiraffeDeepVariant.GBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gbwt",
584
+ "GiraffeDeepVariant.GGBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gg",
585
+ "GiraffeDeepVariant.MIN_FILE": f"{base_uri}/tests/small_sim_graph/graph.min",
586
+ "GiraffeDeepVariant.DIST_FILE": f"{base_uri}/tests/small_sim_graph/graph.dist",
587
+ "GiraffeDeepVariant.OUTPUT_GAF": True,
588
+ "GiraffeDeepVariant.runDeepVariantCallVariants.in_dv_gpu_container": "google/deepvariant:1.3.0-gpu",
589
+ },
590
+ )
591
+
592
+ result_json = subprocess.check_output(
593
+ self.base_command
594
+ + [wdl_file, json_file, "-o", self.output_dir, "--outputDialect", "miniwdl"]
595
+ )
176
596
  result = json.loads(result_json)
177
597
 
178
598
  # Expect MiniWDL-style output with a designated "dir"
179
- assert 'dir' in result
180
- assert isinstance(result['dir'], str)
181
- out_dir = result['dir']
599
+ assert "dir" in result
600
+ assert isinstance(result["dir"], str)
601
+ out_dir = result["dir"]
182
602
 
183
- assert 'outputs' in result
184
- assert isinstance(result['outputs'], dict)
185
- outputs = result['outputs']
603
+ assert "outputs" in result
604
+ assert isinstance(result["outputs"], dict)
605
+ outputs = result["outputs"]
186
606
 
187
607
  # Expect a VCF file to have been written
188
- assert 'GiraffeDeepVariant.output_vcf' in outputs
189
- assert isinstance(outputs['GiraffeDeepVariant.output_vcf'], str)
190
- assert os.path.exists(outputs['GiraffeDeepVariant.output_vcf'])
608
+ assert "GiraffeDeepVariant.output_vcf" in outputs
609
+ assert isinstance(outputs["GiraffeDeepVariant.output_vcf"], str)
610
+ assert os.path.exists(outputs["GiraffeDeepVariant.output_vcf"])
191
611
 
192
612
  @slow
193
613
  @needs_singularity_or_docker
194
614
  def test_giraffe(self):
195
615
  """Test if Giraffe runs. This could take 12 minutes. Also we scale it down but it still demands lots of memory."""
196
616
  # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
617
+ # TODO: Reduce memory requests with custom/smaller inputs.
618
+ # TODO: Skip if node lacks enough memory.
197
619
 
198
620
  json_dir = self._createTempDir()
199
- base_uri = 'https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a'
621
+ base_uri = "https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a"
200
622
  wdl_file = f"{base_uri}/workflows/giraffe.wdl"
201
623
  json_file = f"{base_uri}/params/giraffe.json"
202
624
 
203
- result_json = subprocess.check_output(self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale', '0.1'])
625
+ result_json = subprocess.check_output(
626
+ self.base_command
627
+ + [
628
+ wdl_file,
629
+ json_file,
630
+ "-o",
631
+ self.output_dir,
632
+ "--outputDialect",
633
+ "miniwdl",
634
+ "--scale",
635
+ "0.1",
636
+ "--logDebug",
637
+ ]
638
+ )
204
639
  result = json.loads(result_json)
205
640
 
206
641
  # Expect MiniWDL-style output with a designated "dir"
207
- assert 'dir' in result
208
- assert isinstance(result['dir'], str)
209
- out_dir = result['dir']
642
+ assert "dir" in result
643
+ assert isinstance(result["dir"], str)
644
+ out_dir = result["dir"]
210
645
 
211
- assert 'outputs' in result
212
- assert isinstance(result['outputs'], dict)
213
- outputs = result['outputs']
646
+ assert "outputs" in result
647
+ assert isinstance(result["outputs"], dict)
648
+ outputs = result["outputs"]
214
649
 
215
650
  # Expect a BAM file to have been written
216
- assert 'Giraffe.output_bam' in outputs
217
- assert isinstance(outputs['Giraffe.output_bam'], str)
218
- assert os.path.exists(outputs['Giraffe.output_bam'])
651
+ assert "Giraffe.output_bam" in outputs
652
+ assert isinstance(outputs["Giraffe.output_bam"], str)
653
+ assert os.path.exists(outputs["Giraffe.output_bam"])
219
654
 
220
655
  @needs_singularity_or_docker
221
656
  @needs_google_storage
222
657
  def test_gs_uri(self):
223
658
  """Test if Toil can access Google Storage URIs."""
224
- wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
225
- json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum-gs.json')
659
+ wdl = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.1.0.wdl")
660
+ json_file = os.path.abspath("src/toil/test/wdl/md5sum/md5sum-gs.json")
226
661
 
227
- result_json = subprocess.check_output(self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug'])
662
+ result_json = subprocess.check_output(
663
+ self.base_command + [wdl, json_file, "-o", self.output_dir, "--logDebug"]
664
+ )
228
665
  result = json.loads(result_json)
229
666
 
230
- assert 'ga4ghMd5.value' in result
231
- assert isinstance(result['ga4ghMd5.value'], str)
232
- assert os.path.exists(result['ga4ghMd5.value'])
233
- assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
667
+ assert "ga4ghMd5.value" in result
668
+ assert isinstance(result["ga4ghMd5.value"], str)
669
+ assert os.path.exists(result["ga4ghMd5.value"])
670
+ assert os.path.basename(result["ga4ghMd5.value"]) == "md5sum.txt"
671
+
672
+
673
+ class WDLToilBenchTests(ToilTest):
674
+ """Tests for Toil's MiniWDL-based implementation that don't run workflows."""
234
675
 
235
676
  def test_coalesce(self):
236
677
  """
@@ -241,9 +682,9 @@ class WDLTests(BaseWDLTest):
241
682
 
242
683
  # Set up data structures for our fake workflow graph to pull from.
243
684
  # This has all decl-type nodes
244
- all_decls: Set[str] = set()
685
+ all_decls: set[str] = set()
245
686
  # And this has all transitive dependencies for all nodes.
246
- all_deps: Dict[str, Set[str]] = {}
687
+ all_deps: dict[str, set[str]] = {}
247
688
 
248
689
  def mock_is_decl(self: Any, node_id: str) -> bool:
249
690
  """
@@ -251,7 +692,7 @@ class WDLTests(BaseWDLTest):
251
692
  """
252
693
  return node_id in all_decls
253
694
 
254
- def mock_get_transitive_dependencies(self: Any, node_id: str) -> Set[str]:
695
+ def mock_get_transitive_dependencies(self: Any, node_id: str) -> set[str]:
255
696
  """
256
697
  Replacement function to get all the transitive dependencies of a node.
257
698
  """
@@ -263,19 +704,21 @@ class WDLTests(BaseWDLTest):
263
704
  #
264
705
  # If that changes, the test will need to change! Maybe then it will be
265
706
  # worth extracting a base type for this interface.
266
- with patch.object(WDLWorkflowGraph, 'is_decl', mock_is_decl):
267
- with patch.object(WDLWorkflowGraph, 'get_transitive_dependencies', mock_get_transitive_dependencies):
268
-
707
+ with patch.object(WDLWorkflowGraph, "is_decl", mock_is_decl):
708
+ with patch.object(
709
+ WDLWorkflowGraph,
710
+ "get_transitive_dependencies",
711
+ mock_get_transitive_dependencies,
712
+ ):
269
713
  with self.subTest(msg="Two unrelated decls can coalesce"):
270
714
  # Set up two unrelated decls
271
715
  all_decls = {"decl1", "decl2"}
272
- all_deps = {
273
- "decl1": set(),
274
- "decl2": set()
275
- }
716
+ all_deps = {"decl1": set(), "decl2": set()}
717
+
718
+ result = WDLSectionJob.coalesce_nodes(
719
+ ["decl1", "decl2"], WDLWorkflowGraph([])
720
+ )
276
721
 
277
- result = WDLSectionJob.coalesce_nodes(["decl1", "decl2"], WDLWorkflowGraph([]))
278
-
279
722
  # Make sure they coalesced
280
723
  assert len(result) == 1
281
724
  assert "decl1" in result[0]
@@ -283,61 +726,287 @@ class WDLTests(BaseWDLTest):
283
726
 
284
727
  with self.subTest(msg="A decl will not coalesce with a non-decl"):
285
728
  all_decls = {"decl"}
286
- all_deps = {
287
- "decl": set(),
288
- "nondecl": set()
289
- }
729
+ all_deps = {"decl": set(), "nondecl": set()}
730
+
731
+ result = WDLSectionJob.coalesce_nodes(
732
+ ["decl", "nondecl"], WDLWorkflowGraph([])
733
+ )
290
734
 
291
- result = WDLSectionJob.coalesce_nodes(["decl", "nondecl"], WDLWorkflowGraph([]))
292
-
293
735
  assert len(result) == 2
294
736
  assert len(result[0]) == 1
295
737
  assert len(result[1]) == 1
296
738
 
297
-
298
- with self.subTest(msg="Two adjacent decls with a common dependency can coalesce"):
739
+ with self.subTest(
740
+ msg="Two adjacent decls with a common dependency can coalesce"
741
+ ):
299
742
  all_decls = {"decl1", "decl2"}
300
- all_deps = {
301
- "decl1": {"base"},
302
- "decl2": {"base"},
303
- "base": set()
304
- }
305
-
306
- result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
307
-
743
+ all_deps = {"decl1": {"base"}, "decl2": {"base"}, "base": set()}
744
+
745
+ result = WDLSectionJob.coalesce_nodes(
746
+ ["base", "decl1", "decl2"], WDLWorkflowGraph([])
747
+ )
748
+
308
749
  assert len(result) == 2
309
750
  assert "base" in result[0]
310
751
  assert "decl1" in result[1]
311
752
  assert "decl2" in result[1]
312
753
 
313
- with self.subTest(msg="Two adjacent decls with different dependencies will not coalesce"):
754
+ with self.subTest(
755
+ msg="Two adjacent decls with different dependencies will not coalesce"
756
+ ):
314
757
  all_decls = {"decl1", "decl2"}
315
- all_deps = {
316
- "decl1": {"base"},
317
- "decl2": set(),
318
- "base": set()
319
- }
320
-
321
- result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
322
-
758
+ all_deps = {"decl1": {"base"}, "decl2": set(), "base": set()}
759
+
760
+ result = WDLSectionJob.coalesce_nodes(
761
+ ["base", "decl1", "decl2"], WDLWorkflowGraph([])
762
+ )
763
+
323
764
  assert len(result) == 3
324
765
  assert "base" in result[0]
325
766
 
326
- with self.subTest(msg="Two adjacent decls with different successors will coalesce"):
767
+ with self.subTest(
768
+ msg="Two adjacent decls with different successors will coalesce"
769
+ ):
327
770
  all_decls = {"decl1", "decl2"}
328
- all_deps = {
329
- "decl1": set(),
330
- "decl2": set(),
331
- "successor": {"decl2"}
332
- }
333
-
334
- result = WDLSectionJob.coalesce_nodes(["decl1", "decl2", "successor"], WDLWorkflowGraph([]))
335
-
771
+ all_deps = {"decl1": set(), "decl2": set(), "successor": {"decl2"}}
772
+
773
+ result = WDLSectionJob.coalesce_nodes(
774
+ ["decl1", "decl2", "successor"], WDLWorkflowGraph([])
775
+ )
776
+
336
777
  assert len(result) == 2
337
778
  assert "decl1" in result[0]
338
779
  assert "decl2" in result[0]
339
780
  assert "successor" in result[1]
340
781
 
782
+ def make_string_expr(self, to_parse: str) -> WDL.Expr.String:
783
+ """
784
+ Parse pseudo-WDL for testing whitespace removal.
785
+ """
786
+
787
+ pos = WDL.Error.SourcePosition("nowhere", "nowhere", 0, 0, 0, 0)
788
+
789
+ parts: list[Union[str, WDL.Expr.Placeholder]] = re.split("(~{[^}]*})", to_parse)
790
+ for i in range(1, len(parts), 2):
791
+ parts[i] = WDL.Expr.Placeholder(pos, {}, WDL.Expr.Null(pos))
792
+
793
+ return WDL.Expr.String(pos, parts)
794
+
795
+ def test_remove_common_leading_whitespace(self):
796
+ """
797
+ Make sure leading whitespace removal works properly.
798
+ """
799
+
800
+ # For a single line, we remove its leading whitespace
801
+ expr = self.make_string_expr(" a ~{b} c")
802
+ trimmed = remove_common_leading_whitespace(expr)
803
+ assert len(trimmed.parts) == 3
804
+ assert trimmed.parts[0] == "a "
805
+ assert trimmed.parts[2] == " c"
806
+
807
+ # Whitespace removed isn't affected by totally blank lines
808
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\n b\n\n")
809
+ trimmed = remove_common_leading_whitespace(expr)
810
+ assert len(trimmed.parts) == 3
811
+ assert trimmed.parts[0] == "\n\na\n"
812
+ assert trimmed.parts[2] == "\nb\n\n"
813
+
814
+ # Unless blank toleration is off
815
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\n b\n\n")
816
+ trimmed = remove_common_leading_whitespace(expr, tolerate_blanks=False)
817
+ assert len(trimmed.parts) == 3
818
+ assert trimmed.parts[0] == " \n\n a\n "
819
+ assert trimmed.parts[2] == "\n b\n\n"
820
+
821
+ # Whitespace is still removed if the first line doesn't have it before the newline
822
+ expr = self.make_string_expr("\n a\n ~{stuff}\n b\n")
823
+ trimmed = remove_common_leading_whitespace(expr)
824
+ assert len(trimmed.parts) == 3
825
+ assert trimmed.parts[0] == "\na\n"
826
+ assert trimmed.parts[2] == "\nb\n"
827
+
828
+ # Whitespace is not removed if actual content is dedented
829
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\nuhoh\n b\n\n")
830
+ trimmed = remove_common_leading_whitespace(expr)
831
+ assert len(trimmed.parts) == 3
832
+ assert trimmed.parts[0] == " \n\n a\n "
833
+ assert trimmed.parts[2] == "\nuhoh\n b\n\n"
834
+
835
+ # Unless dedents are tolerated
836
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\nuhoh\n b\n\n")
837
+ trimmed = remove_common_leading_whitespace(expr, tolerate_dedents=True)
838
+ assert len(trimmed.parts) == 3
839
+ assert trimmed.parts[0] == "\n\na\n"
840
+ assert trimmed.parts[2] == "\nuhoh\nb\n\n"
841
+
842
+ # Whitespace is still removed if all-whitespace lines have less of it
843
+ expr = self.make_string_expr("\n a\n ~{stuff}\n \n b\n")
844
+ trimmed = remove_common_leading_whitespace(expr)
845
+ assert len(trimmed.parts) == 3
846
+ assert trimmed.parts[0] == "\na\n"
847
+ assert trimmed.parts[2] == "\n\nb\n"
848
+
849
+ # Unless all-whitespace lines are not tolerated
850
+ expr = self.make_string_expr("\n a\n ~{stuff}\n \n b\n")
851
+ trimmed = remove_common_leading_whitespace(expr, tolerate_all_whitespace=False)
852
+ assert len(trimmed.parts) == 3
853
+ assert trimmed.parts[0] == "\n a\n "
854
+ assert trimmed.parts[2] == "\n\n b\n"
855
+
856
+ # When mixed tabs and spaces are detected, nothing is changed.
857
+ expr = self.make_string_expr("\n a\n\t~{stuff}\n b\n")
858
+ trimmed = remove_common_leading_whitespace(expr)
859
+ assert len(trimmed.parts) == 3
860
+ assert trimmed.parts[0] == "\n a\n\t"
861
+ assert trimmed.parts[2] == "\n b\n"
862
+
863
+ # When mixed tabs and spaces are not in the prefix, whitespace is removed.
864
+ expr = self.make_string_expr("\n\ta\n\t~{stuff} \n\tb\n")
865
+ trimmed = remove_common_leading_whitespace(expr)
866
+ assert len(trimmed.parts) == 3
867
+ assert trimmed.parts[0] == "\na\n"
868
+ assert trimmed.parts[2] == " \nb\n"
869
+
870
+ # An empty string works
871
+ expr = self.make_string_expr("")
872
+ trimmed = remove_common_leading_whitespace(expr)
873
+ assert len(trimmed.parts) == 1
874
+ assert trimmed.parts[0] == ""
875
+
876
+ # A string of only whitespace is preserved as an all-whitespece line
877
+ expr = self.make_string_expr("\t\t\t")
878
+ trimmed = remove_common_leading_whitespace(expr)
879
+ assert len(trimmed.parts) == 1
880
+ assert trimmed.parts[0] == "\t\t\t"
881
+
882
+ # A string of only whitespace is trimmed when all-whitespace lines are not tolerated
883
+ expr = self.make_string_expr("\t\t\t")
884
+ trimmed = remove_common_leading_whitespace(expr, tolerate_all_whitespace=False)
885
+ assert len(trimmed.parts) == 1
886
+ assert trimmed.parts[0] == ""
887
+
888
+ # An empty expression works
889
+ expr = WDL.Expr.String(
890
+ WDL.Error.SourcePosition("nowhere", "nowhere", 0, 0, 0, 0), []
891
+ )
892
+ trimmed = remove_common_leading_whitespace(expr)
893
+ assert len(trimmed.parts) == 0
894
+
895
+ # An expression of only placeholders works
896
+ expr = self.make_string_expr("~{AAA}")
897
+ trimmed = remove_common_leading_whitespace(expr)
898
+ assert len(trimmed.parts) == 3
899
+ assert trimmed.parts[0] == ""
900
+ assert trimmed.parts[2] == ""
901
+
902
+ # The command flag is preserved
903
+ expr = self.make_string_expr(" a ~{b} c")
904
+ trimmed = remove_common_leading_whitespace(expr)
905
+ assert trimmed.command == False
906
+ expr.command = True
907
+ trimmed = remove_common_leading_whitespace(expr)
908
+ assert trimmed.command == True
909
+
910
+ def test_choose_human_readable_directory(self):
911
+ """
912
+ Test to make sure that we pick sensible but non-colliding directories to put files in.
913
+ """
914
+
915
+ from toil.wdl.wdltoil import (
916
+ DirectoryNamingStateDict,
917
+ choose_human_readable_directory,
918
+ )
919
+
920
+ state: DirectoryNamingStateDict = {}
921
+
922
+ # The first time we should get apath with the task name and without the ID
923
+ first_chosen = choose_human_readable_directory(
924
+ "root", "taskname", "111-222-333", state
925
+ )
926
+ assert first_chosen.startswith("root")
927
+ assert "taskname" in first_chosen
928
+ assert "111-222-333" not in first_chosen
929
+
930
+ # If we use the same ID we should get the same result
931
+ same_id = choose_human_readable_directory(
932
+ "root", "taskname", "111-222-333", state
933
+ )
934
+ self.assertEqual(same_id, first_chosen)
935
+
936
+ # If we use a different ID we shoudl get a different result still obeying the constraints
937
+ diff_id = choose_human_readable_directory(
938
+ "root", "taskname", "222-333-444", state
939
+ )
940
+ self.assertNotEqual(diff_id, first_chosen)
941
+ assert diff_id.startswith("root")
942
+ assert "taskname" in diff_id
943
+ assert "222-333-444" not in diff_id
944
+
945
+ def test_uri_packing(self):
946
+ """
947
+ Test to make sure Toil URI packing brings through the required information.
948
+ """
949
+
950
+ from toil.wdl.wdltoil import pack_toil_uri, unpack_toil_uri
951
+
952
+ # Set up a file
953
+ file_id = FileID("fileXYZ", 123, True)
954
+ task_path = "the_wf.the_task"
955
+ dir_id = uuid4()
956
+ file_basename = "thefile.txt"
957
+
958
+ # Pack and unpack it
959
+ uri = pack_toil_uri(file_id, task_path, dir_id, file_basename)
960
+ unpacked = unpack_toil_uri(uri)
961
+
962
+ # Make sure we got what we put in
963
+ self.assertEqual(unpacked[0], file_id)
964
+ self.assertEqual(unpacked[0].size, file_id.size)
965
+ self.assertEqual(unpacked[0].executable, file_id.executable)
966
+
967
+ self.assertEqual(unpacked[1], task_path)
968
+
969
+ # TODO: We don't make the UUIDs back into UUID objects
970
+ self.assertEqual(unpacked[2], str(dir_id))
971
+
972
+ self.assertEqual(unpacked[3], file_basename)
973
+
974
+ def test_disk_parse(self):
975
+ """
976
+ Test to make sure the disk parsing is correct
977
+ """
978
+ # Test cromwell compatibility
979
+ spec = "local-disk 5 SSD"
980
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
981
+ self.assertEqual(specified_mount_point, None)
982
+ self.assertEqual(part_size, 5)
983
+ self.assertEqual(part_suffix, "GB")
984
+
985
+ # Test spec conformance
986
+ # https://github.com/openwdl/wdl/blob/e43e042104b728df1f1ad6e6145945d2b32331a6/SPEC.md?plain=1#L5072-L5082
987
+ spec = "10"
988
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
989
+ self.assertEqual(specified_mount_point, None)
990
+ self.assertEqual(part_size, 10)
991
+ self.assertEqual(part_suffix, "GiB") # WDL spec default
992
+
993
+ spec = "1 MB"
994
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
995
+ self.assertEqual(specified_mount_point, None)
996
+ self.assertEqual(part_size, 1)
997
+ self.assertEqual(part_suffix, "MB")
998
+
999
+ spec = "MOUNT_POINT 3"
1000
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
1001
+ self.assertEqual(specified_mount_point, "MOUNT_POINT")
1002
+ self.assertEqual(part_size, 3)
1003
+ self.assertEqual(part_suffix, "GiB")
1004
+
1005
+ spec = "MOUNT_POINT 2 MB"
1006
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
1007
+ self.assertEqual(specified_mount_point, "MOUNT_POINT")
1008
+ self.assertEqual(part_size, 2)
1009
+ self.assertEqual(part_suffix, "MB")
341
1010
 
342
1011
  if __name__ == "__main__":
343
1012
  unittest.main() # run all tests