toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,39 @@
1
1
  import json
2
+ import logging
2
3
  import os
4
+ import pytest
5
+ import re
3
6
  import shutil
7
+ import string
4
8
  import subprocess
5
9
  import unittest
10
+ from typing import Any, Optional, Union
11
+ from unittest.mock import patch
6
12
  from uuid import uuid4
7
- from typing import Optional
8
13
 
9
- from unittest.mock import patch
10
- from typing import Any, Dict, List, Set
14
+ import WDL.Error
15
+ import WDL.Expr
16
+
17
+ from toil.fileStores import FileID
18
+ from toil.test import (
19
+ ToilTest,
20
+ needs_docker,
21
+ needs_docker_cuda,
22
+ needs_google_storage,
23
+ needs_singularity_or_docker,
24
+ needs_wdl,
25
+ slow,
26
+ )
27
+ from toil.version import exactPython
28
+ from toil.wdl.wdltoil import (
29
+ WDLSectionJob,
30
+ WDLWorkflowGraph,
31
+ parse_disks,
32
+ remove_common_leading_whitespace,
33
+ )
11
34
 
12
- import pytest
35
+ logger = logging.getLogger(__name__)
13
36
 
14
- from toil.provisioners import cluster_factory
15
- from toil.test import (ToilTest,
16
- needs_docker_cuda,
17
- needs_google_storage,
18
- needs_singularity_or_docker,
19
- needs_wdl,
20
- slow, integrative)
21
- from toil.version import exactPython
22
- from toil.wdl.wdltoil import WDLSectionJob, WDLWorkflowGraph
23
37
 
24
38
  @needs_wdl
25
39
  class BaseWDLTest(ToilTest):
@@ -27,7 +41,7 @@ class BaseWDLTest(ToilTest):
27
41
 
28
42
  def setUp(self) -> None:
29
43
  """Runs anew before each test to create farm fresh temp dirs."""
30
- self.output_dir = os.path.join('/tmp/', 'toil-wdl-test-' + str(uuid4()))
44
+ self.output_dir = os.path.join("/tmp/", "toil-wdl-test-" + str(uuid4()))
31
45
  os.makedirs(self.output_dir)
32
46
 
33
47
  def tearDown(self) -> None:
@@ -35,63 +49,138 @@ class BaseWDLTest(ToilTest):
35
49
  shutil.rmtree(self.output_dir)
36
50
 
37
51
 
52
+ WDL_CONFORMANCE_TEST_REPO = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
53
+ WDL_CONFORMANCE_TEST_COMMIT = "baf44bcc7e6f6927540adf77d91b26a5558ae4b7"
54
+ # These tests are known to require things not implemented by
55
+ # Toil and will not be run in CI.
56
+ WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL = [
57
+ 16, # Basic object test (deprecated and removed in 1.1); MiniWDL and toil-wdl-runner do not support Objects, so this will fail if ran by them
58
+ 21, # Parser: expression placeholders in strings in conditional expressions in 1.0, Cromwell style; Fails with MiniWDL and toil-wdl-runner
59
+ 64, # Legacy test for as_map_as_input; It looks like MiniWDL does not have the function as_map()
60
+ 77, # Test that array cannot coerce to a string. WDL 1.1 does not allow compound types to coerce into a string. This should return a TypeError.
61
+ ]
62
+ WDL_UNIT_TESTS_UNSUPPORTED_BY_TOIL = [
63
+ 14, # test_object, Objects are not supported
64
+ 19, # map_to_struct, miniwdl cannot coerce map to struct, https://github.com/chanzuckerberg/miniwdl/issues/712
65
+ 52, # relative_and_absolute, needs root to run
66
+ 58, # test_gpu, needs gpu to run, else warning
67
+ 59, # will be fixed in #5001
68
+ 66, # This needs way too many resources (and actually doesn't work?), see https://github.com/DataBiosphere/wdl-conformance-tests/blob/2d617b703a33791f75f30a9db43c3740a499cd89/README_UNIT.md?plain=1#L8
69
+ 67, # same as above
70
+ 68, # Bug, see #https://github.com/DataBiosphere/toil/issues/4993
71
+ 69, # Same as 68
72
+ 87, # MiniWDL does not handle metacharacters properly when running regex, https://github.com/chanzuckerberg/miniwdl/issues/709
73
+ 97, # miniwdl bug, see https://github.com/chanzuckerberg/miniwdl/issues/701
74
+ 105, # miniwdl (and toil) bug, unserializable json is serialized, see https://github.com/chanzuckerberg/miniwdl/issues/702
75
+ 107, # object not supported
76
+ 108, # object not supported
77
+ 109, # object not supported
78
+ 110, # object not supported
79
+ 120, # miniwdl bug, see https://github.com/chanzuckerberg/miniwdl/issues/699
80
+ 131, # miniwdl bug, evalerror, see https://github.com/chanzuckerberg/miniwdl/issues/700
81
+ 134, # same as 131
82
+ 144 # miniwdl and toil bug
83
+ ]
84
+
85
+
86
+
38
87
  class WDLConformanceTests(BaseWDLTest):
39
88
  """
40
89
  WDL conformance tests for Toil.
41
90
  """
91
+
42
92
  wdl_dir = "wdl-conformance-tests"
43
93
 
44
94
  @classmethod
45
95
  def setUpClass(cls) -> None:
46
96
 
47
- url = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
48
- commit = "c87b62b4f460e009fd42edec13669c4db14cf90c"
49
-
50
97
  p = subprocess.Popen(
51
- f"git clone {url} {cls.wdl_dir} && cd {cls.wdl_dir} && git checkout {commit}",
98
+ f"git clone {WDL_CONFORMANCE_TEST_REPO} {cls.wdl_dir} && cd {cls.wdl_dir} && git checkout {WDL_CONFORMANCE_TEST_COMMIT}",
52
99
  shell=True,
53
100
  )
54
101
 
55
102
  p.communicate()
56
103
 
57
104
  if p.returncode > 0:
58
- raise RuntimeError
105
+ raise RuntimeError("Could not clone WDL conformance tests")
59
106
 
60
107
  os.chdir(cls.wdl_dir)
61
108
 
62
109
  cls.base_command = [exactPython, "run.py", "--runner", "toil-wdl-runner"]
63
110
 
64
- # estimated running time: 2 minutes
65
- @slow
66
- def test_conformance_tests_v10(self):
67
- tests_to_run = "0-15,17-20,22-71,73-77"
68
- p = subprocess.run(self.base_command + ["-v", "1.0", "-n", tests_to_run], capture_output=True)
111
+ def check(self, p: subprocess.CompletedProcess) -> None:
112
+ """
113
+ Make sure a call completed or explain why it failed.
114
+ """
69
115
 
70
116
  if p.returncode != 0:
71
- print(p.stdout.decode('utf-8', errors='replace'))
117
+ logger.error(
118
+ "Failed process standard output: %s",
119
+ p.stdout.decode("utf-8", errors="replace"),
120
+ )
121
+ logger.error(
122
+ "Failed process standard error: %s",
123
+ p.stderr.decode("utf-8", errors="replace"),
124
+ )
72
125
 
73
126
  p.check_returncode()
74
127
 
75
- # estimated running time: 2 minutes
76
128
  @slow
77
- def test_conformance_tests_v11(self):
78
- tests_to_run = "1-63,65-71,73-75,77"
79
- p = subprocess.run(self.base_command + ["-v", "1.1", "-n", tests_to_run], capture_output=True)
129
+ def test_unit_tests_v11(self):
130
+ # There are still some bugs with the WDL spec, use a fixed version until
131
+ # See comments of https://github.com/openwdl/wdl/pull/669
132
+ repo_url = "https://github.com/stxue1/wdl.git"
133
+ repo_branch = "wdl-1.1.3-fixes"
134
+ command = f"{exactPython} setup_unit_tests.py -v 1.1 --extra-patch-data unit_tests_patch_data.yaml --repo {repo_url} --branch {repo_branch} --force-pull"
135
+ p = subprocess.run(command.split(" "), capture_output=True)
136
+ self.check(p)
137
+ command = f"{exactPython} run_unit.py -r toil-wdl-runner -v 1.1 --progress --exclude-numbers {','.join([str(t) for t in WDL_UNIT_TESTS_UNSUPPORTED_BY_TOIL])}"
138
+ p = subprocess.run(command.split(" "), capture_output=True)
139
+ self.check(p)
140
+
141
+ # estimated running time: 10 minutes
142
+ @slow
143
+ def test_conformance_tests_v10(self):
144
+ command = self.base_command + ["-v", "1.0"]
145
+ if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
146
+ command.append("--exclude-numbers")
147
+ command.append(
148
+ ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
149
+ )
150
+ p = subprocess.run(command, capture_output=True)
80
151
 
81
- if p.returncode != 0:
82
- print(p.stdout.decode('utf-8', errors='replace'))
152
+ self.check(p)
83
153
 
84
- p.check_returncode()
154
+ # estimated running time: 10 minutes
155
+ @slow
156
+ def test_conformance_tests_v11(self):
157
+ command = self.base_command + ["-v", "1.1"]
158
+ if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
159
+ command.append("--exclude-numbers")
160
+ command.append(
161
+ ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
162
+ )
163
+ p = subprocess.run(command, capture_output=True)
164
+
165
+ self.check(p)
85
166
 
86
167
  @slow
87
168
  def test_conformance_tests_integration(self):
88
169
  ids_to_run = "encode,tut01,tut02,tut03,tut04"
89
- p = subprocess.run(self.base_command + ["-v", "1.0", "--id", ids_to_run], capture_output=True)
90
-
91
- if p.returncode != 0:
92
- print(p.stdout.decode('utf-8', errors='replace'))
170
+ p = subprocess.run(
171
+ self.base_command
172
+ + [
173
+ "-v",
174
+ "1.0",
175
+ "--conformance-file",
176
+ "integration.yaml",
177
+ "--id",
178
+ ids_to_run,
179
+ ],
180
+ capture_output=True,
181
+ )
93
182
 
94
- p.check_returncode()
183
+ self.check(p)
95
184
 
96
185
  @classmethod
97
186
  def tearDownClass(cls) -> None:
@@ -106,7 +195,7 @@ class WDLTests(BaseWDLTest):
106
195
  @classmethod
107
196
  def setUpClass(cls) -> None:
108
197
  """Runs once for all tests."""
109
- cls.base_command = [exactPython, '-m', 'toil.wdl.wdltoil']
198
+ cls.base_command = [exactPython, "-m", "toil.wdl.wdltoil"]
110
199
 
111
200
  # We inherit a testMD5sum but it is going to need Singularity or Docker
112
201
  # now. And also needs to have a WDL 1.0+ WDL file. So we replace it.
@@ -114,57 +203,338 @@ class WDLTests(BaseWDLTest):
114
203
  def test_MD5sum(self):
115
204
  """Test if Toil produces the same outputs as known good outputs for WDL's
116
205
  GATK tutorial #1."""
117
- wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
118
- json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
206
+ wdl = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.1.0.wdl")
207
+ json_file = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.json")
208
+
209
+ result_json = subprocess.check_output(
210
+ self.base_command
211
+ + [wdl, json_file, "-o", self.output_dir, "--logDebug", "--retryCount=0"]
212
+ )
213
+ result = json.loads(result_json)
214
+
215
+ assert "ga4ghMd5.value" in result
216
+ assert isinstance(result["ga4ghMd5.value"], str)
217
+ assert os.path.exists(result["ga4ghMd5.value"])
218
+ assert os.path.basename(result["ga4ghMd5.value"]) == "md5sum.txt"
219
+
220
+ def test_url_to_file(self):
221
+ """
222
+ Test if web URL strings can be coerced to usable Files.
223
+ """
224
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/url_to_file.wdl")
119
225
 
120
226
  result_json = subprocess.check_output(
121
- self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug', '--retryCount=0'])
227
+ self.base_command
228
+ + [wdl, "-o", self.output_dir, "--logInfo", "--retryCount=0"]
229
+ )
122
230
  result = json.loads(result_json)
123
231
 
124
- assert 'ga4ghMd5.value' in result
125
- assert isinstance(result['ga4ghMd5.value'], str)
126
- assert os.path.exists(result['ga4ghMd5.value'])
127
- assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
232
+ assert "url_to_file.first_line" in result
233
+ assert isinstance(result["url_to_file.first_line"], str)
234
+ self.assertEqual(result["url_to_file.first_line"], "chr1\t248387328")
235
+
236
+ @needs_docker
237
+ def test_wait(self):
238
+ """
239
+ Test if Bash "wait" works in WDL scripts.
240
+ """
241
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/wait.wdl")
242
+
243
+ result_json = subprocess.check_output(
244
+ self.base_command
245
+ + [
246
+ wdl,
247
+ "-o",
248
+ self.output_dir,
249
+ "--logInfo",
250
+ "--retryCount=0",
251
+ "--wdlContainer=docker",
252
+ ]
253
+ )
254
+ result = json.loads(result_json)
255
+
256
+ assert "wait.result" in result
257
+ assert isinstance(result["wait.result"], str)
258
+ self.assertEqual(result["wait.result"], "waited")
259
+
260
+ @needs_singularity_or_docker
261
+ def test_all_call_outputs(self):
262
+ """
263
+ Test if Toil can collect all call outputs from a workflow that doesn't expose them.
264
+ """
265
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/not_enough_outputs.wdl")
266
+
267
+ # With no flag we don't include the call outputs
268
+ result_json = subprocess.check_output(
269
+ self.base_command
270
+ + [wdl, "-o", self.output_dir, "--logInfo", "--retryCount=0"]
271
+ )
272
+ result = json.loads(result_json)
273
+
274
+ assert "wf.only_result" in result
275
+ assert "wf.do_math.square" not in result
276
+ assert "wf.do_math.cube" not in result
277
+ assert "wf.should_never_output" not in result
278
+
279
+ # With flag off we don't include the call outputs
280
+ result_json = subprocess.check_output(
281
+ self.base_command
282
+ + [
283
+ wdl,
284
+ "-o",
285
+ self.output_dir,
286
+ "--logInfo",
287
+ "--retryCount=0",
288
+ "--allCallOutputs=false",
289
+ ]
290
+ )
291
+ result = json.loads(result_json)
292
+
293
+ assert "wf.only_result" in result
294
+ assert "wf.do_math.square" not in result
295
+ assert "wf.do_math.cube" not in result
296
+ assert "wf.should_never_output" not in result
297
+
298
+ # With flag on we do include the call outputs
299
+ result_json = subprocess.check_output(
300
+ self.base_command
301
+ + [
302
+ wdl,
303
+ "-o",
304
+ self.output_dir,
305
+ "--logInfo",
306
+ "--retryCount=0",
307
+ "--allCallOutputs=on",
308
+ ]
309
+ )
310
+ result = json.loads(result_json)
311
+
312
+ assert "wf.only_result" in result
313
+ assert "wf.do_math.square" in result
314
+ assert "wf.do_math.cube" in result
315
+ assert "wf.should_never_output" not in result
316
+
317
+ @needs_singularity_or_docker
318
+ def test_croo_detection(self):
319
+ """
320
+ Test if Toil can detect and do something sensible with Cromwell Output Organizer workflows.
321
+ """
322
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/croo.wdl")
323
+
324
+ # With no flag we should include all task outputs
325
+ result_json = subprocess.check_output(
326
+ self.base_command
327
+ + [wdl, "-o", self.output_dir, "--logInfo", "--retryCount=0"]
328
+ )
329
+ result = json.loads(result_json)
330
+
331
+ assert "wf.only_result" in result
332
+ assert "wf.do_math.square" in result
333
+ assert "wf.do_math.cube" in result
334
+ assert "wf.should_never_output" not in result
335
+
336
+ # With flag off we obey the WDL spec even if we're suspicious
337
+ result_json = subprocess.check_output(
338
+ self.base_command
339
+ + [
340
+ wdl,
341
+ "-o",
342
+ self.output_dir,
343
+ "--logInfo",
344
+ "--retryCount=0",
345
+ "--allCallOutputs=off",
346
+ ]
347
+ )
348
+ result = json.loads(result_json)
349
+
350
+ assert "wf.only_result" in result
351
+ assert "wf.do_math.square" not in result
352
+ assert "wf.do_math.cube" not in result
353
+ assert "wf.should_never_output" not in result
354
+
355
+ @needs_singularity_or_docker
356
+ def test_caching(self):
357
+ """
358
+ Test if Toil can cache task runs.
359
+ """
360
+ wdl = os.path.abspath('src/toil/test/wdl/testfiles/random.wdl')
361
+
362
+ caching_env = dict(os.environ)
363
+ caching_env["MINIWDL__CALL_CACHE__GET"] = "true"
364
+ caching_env["MINIWDL__CALL_CACHE__PUT"] = "true"
365
+ caching_env["MINIWDL__CALL_CACHE__DIR"] = self._createTempDir("cache")
366
+
367
+ result_json = subprocess.check_output(
368
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 1, "random.task_2_input": 1}'],
369
+ env=caching_env)
370
+ result_initial = json.loads(result_json)
371
+
372
+ assert 'random.value_seen' in result_initial
373
+ assert 'random.value_written' in result_initial
374
+
375
+ result_json = subprocess.check_output(
376
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 1, "random.task_2_input": 1}'],
377
+ env=caching_env)
378
+ result_cached = json.loads(result_json)
379
+
380
+ assert 'random.value_seen' in result_cached
381
+ assert 'random.value_written' in result_cached
382
+
383
+ assert result_cached['random.value_seen'] == result_initial['random.value_seen']
384
+ assert result_cached['random.value_written'] == result_initial['random.value_written']
385
+
386
+ result_json = subprocess.check_output(
387
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 2, "random.task_2_input": 1}'],
388
+ env=caching_env)
389
+ result_not_cached = json.loads(result_json)
390
+
391
+ assert 'random.value_seen' in result_not_cached
392
+ assert 'random.value_written' in result_not_cached
393
+
394
+ assert result_not_cached['random.value_seen'] != result_initial['random.value_seen']
395
+ assert result_not_cached['random.value_written'] != result_initial['random.value_written']
396
+
397
+ result_json = subprocess.check_output(
398
+ self.base_command + [wdl, '-o', self.output_dir, '--logInfo', '--retryCount=0', '--inputs={"random.task_1_input": 1, "random.task_2_input": 2}'],
399
+ env=caching_env)
400
+ result_part_cached = json.loads(result_json)
401
+
402
+ assert 'random.value_seen' in result_part_cached
403
+ assert 'random.value_written' in result_part_cached
404
+
405
+ assert result_part_cached['random.value_seen'] == result_initial['random.value_seen']
406
+ assert result_part_cached['random.value_written'] != result_initial['random.value_written']
407
+ assert result_part_cached['random.value_written'] != result_not_cached['random.value_written']
408
+
409
+
410
+
411
+ def test_url_to_optional_file(self):
412
+ """
413
+ Test if missing and error-producing URLs are handled correctly for optional File? values.
414
+ """
415
+ wdl = os.path.abspath("src/toil/test/wdl/testfiles/url_to_optional_file.wdl")
416
+
417
+ def run_for_code(code: int) -> dict:
418
+ """
419
+ Run a workflow coercing URL to File? where the URL returns the given status code.
420
+
421
+ Return the parsed output.
422
+ """
423
+ logger.info("Test optional file with HTTP code %s", code)
424
+ json_value = '{"url_to_optional_file.http_code": %d}' % code
425
+ result_json = subprocess.check_output(
426
+ self.base_command
427
+ + [
428
+ wdl,
429
+ json_value,
430
+ "-o",
431
+ self.output_dir,
432
+ "--logInfo",
433
+ "--retryCount=0",
434
+ ]
435
+ )
436
+ result = json.loads(result_json)
437
+ return result
438
+
439
+ # Check files that exist
440
+ result = run_for_code(200)
441
+ assert "url_to_optional_file.out_file" in result
442
+ self.assertNotEqual(result["url_to_optional_file.out_file"], None)
443
+
444
+ for code in (404, 410):
445
+ # Check files that definitely don't
446
+ result = run_for_code(code)
447
+ assert "url_to_optional_file.out_file" in result
448
+ self.assertEqual(result["url_to_optional_file.out_file"], None)
449
+
450
+ for code in (402, 418, 500, 502):
451
+ # Check that cases where the server refuses to say if the file
452
+ # exists stop the workflow.
453
+ with self.assertRaises(subprocess.CalledProcessError):
454
+ run_for_code(code)
128
455
 
129
456
  def test_missing_output_directory(self):
130
457
  """
131
458
  Test if Toil can run a WDL workflow into a new directory.
132
459
  """
133
- wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
134
- json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
135
- subprocess.check_call(self.base_command + [wdl, json_file, '-o', os.path.join(self.output_dir, "does", "not", "exist"), '--logDebug', '--retryCount=0'])
460
+ wdl = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.1.0.wdl")
461
+ json_file = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.json")
462
+ subprocess.check_call(
463
+ self.base_command
464
+ + [
465
+ wdl,
466
+ json_file,
467
+ "-o",
468
+ os.path.join(self.output_dir, "does", "not", "exist"),
469
+ "--logDebug",
470
+ "--retryCount=0",
471
+ ]
472
+ )
136
473
 
137
474
  @needs_singularity_or_docker
138
- def test_miniwdl_self_test(self, extra_args: Optional[List[str]] = None) -> None:
475
+ def test_miniwdl_self_test(self, extra_args: Optional[list[str]] = None) -> None:
139
476
  """Test if the MiniWDL self test runs and produces the expected output."""
140
- wdl_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/self_test.wdl')
141
- json_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/inputs.json')
477
+ wdl_file = os.path.abspath("src/toil/test/wdl/miniwdl_self_test/self_test.wdl")
478
+ json_file = os.path.abspath("src/toil/test/wdl/miniwdl_self_test/inputs.json")
142
479
 
143
480
  result_json = subprocess.check_output(
144
- self.base_command + [wdl_file, json_file, '--logDebug', '-o', self.output_dir, '--outputDialect',
145
- 'miniwdl'] + (extra_args or []))
481
+ self.base_command
482
+ + [
483
+ wdl_file,
484
+ json_file,
485
+ "--logDebug",
486
+ "-o",
487
+ self.output_dir,
488
+ "--outputDialect",
489
+ "miniwdl",
490
+ ]
491
+ + (extra_args or [])
492
+ )
146
493
  result = json.loads(result_json)
147
494
 
148
495
  # Expect MiniWDL-style output with a designated "dir"
149
496
 
150
- assert 'dir' in result
151
- assert isinstance(result['dir'], str)
152
- out_dir = result['dir']
497
+ assert "dir" in result
498
+ assert isinstance(result["dir"], str)
499
+ out_dir = result["dir"]
153
500
 
154
- assert 'outputs' in result
155
- assert isinstance(result['outputs'], dict)
156
- outputs = result['outputs']
501
+ assert "outputs" in result
502
+ assert isinstance(result["outputs"], dict)
503
+ outputs = result["outputs"]
157
504
 
158
- assert 'hello_caller.message_files' in outputs
159
- assert isinstance(outputs['hello_caller.message_files'], list)
160
- assert len(outputs['hello_caller.message_files']) == 2
161
- for item in outputs['hello_caller.message_files']:
505
+ assert "hello_caller.message_files" in outputs
506
+ assert isinstance(outputs["hello_caller.message_files"], list)
507
+ assert len(outputs["hello_caller.message_files"]) == 2
508
+ for item in outputs["hello_caller.message_files"]:
162
509
  # All the files should be strings in the "out" directory
163
- assert isinstance(item, str)
164
- assert item.startswith(out_dir)
165
-
166
- assert 'hello_caller.messages' in outputs
167
- assert outputs['hello_caller.messages'] == ["Hello, Alyssa P. Hacker!", "Hello, Ben Bitdiddle!"]
510
+ assert isinstance(item, str), "File output must be a string"
511
+ assert item.startswith(
512
+ out_dir
513
+ ), "File output must be in the output directory"
514
+
515
+ # Look at the filename within that directory
516
+ name_in_out_dir = item[len(out_dir) :]
517
+
518
+ # Ity should contain the job name of "hello", so they are human-readable.
519
+ assert (
520
+ "hello" in name_in_out_dir
521
+ ), f"File output {name_in_out_dir} should have the originating task name in it"
522
+
523
+ # And it should not contain non-human-readable content.
524
+ #
525
+ # We use a threshold number of digits as a proxy for this, but
526
+ # don't try and get around this by just rolling other random
527
+ # strings; we want these outputs to be human-readable!!!
528
+ digit_count = len([c for c in name_in_out_dir if c in string.digits])
529
+ assert (
530
+ digit_count < 3
531
+ ), f"File output {name_in_out_dir} has {digit_count} digits, which is too many to be plausibly human-readable"
532
+
533
+ assert "hello_caller.messages" in outputs
534
+ assert outputs["hello_caller.messages"] == [
535
+ "Hello, Alyssa P. Hacker!",
536
+ "Hello, Ben Bitdiddle!",
537
+ ]
168
538
 
169
539
  @needs_singularity_or_docker
170
540
  def test_miniwdl_self_test_by_reference(self) -> None:
@@ -173,6 +543,23 @@ class WDLTests(BaseWDLTest):
173
543
  """
174
544
  self.test_miniwdl_self_test(extra_args=["--referenceInputs=True"])
175
545
 
546
+ @pytest.mark.integrative
547
+ @needs_singularity_or_docker
548
+ def test_dockstore_trs(self, extra_args: Optional[list[str]] = None) -> None:
549
+ wdl_file = "#workflow/github.com/dockstore/bcc2020-training/HelloWorld:master"
550
+ # Needs an input but doesn't provide a good one.
551
+ json_input = json.dumps({"hello_world.hello.myName": "https://raw.githubusercontent.com/dockstore/bcc2020-training/refs/heads/master/wdl-training/exercise1/name.txt"})
552
+
553
+ result_json = subprocess.check_output(
554
+ self.base_command + [wdl_file, json_input, '--logDebug', '-o', self.output_dir, '--outputDialect',
555
+ 'miniwdl'] + (extra_args or []))
556
+ result = json.loads(result_json)
557
+
558
+ with open(result.get("outputs", {}).get("hello_world.helloFile")) as f:
559
+ result_text = f.read().strip()
560
+
561
+ self.assertEqual(result_text, "Hello World!\nMy name is potato.")
562
+
176
563
  @slow
177
564
  @needs_docker_cuda
178
565
  def test_giraffe_deepvariant(self):
@@ -180,87 +567,111 @@ class WDLTests(BaseWDLTest):
180
567
  # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
181
568
 
182
569
  json_dir = self._createTempDir()
183
- base_uri = 'https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a'
570
+ base_uri = "https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a"
184
571
 
185
572
  wdl_file = f"{base_uri}/workflows/giraffe_and_deepvariant.wdl"
186
- json_file = os.path.abspath(os.path.join(json_dir, 'inputs.json'))
187
- with open(json_file, 'w') as fp:
573
+ json_file = os.path.abspath(os.path.join(json_dir, "inputs.json"))
574
+ with open(json_file, "w") as fp:
188
575
  # Write some inputs. We need to override the example inputs to use a GPU container, but that means we need absolute input URLs.
189
- json.dump(fp, {
190
- "GiraffeDeepVariant.INPUT_READ_FILE_1": f"{base_uri}/tests/small_sim_graph/reads_1.fastq.gz",
191
- "GiraffeDeepVariant.INPUT_READ_FILE_2": f"{base_uri}/tests/small_sim_graph/reads_2.fastq.gz",
192
- "GiraffeDeepVariant.XG_FILE": f"{base_uri}/tests/small_sim_graph/graph.xg",
193
- "GiraffeDeepVariant.SAMPLE_NAME": "s0",
194
- "GiraffeDeepVariant.GBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gbwt",
195
- "GiraffeDeepVariant.GGBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gg",
196
- "GiraffeDeepVariant.MIN_FILE": f"{base_uri}/tests/small_sim_graph/graph.min",
197
- "GiraffeDeepVariant.DIST_FILE": f"{base_uri}/tests/small_sim_graph/graph.dist",
198
- "GiraffeDeepVariant.OUTPUT_GAF": True,
199
- "GiraffeDeepVariant.runDeepVariantCallVariants.in_dv_gpu_container": "google/deepvariant:1.3.0-gpu"
200
- })
576
+ json.dump(
577
+ fp,
578
+ {
579
+ "GiraffeDeepVariant.INPUT_READ_FILE_1": f"{base_uri}/tests/small_sim_graph/reads_1.fastq.gz",
580
+ "GiraffeDeepVariant.INPUT_READ_FILE_2": f"{base_uri}/tests/small_sim_graph/reads_2.fastq.gz",
581
+ "GiraffeDeepVariant.XG_FILE": f"{base_uri}/tests/small_sim_graph/graph.xg",
582
+ "GiraffeDeepVariant.SAMPLE_NAME": "s0",
583
+ "GiraffeDeepVariant.GBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gbwt",
584
+ "GiraffeDeepVariant.GGBWT_FILE": f"{base_uri}/tests/small_sim_graph/graph.gg",
585
+ "GiraffeDeepVariant.MIN_FILE": f"{base_uri}/tests/small_sim_graph/graph.min",
586
+ "GiraffeDeepVariant.DIST_FILE": f"{base_uri}/tests/small_sim_graph/graph.dist",
587
+ "GiraffeDeepVariant.OUTPUT_GAF": True,
588
+ "GiraffeDeepVariant.runDeepVariantCallVariants.in_dv_gpu_container": "google/deepvariant:1.3.0-gpu",
589
+ },
590
+ )
201
591
 
202
592
  result_json = subprocess.check_output(
203
- self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl'])
593
+ self.base_command
594
+ + [wdl_file, json_file, "-o", self.output_dir, "--outputDialect", "miniwdl"]
595
+ )
204
596
  result = json.loads(result_json)
205
597
 
206
598
  # Expect MiniWDL-style output with a designated "dir"
207
- assert 'dir' in result
208
- assert isinstance(result['dir'], str)
209
- out_dir = result['dir']
599
+ assert "dir" in result
600
+ assert isinstance(result["dir"], str)
601
+ out_dir = result["dir"]
210
602
 
211
- assert 'outputs' in result
212
- assert isinstance(result['outputs'], dict)
213
- outputs = result['outputs']
603
+ assert "outputs" in result
604
+ assert isinstance(result["outputs"], dict)
605
+ outputs = result["outputs"]
214
606
 
215
607
  # Expect a VCF file to have been written
216
- assert 'GiraffeDeepVariant.output_vcf' in outputs
217
- assert isinstance(outputs['GiraffeDeepVariant.output_vcf'], str)
218
- assert os.path.exists(outputs['GiraffeDeepVariant.output_vcf'])
608
+ assert "GiraffeDeepVariant.output_vcf" in outputs
609
+ assert isinstance(outputs["GiraffeDeepVariant.output_vcf"], str)
610
+ assert os.path.exists(outputs["GiraffeDeepVariant.output_vcf"])
219
611
 
220
612
  @slow
221
613
  @needs_singularity_or_docker
222
614
  def test_giraffe(self):
223
615
  """Test if Giraffe runs. This could take 12 minutes. Also we scale it down but it still demands lots of memory."""
224
616
  # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
617
+ # TODO: Reduce memory requests with custom/smaller inputs.
618
+ # TODO: Skip if node lacks enough memory.
225
619
 
226
620
  json_dir = self._createTempDir()
227
- base_uri = 'https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a'
621
+ base_uri = "https://raw.githubusercontent.com/vgteam/vg_wdl/65dd739aae765f5c4dedd14f2e42d5a263f9267a"
228
622
  wdl_file = f"{base_uri}/workflows/giraffe.wdl"
229
623
  json_file = f"{base_uri}/params/giraffe.json"
230
624
 
231
625
  result_json = subprocess.check_output(
232
- self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale',
233
- '0.1'])
626
+ self.base_command
627
+ + [
628
+ wdl_file,
629
+ json_file,
630
+ "-o",
631
+ self.output_dir,
632
+ "--outputDialect",
633
+ "miniwdl",
634
+ "--scale",
635
+ "0.1",
636
+ "--logDebug",
637
+ ]
638
+ )
234
639
  result = json.loads(result_json)
235
640
 
236
641
  # Expect MiniWDL-style output with a designated "dir"
237
- assert 'dir' in result
238
- assert isinstance(result['dir'], str)
239
- out_dir = result['dir']
642
+ assert "dir" in result
643
+ assert isinstance(result["dir"], str)
644
+ out_dir = result["dir"]
240
645
 
241
- assert 'outputs' in result
242
- assert isinstance(result['outputs'], dict)
243
- outputs = result['outputs']
646
+ assert "outputs" in result
647
+ assert isinstance(result["outputs"], dict)
648
+ outputs = result["outputs"]
244
649
 
245
650
  # Expect a BAM file to have been written
246
- assert 'Giraffe.output_bam' in outputs
247
- assert isinstance(outputs['Giraffe.output_bam'], str)
248
- assert os.path.exists(outputs['Giraffe.output_bam'])
651
+ assert "Giraffe.output_bam" in outputs
652
+ assert isinstance(outputs["Giraffe.output_bam"], str)
653
+ assert os.path.exists(outputs["Giraffe.output_bam"])
249
654
 
250
655
  @needs_singularity_or_docker
251
656
  @needs_google_storage
252
657
  def test_gs_uri(self):
253
658
  """Test if Toil can access Google Storage URIs."""
254
- wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
255
- json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum-gs.json')
659
+ wdl = os.path.abspath("src/toil/test/wdl/md5sum/md5sum.1.0.wdl")
660
+ json_file = os.path.abspath("src/toil/test/wdl/md5sum/md5sum-gs.json")
256
661
 
257
- result_json = subprocess.check_output(self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug'])
662
+ result_json = subprocess.check_output(
663
+ self.base_command + [wdl, json_file, "-o", self.output_dir, "--logDebug"]
664
+ )
258
665
  result = json.loads(result_json)
259
666
 
260
- assert 'ga4ghMd5.value' in result
261
- assert isinstance(result['ga4ghMd5.value'], str)
262
- assert os.path.exists(result['ga4ghMd5.value'])
263
- assert os.path.basename(result['ga4ghMd5.value']) == 'md5sum.txt'
667
+ assert "ga4ghMd5.value" in result
668
+ assert isinstance(result["ga4ghMd5.value"], str)
669
+ assert os.path.exists(result["ga4ghMd5.value"])
670
+ assert os.path.basename(result["ga4ghMd5.value"]) == "md5sum.txt"
671
+
672
+
673
+ class WDLToilBenchTests(ToilTest):
674
+ """Tests for Toil's MiniWDL-based implementation that don't run workflows."""
264
675
 
265
676
  def test_coalesce(self):
266
677
  """
@@ -271,9 +682,9 @@ class WDLTests(BaseWDLTest):
271
682
 
272
683
  # Set up data structures for our fake workflow graph to pull from.
273
684
  # This has all decl-type nodes
274
- all_decls: Set[str] = set()
685
+ all_decls: set[str] = set()
275
686
  # And this has all transitive dependencies for all nodes.
276
- all_deps: Dict[str, Set[str]] = {}
687
+ all_deps: dict[str, set[str]] = {}
277
688
 
278
689
  def mock_is_decl(self: Any, node_id: str) -> bool:
279
690
  """
@@ -281,7 +692,7 @@ class WDLTests(BaseWDLTest):
281
692
  """
282
693
  return node_id in all_decls
283
694
 
284
- def mock_get_transitive_dependencies(self: Any, node_id: str) -> Set[str]:
695
+ def mock_get_transitive_dependencies(self: Any, node_id: str) -> set[str]:
285
696
  """
286
697
  Replacement function to get all the transitive dependencies of a node.
287
698
  """
@@ -293,17 +704,20 @@ class WDLTests(BaseWDLTest):
293
704
  #
294
705
  # If that changes, the test will need to change! Maybe then it will be
295
706
  # worth extracting a base type for this interface.
296
- with patch.object(WDLWorkflowGraph, 'is_decl', mock_is_decl):
297
- with patch.object(WDLWorkflowGraph, 'get_transitive_dependencies', mock_get_transitive_dependencies):
707
+ with patch.object(WDLWorkflowGraph, "is_decl", mock_is_decl):
708
+ with patch.object(
709
+ WDLWorkflowGraph,
710
+ "get_transitive_dependencies",
711
+ mock_get_transitive_dependencies,
712
+ ):
298
713
  with self.subTest(msg="Two unrelated decls can coalesce"):
299
714
  # Set up two unrelated decls
300
715
  all_decls = {"decl1", "decl2"}
301
- all_deps = {
302
- "decl1": set(),
303
- "decl2": set()
304
- }
716
+ all_deps = {"decl1": set(), "decl2": set()}
305
717
 
306
- result = WDLSectionJob.coalesce_nodes(["decl1", "decl2"], WDLWorkflowGraph([]))
718
+ result = WDLSectionJob.coalesce_nodes(
719
+ ["decl1", "decl2"], WDLWorkflowGraph([])
720
+ )
307
721
 
308
722
  # Make sure they coalesced
309
723
  assert len(result) == 1
@@ -312,60 +726,287 @@ class WDLTests(BaseWDLTest):
312
726
 
313
727
  with self.subTest(msg="A decl will not coalesce with a non-decl"):
314
728
  all_decls = {"decl"}
315
- all_deps = {
316
- "decl": set(),
317
- "nondecl": set()
318
- }
729
+ all_deps = {"decl": set(), "nondecl": set()}
319
730
 
320
- result = WDLSectionJob.coalesce_nodes(["decl", "nondecl"], WDLWorkflowGraph([]))
731
+ result = WDLSectionJob.coalesce_nodes(
732
+ ["decl", "nondecl"], WDLWorkflowGraph([])
733
+ )
321
734
 
322
735
  assert len(result) == 2
323
736
  assert len(result[0]) == 1
324
737
  assert len(result[1]) == 1
325
738
 
326
- with self.subTest(msg="Two adjacent decls with a common dependency can coalesce"):
739
+ with self.subTest(
740
+ msg="Two adjacent decls with a common dependency can coalesce"
741
+ ):
327
742
  all_decls = {"decl1", "decl2"}
328
- all_deps = {
329
- "decl1": {"base"},
330
- "decl2": {"base"},
331
- "base": set()
332
- }
743
+ all_deps = {"decl1": {"base"}, "decl2": {"base"}, "base": set()}
333
744
 
334
- result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
745
+ result = WDLSectionJob.coalesce_nodes(
746
+ ["base", "decl1", "decl2"], WDLWorkflowGraph([])
747
+ )
335
748
 
336
749
  assert len(result) == 2
337
750
  assert "base" in result[0]
338
751
  assert "decl1" in result[1]
339
752
  assert "decl2" in result[1]
340
753
 
341
- with self.subTest(msg="Two adjacent decls with different dependencies will not coalesce"):
754
+ with self.subTest(
755
+ msg="Two adjacent decls with different dependencies will not coalesce"
756
+ ):
342
757
  all_decls = {"decl1", "decl2"}
343
- all_deps = {
344
- "decl1": {"base"},
345
- "decl2": set(),
346
- "base": set()
347
- }
758
+ all_deps = {"decl1": {"base"}, "decl2": set(), "base": set()}
348
759
 
349
- result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
760
+ result = WDLSectionJob.coalesce_nodes(
761
+ ["base", "decl1", "decl2"], WDLWorkflowGraph([])
762
+ )
350
763
 
351
764
  assert len(result) == 3
352
765
  assert "base" in result[0]
353
766
 
354
- with self.subTest(msg="Two adjacent decls with different successors will coalesce"):
767
+ with self.subTest(
768
+ msg="Two adjacent decls with different successors will coalesce"
769
+ ):
355
770
  all_decls = {"decl1", "decl2"}
356
- all_deps = {
357
- "decl1": set(),
358
- "decl2": set(),
359
- "successor": {"decl2"}
360
- }
771
+ all_deps = {"decl1": set(), "decl2": set(), "successor": {"decl2"}}
361
772
 
362
- result = WDLSectionJob.coalesce_nodes(["decl1", "decl2", "successor"], WDLWorkflowGraph([]))
773
+ result = WDLSectionJob.coalesce_nodes(
774
+ ["decl1", "decl2", "successor"], WDLWorkflowGraph([])
775
+ )
363
776
 
364
777
  assert len(result) == 2
365
778
  assert "decl1" in result[0]
366
779
  assert "decl2" in result[0]
367
780
  assert "successor" in result[1]
368
781
 
782
+ def make_string_expr(self, to_parse: str) -> WDL.Expr.String:
783
+ """
784
+ Parse pseudo-WDL for testing whitespace removal.
785
+ """
786
+
787
+ pos = WDL.Error.SourcePosition("nowhere", "nowhere", 0, 0, 0, 0)
788
+
789
+ parts: list[Union[str, WDL.Expr.Placeholder]] = re.split("(~{[^}]*})", to_parse)
790
+ for i in range(1, len(parts), 2):
791
+ parts[i] = WDL.Expr.Placeholder(pos, {}, WDL.Expr.Null(pos))
792
+
793
+ return WDL.Expr.String(pos, parts)
794
+
795
+ def test_remove_common_leading_whitespace(self):
796
+ """
797
+ Make sure leading whitespace removal works properly.
798
+ """
799
+
800
+ # For a single line, we remove its leading whitespace
801
+ expr = self.make_string_expr(" a ~{b} c")
802
+ trimmed = remove_common_leading_whitespace(expr)
803
+ assert len(trimmed.parts) == 3
804
+ assert trimmed.parts[0] == "a "
805
+ assert trimmed.parts[2] == " c"
806
+
807
+ # Whitespace removed isn't affected by totally blank lines
808
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\n b\n\n")
809
+ trimmed = remove_common_leading_whitespace(expr)
810
+ assert len(trimmed.parts) == 3
811
+ assert trimmed.parts[0] == "\n\na\n"
812
+ assert trimmed.parts[2] == "\nb\n\n"
813
+
814
+ # Unless blank toleration is off
815
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\n b\n\n")
816
+ trimmed = remove_common_leading_whitespace(expr, tolerate_blanks=False)
817
+ assert len(trimmed.parts) == 3
818
+ assert trimmed.parts[0] == " \n\n a\n "
819
+ assert trimmed.parts[2] == "\n b\n\n"
820
+
821
+ # Whitespace is still removed if the first line doesn't have it before the newline
822
+ expr = self.make_string_expr("\n a\n ~{stuff}\n b\n")
823
+ trimmed = remove_common_leading_whitespace(expr)
824
+ assert len(trimmed.parts) == 3
825
+ assert trimmed.parts[0] == "\na\n"
826
+ assert trimmed.parts[2] == "\nb\n"
827
+
828
+ # Whitespace is not removed if actual content is dedented
829
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\nuhoh\n b\n\n")
830
+ trimmed = remove_common_leading_whitespace(expr)
831
+ assert len(trimmed.parts) == 3
832
+ assert trimmed.parts[0] == " \n\n a\n "
833
+ assert trimmed.parts[2] == "\nuhoh\n b\n\n"
834
+
835
+ # Unless dedents are tolerated
836
+ expr = self.make_string_expr(" \n\n a\n ~{stuff}\nuhoh\n b\n\n")
837
+ trimmed = remove_common_leading_whitespace(expr, tolerate_dedents=True)
838
+ assert len(trimmed.parts) == 3
839
+ assert trimmed.parts[0] == "\n\na\n"
840
+ assert trimmed.parts[2] == "\nuhoh\nb\n\n"
841
+
842
+ # Whitespace is still removed if all-whitespace lines have less of it
843
+ expr = self.make_string_expr("\n a\n ~{stuff}\n \n b\n")
844
+ trimmed = remove_common_leading_whitespace(expr)
845
+ assert len(trimmed.parts) == 3
846
+ assert trimmed.parts[0] == "\na\n"
847
+ assert trimmed.parts[2] == "\n\nb\n"
848
+
849
+ # Unless all-whitespace lines are not tolerated
850
+ expr = self.make_string_expr("\n a\n ~{stuff}\n \n b\n")
851
+ trimmed = remove_common_leading_whitespace(expr, tolerate_all_whitespace=False)
852
+ assert len(trimmed.parts) == 3
853
+ assert trimmed.parts[0] == "\n a\n "
854
+ assert trimmed.parts[2] == "\n\n b\n"
855
+
856
+ # When mixed tabs and spaces are detected, nothing is changed.
857
+ expr = self.make_string_expr("\n a\n\t~{stuff}\n b\n")
858
+ trimmed = remove_common_leading_whitespace(expr)
859
+ assert len(trimmed.parts) == 3
860
+ assert trimmed.parts[0] == "\n a\n\t"
861
+ assert trimmed.parts[2] == "\n b\n"
862
+
863
+ # When mixed tabs and spaces are not in the prefix, whitespace is removed.
864
+ expr = self.make_string_expr("\n\ta\n\t~{stuff} \n\tb\n")
865
+ trimmed = remove_common_leading_whitespace(expr)
866
+ assert len(trimmed.parts) == 3
867
+ assert trimmed.parts[0] == "\na\n"
868
+ assert trimmed.parts[2] == " \nb\n"
869
+
870
+ # An empty string works
871
+ expr = self.make_string_expr("")
872
+ trimmed = remove_common_leading_whitespace(expr)
873
+ assert len(trimmed.parts) == 1
874
+ assert trimmed.parts[0] == ""
875
+
876
+ # A string of only whitespace is preserved as an all-whitespece line
877
+ expr = self.make_string_expr("\t\t\t")
878
+ trimmed = remove_common_leading_whitespace(expr)
879
+ assert len(trimmed.parts) == 1
880
+ assert trimmed.parts[0] == "\t\t\t"
881
+
882
+ # A string of only whitespace is trimmed when all-whitespace lines are not tolerated
883
+ expr = self.make_string_expr("\t\t\t")
884
+ trimmed = remove_common_leading_whitespace(expr, tolerate_all_whitespace=False)
885
+ assert len(trimmed.parts) == 1
886
+ assert trimmed.parts[0] == ""
887
+
888
+ # An empty expression works
889
+ expr = WDL.Expr.String(
890
+ WDL.Error.SourcePosition("nowhere", "nowhere", 0, 0, 0, 0), []
891
+ )
892
+ trimmed = remove_common_leading_whitespace(expr)
893
+ assert len(trimmed.parts) == 0
894
+
895
+ # An expression of only placeholders works
896
+ expr = self.make_string_expr("~{AAA}")
897
+ trimmed = remove_common_leading_whitespace(expr)
898
+ assert len(trimmed.parts) == 3
899
+ assert trimmed.parts[0] == ""
900
+ assert trimmed.parts[2] == ""
901
+
902
+ # The command flag is preserved
903
+ expr = self.make_string_expr(" a ~{b} c")
904
+ trimmed = remove_common_leading_whitespace(expr)
905
+ assert trimmed.command == False
906
+ expr.command = True
907
+ trimmed = remove_common_leading_whitespace(expr)
908
+ assert trimmed.command == True
909
+
910
+ def test_choose_human_readable_directory(self):
911
+ """
912
+ Test to make sure that we pick sensible but non-colliding directories to put files in.
913
+ """
914
+
915
+ from toil.wdl.wdltoil import (
916
+ DirectoryNamingStateDict,
917
+ choose_human_readable_directory,
918
+ )
919
+
920
+ state: DirectoryNamingStateDict = {}
921
+
922
+ # The first time we should get apath with the task name and without the ID
923
+ first_chosen = choose_human_readable_directory(
924
+ "root", "taskname", "111-222-333", state
925
+ )
926
+ assert first_chosen.startswith("root")
927
+ assert "taskname" in first_chosen
928
+ assert "111-222-333" not in first_chosen
929
+
930
+ # If we use the same ID we should get the same result
931
+ same_id = choose_human_readable_directory(
932
+ "root", "taskname", "111-222-333", state
933
+ )
934
+ self.assertEqual(same_id, first_chosen)
935
+
936
+ # If we use a different ID we should get a different result still obeying the constraints
937
+ diff_id = choose_human_readable_directory(
938
+ "root", "taskname", "222-333-444", state
939
+ )
940
+ self.assertNotEqual(diff_id, first_chosen)
941
+ assert diff_id.startswith("root")
942
+ assert "taskname" in diff_id
943
+ assert "222-333-444" not in diff_id
944
+
945
+ def test_uri_packing(self):
946
+ """
947
+ Test to make sure Toil URI packing brings through the required information.
948
+ """
949
+
950
+ from toil.wdl.wdltoil import pack_toil_uri, unpack_toil_uri
951
+
952
+ # Set up a file
953
+ file_id = FileID("fileXYZ", 123, True)
954
+ task_path = "the_wf.the_task"
955
+ dir_id = uuid4()
956
+ file_basename = "thefile.txt"
957
+
958
+ # Pack and unpack it
959
+ uri = pack_toil_uri(file_id, task_path, dir_id, file_basename)
960
+ unpacked = unpack_toil_uri(uri)
961
+
962
+ # Make sure we got what we put in
963
+ self.assertEqual(unpacked[0], file_id)
964
+ self.assertEqual(unpacked[0].size, file_id.size)
965
+ self.assertEqual(unpacked[0].executable, file_id.executable)
966
+
967
+ self.assertEqual(unpacked[1], task_path)
968
+
969
+ # TODO: We don't make the UUIDs back into UUID objects
970
+ self.assertEqual(unpacked[2], str(dir_id))
971
+
972
+ self.assertEqual(unpacked[3], file_basename)
973
+
974
+ def test_disk_parse(self):
975
+ """
976
+ Test to make sure the disk parsing is correct
977
+ """
978
+ # Test cromwell compatibility
979
+ spec = "local-disk 5 SSD"
980
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
981
+ self.assertEqual(specified_mount_point, None)
982
+ self.assertEqual(part_size, 5)
983
+ self.assertEqual(part_suffix, "GB")
984
+
985
+ # Test spec conformance
986
+ # https://github.com/openwdl/wdl/blob/e43e042104b728df1f1ad6e6145945d2b32331a6/SPEC.md?plain=1#L5072-L5082
987
+ spec = "10"
988
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
989
+ self.assertEqual(specified_mount_point, None)
990
+ self.assertEqual(part_size, 10)
991
+ self.assertEqual(part_suffix, "GiB") # WDL spec default
992
+
993
+ spec = "1 MB"
994
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
995
+ self.assertEqual(specified_mount_point, None)
996
+ self.assertEqual(part_size, 1)
997
+ self.assertEqual(part_suffix, "MB")
998
+
999
+ spec = "MOUNT_POINT 3"
1000
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
1001
+ self.assertEqual(specified_mount_point, "MOUNT_POINT")
1002
+ self.assertEqual(part_size, 3)
1003
+ self.assertEqual(part_suffix, "GiB")
1004
+
1005
+ spec = "MOUNT_POINT 2 MB"
1006
+ specified_mount_point, part_size, part_suffix = parse_disks(spec, spec)
1007
+ self.assertEqual(specified_mount_point, "MOUNT_POINT")
1008
+ self.assertEqual(part_size, 2)
1009
+ self.assertEqual(part_suffix, "MB")
369
1010
 
370
1011
  if __name__ == "__main__":
371
1012
  unittest.main() # run all tests