toil 8.1.0b1__py3-none-any.whl → 8.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. toil/__init__.py +0 -35
  2. toil/batchSystems/abstractBatchSystem.py +1 -1
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
  4. toil/batchSystems/awsBatch.py +1 -1
  5. toil/batchSystems/cleanup_support.py +1 -1
  6. toil/batchSystems/kubernetes.py +53 -7
  7. toil/batchSystems/local_support.py +1 -1
  8. toil/batchSystems/mesos/batchSystem.py +13 -8
  9. toil/batchSystems/mesos/test/__init__.py +3 -2
  10. toil/batchSystems/singleMachine.py +1 -1
  11. toil/batchSystems/slurm.py +27 -26
  12. toil/bus.py +5 -3
  13. toil/common.py +39 -11
  14. toil/cwl/cwltoil.py +1 -1
  15. toil/job.py +64 -49
  16. toil/jobStores/abstractJobStore.py +24 -3
  17. toil/jobStores/fileJobStore.py +25 -1
  18. toil/jobStores/googleJobStore.py +104 -30
  19. toil/leader.py +9 -0
  20. toil/lib/accelerators.py +3 -1
  21. toil/lib/aws/utils.py.orig +504 -0
  22. toil/lib/bioio.py +1 -1
  23. toil/lib/docker.py +252 -91
  24. toil/lib/dockstore.py +11 -3
  25. toil/lib/exceptions.py +5 -3
  26. toil/lib/history.py +87 -13
  27. toil/lib/history_submission.py +23 -9
  28. toil/lib/io.py +34 -22
  29. toil/lib/misc.py +7 -1
  30. toil/lib/resources.py +2 -1
  31. toil/lib/threading.py +11 -10
  32. toil/options/common.py +8 -0
  33. toil/options/wdl.py +11 -0
  34. toil/server/api_spec/LICENSE +201 -0
  35. toil/server/api_spec/README.rst +5 -0
  36. toil/server/cli/wes_cwl_runner.py +2 -1
  37. toil/test/__init__.py +275 -115
  38. toil/test/batchSystems/batchSystemTest.py +227 -205
  39. toil/test/batchSystems/test_slurm.py +27 -0
  40. toil/test/cactus/pestis.tar.gz +0 -0
  41. toil/test/conftest.py +7 -0
  42. toil/test/cwl/2.fasta +11 -0
  43. toil/test/cwl/2.fastq +12 -0
  44. toil/test/cwl/conftest.py +1 -1
  45. toil/test/cwl/cwlTest.py +999 -867
  46. toil/test/cwl/directory/directory/file.txt +15 -0
  47. toil/test/cwl/download_directory_file.json +4 -0
  48. toil/test/cwl/download_directory_s3.json +4 -0
  49. toil/test/cwl/download_file.json +6 -0
  50. toil/test/cwl/download_http.json +6 -0
  51. toil/test/cwl/download_https.json +6 -0
  52. toil/test/cwl/download_s3.json +6 -0
  53. toil/test/cwl/download_subdirectory_file.json +5 -0
  54. toil/test/cwl/download_subdirectory_s3.json +5 -0
  55. toil/test/cwl/empty.json +1 -0
  56. toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
  57. toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
  58. toil/test/cwl/optional-file-exists.json +6 -0
  59. toil/test/cwl/optional-file-missing.json +6 -0
  60. toil/test/cwl/preemptible_expression.json +1 -0
  61. toil/test/cwl/revsort-job-missing.json +6 -0
  62. toil/test/cwl/revsort-job.json +6 -0
  63. toil/test/cwl/s3_secondary_file.json +16 -0
  64. toil/test/cwl/seqtk_seq_job.json +6 -0
  65. toil/test/cwl/stream.json +6 -0
  66. toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
  67. toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
  68. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
  69. toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
  70. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
  71. toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
  72. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
  73. toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
  74. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
  75. toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
  76. toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
  77. toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
  78. toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
  79. toil/test/cwl/whale.txt +16 -0
  80. toil/test/docs/scripts/example_alwaysfail.py +38 -0
  81. toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
  82. toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
  83. toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
  84. toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
  85. toil/test/docs/scripts/tutorial_arguments.py +23 -0
  86. toil/test/docs/scripts/tutorial_debugging.patch +12 -0
  87. toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
  88. toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
  89. toil/test/docs/scripts/tutorial_docker.py +20 -0
  90. toil/test/docs/scripts/tutorial_dynamic.py +24 -0
  91. toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
  92. toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
  93. toil/test/docs/scripts/tutorial_helloworld.py +15 -0
  94. toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
  95. toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
  96. toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
  97. toil/test/docs/scripts/tutorial_managing.py +29 -0
  98. toil/test/docs/scripts/tutorial_managing2.py +56 -0
  99. toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
  100. toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
  101. toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
  102. toil/test/docs/scripts/tutorial_promises.py +25 -0
  103. toil/test/docs/scripts/tutorial_promises2.py +30 -0
  104. toil/test/docs/scripts/tutorial_quickstart.py +22 -0
  105. toil/test/docs/scripts/tutorial_requirements.py +44 -0
  106. toil/test/docs/scripts/tutorial_services.py +45 -0
  107. toil/test/docs/scripts/tutorial_staging.py +45 -0
  108. toil/test/docs/scripts/tutorial_stats.py +64 -0
  109. toil/test/lib/aws/test_iam.py +3 -1
  110. toil/test/lib/dockerTest.py +205 -122
  111. toil/test/lib/test_history.py +101 -77
  112. toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
  113. toil/test/provisioners/clusterTest.py +4 -4
  114. toil/test/provisioners/gceProvisionerTest.py +16 -14
  115. toil/test/sort/sort.py +4 -1
  116. toil/test/src/busTest.py +17 -17
  117. toil/test/src/deferredFunctionTest.py +145 -132
  118. toil/test/src/importExportFileTest.py +71 -63
  119. toil/test/src/jobEncapsulationTest.py +27 -28
  120. toil/test/src/jobServiceTest.py +149 -133
  121. toil/test/src/jobTest.py +219 -211
  122. toil/test/src/miscTests.py +66 -60
  123. toil/test/src/promisedRequirementTest.py +163 -169
  124. toil/test/src/regularLogTest.py +24 -24
  125. toil/test/src/resourceTest.py +82 -76
  126. toil/test/src/restartDAGTest.py +51 -47
  127. toil/test/src/resumabilityTest.py +24 -19
  128. toil/test/src/retainTempDirTest.py +60 -57
  129. toil/test/src/systemTest.py +17 -13
  130. toil/test/src/threadingTest.py +29 -32
  131. toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
  132. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
  133. toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
  134. toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
  135. toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
  136. toil/test/utils/toilDebugTest.py +117 -102
  137. toil/test/utils/toilKillTest.py +54 -53
  138. toil/test/utils/utilsTest.py +303 -229
  139. toil/test/wdl/lint_error.wdl +9 -0
  140. toil/test/wdl/md5sum/empty_file.json +1 -0
  141. toil/test/wdl/md5sum/md5sum-gs.json +1 -0
  142. toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
  143. toil/test/wdl/md5sum/md5sum.input +1 -0
  144. toil/test/wdl/md5sum/md5sum.json +1 -0
  145. toil/test/wdl/md5sum/md5sum.wdl +25 -0
  146. toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
  147. toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
  148. toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
  149. toil/test/wdl/standard_library/as_map.json +16 -0
  150. toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
  151. toil/test/wdl/standard_library/as_pairs.json +7 -0
  152. toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
  153. toil/test/wdl/standard_library/ceil.json +3 -0
  154. toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
  155. toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
  156. toil/test/wdl/standard_library/collect_by_key.json +1 -0
  157. toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
  158. toil/test/wdl/standard_library/cross.json +11 -0
  159. toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
  160. toil/test/wdl/standard_library/flatten.json +7 -0
  161. toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
  162. toil/test/wdl/standard_library/floor.json +3 -0
  163. toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
  164. toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
  165. toil/test/wdl/standard_library/keys.json +8 -0
  166. toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
  167. toil/test/wdl/standard_library/length.json +7 -0
  168. toil/test/wdl/standard_library/length_as_input.wdl +16 -0
  169. toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
  170. toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
  171. toil/test/wdl/standard_library/length_invalid.json +3 -0
  172. toil/test/wdl/standard_library/range.json +3 -0
  173. toil/test/wdl/standard_library/range_0.json +3 -0
  174. toil/test/wdl/standard_library/range_as_input.wdl +17 -0
  175. toil/test/wdl/standard_library/range_invalid.json +3 -0
  176. toil/test/wdl/standard_library/read_boolean.json +3 -0
  177. toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
  178. toil/test/wdl/standard_library/read_float.json +3 -0
  179. toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
  180. toil/test/wdl/standard_library/read_int.json +3 -0
  181. toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
  182. toil/test/wdl/standard_library/read_json.json +3 -0
  183. toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
  184. toil/test/wdl/standard_library/read_lines.json +3 -0
  185. toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
  186. toil/test/wdl/standard_library/read_map.json +3 -0
  187. toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
  188. toil/test/wdl/standard_library/read_string.json +3 -0
  189. toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
  190. toil/test/wdl/standard_library/read_tsv.json +3 -0
  191. toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
  192. toil/test/wdl/standard_library/round.json +3 -0
  193. toil/test/wdl/standard_library/round_as_command.wdl +16 -0
  194. toil/test/wdl/standard_library/round_as_input.wdl +16 -0
  195. toil/test/wdl/standard_library/size.json +3 -0
  196. toil/test/wdl/standard_library/size_as_command.wdl +17 -0
  197. toil/test/wdl/standard_library/size_as_output.wdl +36 -0
  198. toil/test/wdl/standard_library/stderr.json +3 -0
  199. toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
  200. toil/test/wdl/standard_library/stdout.json +3 -0
  201. toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
  202. toil/test/wdl/standard_library/sub.json +3 -0
  203. toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
  204. toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
  205. toil/test/wdl/standard_library/transpose.json +6 -0
  206. toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
  207. toil/test/wdl/standard_library/write_json.json +6 -0
  208. toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
  209. toil/test/wdl/standard_library/write_lines.json +7 -0
  210. toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
  211. toil/test/wdl/standard_library/write_map.json +6 -0
  212. toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
  213. toil/test/wdl/standard_library/write_tsv.json +6 -0
  214. toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
  215. toil/test/wdl/standard_library/zip.json +12 -0
  216. toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
  217. toil/test/wdl/test.csv +3 -0
  218. toil/test/wdl/test.tsv +3 -0
  219. toil/test/wdl/testfiles/croo.wdl +38 -0
  220. toil/test/wdl/testfiles/drop_files.wdl +62 -0
  221. toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
  222. toil/test/wdl/testfiles/empty.txt +0 -0
  223. toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
  224. toil/test/wdl/testfiles/random.wdl +66 -0
  225. toil/test/wdl/testfiles/string_file_coercion.json +1 -0
  226. toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
  227. toil/test/wdl/testfiles/test.json +4 -0
  228. toil/test/wdl/testfiles/test_boolean.txt +1 -0
  229. toil/test/wdl/testfiles/test_float.txt +1 -0
  230. toil/test/wdl/testfiles/test_int.txt +1 -0
  231. toil/test/wdl/testfiles/test_lines.txt +5 -0
  232. toil/test/wdl/testfiles/test_map.txt +2 -0
  233. toil/test/wdl/testfiles/test_string.txt +1 -0
  234. toil/test/wdl/testfiles/url_to_file.wdl +13 -0
  235. toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
  236. toil/test/wdl/testfiles/vocab.json +1 -0
  237. toil/test/wdl/testfiles/vocab.wdl +66 -0
  238. toil/test/wdl/testfiles/wait.wdl +34 -0
  239. toil/test/wdl/wdl_specification/type_pair.json +23 -0
  240. toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
  241. toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
  242. toil/test/wdl/wdl_specification/v1_spec.json +1 -0
  243. toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
  244. toil/test/wdl/wdltoil_test.py +680 -407
  245. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  246. toil/version.py +9 -9
  247. toil/wdl/wdltoil.py +336 -123
  248. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/METADATA +5 -4
  249. toil-8.2.0.dist-info/RECORD +439 -0
  250. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
  251. toil-8.1.0b1.dist-info/RECORD +0 -259
  252. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
  253. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
  254. {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py CHANGED
@@ -62,13 +62,14 @@ else:
62
62
 
63
63
  from functools import partial
64
64
  from urllib.error import HTTPError
65
- from urllib.parse import quote, unquote, urljoin, urlsplit, urlparse
65
+ from urllib.parse import quote, unquote, urljoin, urlsplit
66
66
 
67
67
  import WDL.Error
68
68
  import WDL.runtime.config
69
69
  from configargparse import ArgParser, Namespace
70
70
  from WDL._util import byte_size_units, chmod_R_plus
71
- from WDL.CLI import print_error
71
+ from WDL.CLI import print_error, outline
72
+ import WDL.Lint
72
73
  from WDL.runtime.backend.docker_swarm import SwarmContainer
73
74
  from WDL.runtime.backend.singularity import SingularityContainer
74
75
  from WDL.runtime.error import DownloadFailed
@@ -917,8 +918,8 @@ def set_shared_fs_path(file: WDL.Value.File, path: str) -> WDL.Value.File:
917
918
 
918
919
 
919
920
  def view_shared_fs_paths(
920
- bindings: WDL.Env.Bindings[WDL.Value.Base],
921
- ) -> WDL.Env.Bindings[WDL.Value.Base]:
921
+ bindings: WDLBindings,
922
+ ) -> WDLBindings:
922
923
  """
923
924
  Given WDL bindings, return a copy where all files have their shared filesystem paths as their values.
924
925
  """
@@ -1137,11 +1138,11 @@ def choose_human_readable_directory(
1137
1138
 
1138
1139
  def evaluate_decls_to_bindings(
1139
1140
  decls: list[WDL.Tree.Decl],
1140
- all_bindings: WDL.Env.Bindings[WDL.Value.Base],
1141
+ all_bindings: WDLBindings,
1141
1142
  standard_library: ToilWDLStdLibBase,
1142
1143
  include_previous: bool = False,
1143
1144
  drop_missing_files: bool = False,
1144
- ) -> WDL.Env.Bindings[WDL.Value.Base]:
1145
+ ) -> WDLBindings:
1145
1146
  """
1146
1147
  Evaluate decls with a given bindings environment and standard library.
1147
1148
  Creates a new bindings object that only contains the bindings from the given decls.
@@ -1156,7 +1157,7 @@ def evaluate_decls_to_bindings(
1156
1157
  """
1157
1158
  # all_bindings contains current bindings + previous all_bindings
1158
1159
  # bindings only contains the decl bindings themselves so that bindings from other sections prior aren't included
1159
- bindings: WDL.Env.Bindings[WDL.Value.Base] = WDL.Env.Bindings()
1160
+ bindings: WDLBindings = WDL.Env.Bindings()
1160
1161
  drop_if_missing_with_workdir = partial(
1161
1162
  drop_if_missing, standard_library=standard_library
1162
1163
  )
@@ -1245,7 +1246,10 @@ class NonDownloadingSize(WDL.StdLib._Size):
1245
1246
  return WDL.Value.Float(total_size)
1246
1247
 
1247
1248
 
1248
- def extract_workflow_inputs(environment: WDLBindings) -> list[str]:
1249
+ def extract_file_values(environment: WDLBindings) -> list[str]:
1250
+ """
1251
+ Get a list of all File object values in the given bindings.
1252
+ """
1249
1253
  filenames = list()
1250
1254
 
1251
1255
  def add_filename(file: WDL.Value.File) -> WDL.Value.File:
@@ -1255,6 +1259,22 @@ def extract_workflow_inputs(environment: WDLBindings) -> list[str]:
1255
1259
  map_over_files_in_bindings(environment, add_filename)
1256
1260
  return filenames
1257
1261
 
1262
+ def extract_file_virtualized_values(environment: WDLBindings) -> list[str]:
1263
+ """
1264
+ Get a list of all File object virtualized values in the given bindings.
1265
+
1266
+ If a file hasn't been virtualized, it won't contribute to the list.
1267
+ """
1268
+ values = list()
1269
+
1270
+ def add_value(file: WDL.Value.File) -> WDL.Value.File:
1271
+ value = get_file_virtualized_value(file)
1272
+ if value is not None:
1273
+ values.append(value)
1274
+ return file
1275
+
1276
+ map_over_files_in_bindings(environment, add_value)
1277
+ return values
1258
1278
 
1259
1279
  def convert_files(
1260
1280
  environment: WDLBindings,
@@ -1263,19 +1283,21 @@ def convert_files(
1263
1283
  task_path: str,
1264
1284
  ) -> WDLBindings:
1265
1285
  """
1266
- Resolve relative-URI files in the given environment convert the file values to a new value made from a given mapping.
1267
-
1268
- Will return bindings with file values set to their corresponding relative-URI.
1286
+ Fill in the virtualized_value fields for File objects in a WDL environment.
1269
1287
 
1270
- :param environment: Bindings to evaluate on
1271
- :return: new bindings object
1288
+ :param environment: Bindings to evaluate on. Will not be modified.
1289
+ :param file_to_id: Maps from imported URI to Toil FileID with the data.
1290
+ :param file_to_data: Maps from WDL-level file calue to metadata about the
1291
+ file, including URI that would have been imported.
1292
+ :return: new bindings object with the annotated File objects in it.
1272
1293
  """
1273
1294
  dir_ids = {t[1] for t in file_to_data.values()}
1274
1295
  dir_to_id = {k: uuid.uuid4() for k in dir_ids}
1275
1296
 
1276
1297
  def convert_file_to_uri(file: WDL.Value.File) -> WDL.Value.File:
1277
1298
  """
1278
- Calls import_filename to detect if a potential URI exists and imports it. Will modify the File object value to the new URI and tack on the virtualized file.
1299
+ Produce a WDL File with the virtualized_value set to the Toil URI for
1300
+ the already-imported data, but the same value.
1279
1301
  """
1280
1302
  candidate_uri = file_to_data[file.value][0]
1281
1303
  file_id = file_to_id[candidate_uri]
@@ -1638,32 +1660,35 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1638
1660
  logger.debug("File has no virtualized value so not changing value")
1639
1661
  return file
1640
1662
 
1663
+ def _resolve_devirtualized_to_uri(self, devirtualized: str) -> str:
1664
+ """
1665
+ Get a URI pointing to whatever URI or divirtualized file path is provided.
1666
+
1667
+ Handles resolving symlinks using in-container paths if necessary.
1668
+ """
1669
+
1670
+ return Toil.normalize_uri(devirtualized, dir_path=self.execution_dir)
1671
+
1641
1672
  def _virtualize_file(
1642
1673
  self, file: WDL.Value.File, enforce_existence: bool = True
1643
1674
  ) -> WDL.Value.File:
1644
- logger.debug("Virtualizing %s", file)
1645
- # If enforce_existence is true, then if a file is detected as nonexistent, raise an error. Else, let it pass through
1646
1675
  if get_file_virtualized_value(file) is not None:
1647
- logger.debug("File is marked nonexistent so passing it through")
1676
+ # Already virtualized
1648
1677
  return file
1649
1678
 
1650
- if enforce_existence is False:
1651
- # We only want to error on a nonexistent file in the output section
1652
- # Since we need to virtualize on task boundaries, don't enforce existence if on a boundary
1653
- if is_standard_url(file.value):
1654
- file_uri = Toil.normalize_uri(file.value)
1655
- else:
1656
- abs_filepath = (
1657
- os.path.join(self.execution_dir, file.value)
1658
- if self.execution_dir is not None
1659
- else os.path.abspath(file.value)
1660
- )
1661
- file_uri = Toil.normalize_uri(abs_filepath)
1679
+ logger.debug("Virtualizing %s", file)
1662
1680
 
1663
- if not AbstractJobStore.url_exists(file_uri):
1681
+ try:
1682
+ # Let the actual virtualization implementation signal a missing file
1683
+ virtualized_filename = self._virtualize_filename(file.value)
1684
+ except FileNotFoundError:
1685
+ if enforce_existence:
1686
+ raise
1687
+ else:
1664
1688
  logger.debug("File appears nonexistent so marking it nonexistent")
1689
+ # Mark the file nonexistent.
1665
1690
  return set_file_nonexistent(file, True)
1666
- virtualized_filename = self._virtualize_filename(file.value)
1691
+
1667
1692
  logger.debug(
1668
1693
  "For file %s got virtualized filename %s", file, virtualized_filename
1669
1694
  )
@@ -1846,9 +1871,12 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1846
1871
  @memoize
1847
1872
  def _virtualize_filename(self, filename: str) -> str:
1848
1873
  """
1849
- from a local path in write_dir, 'virtualize' into the filename as it should present in a File value
1874
+ from a local path or other URL, 'virtualize' into the filename as it should present in a File value.
1875
+
1876
+ New in Toil: the path or URL may not actually exist.
1850
1877
 
1851
1878
  :param filename: Can be a local file path, URL (http, https, s3, gs), or toilfile
1879
+ :raises FileNotFoundError: if the file doesn't actually exist (new addition in Toil over MiniWDL)
1852
1880
  """
1853
1881
 
1854
1882
  if is_toil_url(filename):
@@ -1868,7 +1896,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1868
1896
  try:
1869
1897
  imported = self._file_store.import_file(filename)
1870
1898
  except FileNotFoundError:
1871
- logger.error(
1899
+ # This might happen because we're also along the code path for
1900
+ # optional file outputs.
1901
+ logger.info(
1872
1902
  "File at URL %s does not exist or is inaccessible." % filename
1873
1903
  )
1874
1904
  raise
@@ -1879,9 +1909,13 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1879
1909
  filename,
1880
1910
  e.code,
1881
1911
  )
1912
+ # We don't need to handle translating error codes for not
1913
+ # found; import_file does it already.
1882
1914
  raise
1883
1915
  if imported is None:
1884
- # Satisfy mypy, this should never happen though as we don't pass a shared file name (which is the only way import_file returns None)
1916
+ # Satisfy mypy. This should never happen though as we don't
1917
+ # pass a shared file name (which is the only way import_file
1918
+ # returns None)
1885
1919
  raise RuntimeError("Failed to import URL %s into jobstore." % filename)
1886
1920
  file_basename = os.path.basename(urlsplit(filename).path)
1887
1921
  # Get the URL to the parent directory and use that.
@@ -1890,23 +1924,19 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1890
1924
  dir_id = self._parent_dir_to_ids.setdefault(parent_dir, uuid.uuid4())
1891
1925
  result = pack_toil_uri(imported, self.task_path, dir_id, file_basename)
1892
1926
  logger.debug("Virtualized %s as WDL file %s", filename, result)
1893
- # We can't put the Toil URI in the virtualized_to_devirtualized cache because it would point to the URL instead of a
1894
- # local file on the machine, so only store the forward mapping
1927
+ # We can't put the Toil URI in the virtualized_to_devirtualized
1928
+ # cache because it would point to the URL instead of a local file
1929
+ # on the machine, so only store the forward mapping
1895
1930
  self._devirtualized_to_virtualized[filename] = result
1896
1931
  return result
1897
1932
  else:
1898
- # Otherwise this is a local file and we want to fake it as a Toil file store file
1899
- # Make it an absolute path
1900
- parsed = urlparse(filename)
1901
- if parsed.scheme == "file":
1902
- # conversion was already done by normalize_uri
1903
- abs_filename = unquote(parsed.path)
1904
- elif self.execution_dir is not None:
1905
- # To support relative paths from execution directory, join the execution dir and filename
1906
- # If filename is already an abs path, join() will not do anything
1907
- abs_filename = os.path.join(self.execution_dir, filename)
1908
- else:
1909
- abs_filename = os.path.abspath(filename)
1933
+ # Otherwise this is a local file name or URI and we want to fake it
1934
+ # as a Toil file store file
1935
+
1936
+ # Convert to a properly-absolutized file URI
1937
+ file_uri = Toil.normalize_uri(filename, dir_path=self.execution_dir)
1938
+ # Extract the absolute path name
1939
+ abs_filename = unquote(urlsplit(file_uri).path)
1910
1940
 
1911
1941
  if abs_filename in self._devirtualized_to_virtualized:
1912
1942
  # This is a previously devirtualized thing so we can just use the
@@ -1917,6 +1947,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1917
1947
  )
1918
1948
  return result
1919
1949
 
1950
+ if not os.path.exists(abs_filename):
1951
+ raise FileNotFoundError(abs_filename)
1952
+
1920
1953
  file_id = self._file_store.writeGlobalFile(abs_filename)
1921
1954
 
1922
1955
  file_dir = os.path.dirname(abs_filename)
@@ -1946,6 +1979,51 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
1946
1979
 
1947
1980
  self._miniwdl_cache: Optional[WDL.runtime.cache.CallCache] = None
1948
1981
 
1982
+ def _virtualize_file(
1983
+ self, file: WDL.Value.File, enforce_existence: bool = True
1984
+ ) -> WDL.Value.File:
1985
+ # When a workflow coerces a string path or file: URI to a File at
1986
+ # workflow scope, we need to fill in the cache filesystem path.
1987
+ if (
1988
+ get_file_virtualized_value(file) is None
1989
+ and get_shared_fs_path(file) is None
1990
+ and (
1991
+ not is_any_url(file.value)
1992
+ or is_file_url(file.value)
1993
+ )
1994
+ ):
1995
+ # This is a never-virtualized file that is a file path or URI and
1996
+ # has no shared FS path associated with it. We just made it at
1997
+ # workflow scope. (If it came from a task, it would have a
1998
+ # virtualized value already.)
1999
+
2000
+ # If we are loading it at workflow scope, the file path can be used
2001
+ # as the cache path.
2002
+
2003
+ if not is_any_url(file.value):
2004
+ # Handle file path
2005
+ cache_path = file.value
2006
+ else:
2007
+ # Handle pulling path out of file URI
2008
+ cache_path = unquote(urlsplit(file.value).path)
2009
+
2010
+ # Apply the path
2011
+ file = set_shared_fs_path(file, cache_path)
2012
+
2013
+ logger.info(
2014
+ "Applied shared filesystem path %s to File %s that appears to "
2015
+ "have been coerced from String at workflow scope.",
2016
+ cache_path,
2017
+ file
2018
+ )
2019
+
2020
+ # Do the virtualization
2021
+ return super()._virtualize_file(file, enforce_existence)
2022
+
2023
+ # TODO: If the workflow coerces a File to a String and back again, we
2024
+ # should have some way to recover the toilfile: URL it had in the job
2025
+ # store to avoid re-importing it.
2026
+
1949
2027
  # This needs to be hash-compatible with MiniWDL.
1950
2028
  # MiniWDL hooks _virtualize_filename
1951
2029
  # <https://github.com/chanzuckerberg/miniwdl/blob/475dd3f3784d1390e6a0e880d43316a620114de3/WDL/runtime/workflow.py#L699-L729>,
@@ -1999,7 +2077,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
1999
2077
  )
2000
2078
  # Make an environment of "file_sha256" to that as a WDL string, and
2001
2079
  # digest that, and make a write_ cache key. No need to transform to
2002
- # shared FS paths sonce no paths are in it.
2080
+ # shared FS paths since no paths are in it.
2003
2081
  log_bindings(
2004
2082
  logger.debug, "Digesting file bindings:", [file_input_bindings]
2005
2083
  )
@@ -2346,6 +2424,8 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
2346
2424
  filenames.
2347
2425
  """
2348
2426
 
2427
+ logger.debug("WDL task outputs stdlib asked to virtualize %s", filename)
2428
+
2349
2429
  if not is_any_url(filename) and not filename.startswith("/"):
2350
2430
  # We are getting a bare relative path on the supposedly devirtualized side.
2351
2431
  # Find a real path to it relative to the current directory override.
@@ -2394,8 +2474,12 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
2394
2474
  logger.error(
2395
2475
  "Handling broken symlink %s ultimately to %s", filename, here
2396
2476
  )
2477
+ # This should produce a FileNotFoundError since we think of
2478
+ # broken symlinks as nonexistent.
2479
+ raise FileNotFoundError(filename)
2397
2480
  filename = here
2398
-
2481
+
2482
+ logger.debug("WDL task outputs stdlib thinks we really need to virtualize %s", filename)
2399
2483
  return super()._virtualize_filename(filename)
2400
2484
 
2401
2485
 
@@ -2535,7 +2619,7 @@ def devirtualize_files(
2535
2619
  that are actually available to command line commands.
2536
2620
  The same virtual file always maps to the same devirtualized filename even with duplicates
2537
2621
  """
2538
- logger.info("Devirtualizing files")
2622
+ logger.debug("Devirtualizing files")
2539
2623
  return map_over_files_in_bindings(environment, stdlib._devirtualize_file)
2540
2624
 
2541
2625
 
@@ -2546,12 +2630,35 @@ def virtualize_files(
2546
2630
  Make sure all the File values embedded in the given bindings point to files
2547
2631
  that are usable from other machines.
2548
2632
  """
2549
- logger.info("Virtualizing files")
2633
+ logger.debug("Virtualizing files")
2550
2634
  virtualize_func = partial(
2551
2635
  stdlib._virtualize_file, enforce_existence=enforce_existence
2552
2636
  )
2553
2637
  return map_over_files_in_bindings(environment, virtualize_func)
2554
2638
 
2639
+ def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[WDLBindings], file_store: AbstractFileStore) -> None:
2640
+ """
2641
+ Delete any files that in the given bindings but not in the live list.
2642
+
2643
+ Operates on the virtualized values of File objects anywhere in the bindings.
2644
+ """
2645
+
2646
+ # Get all the files in the first bindings and not any of the others.
2647
+ unused_files = set(
2648
+ extract_file_virtualized_values(internal_bindings)
2649
+ ).difference(
2650
+ *(
2651
+ extract_file_virtualized_values(bindings)
2652
+ for bindings in live_bindings_list
2653
+ )
2654
+ )
2655
+
2656
+ for file_uri in unused_files:
2657
+ # Delete them
2658
+ if is_toil_url(file_uri):
2659
+ logger.debug("Delete file %s that is not needed", file_uri)
2660
+ file_id, _, _, _ = unpack_toil_uri(file_uri)
2661
+ file_store.deleteGlobalFile(file_id)
2555
2662
 
2556
2663
  def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
2557
2664
  """
@@ -3025,6 +3132,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
3025
3132
  self,
3026
3133
  task: WDL.Tree.Task,
3027
3134
  prev_node_results: Sequence[Promised[WDLBindings]],
3135
+ enclosing_bindings: WDLBindings,
3028
3136
  task_id: list[str],
3029
3137
  wdl_options: WDLContext,
3030
3138
  **kwargs: Any,
@@ -3032,6 +3140,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
3032
3140
  """
3033
3141
  Make a new job to determine resources and run a task.
3034
3142
 
3143
+ :param enclosing_bindings: Bindings in the enclosing section,
3144
+ containing files not to clean up. Files that are passed as inputs
3145
+ but not uses as outputs or present in the enclosing section
3146
+ bindings will be deleted after the task call completes.
3147
+
3035
3148
  :param namespace: The namespace that the task's *contents* exist in.
3036
3149
  The caller has alredy added the task's own name.
3037
3150
  """
@@ -3052,6 +3165,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
3052
3165
 
3053
3166
  self._task = task
3054
3167
  self._prev_node_results = prev_node_results
3168
+ self._enclosing_bindings = enclosing_bindings
3055
3169
  self._task_id = task_id
3056
3170
 
3057
3171
  @report_wdl_errors("evaluate task code", exit=True)
@@ -3091,10 +3205,23 @@ class WDLTaskWrapperJob(WDLBaseJob):
3091
3205
  # TODO: What if the same file is passed through several tasks, and
3092
3206
  # we get cache hits on those tasks? Won't we upload it several
3093
3207
  # times?
3208
+
3209
+ # Load output bindings from the cache
3210
+ cached_bindings = virtualize_files(
3211
+ cached_result, standard_library, enforce_existence=False
3212
+ )
3213
+
3214
+ # Throw away anything input but not available outside the call or
3215
+ # output.
3216
+ delete_dead_files(
3217
+ bindings,
3218
+ [cached_bindings, self._enclosing_bindings],
3219
+ file_store
3220
+ )
3221
+
3222
+ # Postprocess and ship the output bindings.
3094
3223
  return self.postprocess(
3095
- virtualize_files(
3096
- cached_result, standard_library, enforce_existence=False
3097
- )
3224
+ cached_bindings
3098
3225
  )
3099
3226
 
3100
3227
  if self._task.inputs:
@@ -3231,6 +3358,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
3231
3358
  virtualize_files(
3232
3359
  runtime_bindings, standard_library, enforce_existence=False
3233
3360
  ),
3361
+ self._enclosing_bindings,
3234
3362
  self._task_id,
3235
3363
  cores=runtime_cores or self.cores,
3236
3364
  memory=runtime_memory or self.memory,
@@ -3266,6 +3394,7 @@ class WDLTaskJob(WDLBaseJob):
3266
3394
  task: WDL.Tree.Task,
3267
3395
  task_internal_bindings: Promised[WDLBindings],
3268
3396
  runtime_bindings: Promised[WDLBindings],
3397
+ enclosing_bindings: WDLBindings,
3269
3398
  task_id: list[str],
3270
3399
  mount_spec: dict[str | None, int],
3271
3400
  wdl_options: WDLContext,
@@ -3275,6 +3404,9 @@ class WDLTaskJob(WDLBaseJob):
3275
3404
  """
3276
3405
  Make a new job to run a task.
3277
3406
 
3407
+ :param enclosing_bindings: Bindings outside the workflow call, with
3408
+ files that should not be cleaned up at the end of the task.
3409
+
3278
3410
  :param namespace: The namespace that the task's *contents* exist in.
3279
3411
  The caller has alredy added the task's own name.
3280
3412
  """
@@ -3298,6 +3430,7 @@ class WDLTaskJob(WDLBaseJob):
3298
3430
  self._task = task
3299
3431
  self._task_internal_bindings = task_internal_bindings
3300
3432
  self._runtime_bindings = runtime_bindings
3433
+ self._enclosing_bindings = enclosing_bindings
3301
3434
  self._task_id = task_id
3302
3435
  self._cache_key = cache_key
3303
3436
  self._mount_spec = mount_spec
@@ -4056,6 +4189,18 @@ class WDLTaskJob(WDLBaseJob):
4056
4189
  miniwdl_config=miniwdl_config,
4057
4190
  )
4058
4191
 
4192
+ # Clean up anything from the task call input: block or the runtime
4193
+ # section that isn't getting output or available in the enclosing
4194
+ # section. Runtime sections aren't meant to have files, but nothing
4195
+ # actually stops them from being there.
4196
+ delete_dead_files(
4197
+ combine_bindings([bindings, runtime_bindings]),
4198
+ [output_bindings, self._enclosing_bindings],
4199
+ file_store
4200
+ )
4201
+ # If File objects somehow made it to the runtime block they shouldn't
4202
+ # have been virtualized so don't bother with them.
4203
+
4059
4204
  # Do postprocessing steps to e.g. apply namespaces.
4060
4205
  output_bindings = self.postprocess(output_bindings)
4061
4206
 
@@ -4108,7 +4253,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
4108
4253
  logger.info("Setting %s to %s", self._node.name, self._node.expr)
4109
4254
  value = evaluate_decl(self._node, incoming_bindings, standard_library)
4110
4255
  bindings = incoming_bindings.bind(self._node.name, value)
4111
- return self.postprocess(bindings)
4256
+ # TODO: Only virtualize the new binding
4257
+ return self.postprocess(virtualize_files(bindings, standard_library, enforce_existence=False))
4112
4258
  elif isinstance(self._node, WDL.Tree.Call):
4113
4259
  # This is a call of a task or workflow
4114
4260
 
@@ -4129,6 +4275,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
4129
4275
  standard_library,
4130
4276
  inputs_mapping,
4131
4277
  )
4278
+ # Prepare call inputs to move to another node
4279
+ input_bindings = virtualize_files(input_bindings, standard_library, enforce_existence=False)
4132
4280
 
4133
4281
  # Bindings may also be added in from the enclosing workflow inputs
4134
4282
  # TODO: this is letting us also inject them from the workflow body.
@@ -4146,6 +4294,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
4146
4294
  subjob: WDLBaseJob = WDLWorkflowJob(
4147
4295
  self._node.callee,
4148
4296
  [input_bindings, passed_down_bindings],
4297
+ incoming_bindings,
4149
4298
  self._node.callee_id,
4150
4299
  wdl_options=wdl_options,
4151
4300
  local=True,
@@ -4156,6 +4305,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
4156
4305
  subjob = WDLTaskWrapperJob(
4157
4306
  self._node.callee,
4158
4307
  [input_bindings, passed_down_bindings],
4308
+ incoming_bindings,
4159
4309
  self._node.callee_id,
4160
4310
  wdl_options=wdl_options,
4161
4311
  local=True,
@@ -4257,7 +4407,8 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
4257
4407
  node, "Unimplemented WorkflowNode: " + str(type(node))
4258
4408
  )
4259
4409
 
4260
- return self.postprocess(current_bindings)
4410
+ # TODO: Only virtualize the new bindings created
4411
+ return self.postprocess(virtualize_files(current_bindings, standard_library, enforce_existence=False))
4261
4412
 
4262
4413
 
4263
4414
  class WDLCombineBindingsJob(WDLBaseJob):
@@ -5020,6 +5171,7 @@ class WDLWorkflowJob(WDLSectionJob):
5020
5171
  self,
5021
5172
  workflow: WDL.Tree.Workflow,
5022
5173
  prev_node_results: Sequence[Promised[WDLBindings]],
5174
+ enclosing_bindings: WDLBindings,
5023
5175
  workflow_id: list[str],
5024
5176
  wdl_options: WDLContext,
5025
5177
  **kwargs: Any,
@@ -5028,6 +5180,13 @@ class WDLWorkflowJob(WDLSectionJob):
5028
5180
  Create a subtree that will run a WDL workflow. The job returns the
5029
5181
  return value of the workflow.
5030
5182
 
5183
+ :param prev_node_results: Bindings fed into the workflow call as inputs.
5184
+
5185
+ :param enclosing_bindings: Bindings in the enclosing section,
5186
+ containing files not to clean up. Files that are passed as inputs
5187
+ but not uses as outputs or present in the enclosing section
5188
+ bindings will be deleted after the workflow call completes.
5189
+
5031
5190
  :param namespace: the namespace that the workflow's *contents* will be
5032
5191
  in. Caller has already added the workflow's own name.
5033
5192
  """
@@ -5044,6 +5203,7 @@ class WDLWorkflowJob(WDLSectionJob):
5044
5203
 
5045
5204
  self._workflow = workflow
5046
5205
  self._prev_node_results = prev_node_results
5206
+ self._enclosing_bindings = enclosing_bindings
5047
5207
  self._workflow_id = workflow_id
5048
5208
 
5049
5209
  @report_wdl_errors("run workflow")
@@ -5095,11 +5255,13 @@ class WDLWorkflowJob(WDLSectionJob):
5095
5255
  # Make jobs to run all the parts of the workflow
5096
5256
  sink = self.create_subgraph(self._workflow.body, [], bindings)
5097
5257
 
5098
- # To support the all call outputs feature, run an outputs job even if
5099
- # we have a declared but empty outputs section.
5258
+ # To support the all call outputs feature and cleanup of files created
5259
+ # in input: blocks, run an outputs job even if we have a declared but
5260
+ # empty outputs section.
5100
5261
  outputs_job = WDLOutputsJob(
5101
5262
  self._workflow,
5102
5263
  sink.rv(),
5264
+ self._enclosing_bindings,
5103
5265
  wdl_options=self._wdl_options,
5104
5266
  cache_key=cache_key,
5105
5267
  local=True,
@@ -5121,6 +5283,7 @@ class WDLOutputsJob(WDLBaseJob):
5121
5283
  self,
5122
5284
  workflow: WDL.Tree.Workflow,
5123
5285
  bindings: Promised[WDLBindings],
5286
+ enclosing_bindings: WDLBindings,
5124
5287
  wdl_options: WDLContext,
5125
5288
  cache_key: str | None = None,
5126
5289
  **kwargs: Any,
@@ -5128,6 +5291,11 @@ class WDLOutputsJob(WDLBaseJob):
5128
5291
  """
5129
5292
  Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
5130
5293
 
5294
+ :param bindings: Bindings after execution of the workflow body.
5295
+
5296
+ :param enclosing_bindings: Bindings outside the workflow call, with
5297
+ files that should not be cleaned up at the end of the workflow.
5298
+
5131
5299
  :param cache_key: If set and storing into the call cache is on, will
5132
5300
  cache the workflow execution result under the given key in a
5133
5301
  MiniWDL-compatible way.
@@ -5135,6 +5303,7 @@ class WDLOutputsJob(WDLBaseJob):
5135
5303
  super().__init__(wdl_options=wdl_options, **kwargs)
5136
5304
 
5137
5305
  self._bindings = bindings
5306
+ self._enclosing_bindings = enclosing_bindings
5138
5307
  self._workflow = workflow
5139
5308
  self._cache_key = cache_key
5140
5309
 
@@ -5227,8 +5396,15 @@ class WDLOutputsJob(WDLBaseJob):
5227
5396
  self._cache_key, output_bindings, file_store, self._wdl_options
5228
5397
  )
5229
5398
 
5230
- return self.postprocess(output_bindings)
5399
+ # Let Files that are not output or available outside the call go out of
5400
+ # scope.
5401
+ delete_dead_files(
5402
+ unwrap(self._bindings),
5403
+ [output_bindings, self._enclosing_bindings],
5404
+ file_store
5405
+ )
5231
5406
 
5407
+ return self.postprocess(output_bindings)
5232
5408
 
5233
5409
  class WDLStartJob(WDLSectionJob):
5234
5410
  """
@@ -5263,18 +5439,24 @@ class WDLStartJob(WDLSectionJob):
5263
5439
  if isinstance(self._target, WDL.Tree.Workflow):
5264
5440
  # Create a workflow job. We rely in this to handle entering the input
5265
5441
  # namespace if needed, or handling free-floating inputs.
5442
+ # Pass top-level inputs as enclosing section inputs to avoid
5443
+ # bothering to separately delete them.
5266
5444
  job: WDLBaseJob = WDLWorkflowJob(
5267
5445
  self._target,
5268
5446
  [inputs],
5447
+ inputs,
5269
5448
  [self._target.name],
5270
5449
  wdl_options=self._wdl_options,
5271
5450
  local=True,
5272
5451
  )
5273
5452
  else:
5274
5453
  # There is no workflow. Create a task job.
5454
+ # Pass top-level inputs as enclosing section inputs to avoid
5455
+ # bothering to separately delete them.
5275
5456
  job = WDLTaskWrapperJob(
5276
5457
  self._target,
5277
5458
  [inputs],
5459
+ inputs,
5278
5460
  [self._target.name],
5279
5461
  wdl_options=self._wdl_options,
5280
5462
  local=True,
@@ -5348,7 +5530,7 @@ class WDLImportWrapper(WDLSectionJob):
5348
5530
  self._import_workers_disk = import_workers_disk
5349
5531
 
5350
5532
  def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
5351
- filenames = extract_workflow_inputs(self._inputs)
5533
+ filenames = extract_file_values(self._inputs)
5352
5534
  file_to_data = get_file_sizes(
5353
5535
  filenames,
5354
5536
  file_store.jobStore,
@@ -5445,61 +5627,105 @@ def main() -> None:
5445
5627
  wdl_uri, trs_spec = resolve_workflow(options.wdl_uri, supported_languages={"WDL"})
5446
5628
 
5447
5629
  with Toil(options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec) as toil:
5448
- if options.restart:
5449
- output_bindings = toil.restart()
5630
+ # TODO: Move all the input parsing outside the Toil context
5631
+ # manager to avoid leaving a job store behind if the workflow
5632
+ # can't start.
5633
+
5634
+ # Both start and restart need us to have the workflow and the
5635
+ # wdl_options WDLContext.
5636
+
5637
+ # MiniWDL load code internally uses asyncio.get_event_loop()
5638
+ # which might not get an event loop if somebody has ever called
5639
+ # set_event_loop. So we need to make sure an event loop is
5640
+ # available.
5641
+ asyncio.set_event_loop(asyncio.new_event_loop())
5642
+
5643
+ # Load the WDL document.
5644
+ document: WDL.Tree.Document = WDL.load(
5645
+ wdl_uri,
5646
+ read_source=toil_read_source,
5647
+ )
5648
+
5649
+ # See if we're going to run a workflow or a task
5650
+ target: WDL.Tree.Workflow | WDL.Tree.Task
5651
+ if document.workflow:
5652
+ target = document.workflow
5653
+ elif len(document.tasks) == 1:
5654
+ target = document.tasks[0]
5655
+ elif len(document.tasks) > 1:
5656
+ raise WDL.Error.InputError(
5657
+ "Multiple tasks found with no workflow! Either add a workflow or keep one task."
5658
+ )
5450
5659
  else:
5451
- # TODO: Move all the input parsing outside the Toil context
5452
- # manager to avoid leaving a job store behind if the workflow
5453
- # can't start.
5454
-
5455
- # MiniWDL load code internally uses asyncio.get_event_loop()
5456
- # which might not get an event loop if somebody has ever called
5457
- # set_event_loop. So we need to make sure an event loop is
5458
- # available.
5459
- asyncio.set_event_loop(asyncio.new_event_loop())
5460
-
5461
- # Load the WDL document.
5462
- document: WDL.Tree.Document = WDL.load(
5463
- wdl_uri,
5464
- read_source=toil_read_source,
5660
+ raise WDL.Error.InputError("WDL document is empty!")
5661
+
5662
+ if "croo_out_def" in target.meta:
5663
+ # This workflow or task wants to have its outputs
5664
+ # "organized" by the Cromwell Output Organizer:
5665
+ # <https://github.com/ENCODE-DCC/croo>.
5666
+ #
5667
+ # TODO: We don't support generating anything that CROO can read.
5668
+ logger.warning(
5669
+ "This WDL expects to be used with the Cromwell Output Organizer (croo) <https://github.com/ENCODE-DCC/croo>. Toil cannot yet produce the outputs that croo requires. You will not be able to use croo on the output of this Toil run!"
5465
5670
  )
5466
5671
 
5467
- # See if we're going to run a workflow or a task
5468
- target: WDL.Tree.Workflow | WDL.Tree.Task
5469
- if document.workflow:
5470
- target = document.workflow
5471
- elif len(document.tasks) == 1:
5472
- target = document.tasks[0]
5473
- elif len(document.tasks) > 1:
5474
- raise WDL.Error.InputError(
5475
- "Multiple tasks found with no workflow! Either add a workflow or keep one task."
5476
- )
5477
- else:
5478
- raise WDL.Error.InputError("WDL document is empty!")
5479
-
5480
- if "croo_out_def" in target.meta:
5481
- # This workflow or task wants to have its outputs
5482
- # "organized" by the Cromwell Output Organizer:
5483
- # <https://github.com/ENCODE-DCC/croo>.
5484
- #
5485
- # TODO: We don't support generating anything that CROO can read.
5672
+ # But we can assume that we need to preserve individual
5673
+ # taks outputs since the point of CROO is fetching those
5674
+ # from Cromwell's output directories.
5675
+ #
5676
+ # This isn't quite WDL spec compliant but it will rescue
5677
+ # runs of the popular
5678
+ # <https://github.com/ENCODE-DCC/atac-seq-pipeline>
5679
+ if options.all_call_outputs is None:
5486
5680
  logger.warning(
5487
- "This WDL expects to be used with the Cromwell Output Organizer (croo) <https://github.com/ENCODE-DCC/croo>. Toil cannot yet produce the outputs that croo requires. You will not be able to use croo on the output of this Toil run!"
5681
+ "Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
5488
5682
  )
5683
+ options.all_call_outputs = True
5684
+
5685
+ # This mutates document to add linting information, but doesn't print any lint errors itself
5686
+ # or stop the workflow
5687
+ WDL.Lint.lint(document)
5688
+
5689
+ # We use a mutable variable and a generic file pointer to capture information about lint warnings
5690
+ # Both will be populated inside outline()
5691
+ lint_warnings_counter = [0]
5692
+ lint_warnings_io = io.StringIO()
5693
+ outline(
5694
+ document,
5695
+ 0,
5696
+ file=lint_warnings_io,
5697
+ show_called=(document.workflow is not None),
5698
+ shown=lint_warnings_counter,
5699
+ ) # type: ignore[no-untyped-call]
5700
+
5701
+ if getattr(WDL.Lint, "_shellcheck_available", None) is False:
5702
+ logger.info("Suggestion: install shellcheck (www.shellcheck.net) to check task commands")
5703
+
5704
+ if lint_warnings_counter[0]:
5705
+ logger.warning('Workflow lint warnings:\n%s', lint_warnings_io.getvalue().rstrip())
5706
+ if options.strict:
5707
+ logger.critical(f'Workflow did not pass linting in strict mode')
5708
+ # MiniWDL uses exit code 2 to indicate linting errors, so replicate that behavior
5709
+ sys.exit(2)
5710
+
5711
+ # Get the execution directory
5712
+ execution_dir = os.getcwd()
5713
+
5714
+ # Configure workflow interpreter options.
5715
+ # TODO: Would be nice to somehow be able to change some of these on
5716
+ # restart. For now we assume we are computing the same values.
5717
+ wdl_options: WDLContext = {
5718
+ "execution_dir": execution_dir,
5719
+ "container": options.container,
5720
+ "task_path": target.name,
5721
+ "namespace": target.name,
5722
+ "all_call_outputs": options.all_call_outputs,
5723
+ }
5724
+ assert wdl_options.get("container") is not None
5489
5725
 
5490
- # But we can assume that we need to preserve individual
5491
- # taks outputs since the point of CROO is fetching those
5492
- # from Cromwell's output directories.
5493
- #
5494
- # This isn't quite WDL spec compliant but it will rescue
5495
- # runs of the popular
5496
- # <https://github.com/ENCODE-DCC/atac-seq-pipeline>
5497
- if options.all_call_outputs is None:
5498
- logger.warning(
5499
- "Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
5500
- )
5501
- options.all_call_outputs = True
5502
-
5726
+ if options.restart:
5727
+ output_bindings = toil.restart()
5728
+ else:
5503
5729
  # If our input really comes from a URI or path, remember it.
5504
5730
  input_source_uri = None
5505
5731
  # Also remember where we need to report JSON parse errors as
@@ -5592,19 +5818,6 @@ def main() -> None:
5592
5818
 
5593
5819
  # TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ?
5594
5820
 
5595
- # Get the execution directory
5596
- execution_dir = os.getcwd()
5597
-
5598
- # Configure workflow interpreter options
5599
- wdl_options: WDLContext = {
5600
- "execution_dir": execution_dir,
5601
- "container": options.container,
5602
- "task_path": target.name,
5603
- "namespace": target.name,
5604
- "all_call_outputs": options.all_call_outputs,
5605
- }
5606
- assert wdl_options.get("container") is not None
5607
-
5608
5821
  # Run the workflow and get its outputs namespaced with the workflow name.
5609
5822
  root_job = make_root_job(
5610
5823
  target,