toil 8.1.0b1__py3-none-any.whl → 8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +0 -35
- toil/batchSystems/abstractBatchSystem.py +1 -1
- toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
- toil/batchSystems/awsBatch.py +1 -1
- toil/batchSystems/cleanup_support.py +1 -1
- toil/batchSystems/kubernetes.py +53 -7
- toil/batchSystems/local_support.py +1 -1
- toil/batchSystems/mesos/batchSystem.py +13 -8
- toil/batchSystems/mesos/test/__init__.py +3 -2
- toil/batchSystems/singleMachine.py +1 -1
- toil/batchSystems/slurm.py +27 -26
- toil/bus.py +5 -3
- toil/common.py +39 -11
- toil/cwl/cwltoil.py +1 -1
- toil/job.py +64 -49
- toil/jobStores/abstractJobStore.py +24 -3
- toil/jobStores/fileJobStore.py +25 -1
- toil/jobStores/googleJobStore.py +104 -30
- toil/leader.py +9 -0
- toil/lib/accelerators.py +3 -1
- toil/lib/aws/utils.py.orig +504 -0
- toil/lib/bioio.py +1 -1
- toil/lib/docker.py +252 -91
- toil/lib/dockstore.py +11 -3
- toil/lib/exceptions.py +5 -3
- toil/lib/history.py +87 -13
- toil/lib/history_submission.py +23 -9
- toil/lib/io.py +34 -22
- toil/lib/misc.py +7 -1
- toil/lib/resources.py +2 -1
- toil/lib/threading.py +11 -10
- toil/options/common.py +8 -0
- toil/options/wdl.py +11 -0
- toil/server/api_spec/LICENSE +201 -0
- toil/server/api_spec/README.rst +5 -0
- toil/server/cli/wes_cwl_runner.py +2 -1
- toil/test/__init__.py +275 -115
- toil/test/batchSystems/batchSystemTest.py +227 -205
- toil/test/batchSystems/test_slurm.py +27 -0
- toil/test/cactus/pestis.tar.gz +0 -0
- toil/test/conftest.py +7 -0
- toil/test/cwl/2.fasta +11 -0
- toil/test/cwl/2.fastq +12 -0
- toil/test/cwl/conftest.py +1 -1
- toil/test/cwl/cwlTest.py +999 -867
- toil/test/cwl/directory/directory/file.txt +15 -0
- toil/test/cwl/download_directory_file.json +4 -0
- toil/test/cwl/download_directory_s3.json +4 -0
- toil/test/cwl/download_file.json +6 -0
- toil/test/cwl/download_http.json +6 -0
- toil/test/cwl/download_https.json +6 -0
- toil/test/cwl/download_s3.json +6 -0
- toil/test/cwl/download_subdirectory_file.json +5 -0
- toil/test/cwl/download_subdirectory_s3.json +5 -0
- toil/test/cwl/empty.json +1 -0
- toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
- toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
- toil/test/cwl/optional-file-exists.json +6 -0
- toil/test/cwl/optional-file-missing.json +6 -0
- toil/test/cwl/preemptible_expression.json +1 -0
- toil/test/cwl/revsort-job-missing.json +6 -0
- toil/test/cwl/revsort-job.json +6 -0
- toil/test/cwl/s3_secondary_file.json +16 -0
- toil/test/cwl/seqtk_seq_job.json +6 -0
- toil/test/cwl/stream.json +6 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
- toil/test/cwl/whale.txt +16 -0
- toil/test/docs/scripts/example_alwaysfail.py +38 -0
- toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
- toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
- toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
- toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
- toil/test/docs/scripts/tutorial_arguments.py +23 -0
- toil/test/docs/scripts/tutorial_debugging.patch +12 -0
- toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
- toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
- toil/test/docs/scripts/tutorial_docker.py +20 -0
- toil/test/docs/scripts/tutorial_dynamic.py +24 -0
- toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
- toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
- toil/test/docs/scripts/tutorial_helloworld.py +15 -0
- toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
- toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
- toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
- toil/test/docs/scripts/tutorial_managing.py +29 -0
- toil/test/docs/scripts/tutorial_managing2.py +56 -0
- toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
- toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
- toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
- toil/test/docs/scripts/tutorial_promises.py +25 -0
- toil/test/docs/scripts/tutorial_promises2.py +30 -0
- toil/test/docs/scripts/tutorial_quickstart.py +22 -0
- toil/test/docs/scripts/tutorial_requirements.py +44 -0
- toil/test/docs/scripts/tutorial_services.py +45 -0
- toil/test/docs/scripts/tutorial_staging.py +45 -0
- toil/test/docs/scripts/tutorial_stats.py +64 -0
- toil/test/lib/aws/test_iam.py +3 -1
- toil/test/lib/dockerTest.py +205 -122
- toil/test/lib/test_history.py +101 -77
- toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
- toil/test/provisioners/clusterTest.py +4 -4
- toil/test/provisioners/gceProvisionerTest.py +16 -14
- toil/test/sort/sort.py +4 -1
- toil/test/src/busTest.py +17 -17
- toil/test/src/deferredFunctionTest.py +145 -132
- toil/test/src/importExportFileTest.py +71 -63
- toil/test/src/jobEncapsulationTest.py +27 -28
- toil/test/src/jobServiceTest.py +149 -133
- toil/test/src/jobTest.py +219 -211
- toil/test/src/miscTests.py +66 -60
- toil/test/src/promisedRequirementTest.py +163 -169
- toil/test/src/regularLogTest.py +24 -24
- toil/test/src/resourceTest.py +82 -76
- toil/test/src/restartDAGTest.py +51 -47
- toil/test/src/resumabilityTest.py +24 -19
- toil/test/src/retainTempDirTest.py +60 -57
- toil/test/src/systemTest.py +17 -13
- toil/test/src/threadingTest.py +29 -32
- toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
- toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
- toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
- toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
- toil/test/utils/toilDebugTest.py +117 -102
- toil/test/utils/toilKillTest.py +54 -53
- toil/test/utils/utilsTest.py +303 -229
- toil/test/wdl/lint_error.wdl +9 -0
- toil/test/wdl/md5sum/empty_file.json +1 -0
- toil/test/wdl/md5sum/md5sum-gs.json +1 -0
- toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
- toil/test/wdl/md5sum/md5sum.input +1 -0
- toil/test/wdl/md5sum/md5sum.json +1 -0
- toil/test/wdl/md5sum/md5sum.wdl +25 -0
- toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
- toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
- toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
- toil/test/wdl/standard_library/as_map.json +16 -0
- toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
- toil/test/wdl/standard_library/as_pairs.json +7 -0
- toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
- toil/test/wdl/standard_library/ceil.json +3 -0
- toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
- toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
- toil/test/wdl/standard_library/collect_by_key.json +1 -0
- toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
- toil/test/wdl/standard_library/cross.json +11 -0
- toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
- toil/test/wdl/standard_library/flatten.json +7 -0
- toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
- toil/test/wdl/standard_library/floor.json +3 -0
- toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
- toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
- toil/test/wdl/standard_library/keys.json +8 -0
- toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
- toil/test/wdl/standard_library/length.json +7 -0
- toil/test/wdl/standard_library/length_as_input.wdl +16 -0
- toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
- toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
- toil/test/wdl/standard_library/length_invalid.json +3 -0
- toil/test/wdl/standard_library/range.json +3 -0
- toil/test/wdl/standard_library/range_0.json +3 -0
- toil/test/wdl/standard_library/range_as_input.wdl +17 -0
- toil/test/wdl/standard_library/range_invalid.json +3 -0
- toil/test/wdl/standard_library/read_boolean.json +3 -0
- toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_float.json +3 -0
- toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_int.json +3 -0
- toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_json.json +3 -0
- toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_lines.json +3 -0
- toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_map.json +3 -0
- toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_string.json +3 -0
- toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_tsv.json +3 -0
- toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
- toil/test/wdl/standard_library/round.json +3 -0
- toil/test/wdl/standard_library/round_as_command.wdl +16 -0
- toil/test/wdl/standard_library/round_as_input.wdl +16 -0
- toil/test/wdl/standard_library/size.json +3 -0
- toil/test/wdl/standard_library/size_as_command.wdl +17 -0
- toil/test/wdl/standard_library/size_as_output.wdl +36 -0
- toil/test/wdl/standard_library/stderr.json +3 -0
- toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
- toil/test/wdl/standard_library/stdout.json +3 -0
- toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
- toil/test/wdl/standard_library/sub.json +3 -0
- toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
- toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
- toil/test/wdl/standard_library/transpose.json +6 -0
- toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
- toil/test/wdl/standard_library/write_json.json +6 -0
- toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_lines.json +7 -0
- toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_map.json +6 -0
- toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_tsv.json +6 -0
- toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
- toil/test/wdl/standard_library/zip.json +12 -0
- toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
- toil/test/wdl/test.csv +3 -0
- toil/test/wdl/test.tsv +3 -0
- toil/test/wdl/testfiles/croo.wdl +38 -0
- toil/test/wdl/testfiles/drop_files.wdl +62 -0
- toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
- toil/test/wdl/testfiles/empty.txt +0 -0
- toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
- toil/test/wdl/testfiles/random.wdl +66 -0
- toil/test/wdl/testfiles/string_file_coercion.json +1 -0
- toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
- toil/test/wdl/testfiles/test.json +4 -0
- toil/test/wdl/testfiles/test_boolean.txt +1 -0
- toil/test/wdl/testfiles/test_float.txt +1 -0
- toil/test/wdl/testfiles/test_int.txt +1 -0
- toil/test/wdl/testfiles/test_lines.txt +5 -0
- toil/test/wdl/testfiles/test_map.txt +2 -0
- toil/test/wdl/testfiles/test_string.txt +1 -0
- toil/test/wdl/testfiles/url_to_file.wdl +13 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
- toil/test/wdl/testfiles/vocab.json +1 -0
- toil/test/wdl/testfiles/vocab.wdl +66 -0
- toil/test/wdl/testfiles/wait.wdl +34 -0
- toil/test/wdl/wdl_specification/type_pair.json +23 -0
- toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
- toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
- toil/test/wdl/wdl_specification/v1_spec.json +1 -0
- toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
- toil/test/wdl/wdltoil_test.py +680 -407
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/version.py +9 -9
- toil/wdl/wdltoil.py +336 -123
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/METADATA +5 -4
- toil-8.2.0.dist-info/RECORD +439 -0
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
- toil-8.1.0b1.dist-info/RECORD +0 -259
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
- {toil-8.1.0b1.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -62,13 +62,14 @@ else:
|
|
|
62
62
|
|
|
63
63
|
from functools import partial
|
|
64
64
|
from urllib.error import HTTPError
|
|
65
|
-
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
65
|
+
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
66
66
|
|
|
67
67
|
import WDL.Error
|
|
68
68
|
import WDL.runtime.config
|
|
69
69
|
from configargparse import ArgParser, Namespace
|
|
70
70
|
from WDL._util import byte_size_units, chmod_R_plus
|
|
71
|
-
from WDL.CLI import print_error
|
|
71
|
+
from WDL.CLI import print_error, outline
|
|
72
|
+
import WDL.Lint
|
|
72
73
|
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
73
74
|
from WDL.runtime.backend.singularity import SingularityContainer
|
|
74
75
|
from WDL.runtime.error import DownloadFailed
|
|
@@ -917,8 +918,8 @@ def set_shared_fs_path(file: WDL.Value.File, path: str) -> WDL.Value.File:
|
|
|
917
918
|
|
|
918
919
|
|
|
919
920
|
def view_shared_fs_paths(
|
|
920
|
-
bindings:
|
|
921
|
-
) ->
|
|
921
|
+
bindings: WDLBindings,
|
|
922
|
+
) -> WDLBindings:
|
|
922
923
|
"""
|
|
923
924
|
Given WDL bindings, return a copy where all files have their shared filesystem paths as their values.
|
|
924
925
|
"""
|
|
@@ -1137,11 +1138,11 @@ def choose_human_readable_directory(
|
|
|
1137
1138
|
|
|
1138
1139
|
def evaluate_decls_to_bindings(
|
|
1139
1140
|
decls: list[WDL.Tree.Decl],
|
|
1140
|
-
all_bindings:
|
|
1141
|
+
all_bindings: WDLBindings,
|
|
1141
1142
|
standard_library: ToilWDLStdLibBase,
|
|
1142
1143
|
include_previous: bool = False,
|
|
1143
1144
|
drop_missing_files: bool = False,
|
|
1144
|
-
) ->
|
|
1145
|
+
) -> WDLBindings:
|
|
1145
1146
|
"""
|
|
1146
1147
|
Evaluate decls with a given bindings environment and standard library.
|
|
1147
1148
|
Creates a new bindings object that only contains the bindings from the given decls.
|
|
@@ -1156,7 +1157,7 @@ def evaluate_decls_to_bindings(
|
|
|
1156
1157
|
"""
|
|
1157
1158
|
# all_bindings contains current bindings + previous all_bindings
|
|
1158
1159
|
# bindings only contains the decl bindings themselves so that bindings from other sections prior aren't included
|
|
1159
|
-
bindings:
|
|
1160
|
+
bindings: WDLBindings = WDL.Env.Bindings()
|
|
1160
1161
|
drop_if_missing_with_workdir = partial(
|
|
1161
1162
|
drop_if_missing, standard_library=standard_library
|
|
1162
1163
|
)
|
|
@@ -1245,7 +1246,10 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
1245
1246
|
return WDL.Value.Float(total_size)
|
|
1246
1247
|
|
|
1247
1248
|
|
|
1248
|
-
def
|
|
1249
|
+
def extract_file_values(environment: WDLBindings) -> list[str]:
|
|
1250
|
+
"""
|
|
1251
|
+
Get a list of all File object values in the given bindings.
|
|
1252
|
+
"""
|
|
1249
1253
|
filenames = list()
|
|
1250
1254
|
|
|
1251
1255
|
def add_filename(file: WDL.Value.File) -> WDL.Value.File:
|
|
@@ -1255,6 +1259,22 @@ def extract_workflow_inputs(environment: WDLBindings) -> list[str]:
|
|
|
1255
1259
|
map_over_files_in_bindings(environment, add_filename)
|
|
1256
1260
|
return filenames
|
|
1257
1261
|
|
|
1262
|
+
def extract_file_virtualized_values(environment: WDLBindings) -> list[str]:
|
|
1263
|
+
"""
|
|
1264
|
+
Get a list of all File object virtualized values in the given bindings.
|
|
1265
|
+
|
|
1266
|
+
If a file hasn't been virtualized, it won't contribute to the list.
|
|
1267
|
+
"""
|
|
1268
|
+
values = list()
|
|
1269
|
+
|
|
1270
|
+
def add_value(file: WDL.Value.File) -> WDL.Value.File:
|
|
1271
|
+
value = get_file_virtualized_value(file)
|
|
1272
|
+
if value is not None:
|
|
1273
|
+
values.append(value)
|
|
1274
|
+
return file
|
|
1275
|
+
|
|
1276
|
+
map_over_files_in_bindings(environment, add_value)
|
|
1277
|
+
return values
|
|
1258
1278
|
|
|
1259
1279
|
def convert_files(
|
|
1260
1280
|
environment: WDLBindings,
|
|
@@ -1263,19 +1283,21 @@ def convert_files(
|
|
|
1263
1283
|
task_path: str,
|
|
1264
1284
|
) -> WDLBindings:
|
|
1265
1285
|
"""
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
Will return bindings with file values set to their corresponding relative-URI.
|
|
1286
|
+
Fill in the virtualized_value fields for File objects in a WDL environment.
|
|
1269
1287
|
|
|
1270
|
-
:param environment: Bindings to evaluate on
|
|
1271
|
-
:
|
|
1288
|
+
:param environment: Bindings to evaluate on. Will not be modified.
|
|
1289
|
+
:param file_to_id: Maps from imported URI to Toil FileID with the data.
|
|
1290
|
+
:param file_to_data: Maps from WDL-level file calue to metadata about the
|
|
1291
|
+
file, including URI that would have been imported.
|
|
1292
|
+
:return: new bindings object with the annotated File objects in it.
|
|
1272
1293
|
"""
|
|
1273
1294
|
dir_ids = {t[1] for t in file_to_data.values()}
|
|
1274
1295
|
dir_to_id = {k: uuid.uuid4() for k in dir_ids}
|
|
1275
1296
|
|
|
1276
1297
|
def convert_file_to_uri(file: WDL.Value.File) -> WDL.Value.File:
|
|
1277
1298
|
"""
|
|
1278
|
-
|
|
1299
|
+
Produce a WDL File with the virtualized_value set to the Toil URI for
|
|
1300
|
+
the already-imported data, but the same value.
|
|
1279
1301
|
"""
|
|
1280
1302
|
candidate_uri = file_to_data[file.value][0]
|
|
1281
1303
|
file_id = file_to_id[candidate_uri]
|
|
@@ -1638,32 +1660,35 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1638
1660
|
logger.debug("File has no virtualized value so not changing value")
|
|
1639
1661
|
return file
|
|
1640
1662
|
|
|
1663
|
+
def _resolve_devirtualized_to_uri(self, devirtualized: str) -> str:
|
|
1664
|
+
"""
|
|
1665
|
+
Get a URI pointing to whatever URI or divirtualized file path is provided.
|
|
1666
|
+
|
|
1667
|
+
Handles resolving symlinks using in-container paths if necessary.
|
|
1668
|
+
"""
|
|
1669
|
+
|
|
1670
|
+
return Toil.normalize_uri(devirtualized, dir_path=self.execution_dir)
|
|
1671
|
+
|
|
1641
1672
|
def _virtualize_file(
|
|
1642
1673
|
self, file: WDL.Value.File, enforce_existence: bool = True
|
|
1643
1674
|
) -> WDL.Value.File:
|
|
1644
|
-
logger.debug("Virtualizing %s", file)
|
|
1645
|
-
# If enforce_existence is true, then if a file is detected as nonexistent, raise an error. Else, let it pass through
|
|
1646
1675
|
if get_file_virtualized_value(file) is not None:
|
|
1647
|
-
|
|
1676
|
+
# Already virtualized
|
|
1648
1677
|
return file
|
|
1649
1678
|
|
|
1650
|
-
|
|
1651
|
-
# We only want to error on a nonexistent file in the output section
|
|
1652
|
-
# Since we need to virtualize on task boundaries, don't enforce existence if on a boundary
|
|
1653
|
-
if is_standard_url(file.value):
|
|
1654
|
-
file_uri = Toil.normalize_uri(file.value)
|
|
1655
|
-
else:
|
|
1656
|
-
abs_filepath = (
|
|
1657
|
-
os.path.join(self.execution_dir, file.value)
|
|
1658
|
-
if self.execution_dir is not None
|
|
1659
|
-
else os.path.abspath(file.value)
|
|
1660
|
-
)
|
|
1661
|
-
file_uri = Toil.normalize_uri(abs_filepath)
|
|
1679
|
+
logger.debug("Virtualizing %s", file)
|
|
1662
1680
|
|
|
1663
|
-
|
|
1681
|
+
try:
|
|
1682
|
+
# Let the actual virtualization implementation signal a missing file
|
|
1683
|
+
virtualized_filename = self._virtualize_filename(file.value)
|
|
1684
|
+
except FileNotFoundError:
|
|
1685
|
+
if enforce_existence:
|
|
1686
|
+
raise
|
|
1687
|
+
else:
|
|
1664
1688
|
logger.debug("File appears nonexistent so marking it nonexistent")
|
|
1689
|
+
# Mark the file nonexistent.
|
|
1665
1690
|
return set_file_nonexistent(file, True)
|
|
1666
|
-
|
|
1691
|
+
|
|
1667
1692
|
logger.debug(
|
|
1668
1693
|
"For file %s got virtualized filename %s", file, virtualized_filename
|
|
1669
1694
|
)
|
|
@@ -1846,9 +1871,12 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1846
1871
|
@memoize
|
|
1847
1872
|
def _virtualize_filename(self, filename: str) -> str:
|
|
1848
1873
|
"""
|
|
1849
|
-
from a local path
|
|
1874
|
+
from a local path or other URL, 'virtualize' into the filename as it should present in a File value.
|
|
1875
|
+
|
|
1876
|
+
New in Toil: the path or URL may not actually exist.
|
|
1850
1877
|
|
|
1851
1878
|
:param filename: Can be a local file path, URL (http, https, s3, gs), or toilfile
|
|
1879
|
+
:raises FileNotFoundError: if the file doesn't actually exist (new addition in Toil over MiniWDL)
|
|
1852
1880
|
"""
|
|
1853
1881
|
|
|
1854
1882
|
if is_toil_url(filename):
|
|
@@ -1868,7 +1896,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1868
1896
|
try:
|
|
1869
1897
|
imported = self._file_store.import_file(filename)
|
|
1870
1898
|
except FileNotFoundError:
|
|
1871
|
-
|
|
1899
|
+
# This might happen because we're also along the code path for
|
|
1900
|
+
# optional file outputs.
|
|
1901
|
+
logger.info(
|
|
1872
1902
|
"File at URL %s does not exist or is inaccessible." % filename
|
|
1873
1903
|
)
|
|
1874
1904
|
raise
|
|
@@ -1879,9 +1909,13 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1879
1909
|
filename,
|
|
1880
1910
|
e.code,
|
|
1881
1911
|
)
|
|
1912
|
+
# We don't need to handle translating error codes for not
|
|
1913
|
+
# found; import_file does it already.
|
|
1882
1914
|
raise
|
|
1883
1915
|
if imported is None:
|
|
1884
|
-
# Satisfy mypy
|
|
1916
|
+
# Satisfy mypy. This should never happen though as we don't
|
|
1917
|
+
# pass a shared file name (which is the only way import_file
|
|
1918
|
+
# returns None)
|
|
1885
1919
|
raise RuntimeError("Failed to import URL %s into jobstore." % filename)
|
|
1886
1920
|
file_basename = os.path.basename(urlsplit(filename).path)
|
|
1887
1921
|
# Get the URL to the parent directory and use that.
|
|
@@ -1890,23 +1924,19 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1890
1924
|
dir_id = self._parent_dir_to_ids.setdefault(parent_dir, uuid.uuid4())
|
|
1891
1925
|
result = pack_toil_uri(imported, self.task_path, dir_id, file_basename)
|
|
1892
1926
|
logger.debug("Virtualized %s as WDL file %s", filename, result)
|
|
1893
|
-
# We can't put the Toil URI in the virtualized_to_devirtualized
|
|
1894
|
-
#
|
|
1927
|
+
# We can't put the Toil URI in the virtualized_to_devirtualized
|
|
1928
|
+
# cache because it would point to the URL instead of a local file
|
|
1929
|
+
# on the machine, so only store the forward mapping
|
|
1895
1930
|
self._devirtualized_to_virtualized[filename] = result
|
|
1896
1931
|
return result
|
|
1897
1932
|
else:
|
|
1898
|
-
# Otherwise this is a local file and we want to fake it
|
|
1899
|
-
#
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
# To support relative paths from execution directory, join the execution dir and filename
|
|
1906
|
-
# If filename is already an abs path, join() will not do anything
|
|
1907
|
-
abs_filename = os.path.join(self.execution_dir, filename)
|
|
1908
|
-
else:
|
|
1909
|
-
abs_filename = os.path.abspath(filename)
|
|
1933
|
+
# Otherwise this is a local file name or URI and we want to fake it
|
|
1934
|
+
# as a Toil file store file
|
|
1935
|
+
|
|
1936
|
+
# Convert to a properly-absolutized file URI
|
|
1937
|
+
file_uri = Toil.normalize_uri(filename, dir_path=self.execution_dir)
|
|
1938
|
+
# Extract the absolute path name
|
|
1939
|
+
abs_filename = unquote(urlsplit(file_uri).path)
|
|
1910
1940
|
|
|
1911
1941
|
if abs_filename in self._devirtualized_to_virtualized:
|
|
1912
1942
|
# This is a previously devirtualized thing so we can just use the
|
|
@@ -1917,6 +1947,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1917
1947
|
)
|
|
1918
1948
|
return result
|
|
1919
1949
|
|
|
1950
|
+
if not os.path.exists(abs_filename):
|
|
1951
|
+
raise FileNotFoundError(abs_filename)
|
|
1952
|
+
|
|
1920
1953
|
file_id = self._file_store.writeGlobalFile(abs_filename)
|
|
1921
1954
|
|
|
1922
1955
|
file_dir = os.path.dirname(abs_filename)
|
|
@@ -1946,6 +1979,51 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
1946
1979
|
|
|
1947
1980
|
self._miniwdl_cache: Optional[WDL.runtime.cache.CallCache] = None
|
|
1948
1981
|
|
|
1982
|
+
def _virtualize_file(
|
|
1983
|
+
self, file: WDL.Value.File, enforce_existence: bool = True
|
|
1984
|
+
) -> WDL.Value.File:
|
|
1985
|
+
# When a workflow coerces a string path or file: URI to a File at
|
|
1986
|
+
# workflow scope, we need to fill in the cache filesystem path.
|
|
1987
|
+
if (
|
|
1988
|
+
get_file_virtualized_value(file) is None
|
|
1989
|
+
and get_shared_fs_path(file) is None
|
|
1990
|
+
and (
|
|
1991
|
+
not is_any_url(file.value)
|
|
1992
|
+
or is_file_url(file.value)
|
|
1993
|
+
)
|
|
1994
|
+
):
|
|
1995
|
+
# This is a never-virtualized file that is a file path or URI and
|
|
1996
|
+
# has no shared FS path associated with it. We just made it at
|
|
1997
|
+
# workflow scope. (If it came from a task, it would have a
|
|
1998
|
+
# virtualized value already.)
|
|
1999
|
+
|
|
2000
|
+
# If we are loading it at workflow scope, the file path can be used
|
|
2001
|
+
# as the cache path.
|
|
2002
|
+
|
|
2003
|
+
if not is_any_url(file.value):
|
|
2004
|
+
# Handle file path
|
|
2005
|
+
cache_path = file.value
|
|
2006
|
+
else:
|
|
2007
|
+
# Handle pulling path out of file URI
|
|
2008
|
+
cache_path = unquote(urlsplit(file.value).path)
|
|
2009
|
+
|
|
2010
|
+
# Apply the path
|
|
2011
|
+
file = set_shared_fs_path(file, cache_path)
|
|
2012
|
+
|
|
2013
|
+
logger.info(
|
|
2014
|
+
"Applied shared filesystem path %s to File %s that appears to "
|
|
2015
|
+
"have been coerced from String at workflow scope.",
|
|
2016
|
+
cache_path,
|
|
2017
|
+
file
|
|
2018
|
+
)
|
|
2019
|
+
|
|
2020
|
+
# Do the virtualization
|
|
2021
|
+
return super()._virtualize_file(file, enforce_existence)
|
|
2022
|
+
|
|
2023
|
+
# TODO: If the workflow coerces a File to a String and back again, we
|
|
2024
|
+
# should have some way to recover the toilfile: URL it had in the job
|
|
2025
|
+
# store to avoid re-importing it.
|
|
2026
|
+
|
|
1949
2027
|
# This needs to be hash-compatible with MiniWDL.
|
|
1950
2028
|
# MiniWDL hooks _virtualize_filename
|
|
1951
2029
|
# <https://github.com/chanzuckerberg/miniwdl/blob/475dd3f3784d1390e6a0e880d43316a620114de3/WDL/runtime/workflow.py#L699-L729>,
|
|
@@ -1999,7 +2077,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
1999
2077
|
)
|
|
2000
2078
|
# Make an environment of "file_sha256" to that as a WDL string, and
|
|
2001
2079
|
# digest that, and make a write_ cache key. No need to transform to
|
|
2002
|
-
# shared FS paths
|
|
2080
|
+
# shared FS paths since no paths are in it.
|
|
2003
2081
|
log_bindings(
|
|
2004
2082
|
logger.debug, "Digesting file bindings:", [file_input_bindings]
|
|
2005
2083
|
)
|
|
@@ -2346,6 +2424,8 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2346
2424
|
filenames.
|
|
2347
2425
|
"""
|
|
2348
2426
|
|
|
2427
|
+
logger.debug("WDL task outputs stdlib asked to virtualize %s", filename)
|
|
2428
|
+
|
|
2349
2429
|
if not is_any_url(filename) and not filename.startswith("/"):
|
|
2350
2430
|
# We are getting a bare relative path on the supposedly devirtualized side.
|
|
2351
2431
|
# Find a real path to it relative to the current directory override.
|
|
@@ -2394,8 +2474,12 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2394
2474
|
logger.error(
|
|
2395
2475
|
"Handling broken symlink %s ultimately to %s", filename, here
|
|
2396
2476
|
)
|
|
2477
|
+
# This should produce a FileNotFoundError since we think of
|
|
2478
|
+
# broken symlinks as nonexistent.
|
|
2479
|
+
raise FileNotFoundError(filename)
|
|
2397
2480
|
filename = here
|
|
2398
|
-
|
|
2481
|
+
|
|
2482
|
+
logger.debug("WDL task outputs stdlib thinks we really need to virtualize %s", filename)
|
|
2399
2483
|
return super()._virtualize_filename(filename)
|
|
2400
2484
|
|
|
2401
2485
|
|
|
@@ -2535,7 +2619,7 @@ def devirtualize_files(
|
|
|
2535
2619
|
that are actually available to command line commands.
|
|
2536
2620
|
The same virtual file always maps to the same devirtualized filename even with duplicates
|
|
2537
2621
|
"""
|
|
2538
|
-
logger.
|
|
2622
|
+
logger.debug("Devirtualizing files")
|
|
2539
2623
|
return map_over_files_in_bindings(environment, stdlib._devirtualize_file)
|
|
2540
2624
|
|
|
2541
2625
|
|
|
@@ -2546,12 +2630,35 @@ def virtualize_files(
|
|
|
2546
2630
|
Make sure all the File values embedded in the given bindings point to files
|
|
2547
2631
|
that are usable from other machines.
|
|
2548
2632
|
"""
|
|
2549
|
-
logger.
|
|
2633
|
+
logger.debug("Virtualizing files")
|
|
2550
2634
|
virtualize_func = partial(
|
|
2551
2635
|
stdlib._virtualize_file, enforce_existence=enforce_existence
|
|
2552
2636
|
)
|
|
2553
2637
|
return map_over_files_in_bindings(environment, virtualize_func)
|
|
2554
2638
|
|
|
2639
|
+
def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[WDLBindings], file_store: AbstractFileStore) -> None:
|
|
2640
|
+
"""
|
|
2641
|
+
Delete any files that in the given bindings but not in the live list.
|
|
2642
|
+
|
|
2643
|
+
Operates on the virtualized values of File objects anywhere in the bindings.
|
|
2644
|
+
"""
|
|
2645
|
+
|
|
2646
|
+
# Get all the files in the first bindings and not any of the others.
|
|
2647
|
+
unused_files = set(
|
|
2648
|
+
extract_file_virtualized_values(internal_bindings)
|
|
2649
|
+
).difference(
|
|
2650
|
+
*(
|
|
2651
|
+
extract_file_virtualized_values(bindings)
|
|
2652
|
+
for bindings in live_bindings_list
|
|
2653
|
+
)
|
|
2654
|
+
)
|
|
2655
|
+
|
|
2656
|
+
for file_uri in unused_files:
|
|
2657
|
+
# Delete them
|
|
2658
|
+
if is_toil_url(file_uri):
|
|
2659
|
+
logger.debug("Delete file %s that is not needed", file_uri)
|
|
2660
|
+
file_id, _, _, _ = unpack_toil_uri(file_uri)
|
|
2661
|
+
file_store.deleteGlobalFile(file_id)
|
|
2555
2662
|
|
|
2556
2663
|
def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
2557
2664
|
"""
|
|
@@ -3025,6 +3132,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3025
3132
|
self,
|
|
3026
3133
|
task: WDL.Tree.Task,
|
|
3027
3134
|
prev_node_results: Sequence[Promised[WDLBindings]],
|
|
3135
|
+
enclosing_bindings: WDLBindings,
|
|
3028
3136
|
task_id: list[str],
|
|
3029
3137
|
wdl_options: WDLContext,
|
|
3030
3138
|
**kwargs: Any,
|
|
@@ -3032,6 +3140,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3032
3140
|
"""
|
|
3033
3141
|
Make a new job to determine resources and run a task.
|
|
3034
3142
|
|
|
3143
|
+
:param enclosing_bindings: Bindings in the enclosing section,
|
|
3144
|
+
containing files not to clean up. Files that are passed as inputs
|
|
3145
|
+
but not uses as outputs or present in the enclosing section
|
|
3146
|
+
bindings will be deleted after the task call completes.
|
|
3147
|
+
|
|
3035
3148
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
3036
3149
|
The caller has alredy added the task's own name.
|
|
3037
3150
|
"""
|
|
@@ -3052,6 +3165,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3052
3165
|
|
|
3053
3166
|
self._task = task
|
|
3054
3167
|
self._prev_node_results = prev_node_results
|
|
3168
|
+
self._enclosing_bindings = enclosing_bindings
|
|
3055
3169
|
self._task_id = task_id
|
|
3056
3170
|
|
|
3057
3171
|
@report_wdl_errors("evaluate task code", exit=True)
|
|
@@ -3091,10 +3205,23 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3091
3205
|
# TODO: What if the same file is passed through several tasks, and
|
|
3092
3206
|
# we get cache hits on those tasks? Won't we upload it several
|
|
3093
3207
|
# times?
|
|
3208
|
+
|
|
3209
|
+
# Load output bindings from the cache
|
|
3210
|
+
cached_bindings = virtualize_files(
|
|
3211
|
+
cached_result, standard_library, enforce_existence=False
|
|
3212
|
+
)
|
|
3213
|
+
|
|
3214
|
+
# Throw away anything input but not available outside the call or
|
|
3215
|
+
# output.
|
|
3216
|
+
delete_dead_files(
|
|
3217
|
+
bindings,
|
|
3218
|
+
[cached_bindings, self._enclosing_bindings],
|
|
3219
|
+
file_store
|
|
3220
|
+
)
|
|
3221
|
+
|
|
3222
|
+
# Postprocess and ship the output bindings.
|
|
3094
3223
|
return self.postprocess(
|
|
3095
|
-
|
|
3096
|
-
cached_result, standard_library, enforce_existence=False
|
|
3097
|
-
)
|
|
3224
|
+
cached_bindings
|
|
3098
3225
|
)
|
|
3099
3226
|
|
|
3100
3227
|
if self._task.inputs:
|
|
@@ -3231,6 +3358,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3231
3358
|
virtualize_files(
|
|
3232
3359
|
runtime_bindings, standard_library, enforce_existence=False
|
|
3233
3360
|
),
|
|
3361
|
+
self._enclosing_bindings,
|
|
3234
3362
|
self._task_id,
|
|
3235
3363
|
cores=runtime_cores or self.cores,
|
|
3236
3364
|
memory=runtime_memory or self.memory,
|
|
@@ -3266,6 +3394,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3266
3394
|
task: WDL.Tree.Task,
|
|
3267
3395
|
task_internal_bindings: Promised[WDLBindings],
|
|
3268
3396
|
runtime_bindings: Promised[WDLBindings],
|
|
3397
|
+
enclosing_bindings: WDLBindings,
|
|
3269
3398
|
task_id: list[str],
|
|
3270
3399
|
mount_spec: dict[str | None, int],
|
|
3271
3400
|
wdl_options: WDLContext,
|
|
@@ -3275,6 +3404,9 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3275
3404
|
"""
|
|
3276
3405
|
Make a new job to run a task.
|
|
3277
3406
|
|
|
3407
|
+
:param enclosing_bindings: Bindings outside the workflow call, with
|
|
3408
|
+
files that should not be cleaned up at the end of the task.
|
|
3409
|
+
|
|
3278
3410
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
3279
3411
|
The caller has alredy added the task's own name.
|
|
3280
3412
|
"""
|
|
@@ -3298,6 +3430,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3298
3430
|
self._task = task
|
|
3299
3431
|
self._task_internal_bindings = task_internal_bindings
|
|
3300
3432
|
self._runtime_bindings = runtime_bindings
|
|
3433
|
+
self._enclosing_bindings = enclosing_bindings
|
|
3301
3434
|
self._task_id = task_id
|
|
3302
3435
|
self._cache_key = cache_key
|
|
3303
3436
|
self._mount_spec = mount_spec
|
|
@@ -4056,6 +4189,18 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
4056
4189
|
miniwdl_config=miniwdl_config,
|
|
4057
4190
|
)
|
|
4058
4191
|
|
|
4192
|
+
# Clean up anything from the task call input: block or the runtime
|
|
4193
|
+
# section that isn't getting output or available in the enclosing
|
|
4194
|
+
# section. Runtime sections aren't meant to have files, but nothing
|
|
4195
|
+
# actually stops them from being there.
|
|
4196
|
+
delete_dead_files(
|
|
4197
|
+
combine_bindings([bindings, runtime_bindings]),
|
|
4198
|
+
[output_bindings, self._enclosing_bindings],
|
|
4199
|
+
file_store
|
|
4200
|
+
)
|
|
4201
|
+
# If File objects somehow made it to the runtime block they shouldn't
|
|
4202
|
+
# have been virtualized so don't bother with them.
|
|
4203
|
+
|
|
4059
4204
|
# Do postprocessing steps to e.g. apply namespaces.
|
|
4060
4205
|
output_bindings = self.postprocess(output_bindings)
|
|
4061
4206
|
|
|
@@ -4108,7 +4253,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4108
4253
|
logger.info("Setting %s to %s", self._node.name, self._node.expr)
|
|
4109
4254
|
value = evaluate_decl(self._node, incoming_bindings, standard_library)
|
|
4110
4255
|
bindings = incoming_bindings.bind(self._node.name, value)
|
|
4111
|
-
|
|
4256
|
+
# TODO: Only virtualize the new binding
|
|
4257
|
+
return self.postprocess(virtualize_files(bindings, standard_library, enforce_existence=False))
|
|
4112
4258
|
elif isinstance(self._node, WDL.Tree.Call):
|
|
4113
4259
|
# This is a call of a task or workflow
|
|
4114
4260
|
|
|
@@ -4129,6 +4275,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4129
4275
|
standard_library,
|
|
4130
4276
|
inputs_mapping,
|
|
4131
4277
|
)
|
|
4278
|
+
# Prepare call inputs to move to another node
|
|
4279
|
+
input_bindings = virtualize_files(input_bindings, standard_library, enforce_existence=False)
|
|
4132
4280
|
|
|
4133
4281
|
# Bindings may also be added in from the enclosing workflow inputs
|
|
4134
4282
|
# TODO: this is letting us also inject them from the workflow body.
|
|
@@ -4146,6 +4294,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4146
4294
|
subjob: WDLBaseJob = WDLWorkflowJob(
|
|
4147
4295
|
self._node.callee,
|
|
4148
4296
|
[input_bindings, passed_down_bindings],
|
|
4297
|
+
incoming_bindings,
|
|
4149
4298
|
self._node.callee_id,
|
|
4150
4299
|
wdl_options=wdl_options,
|
|
4151
4300
|
local=True,
|
|
@@ -4156,6 +4305,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4156
4305
|
subjob = WDLTaskWrapperJob(
|
|
4157
4306
|
self._node.callee,
|
|
4158
4307
|
[input_bindings, passed_down_bindings],
|
|
4308
|
+
incoming_bindings,
|
|
4159
4309
|
self._node.callee_id,
|
|
4160
4310
|
wdl_options=wdl_options,
|
|
4161
4311
|
local=True,
|
|
@@ -4257,7 +4407,8 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
|
4257
4407
|
node, "Unimplemented WorkflowNode: " + str(type(node))
|
|
4258
4408
|
)
|
|
4259
4409
|
|
|
4260
|
-
|
|
4410
|
+
# TODO: Only virtualize the new bindings created
|
|
4411
|
+
return self.postprocess(virtualize_files(current_bindings, standard_library, enforce_existence=False))
|
|
4261
4412
|
|
|
4262
4413
|
|
|
4263
4414
|
class WDLCombineBindingsJob(WDLBaseJob):
|
|
@@ -5020,6 +5171,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5020
5171
|
self,
|
|
5021
5172
|
workflow: WDL.Tree.Workflow,
|
|
5022
5173
|
prev_node_results: Sequence[Promised[WDLBindings]],
|
|
5174
|
+
enclosing_bindings: WDLBindings,
|
|
5023
5175
|
workflow_id: list[str],
|
|
5024
5176
|
wdl_options: WDLContext,
|
|
5025
5177
|
**kwargs: Any,
|
|
@@ -5028,6 +5180,13 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5028
5180
|
Create a subtree that will run a WDL workflow. The job returns the
|
|
5029
5181
|
return value of the workflow.
|
|
5030
5182
|
|
|
5183
|
+
:param prev_node_results: Bindings fed into the workflow call as inputs.
|
|
5184
|
+
|
|
5185
|
+
:param enclosing_bindings: Bindings in the enclosing section,
|
|
5186
|
+
containing files not to clean up. Files that are passed as inputs
|
|
5187
|
+
but not uses as outputs or present in the enclosing section
|
|
5188
|
+
bindings will be deleted after the workflow call completes.
|
|
5189
|
+
|
|
5031
5190
|
:param namespace: the namespace that the workflow's *contents* will be
|
|
5032
5191
|
in. Caller has already added the workflow's own name.
|
|
5033
5192
|
"""
|
|
@@ -5044,6 +5203,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5044
5203
|
|
|
5045
5204
|
self._workflow = workflow
|
|
5046
5205
|
self._prev_node_results = prev_node_results
|
|
5206
|
+
self._enclosing_bindings = enclosing_bindings
|
|
5047
5207
|
self._workflow_id = workflow_id
|
|
5048
5208
|
|
|
5049
5209
|
@report_wdl_errors("run workflow")
|
|
@@ -5095,11 +5255,13 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5095
5255
|
# Make jobs to run all the parts of the workflow
|
|
5096
5256
|
sink = self.create_subgraph(self._workflow.body, [], bindings)
|
|
5097
5257
|
|
|
5098
|
-
# To support the all call outputs feature
|
|
5099
|
-
# we have a declared but
|
|
5258
|
+
# To support the all call outputs feature and cleanup of files created
|
|
5259
|
+
# in input: blocks, run an outputs job even if we have a declared but
|
|
5260
|
+
# empty outputs section.
|
|
5100
5261
|
outputs_job = WDLOutputsJob(
|
|
5101
5262
|
self._workflow,
|
|
5102
5263
|
sink.rv(),
|
|
5264
|
+
self._enclosing_bindings,
|
|
5103
5265
|
wdl_options=self._wdl_options,
|
|
5104
5266
|
cache_key=cache_key,
|
|
5105
5267
|
local=True,
|
|
@@ -5121,6 +5283,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5121
5283
|
self,
|
|
5122
5284
|
workflow: WDL.Tree.Workflow,
|
|
5123
5285
|
bindings: Promised[WDLBindings],
|
|
5286
|
+
enclosing_bindings: WDLBindings,
|
|
5124
5287
|
wdl_options: WDLContext,
|
|
5125
5288
|
cache_key: str | None = None,
|
|
5126
5289
|
**kwargs: Any,
|
|
@@ -5128,6 +5291,11 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5128
5291
|
"""
|
|
5129
5292
|
Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
|
|
5130
5293
|
|
|
5294
|
+
:param bindings: Bindings after execution of the workflow body.
|
|
5295
|
+
|
|
5296
|
+
:param enclosing_bindings: Bindings outside the workflow call, with
|
|
5297
|
+
files that should not be cleaned up at the end of the workflow.
|
|
5298
|
+
|
|
5131
5299
|
:param cache_key: If set and storing into the call cache is on, will
|
|
5132
5300
|
cache the workflow execution result under the given key in a
|
|
5133
5301
|
MiniWDL-compatible way.
|
|
@@ -5135,6 +5303,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5135
5303
|
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
5136
5304
|
|
|
5137
5305
|
self._bindings = bindings
|
|
5306
|
+
self._enclosing_bindings = enclosing_bindings
|
|
5138
5307
|
self._workflow = workflow
|
|
5139
5308
|
self._cache_key = cache_key
|
|
5140
5309
|
|
|
@@ -5227,8 +5396,15 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5227
5396
|
self._cache_key, output_bindings, file_store, self._wdl_options
|
|
5228
5397
|
)
|
|
5229
5398
|
|
|
5230
|
-
|
|
5399
|
+
# Let Files that are not output or available outside the call go out of
|
|
5400
|
+
# scope.
|
|
5401
|
+
delete_dead_files(
|
|
5402
|
+
unwrap(self._bindings),
|
|
5403
|
+
[output_bindings, self._enclosing_bindings],
|
|
5404
|
+
file_store
|
|
5405
|
+
)
|
|
5231
5406
|
|
|
5407
|
+
return self.postprocess(output_bindings)
|
|
5232
5408
|
|
|
5233
5409
|
class WDLStartJob(WDLSectionJob):
|
|
5234
5410
|
"""
|
|
@@ -5263,18 +5439,24 @@ class WDLStartJob(WDLSectionJob):
|
|
|
5263
5439
|
if isinstance(self._target, WDL.Tree.Workflow):
|
|
5264
5440
|
# Create a workflow job. We rely in this to handle entering the input
|
|
5265
5441
|
# namespace if needed, or handling free-floating inputs.
|
|
5442
|
+
# Pass top-level inputs as enclosing section inputs to avoid
|
|
5443
|
+
# bothering to separately delete them.
|
|
5266
5444
|
job: WDLBaseJob = WDLWorkflowJob(
|
|
5267
5445
|
self._target,
|
|
5268
5446
|
[inputs],
|
|
5447
|
+
inputs,
|
|
5269
5448
|
[self._target.name],
|
|
5270
5449
|
wdl_options=self._wdl_options,
|
|
5271
5450
|
local=True,
|
|
5272
5451
|
)
|
|
5273
5452
|
else:
|
|
5274
5453
|
# There is no workflow. Create a task job.
|
|
5454
|
+
# Pass top-level inputs as enclosing section inputs to avoid
|
|
5455
|
+
# bothering to separately delete them.
|
|
5275
5456
|
job = WDLTaskWrapperJob(
|
|
5276
5457
|
self._target,
|
|
5277
5458
|
[inputs],
|
|
5459
|
+
inputs,
|
|
5278
5460
|
[self._target.name],
|
|
5279
5461
|
wdl_options=self._wdl_options,
|
|
5280
5462
|
local=True,
|
|
@@ -5348,7 +5530,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5348
5530
|
self._import_workers_disk = import_workers_disk
|
|
5349
5531
|
|
|
5350
5532
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
5351
|
-
filenames =
|
|
5533
|
+
filenames = extract_file_values(self._inputs)
|
|
5352
5534
|
file_to_data = get_file_sizes(
|
|
5353
5535
|
filenames,
|
|
5354
5536
|
file_store.jobStore,
|
|
@@ -5445,61 +5627,105 @@ def main() -> None:
|
|
|
5445
5627
|
wdl_uri, trs_spec = resolve_workflow(options.wdl_uri, supported_languages={"WDL"})
|
|
5446
5628
|
|
|
5447
5629
|
with Toil(options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec) as toil:
|
|
5448
|
-
|
|
5449
|
-
|
|
5630
|
+
# TODO: Move all the input parsing outside the Toil context
|
|
5631
|
+
# manager to avoid leaving a job store behind if the workflow
|
|
5632
|
+
# can't start.
|
|
5633
|
+
|
|
5634
|
+
# Both start and restart need us to have the workflow and the
|
|
5635
|
+
# wdl_options WDLContext.
|
|
5636
|
+
|
|
5637
|
+
# MiniWDL load code internally uses asyncio.get_event_loop()
|
|
5638
|
+
# which might not get an event loop if somebody has ever called
|
|
5639
|
+
# set_event_loop. So we need to make sure an event loop is
|
|
5640
|
+
# available.
|
|
5641
|
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
|
5642
|
+
|
|
5643
|
+
# Load the WDL document.
|
|
5644
|
+
document: WDL.Tree.Document = WDL.load(
|
|
5645
|
+
wdl_uri,
|
|
5646
|
+
read_source=toil_read_source,
|
|
5647
|
+
)
|
|
5648
|
+
|
|
5649
|
+
# See if we're going to run a workflow or a task
|
|
5650
|
+
target: WDL.Tree.Workflow | WDL.Tree.Task
|
|
5651
|
+
if document.workflow:
|
|
5652
|
+
target = document.workflow
|
|
5653
|
+
elif len(document.tasks) == 1:
|
|
5654
|
+
target = document.tasks[0]
|
|
5655
|
+
elif len(document.tasks) > 1:
|
|
5656
|
+
raise WDL.Error.InputError(
|
|
5657
|
+
"Multiple tasks found with no workflow! Either add a workflow or keep one task."
|
|
5658
|
+
)
|
|
5450
5659
|
else:
|
|
5451
|
-
|
|
5452
|
-
|
|
5453
|
-
|
|
5454
|
-
|
|
5455
|
-
#
|
|
5456
|
-
#
|
|
5457
|
-
#
|
|
5458
|
-
#
|
|
5459
|
-
|
|
5460
|
-
|
|
5461
|
-
# Load the WDL document.
|
|
5462
|
-
document: WDL.Tree.Document = WDL.load(
|
|
5463
|
-
wdl_uri,
|
|
5464
|
-
read_source=toil_read_source,
|
|
5660
|
+
raise WDL.Error.InputError("WDL document is empty!")
|
|
5661
|
+
|
|
5662
|
+
if "croo_out_def" in target.meta:
|
|
5663
|
+
# This workflow or task wants to have its outputs
|
|
5664
|
+
# "organized" by the Cromwell Output Organizer:
|
|
5665
|
+
# <https://github.com/ENCODE-DCC/croo>.
|
|
5666
|
+
#
|
|
5667
|
+
# TODO: We don't support generating anything that CROO can read.
|
|
5668
|
+
logger.warning(
|
|
5669
|
+
"This WDL expects to be used with the Cromwell Output Organizer (croo) <https://github.com/ENCODE-DCC/croo>. Toil cannot yet produce the outputs that croo requires. You will not be able to use croo on the output of this Toil run!"
|
|
5465
5670
|
)
|
|
5466
5671
|
|
|
5467
|
-
#
|
|
5468
|
-
|
|
5469
|
-
|
|
5470
|
-
|
|
5471
|
-
|
|
5472
|
-
|
|
5473
|
-
|
|
5474
|
-
|
|
5475
|
-
"Multiple tasks found with no workflow! Either add a workflow or keep one task."
|
|
5476
|
-
)
|
|
5477
|
-
else:
|
|
5478
|
-
raise WDL.Error.InputError("WDL document is empty!")
|
|
5479
|
-
|
|
5480
|
-
if "croo_out_def" in target.meta:
|
|
5481
|
-
# This workflow or task wants to have its outputs
|
|
5482
|
-
# "organized" by the Cromwell Output Organizer:
|
|
5483
|
-
# <https://github.com/ENCODE-DCC/croo>.
|
|
5484
|
-
#
|
|
5485
|
-
# TODO: We don't support generating anything that CROO can read.
|
|
5672
|
+
# But we can assume that we need to preserve individual
|
|
5673
|
+
# taks outputs since the point of CROO is fetching those
|
|
5674
|
+
# from Cromwell's output directories.
|
|
5675
|
+
#
|
|
5676
|
+
# This isn't quite WDL spec compliant but it will rescue
|
|
5677
|
+
# runs of the popular
|
|
5678
|
+
# <https://github.com/ENCODE-DCC/atac-seq-pipeline>
|
|
5679
|
+
if options.all_call_outputs is None:
|
|
5486
5680
|
logger.warning(
|
|
5487
|
-
"
|
|
5681
|
+
"Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
|
|
5488
5682
|
)
|
|
5683
|
+
options.all_call_outputs = True
|
|
5684
|
+
|
|
5685
|
+
# This mutates document to add linting information, but doesn't print any lint errors itself
|
|
5686
|
+
# or stop the workflow
|
|
5687
|
+
WDL.Lint.lint(document)
|
|
5688
|
+
|
|
5689
|
+
# We use a mutable variable and a generic file pointer to capture information about lint warnings
|
|
5690
|
+
# Both will be populated inside outline()
|
|
5691
|
+
lint_warnings_counter = [0]
|
|
5692
|
+
lint_warnings_io = io.StringIO()
|
|
5693
|
+
outline(
|
|
5694
|
+
document,
|
|
5695
|
+
0,
|
|
5696
|
+
file=lint_warnings_io,
|
|
5697
|
+
show_called=(document.workflow is not None),
|
|
5698
|
+
shown=lint_warnings_counter,
|
|
5699
|
+
) # type: ignore[no-untyped-call]
|
|
5700
|
+
|
|
5701
|
+
if getattr(WDL.Lint, "_shellcheck_available", None) is False:
|
|
5702
|
+
logger.info("Suggestion: install shellcheck (www.shellcheck.net) to check task commands")
|
|
5703
|
+
|
|
5704
|
+
if lint_warnings_counter[0]:
|
|
5705
|
+
logger.warning('Workflow lint warnings:\n%s', lint_warnings_io.getvalue().rstrip())
|
|
5706
|
+
if options.strict:
|
|
5707
|
+
logger.critical(f'Workflow did not pass linting in strict mode')
|
|
5708
|
+
# MiniWDL uses exit code 2 to indicate linting errors, so replicate that behavior
|
|
5709
|
+
sys.exit(2)
|
|
5710
|
+
|
|
5711
|
+
# Get the execution directory
|
|
5712
|
+
execution_dir = os.getcwd()
|
|
5713
|
+
|
|
5714
|
+
# Configure workflow interpreter options.
|
|
5715
|
+
# TODO: Would be nice to somehow be able to change some of these on
|
|
5716
|
+
# restart. For now we assume we are computing the same values.
|
|
5717
|
+
wdl_options: WDLContext = {
|
|
5718
|
+
"execution_dir": execution_dir,
|
|
5719
|
+
"container": options.container,
|
|
5720
|
+
"task_path": target.name,
|
|
5721
|
+
"namespace": target.name,
|
|
5722
|
+
"all_call_outputs": options.all_call_outputs,
|
|
5723
|
+
}
|
|
5724
|
+
assert wdl_options.get("container") is not None
|
|
5489
5725
|
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
|
|
5493
|
-
#
|
|
5494
|
-
# This isn't quite WDL spec compliant but it will rescue
|
|
5495
|
-
# runs of the popular
|
|
5496
|
-
# <https://github.com/ENCODE-DCC/atac-seq-pipeline>
|
|
5497
|
-
if options.all_call_outputs is None:
|
|
5498
|
-
logger.warning(
|
|
5499
|
-
"Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
|
|
5500
|
-
)
|
|
5501
|
-
options.all_call_outputs = True
|
|
5502
|
-
|
|
5726
|
+
if options.restart:
|
|
5727
|
+
output_bindings = toil.restart()
|
|
5728
|
+
else:
|
|
5503
5729
|
# If our input really comes from a URI or path, remember it.
|
|
5504
5730
|
input_source_uri = None
|
|
5505
5731
|
# Also remember where we need to report JSON parse errors as
|
|
@@ -5592,19 +5818,6 @@ def main() -> None:
|
|
|
5592
5818
|
|
|
5593
5819
|
# TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ?
|
|
5594
5820
|
|
|
5595
|
-
# Get the execution directory
|
|
5596
|
-
execution_dir = os.getcwd()
|
|
5597
|
-
|
|
5598
|
-
# Configure workflow interpreter options
|
|
5599
|
-
wdl_options: WDLContext = {
|
|
5600
|
-
"execution_dir": execution_dir,
|
|
5601
|
-
"container": options.container,
|
|
5602
|
-
"task_path": target.name,
|
|
5603
|
-
"namespace": target.name,
|
|
5604
|
-
"all_call_outputs": options.all_call_outputs,
|
|
5605
|
-
}
|
|
5606
|
-
assert wdl_options.get("container") is not None
|
|
5607
|
-
|
|
5608
5821
|
# Run the workflow and get its outputs namespaced with the workflow name.
|
|
5609
5822
|
root_job = make_root_job(
|
|
5610
5823
|
target,
|