toil 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +4 -39
- toil/batchSystems/abstractBatchSystem.py +1 -1
- toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
- toil/batchSystems/awsBatch.py +1 -1
- toil/batchSystems/cleanup_support.py +1 -1
- toil/batchSystems/kubernetes.py +53 -7
- toil/batchSystems/local_support.py +1 -1
- toil/batchSystems/mesos/batchSystem.py +13 -8
- toil/batchSystems/mesos/test/__init__.py +3 -2
- toil/batchSystems/options.py +1 -0
- toil/batchSystems/singleMachine.py +1 -1
- toil/batchSystems/slurm.py +229 -84
- toil/bus.py +5 -3
- toil/common.py +198 -54
- toil/cwl/cwltoil.py +32 -11
- toil/job.py +110 -86
- toil/jobStores/abstractJobStore.py +24 -3
- toil/jobStores/aws/jobStore.py +46 -10
- toil/jobStores/fileJobStore.py +25 -1
- toil/jobStores/googleJobStore.py +104 -30
- toil/leader.py +9 -0
- toil/lib/accelerators.py +3 -1
- toil/lib/aws/session.py +14 -3
- toil/lib/aws/utils.py +92 -35
- toil/lib/aws/utils.py.orig +504 -0
- toil/lib/bioio.py +1 -1
- toil/lib/docker.py +252 -91
- toil/lib/dockstore.py +387 -0
- toil/lib/ec2nodes.py +3 -2
- toil/lib/exceptions.py +5 -3
- toil/lib/history.py +1345 -0
- toil/lib/history_submission.py +695 -0
- toil/lib/io.py +56 -23
- toil/lib/misc.py +25 -1
- toil/lib/resources.py +2 -1
- toil/lib/retry.py +10 -10
- toil/lib/threading.py +11 -10
- toil/lib/{integration.py → trs.py} +95 -46
- toil/lib/web.py +38 -0
- toil/options/common.py +25 -2
- toil/options/cwl.py +10 -0
- toil/options/wdl.py +11 -0
- toil/provisioners/gceProvisioner.py +4 -4
- toil/server/api_spec/LICENSE +201 -0
- toil/server/api_spec/README.rst +5 -0
- toil/server/cli/wes_cwl_runner.py +5 -4
- toil/server/utils.py +2 -3
- toil/statsAndLogging.py +35 -1
- toil/test/__init__.py +275 -115
- toil/test/batchSystems/batchSystemTest.py +227 -205
- toil/test/batchSystems/test_slurm.py +199 -2
- toil/test/cactus/pestis.tar.gz +0 -0
- toil/test/conftest.py +7 -0
- toil/test/cwl/2.fasta +11 -0
- toil/test/cwl/2.fastq +12 -0
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +1015 -780
- toil/test/cwl/directory/directory/file.txt +15 -0
- toil/test/cwl/download_directory_file.json +4 -0
- toil/test/cwl/download_directory_s3.json +4 -0
- toil/test/cwl/download_file.json +6 -0
- toil/test/cwl/download_http.json +6 -0
- toil/test/cwl/download_https.json +6 -0
- toil/test/cwl/download_s3.json +6 -0
- toil/test/cwl/download_subdirectory_file.json +5 -0
- toil/test/cwl/download_subdirectory_s3.json +5 -0
- toil/test/cwl/empty.json +1 -0
- toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
- toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
- toil/test/cwl/optional-file-exists.json +6 -0
- toil/test/cwl/optional-file-missing.json +6 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/preemptible_expression.json +1 -0
- toil/test/cwl/revsort-job-missing.json +6 -0
- toil/test/cwl/revsort-job.json +6 -0
- toil/test/cwl/s3_secondary_file.json +16 -0
- toil/test/cwl/seqtk_seq_job.json +6 -0
- toil/test/cwl/stream.json +6 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
- toil/test/cwl/whale.txt +16 -0
- toil/test/docs/scripts/example_alwaysfail.py +38 -0
- toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
- toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
- toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
- toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
- toil/test/docs/scripts/tutorial_arguments.py +23 -0
- toil/test/docs/scripts/tutorial_debugging.patch +12 -0
- toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
- toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
- toil/test/docs/scripts/tutorial_docker.py +20 -0
- toil/test/docs/scripts/tutorial_dynamic.py +24 -0
- toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
- toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
- toil/test/docs/scripts/tutorial_helloworld.py +15 -0
- toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
- toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
- toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
- toil/test/docs/scripts/tutorial_managing.py +29 -0
- toil/test/docs/scripts/tutorial_managing2.py +56 -0
- toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
- toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
- toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
- toil/test/docs/scripts/tutorial_promises.py +25 -0
- toil/test/docs/scripts/tutorial_promises2.py +30 -0
- toil/test/docs/scripts/tutorial_quickstart.py +22 -0
- toil/test/docs/scripts/tutorial_requirements.py +44 -0
- toil/test/docs/scripts/tutorial_services.py +45 -0
- toil/test/docs/scripts/tutorial_staging.py +45 -0
- toil/test/docs/scripts/tutorial_stats.py +64 -0
- toil/test/lib/aws/test_iam.py +3 -1
- toil/test/lib/dockerTest.py +205 -122
- toil/test/lib/test_history.py +236 -0
- toil/test/lib/test_trs.py +161 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
- toil/test/provisioners/clusterTest.py +4 -4
- toil/test/provisioners/gceProvisionerTest.py +16 -14
- toil/test/sort/sort.py +4 -1
- toil/test/src/busTest.py +17 -17
- toil/test/src/deferredFunctionTest.py +145 -132
- toil/test/src/importExportFileTest.py +71 -63
- toil/test/src/jobEncapsulationTest.py +27 -28
- toil/test/src/jobServiceTest.py +149 -133
- toil/test/src/jobTest.py +219 -211
- toil/test/src/miscTests.py +66 -60
- toil/test/src/promisedRequirementTest.py +163 -169
- toil/test/src/regularLogTest.py +24 -24
- toil/test/src/resourceTest.py +82 -76
- toil/test/src/restartDAGTest.py +51 -47
- toil/test/src/resumabilityTest.py +24 -19
- toil/test/src/retainTempDirTest.py +60 -57
- toil/test/src/systemTest.py +17 -13
- toil/test/src/threadingTest.py +29 -32
- toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
- toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
- toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
- toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
- toil/test/utils/toilDebugTest.py +117 -102
- toil/test/utils/toilKillTest.py +54 -53
- toil/test/utils/utilsTest.py +303 -229
- toil/test/wdl/lint_error.wdl +9 -0
- toil/test/wdl/md5sum/empty_file.json +1 -0
- toil/test/wdl/md5sum/md5sum-gs.json +1 -0
- toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
- toil/test/wdl/md5sum/md5sum.input +1 -0
- toil/test/wdl/md5sum/md5sum.json +1 -0
- toil/test/wdl/md5sum/md5sum.wdl +25 -0
- toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
- toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
- toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
- toil/test/wdl/standard_library/as_map.json +16 -0
- toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
- toil/test/wdl/standard_library/as_pairs.json +7 -0
- toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
- toil/test/wdl/standard_library/ceil.json +3 -0
- toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
- toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
- toil/test/wdl/standard_library/collect_by_key.json +1 -0
- toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
- toil/test/wdl/standard_library/cross.json +11 -0
- toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
- toil/test/wdl/standard_library/flatten.json +7 -0
- toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
- toil/test/wdl/standard_library/floor.json +3 -0
- toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
- toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
- toil/test/wdl/standard_library/keys.json +8 -0
- toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
- toil/test/wdl/standard_library/length.json +7 -0
- toil/test/wdl/standard_library/length_as_input.wdl +16 -0
- toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
- toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
- toil/test/wdl/standard_library/length_invalid.json +3 -0
- toil/test/wdl/standard_library/range.json +3 -0
- toil/test/wdl/standard_library/range_0.json +3 -0
- toil/test/wdl/standard_library/range_as_input.wdl +17 -0
- toil/test/wdl/standard_library/range_invalid.json +3 -0
- toil/test/wdl/standard_library/read_boolean.json +3 -0
- toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_float.json +3 -0
- toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_int.json +3 -0
- toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_json.json +3 -0
- toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_lines.json +3 -0
- toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_map.json +3 -0
- toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_string.json +3 -0
- toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_tsv.json +3 -0
- toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
- toil/test/wdl/standard_library/round.json +3 -0
- toil/test/wdl/standard_library/round_as_command.wdl +16 -0
- toil/test/wdl/standard_library/round_as_input.wdl +16 -0
- toil/test/wdl/standard_library/size.json +3 -0
- toil/test/wdl/standard_library/size_as_command.wdl +17 -0
- toil/test/wdl/standard_library/size_as_output.wdl +36 -0
- toil/test/wdl/standard_library/stderr.json +3 -0
- toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
- toil/test/wdl/standard_library/stdout.json +3 -0
- toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
- toil/test/wdl/standard_library/sub.json +3 -0
- toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
- toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
- toil/test/wdl/standard_library/transpose.json +6 -0
- toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
- toil/test/wdl/standard_library/write_json.json +6 -0
- toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_lines.json +7 -0
- toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_map.json +6 -0
- toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_tsv.json +6 -0
- toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
- toil/test/wdl/standard_library/zip.json +12 -0
- toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
- toil/test/wdl/test.csv +3 -0
- toil/test/wdl/test.tsv +3 -0
- toil/test/wdl/testfiles/croo.wdl +38 -0
- toil/test/wdl/testfiles/drop_files.wdl +62 -0
- toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
- toil/test/wdl/testfiles/empty.txt +0 -0
- toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
- toil/test/wdl/testfiles/random.wdl +66 -0
- toil/test/wdl/testfiles/string_file_coercion.json +1 -0
- toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
- toil/test/wdl/testfiles/test.json +4 -0
- toil/test/wdl/testfiles/test_boolean.txt +1 -0
- toil/test/wdl/testfiles/test_float.txt +1 -0
- toil/test/wdl/testfiles/test_int.txt +1 -0
- toil/test/wdl/testfiles/test_lines.txt +5 -0
- toil/test/wdl/testfiles/test_map.txt +2 -0
- toil/test/wdl/testfiles/test_string.txt +1 -0
- toil/test/wdl/testfiles/url_to_file.wdl +13 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
- toil/test/wdl/testfiles/vocab.json +1 -0
- toil/test/wdl/testfiles/vocab.wdl +66 -0
- toil/test/wdl/testfiles/wait.wdl +34 -0
- toil/test/wdl/wdl_specification/type_pair.json +23 -0
- toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
- toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
- toil/test/wdl/wdl_specification/v1_spec.json +1 -0
- toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
- toil/test/wdl/wdltoil_test.py +681 -408
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +350 -123
- toil/worker.py +113 -33
- {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/METADATA +13 -7
- toil-8.2.0.dist-info/RECORD +439 -0
- {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
- toil/test/lib/test_integration.py +0 -104
- toil-8.0.0.dist-info/RECORD +0 -253
- {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
- {toil-8.0.0.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
- {toil-8.0.0.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -62,13 +62,14 @@ else:
|
|
|
62
62
|
|
|
63
63
|
from functools import partial
|
|
64
64
|
from urllib.error import HTTPError
|
|
65
|
-
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
65
|
+
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
66
66
|
|
|
67
67
|
import WDL.Error
|
|
68
68
|
import WDL.runtime.config
|
|
69
69
|
from configargparse import ArgParser, Namespace
|
|
70
70
|
from WDL._util import byte_size_units, chmod_R_plus
|
|
71
|
-
from WDL.CLI import print_error
|
|
71
|
+
from WDL.CLI import print_error, outline
|
|
72
|
+
import WDL.Lint
|
|
72
73
|
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
73
74
|
from WDL.runtime.backend.singularity import SingularityContainer
|
|
74
75
|
from WDL.runtime.error import DownloadFailed
|
|
@@ -103,8 +104,8 @@ from toil.jobStores.abstractJobStore import (
|
|
|
103
104
|
from toil.lib.exceptions import UnimplementedURLException
|
|
104
105
|
from toil.lib.accelerators import get_individual_local_accelerators
|
|
105
106
|
from toil.lib.conversions import VALID_PREFIXES, convert_units, human2bytes
|
|
107
|
+
from toil.lib.trs import resolve_workflow
|
|
106
108
|
from toil.lib.io import mkdtemp, is_any_url, is_file_url, TOIL_URI_SCHEME, is_standard_url, is_toil_url, is_remote_url
|
|
107
|
-
from toil.lib.integration import resolve_workflow
|
|
108
109
|
from toil.lib.memoize import memoize
|
|
109
110
|
from toil.lib.misc import get_user_name
|
|
110
111
|
from toil.lib.resources import ResourceMonitor
|
|
@@ -515,10 +516,14 @@ async def toil_read_source(
|
|
|
515
516
|
# TODO: this is probably sync work that would be better as async work here
|
|
516
517
|
AbstractJobStore.read_from_url(candidate_uri, destination_buffer)
|
|
517
518
|
except Exception as e:
|
|
518
|
-
|
|
519
|
-
|
|
519
|
+
if isinstance(e, SyntaxError) or isinstance(e, NameError):
|
|
520
|
+
# These are probably actual problems with the code and not
|
|
521
|
+
# failures in reading the URL.
|
|
522
|
+
raise
|
|
523
|
+
# TODO: we need to assume in general that an error is just a
|
|
524
|
+
# not-found, because the exceptions thrown by read_from_url()
|
|
520
525
|
# implementations are not specified.
|
|
521
|
-
logger.debug("Tried to fetch %s from %s but got %s", uri, candidate_uri, e)
|
|
526
|
+
logger.debug("Tried to fetch %s from %s but got %s: %s", uri, candidate_uri, type(e), e)
|
|
522
527
|
continue
|
|
523
528
|
# If we get here, we got it probably.
|
|
524
529
|
try:
|
|
@@ -913,8 +918,8 @@ def set_shared_fs_path(file: WDL.Value.File, path: str) -> WDL.Value.File:
|
|
|
913
918
|
|
|
914
919
|
|
|
915
920
|
def view_shared_fs_paths(
|
|
916
|
-
bindings:
|
|
917
|
-
) ->
|
|
921
|
+
bindings: WDLBindings,
|
|
922
|
+
) -> WDLBindings:
|
|
918
923
|
"""
|
|
919
924
|
Given WDL bindings, return a copy where all files have their shared filesystem paths as their values.
|
|
920
925
|
"""
|
|
@@ -1133,11 +1138,11 @@ def choose_human_readable_directory(
|
|
|
1133
1138
|
|
|
1134
1139
|
def evaluate_decls_to_bindings(
|
|
1135
1140
|
decls: list[WDL.Tree.Decl],
|
|
1136
|
-
all_bindings:
|
|
1141
|
+
all_bindings: WDLBindings,
|
|
1137
1142
|
standard_library: ToilWDLStdLibBase,
|
|
1138
1143
|
include_previous: bool = False,
|
|
1139
1144
|
drop_missing_files: bool = False,
|
|
1140
|
-
) ->
|
|
1145
|
+
) -> WDLBindings:
|
|
1141
1146
|
"""
|
|
1142
1147
|
Evaluate decls with a given bindings environment and standard library.
|
|
1143
1148
|
Creates a new bindings object that only contains the bindings from the given decls.
|
|
@@ -1152,7 +1157,7 @@ def evaluate_decls_to_bindings(
|
|
|
1152
1157
|
"""
|
|
1153
1158
|
# all_bindings contains current bindings + previous all_bindings
|
|
1154
1159
|
# bindings only contains the decl bindings themselves so that bindings from other sections prior aren't included
|
|
1155
|
-
bindings:
|
|
1160
|
+
bindings: WDLBindings = WDL.Env.Bindings()
|
|
1156
1161
|
drop_if_missing_with_workdir = partial(
|
|
1157
1162
|
drop_if_missing, standard_library=standard_library
|
|
1158
1163
|
)
|
|
@@ -1241,7 +1246,10 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
1241
1246
|
return WDL.Value.Float(total_size)
|
|
1242
1247
|
|
|
1243
1248
|
|
|
1244
|
-
def
|
|
1249
|
+
def extract_file_values(environment: WDLBindings) -> list[str]:
|
|
1250
|
+
"""
|
|
1251
|
+
Get a list of all File object values in the given bindings.
|
|
1252
|
+
"""
|
|
1245
1253
|
filenames = list()
|
|
1246
1254
|
|
|
1247
1255
|
def add_filename(file: WDL.Value.File) -> WDL.Value.File:
|
|
@@ -1251,6 +1259,22 @@ def extract_workflow_inputs(environment: WDLBindings) -> list[str]:
|
|
|
1251
1259
|
map_over_files_in_bindings(environment, add_filename)
|
|
1252
1260
|
return filenames
|
|
1253
1261
|
|
|
1262
|
+
def extract_file_virtualized_values(environment: WDLBindings) -> list[str]:
|
|
1263
|
+
"""
|
|
1264
|
+
Get a list of all File object virtualized values in the given bindings.
|
|
1265
|
+
|
|
1266
|
+
If a file hasn't been virtualized, it won't contribute to the list.
|
|
1267
|
+
"""
|
|
1268
|
+
values = list()
|
|
1269
|
+
|
|
1270
|
+
def add_value(file: WDL.Value.File) -> WDL.Value.File:
|
|
1271
|
+
value = get_file_virtualized_value(file)
|
|
1272
|
+
if value is not None:
|
|
1273
|
+
values.append(value)
|
|
1274
|
+
return file
|
|
1275
|
+
|
|
1276
|
+
map_over_files_in_bindings(environment, add_value)
|
|
1277
|
+
return values
|
|
1254
1278
|
|
|
1255
1279
|
def convert_files(
|
|
1256
1280
|
environment: WDLBindings,
|
|
@@ -1259,19 +1283,21 @@ def convert_files(
|
|
|
1259
1283
|
task_path: str,
|
|
1260
1284
|
) -> WDLBindings:
|
|
1261
1285
|
"""
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
Will return bindings with file values set to their corresponding relative-URI.
|
|
1286
|
+
Fill in the virtualized_value fields for File objects in a WDL environment.
|
|
1265
1287
|
|
|
1266
|
-
:param environment: Bindings to evaluate on
|
|
1267
|
-
:
|
|
1288
|
+
:param environment: Bindings to evaluate on. Will not be modified.
|
|
1289
|
+
:param file_to_id: Maps from imported URI to Toil FileID with the data.
|
|
1290
|
+
:param file_to_data: Maps from WDL-level file calue to metadata about the
|
|
1291
|
+
file, including URI that would have been imported.
|
|
1292
|
+
:return: new bindings object with the annotated File objects in it.
|
|
1268
1293
|
"""
|
|
1269
1294
|
dir_ids = {t[1] for t in file_to_data.values()}
|
|
1270
1295
|
dir_to_id = {k: uuid.uuid4() for k in dir_ids}
|
|
1271
1296
|
|
|
1272
1297
|
def convert_file_to_uri(file: WDL.Value.File) -> WDL.Value.File:
|
|
1273
1298
|
"""
|
|
1274
|
-
|
|
1299
|
+
Produce a WDL File with the virtualized_value set to the Toil URI for
|
|
1300
|
+
the already-imported data, but the same value.
|
|
1275
1301
|
"""
|
|
1276
1302
|
candidate_uri = file_to_data[file.value][0]
|
|
1277
1303
|
file_id = file_to_id[candidate_uri]
|
|
@@ -1634,32 +1660,35 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1634
1660
|
logger.debug("File has no virtualized value so not changing value")
|
|
1635
1661
|
return file
|
|
1636
1662
|
|
|
1663
|
+
def _resolve_devirtualized_to_uri(self, devirtualized: str) -> str:
|
|
1664
|
+
"""
|
|
1665
|
+
Get a URI pointing to whatever URI or divirtualized file path is provided.
|
|
1666
|
+
|
|
1667
|
+
Handles resolving symlinks using in-container paths if necessary.
|
|
1668
|
+
"""
|
|
1669
|
+
|
|
1670
|
+
return Toil.normalize_uri(devirtualized, dir_path=self.execution_dir)
|
|
1671
|
+
|
|
1637
1672
|
def _virtualize_file(
|
|
1638
1673
|
self, file: WDL.Value.File, enforce_existence: bool = True
|
|
1639
1674
|
) -> WDL.Value.File:
|
|
1640
|
-
logger.debug("Virtualizing %s", file)
|
|
1641
|
-
# If enforce_existence is true, then if a file is detected as nonexistent, raise an error. Else, let it pass through
|
|
1642
1675
|
if get_file_virtualized_value(file) is not None:
|
|
1643
|
-
|
|
1676
|
+
# Already virtualized
|
|
1644
1677
|
return file
|
|
1645
1678
|
|
|
1646
|
-
|
|
1647
|
-
# We only want to error on a nonexistent file in the output section
|
|
1648
|
-
# Since we need to virtualize on task boundaries, don't enforce existence if on a boundary
|
|
1649
|
-
if is_standard_url(file.value):
|
|
1650
|
-
file_uri = Toil.normalize_uri(file.value)
|
|
1651
|
-
else:
|
|
1652
|
-
abs_filepath = (
|
|
1653
|
-
os.path.join(self.execution_dir, file.value)
|
|
1654
|
-
if self.execution_dir is not None
|
|
1655
|
-
else os.path.abspath(file.value)
|
|
1656
|
-
)
|
|
1657
|
-
file_uri = Toil.normalize_uri(abs_filepath)
|
|
1679
|
+
logger.debug("Virtualizing %s", file)
|
|
1658
1680
|
|
|
1659
|
-
|
|
1681
|
+
try:
|
|
1682
|
+
# Let the actual virtualization implementation signal a missing file
|
|
1683
|
+
virtualized_filename = self._virtualize_filename(file.value)
|
|
1684
|
+
except FileNotFoundError:
|
|
1685
|
+
if enforce_existence:
|
|
1686
|
+
raise
|
|
1687
|
+
else:
|
|
1660
1688
|
logger.debug("File appears nonexistent so marking it nonexistent")
|
|
1689
|
+
# Mark the file nonexistent.
|
|
1661
1690
|
return set_file_nonexistent(file, True)
|
|
1662
|
-
|
|
1691
|
+
|
|
1663
1692
|
logger.debug(
|
|
1664
1693
|
"For file %s got virtualized filename %s", file, virtualized_filename
|
|
1665
1694
|
)
|
|
@@ -1842,9 +1871,12 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1842
1871
|
@memoize
|
|
1843
1872
|
def _virtualize_filename(self, filename: str) -> str:
|
|
1844
1873
|
"""
|
|
1845
|
-
from a local path
|
|
1874
|
+
from a local path or other URL, 'virtualize' into the filename as it should present in a File value.
|
|
1875
|
+
|
|
1876
|
+
New in Toil: the path or URL may not actually exist.
|
|
1846
1877
|
|
|
1847
1878
|
:param filename: Can be a local file path, URL (http, https, s3, gs), or toilfile
|
|
1879
|
+
:raises FileNotFoundError: if the file doesn't actually exist (new addition in Toil over MiniWDL)
|
|
1848
1880
|
"""
|
|
1849
1881
|
|
|
1850
1882
|
if is_toil_url(filename):
|
|
@@ -1864,7 +1896,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1864
1896
|
try:
|
|
1865
1897
|
imported = self._file_store.import_file(filename)
|
|
1866
1898
|
except FileNotFoundError:
|
|
1867
|
-
|
|
1899
|
+
# This might happen because we're also along the code path for
|
|
1900
|
+
# optional file outputs.
|
|
1901
|
+
logger.info(
|
|
1868
1902
|
"File at URL %s does not exist or is inaccessible." % filename
|
|
1869
1903
|
)
|
|
1870
1904
|
raise
|
|
@@ -1875,9 +1909,13 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1875
1909
|
filename,
|
|
1876
1910
|
e.code,
|
|
1877
1911
|
)
|
|
1912
|
+
# We don't need to handle translating error codes for not
|
|
1913
|
+
# found; import_file does it already.
|
|
1878
1914
|
raise
|
|
1879
1915
|
if imported is None:
|
|
1880
|
-
# Satisfy mypy
|
|
1916
|
+
# Satisfy mypy. This should never happen though as we don't
|
|
1917
|
+
# pass a shared file name (which is the only way import_file
|
|
1918
|
+
# returns None)
|
|
1881
1919
|
raise RuntimeError("Failed to import URL %s into jobstore." % filename)
|
|
1882
1920
|
file_basename = os.path.basename(urlsplit(filename).path)
|
|
1883
1921
|
# Get the URL to the parent directory and use that.
|
|
@@ -1886,23 +1924,19 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1886
1924
|
dir_id = self._parent_dir_to_ids.setdefault(parent_dir, uuid.uuid4())
|
|
1887
1925
|
result = pack_toil_uri(imported, self.task_path, dir_id, file_basename)
|
|
1888
1926
|
logger.debug("Virtualized %s as WDL file %s", filename, result)
|
|
1889
|
-
# We can't put the Toil URI in the virtualized_to_devirtualized
|
|
1890
|
-
#
|
|
1927
|
+
# We can't put the Toil URI in the virtualized_to_devirtualized
|
|
1928
|
+
# cache because it would point to the URL instead of a local file
|
|
1929
|
+
# on the machine, so only store the forward mapping
|
|
1891
1930
|
self._devirtualized_to_virtualized[filename] = result
|
|
1892
1931
|
return result
|
|
1893
1932
|
else:
|
|
1894
|
-
# Otherwise this is a local file and we want to fake it
|
|
1895
|
-
#
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
# To support relative paths from execution directory, join the execution dir and filename
|
|
1902
|
-
# If filename is already an abs path, join() will not do anything
|
|
1903
|
-
abs_filename = os.path.join(self.execution_dir, filename)
|
|
1904
|
-
else:
|
|
1905
|
-
abs_filename = os.path.abspath(filename)
|
|
1933
|
+
# Otherwise this is a local file name or URI and we want to fake it
|
|
1934
|
+
# as a Toil file store file
|
|
1935
|
+
|
|
1936
|
+
# Convert to a properly-absolutized file URI
|
|
1937
|
+
file_uri = Toil.normalize_uri(filename, dir_path=self.execution_dir)
|
|
1938
|
+
# Extract the absolute path name
|
|
1939
|
+
abs_filename = unquote(urlsplit(file_uri).path)
|
|
1906
1940
|
|
|
1907
1941
|
if abs_filename in self._devirtualized_to_virtualized:
|
|
1908
1942
|
# This is a previously devirtualized thing so we can just use the
|
|
@@ -1913,6 +1947,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1913
1947
|
)
|
|
1914
1948
|
return result
|
|
1915
1949
|
|
|
1950
|
+
if not os.path.exists(abs_filename):
|
|
1951
|
+
raise FileNotFoundError(abs_filename)
|
|
1952
|
+
|
|
1916
1953
|
file_id = self._file_store.writeGlobalFile(abs_filename)
|
|
1917
1954
|
|
|
1918
1955
|
file_dir = os.path.dirname(abs_filename)
|
|
@@ -1942,6 +1979,51 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
1942
1979
|
|
|
1943
1980
|
self._miniwdl_cache: Optional[WDL.runtime.cache.CallCache] = None
|
|
1944
1981
|
|
|
1982
|
+
def _virtualize_file(
|
|
1983
|
+
self, file: WDL.Value.File, enforce_existence: bool = True
|
|
1984
|
+
) -> WDL.Value.File:
|
|
1985
|
+
# When a workflow coerces a string path or file: URI to a File at
|
|
1986
|
+
# workflow scope, we need to fill in the cache filesystem path.
|
|
1987
|
+
if (
|
|
1988
|
+
get_file_virtualized_value(file) is None
|
|
1989
|
+
and get_shared_fs_path(file) is None
|
|
1990
|
+
and (
|
|
1991
|
+
not is_any_url(file.value)
|
|
1992
|
+
or is_file_url(file.value)
|
|
1993
|
+
)
|
|
1994
|
+
):
|
|
1995
|
+
# This is a never-virtualized file that is a file path or URI and
|
|
1996
|
+
# has no shared FS path associated with it. We just made it at
|
|
1997
|
+
# workflow scope. (If it came from a task, it would have a
|
|
1998
|
+
# virtualized value already.)
|
|
1999
|
+
|
|
2000
|
+
# If we are loading it at workflow scope, the file path can be used
|
|
2001
|
+
# as the cache path.
|
|
2002
|
+
|
|
2003
|
+
if not is_any_url(file.value):
|
|
2004
|
+
# Handle file path
|
|
2005
|
+
cache_path = file.value
|
|
2006
|
+
else:
|
|
2007
|
+
# Handle pulling path out of file URI
|
|
2008
|
+
cache_path = unquote(urlsplit(file.value).path)
|
|
2009
|
+
|
|
2010
|
+
# Apply the path
|
|
2011
|
+
file = set_shared_fs_path(file, cache_path)
|
|
2012
|
+
|
|
2013
|
+
logger.info(
|
|
2014
|
+
"Applied shared filesystem path %s to File %s that appears to "
|
|
2015
|
+
"have been coerced from String at workflow scope.",
|
|
2016
|
+
cache_path,
|
|
2017
|
+
file
|
|
2018
|
+
)
|
|
2019
|
+
|
|
2020
|
+
# Do the virtualization
|
|
2021
|
+
return super()._virtualize_file(file, enforce_existence)
|
|
2022
|
+
|
|
2023
|
+
# TODO: If the workflow coerces a File to a String and back again, we
|
|
2024
|
+
# should have some way to recover the toilfile: URL it had in the job
|
|
2025
|
+
# store to avoid re-importing it.
|
|
2026
|
+
|
|
1945
2027
|
# This needs to be hash-compatible with MiniWDL.
|
|
1946
2028
|
# MiniWDL hooks _virtualize_filename
|
|
1947
2029
|
# <https://github.com/chanzuckerberg/miniwdl/blob/475dd3f3784d1390e6a0e880d43316a620114de3/WDL/runtime/workflow.py#L699-L729>,
|
|
@@ -1995,7 +2077,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
1995
2077
|
)
|
|
1996
2078
|
# Make an environment of "file_sha256" to that as a WDL string, and
|
|
1997
2079
|
# digest that, and make a write_ cache key. No need to transform to
|
|
1998
|
-
# shared FS paths
|
|
2080
|
+
# shared FS paths since no paths are in it.
|
|
1999
2081
|
log_bindings(
|
|
2000
2082
|
logger.debug, "Digesting file bindings:", [file_input_bindings]
|
|
2001
2083
|
)
|
|
@@ -2342,6 +2424,8 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2342
2424
|
filenames.
|
|
2343
2425
|
"""
|
|
2344
2426
|
|
|
2427
|
+
logger.debug("WDL task outputs stdlib asked to virtualize %s", filename)
|
|
2428
|
+
|
|
2345
2429
|
if not is_any_url(filename) and not filename.startswith("/"):
|
|
2346
2430
|
# We are getting a bare relative path on the supposedly devirtualized side.
|
|
2347
2431
|
# Find a real path to it relative to the current directory override.
|
|
@@ -2390,8 +2474,12 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2390
2474
|
logger.error(
|
|
2391
2475
|
"Handling broken symlink %s ultimately to %s", filename, here
|
|
2392
2476
|
)
|
|
2477
|
+
# This should produce a FileNotFoundError since we think of
|
|
2478
|
+
# broken symlinks as nonexistent.
|
|
2479
|
+
raise FileNotFoundError(filename)
|
|
2393
2480
|
filename = here
|
|
2394
|
-
|
|
2481
|
+
|
|
2482
|
+
logger.debug("WDL task outputs stdlib thinks we really need to virtualize %s", filename)
|
|
2395
2483
|
return super()._virtualize_filename(filename)
|
|
2396
2484
|
|
|
2397
2485
|
|
|
@@ -2531,7 +2619,7 @@ def devirtualize_files(
|
|
|
2531
2619
|
that are actually available to command line commands.
|
|
2532
2620
|
The same virtual file always maps to the same devirtualized filename even with duplicates
|
|
2533
2621
|
"""
|
|
2534
|
-
logger.
|
|
2622
|
+
logger.debug("Devirtualizing files")
|
|
2535
2623
|
return map_over_files_in_bindings(environment, stdlib._devirtualize_file)
|
|
2536
2624
|
|
|
2537
2625
|
|
|
@@ -2542,12 +2630,35 @@ def virtualize_files(
|
|
|
2542
2630
|
Make sure all the File values embedded in the given bindings point to files
|
|
2543
2631
|
that are usable from other machines.
|
|
2544
2632
|
"""
|
|
2545
|
-
logger.
|
|
2633
|
+
logger.debug("Virtualizing files")
|
|
2546
2634
|
virtualize_func = partial(
|
|
2547
2635
|
stdlib._virtualize_file, enforce_existence=enforce_existence
|
|
2548
2636
|
)
|
|
2549
2637
|
return map_over_files_in_bindings(environment, virtualize_func)
|
|
2550
2638
|
|
|
2639
|
+
def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[WDLBindings], file_store: AbstractFileStore) -> None:
|
|
2640
|
+
"""
|
|
2641
|
+
Delete any files that in the given bindings but not in the live list.
|
|
2642
|
+
|
|
2643
|
+
Operates on the virtualized values of File objects anywhere in the bindings.
|
|
2644
|
+
"""
|
|
2645
|
+
|
|
2646
|
+
# Get all the files in the first bindings and not any of the others.
|
|
2647
|
+
unused_files = set(
|
|
2648
|
+
extract_file_virtualized_values(internal_bindings)
|
|
2649
|
+
).difference(
|
|
2650
|
+
*(
|
|
2651
|
+
extract_file_virtualized_values(bindings)
|
|
2652
|
+
for bindings in live_bindings_list
|
|
2653
|
+
)
|
|
2654
|
+
)
|
|
2655
|
+
|
|
2656
|
+
for file_uri in unused_files:
|
|
2657
|
+
# Delete them
|
|
2658
|
+
if is_toil_url(file_uri):
|
|
2659
|
+
logger.debug("Delete file %s that is not needed", file_uri)
|
|
2660
|
+
file_id, _, _, _ = unpack_toil_uri(file_uri)
|
|
2661
|
+
file_store.deleteGlobalFile(file_id)
|
|
2551
2662
|
|
|
2552
2663
|
def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
2553
2664
|
"""
|
|
@@ -3021,6 +3132,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3021
3132
|
self,
|
|
3022
3133
|
task: WDL.Tree.Task,
|
|
3023
3134
|
prev_node_results: Sequence[Promised[WDLBindings]],
|
|
3135
|
+
enclosing_bindings: WDLBindings,
|
|
3024
3136
|
task_id: list[str],
|
|
3025
3137
|
wdl_options: WDLContext,
|
|
3026
3138
|
**kwargs: Any,
|
|
@@ -3028,6 +3140,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3028
3140
|
"""
|
|
3029
3141
|
Make a new job to determine resources and run a task.
|
|
3030
3142
|
|
|
3143
|
+
:param enclosing_bindings: Bindings in the enclosing section,
|
|
3144
|
+
containing files not to clean up. Files that are passed as inputs
|
|
3145
|
+
but not uses as outputs or present in the enclosing section
|
|
3146
|
+
bindings will be deleted after the task call completes.
|
|
3147
|
+
|
|
3031
3148
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
3032
3149
|
The caller has alredy added the task's own name.
|
|
3033
3150
|
"""
|
|
@@ -3048,6 +3165,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3048
3165
|
|
|
3049
3166
|
self._task = task
|
|
3050
3167
|
self._prev_node_results = prev_node_results
|
|
3168
|
+
self._enclosing_bindings = enclosing_bindings
|
|
3051
3169
|
self._task_id = task_id
|
|
3052
3170
|
|
|
3053
3171
|
@report_wdl_errors("evaluate task code", exit=True)
|
|
@@ -3087,10 +3205,23 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3087
3205
|
# TODO: What if the same file is passed through several tasks, and
|
|
3088
3206
|
# we get cache hits on those tasks? Won't we upload it several
|
|
3089
3207
|
# times?
|
|
3208
|
+
|
|
3209
|
+
# Load output bindings from the cache
|
|
3210
|
+
cached_bindings = virtualize_files(
|
|
3211
|
+
cached_result, standard_library, enforce_existence=False
|
|
3212
|
+
)
|
|
3213
|
+
|
|
3214
|
+
# Throw away anything input but not available outside the call or
|
|
3215
|
+
# output.
|
|
3216
|
+
delete_dead_files(
|
|
3217
|
+
bindings,
|
|
3218
|
+
[cached_bindings, self._enclosing_bindings],
|
|
3219
|
+
file_store
|
|
3220
|
+
)
|
|
3221
|
+
|
|
3222
|
+
# Postprocess and ship the output bindings.
|
|
3090
3223
|
return self.postprocess(
|
|
3091
|
-
|
|
3092
|
-
cached_result, standard_library, enforce_existence=False
|
|
3093
|
-
)
|
|
3224
|
+
cached_bindings
|
|
3094
3225
|
)
|
|
3095
3226
|
|
|
3096
3227
|
if self._task.inputs:
|
|
@@ -3227,6 +3358,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3227
3358
|
virtualize_files(
|
|
3228
3359
|
runtime_bindings, standard_library, enforce_existence=False
|
|
3229
3360
|
),
|
|
3361
|
+
self._enclosing_bindings,
|
|
3230
3362
|
self._task_id,
|
|
3231
3363
|
cores=runtime_cores or self.cores,
|
|
3232
3364
|
memory=runtime_memory or self.memory,
|
|
@@ -3262,6 +3394,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3262
3394
|
task: WDL.Tree.Task,
|
|
3263
3395
|
task_internal_bindings: Promised[WDLBindings],
|
|
3264
3396
|
runtime_bindings: Promised[WDLBindings],
|
|
3397
|
+
enclosing_bindings: WDLBindings,
|
|
3265
3398
|
task_id: list[str],
|
|
3266
3399
|
mount_spec: dict[str | None, int],
|
|
3267
3400
|
wdl_options: WDLContext,
|
|
@@ -3271,6 +3404,9 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3271
3404
|
"""
|
|
3272
3405
|
Make a new job to run a task.
|
|
3273
3406
|
|
|
3407
|
+
:param enclosing_bindings: Bindings outside the workflow call, with
|
|
3408
|
+
files that should not be cleaned up at the end of the task.
|
|
3409
|
+
|
|
3274
3410
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
3275
3411
|
The caller has alredy added the task's own name.
|
|
3276
3412
|
"""
|
|
@@ -3294,6 +3430,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3294
3430
|
self._task = task
|
|
3295
3431
|
self._task_internal_bindings = task_internal_bindings
|
|
3296
3432
|
self._runtime_bindings = runtime_bindings
|
|
3433
|
+
self._enclosing_bindings = enclosing_bindings
|
|
3297
3434
|
self._task_id = task_id
|
|
3298
3435
|
self._cache_key = cache_key
|
|
3299
3436
|
self._mount_spec = mount_spec
|
|
@@ -4052,6 +4189,18 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
4052
4189
|
miniwdl_config=miniwdl_config,
|
|
4053
4190
|
)
|
|
4054
4191
|
|
|
4192
|
+
# Clean up anything from the task call input: block or the runtime
|
|
4193
|
+
# section that isn't getting output or available in the enclosing
|
|
4194
|
+
# section. Runtime sections aren't meant to have files, but nothing
|
|
4195
|
+
# actually stops them from being there.
|
|
4196
|
+
delete_dead_files(
|
|
4197
|
+
combine_bindings([bindings, runtime_bindings]),
|
|
4198
|
+
[output_bindings, self._enclosing_bindings],
|
|
4199
|
+
file_store
|
|
4200
|
+
)
|
|
4201
|
+
# If File objects somehow made it to the runtime block they shouldn't
|
|
4202
|
+
# have been virtualized so don't bother with them.
|
|
4203
|
+
|
|
4055
4204
|
# Do postprocessing steps to e.g. apply namespaces.
|
|
4056
4205
|
output_bindings = self.postprocess(output_bindings)
|
|
4057
4206
|
|
|
@@ -4104,7 +4253,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4104
4253
|
logger.info("Setting %s to %s", self._node.name, self._node.expr)
|
|
4105
4254
|
value = evaluate_decl(self._node, incoming_bindings, standard_library)
|
|
4106
4255
|
bindings = incoming_bindings.bind(self._node.name, value)
|
|
4107
|
-
|
|
4256
|
+
# TODO: Only virtualize the new binding
|
|
4257
|
+
return self.postprocess(virtualize_files(bindings, standard_library, enforce_existence=False))
|
|
4108
4258
|
elif isinstance(self._node, WDL.Tree.Call):
|
|
4109
4259
|
# This is a call of a task or workflow
|
|
4110
4260
|
|
|
@@ -4125,6 +4275,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4125
4275
|
standard_library,
|
|
4126
4276
|
inputs_mapping,
|
|
4127
4277
|
)
|
|
4278
|
+
# Prepare call inputs to move to another node
|
|
4279
|
+
input_bindings = virtualize_files(input_bindings, standard_library, enforce_existence=False)
|
|
4128
4280
|
|
|
4129
4281
|
# Bindings may also be added in from the enclosing workflow inputs
|
|
4130
4282
|
# TODO: this is letting us also inject them from the workflow body.
|
|
@@ -4142,6 +4294,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4142
4294
|
subjob: WDLBaseJob = WDLWorkflowJob(
|
|
4143
4295
|
self._node.callee,
|
|
4144
4296
|
[input_bindings, passed_down_bindings],
|
|
4297
|
+
incoming_bindings,
|
|
4145
4298
|
self._node.callee_id,
|
|
4146
4299
|
wdl_options=wdl_options,
|
|
4147
4300
|
local=True,
|
|
@@ -4152,6 +4305,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4152
4305
|
subjob = WDLTaskWrapperJob(
|
|
4153
4306
|
self._node.callee,
|
|
4154
4307
|
[input_bindings, passed_down_bindings],
|
|
4308
|
+
incoming_bindings,
|
|
4155
4309
|
self._node.callee_id,
|
|
4156
4310
|
wdl_options=wdl_options,
|
|
4157
4311
|
local=True,
|
|
@@ -4253,7 +4407,8 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
|
4253
4407
|
node, "Unimplemented WorkflowNode: " + str(type(node))
|
|
4254
4408
|
)
|
|
4255
4409
|
|
|
4256
|
-
|
|
4410
|
+
# TODO: Only virtualize the new bindings created
|
|
4411
|
+
return self.postprocess(virtualize_files(current_bindings, standard_library, enforce_existence=False))
|
|
4257
4412
|
|
|
4258
4413
|
|
|
4259
4414
|
class WDLCombineBindingsJob(WDLBaseJob):
|
|
@@ -5016,6 +5171,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5016
5171
|
self,
|
|
5017
5172
|
workflow: WDL.Tree.Workflow,
|
|
5018
5173
|
prev_node_results: Sequence[Promised[WDLBindings]],
|
|
5174
|
+
enclosing_bindings: WDLBindings,
|
|
5019
5175
|
workflow_id: list[str],
|
|
5020
5176
|
wdl_options: WDLContext,
|
|
5021
5177
|
**kwargs: Any,
|
|
@@ -5024,6 +5180,13 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5024
5180
|
Create a subtree that will run a WDL workflow. The job returns the
|
|
5025
5181
|
return value of the workflow.
|
|
5026
5182
|
|
|
5183
|
+
:param prev_node_results: Bindings fed into the workflow call as inputs.
|
|
5184
|
+
|
|
5185
|
+
:param enclosing_bindings: Bindings in the enclosing section,
|
|
5186
|
+
containing files not to clean up. Files that are passed as inputs
|
|
5187
|
+
but not uses as outputs or present in the enclosing section
|
|
5188
|
+
bindings will be deleted after the workflow call completes.
|
|
5189
|
+
|
|
5027
5190
|
:param namespace: the namespace that the workflow's *contents* will be
|
|
5028
5191
|
in. Caller has already added the workflow's own name.
|
|
5029
5192
|
"""
|
|
@@ -5040,6 +5203,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5040
5203
|
|
|
5041
5204
|
self._workflow = workflow
|
|
5042
5205
|
self._prev_node_results = prev_node_results
|
|
5206
|
+
self._enclosing_bindings = enclosing_bindings
|
|
5043
5207
|
self._workflow_id = workflow_id
|
|
5044
5208
|
|
|
5045
5209
|
@report_wdl_errors("run workflow")
|
|
@@ -5091,11 +5255,13 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5091
5255
|
# Make jobs to run all the parts of the workflow
|
|
5092
5256
|
sink = self.create_subgraph(self._workflow.body, [], bindings)
|
|
5093
5257
|
|
|
5094
|
-
# To support the all call outputs feature
|
|
5095
|
-
# we have a declared but
|
|
5258
|
+
# To support the all call outputs feature and cleanup of files created
|
|
5259
|
+
# in input: blocks, run an outputs job even if we have a declared but
|
|
5260
|
+
# empty outputs section.
|
|
5096
5261
|
outputs_job = WDLOutputsJob(
|
|
5097
5262
|
self._workflow,
|
|
5098
5263
|
sink.rv(),
|
|
5264
|
+
self._enclosing_bindings,
|
|
5099
5265
|
wdl_options=self._wdl_options,
|
|
5100
5266
|
cache_key=cache_key,
|
|
5101
5267
|
local=True,
|
|
@@ -5117,6 +5283,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5117
5283
|
self,
|
|
5118
5284
|
workflow: WDL.Tree.Workflow,
|
|
5119
5285
|
bindings: Promised[WDLBindings],
|
|
5286
|
+
enclosing_bindings: WDLBindings,
|
|
5120
5287
|
wdl_options: WDLContext,
|
|
5121
5288
|
cache_key: str | None = None,
|
|
5122
5289
|
**kwargs: Any,
|
|
@@ -5124,6 +5291,11 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5124
5291
|
"""
|
|
5125
5292
|
Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
|
|
5126
5293
|
|
|
5294
|
+
:param bindings: Bindings after execution of the workflow body.
|
|
5295
|
+
|
|
5296
|
+
:param enclosing_bindings: Bindings outside the workflow call, with
|
|
5297
|
+
files that should not be cleaned up at the end of the workflow.
|
|
5298
|
+
|
|
5127
5299
|
:param cache_key: If set and storing into the call cache is on, will
|
|
5128
5300
|
cache the workflow execution result under the given key in a
|
|
5129
5301
|
MiniWDL-compatible way.
|
|
@@ -5131,6 +5303,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5131
5303
|
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
5132
5304
|
|
|
5133
5305
|
self._bindings = bindings
|
|
5306
|
+
self._enclosing_bindings = enclosing_bindings
|
|
5134
5307
|
self._workflow = workflow
|
|
5135
5308
|
self._cache_key = cache_key
|
|
5136
5309
|
|
|
@@ -5223,8 +5396,15 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5223
5396
|
self._cache_key, output_bindings, file_store, self._wdl_options
|
|
5224
5397
|
)
|
|
5225
5398
|
|
|
5226
|
-
|
|
5399
|
+
# Let Files that are not output or available outside the call go out of
|
|
5400
|
+
# scope.
|
|
5401
|
+
delete_dead_files(
|
|
5402
|
+
unwrap(self._bindings),
|
|
5403
|
+
[output_bindings, self._enclosing_bindings],
|
|
5404
|
+
file_store
|
|
5405
|
+
)
|
|
5227
5406
|
|
|
5407
|
+
return self.postprocess(output_bindings)
|
|
5228
5408
|
|
|
5229
5409
|
class WDLStartJob(WDLSectionJob):
|
|
5230
5410
|
"""
|
|
@@ -5259,18 +5439,24 @@ class WDLStartJob(WDLSectionJob):
|
|
|
5259
5439
|
if isinstance(self._target, WDL.Tree.Workflow):
|
|
5260
5440
|
# Create a workflow job. We rely in this to handle entering the input
|
|
5261
5441
|
# namespace if needed, or handling free-floating inputs.
|
|
5442
|
+
# Pass top-level inputs as enclosing section inputs to avoid
|
|
5443
|
+
# bothering to separately delete them.
|
|
5262
5444
|
job: WDLBaseJob = WDLWorkflowJob(
|
|
5263
5445
|
self._target,
|
|
5264
5446
|
[inputs],
|
|
5447
|
+
inputs,
|
|
5265
5448
|
[self._target.name],
|
|
5266
5449
|
wdl_options=self._wdl_options,
|
|
5267
5450
|
local=True,
|
|
5268
5451
|
)
|
|
5269
5452
|
else:
|
|
5270
5453
|
# There is no workflow. Create a task job.
|
|
5454
|
+
# Pass top-level inputs as enclosing section inputs to avoid
|
|
5455
|
+
# bothering to separately delete them.
|
|
5271
5456
|
job = WDLTaskWrapperJob(
|
|
5272
5457
|
self._target,
|
|
5273
5458
|
[inputs],
|
|
5459
|
+
inputs,
|
|
5274
5460
|
[self._target.name],
|
|
5275
5461
|
wdl_options=self._wdl_options,
|
|
5276
5462
|
local=True,
|
|
@@ -5344,7 +5530,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5344
5530
|
self._import_workers_disk = import_workers_disk
|
|
5345
5531
|
|
|
5346
5532
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
5347
|
-
filenames =
|
|
5533
|
+
filenames = extract_file_values(self._inputs)
|
|
5348
5534
|
file_to_data = get_file_sizes(
|
|
5349
5535
|
filenames,
|
|
5350
5536
|
file_store.jobStore,
|
|
@@ -5438,56 +5624,108 @@ def main() -> None:
|
|
|
5438
5624
|
)
|
|
5439
5625
|
|
|
5440
5626
|
try:
|
|
5441
|
-
|
|
5442
|
-
|
|
5443
|
-
|
|
5627
|
+
wdl_uri, trs_spec = resolve_workflow(options.wdl_uri, supported_languages={"WDL"})
|
|
5628
|
+
|
|
5629
|
+
with Toil(options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec) as toil:
|
|
5630
|
+
# TODO: Move all the input parsing outside the Toil context
|
|
5631
|
+
# manager to avoid leaving a job store behind if the workflow
|
|
5632
|
+
# can't start.
|
|
5633
|
+
|
|
5634
|
+
# Both start and restart need us to have the workflow and the
|
|
5635
|
+
# wdl_options WDLContext.
|
|
5636
|
+
|
|
5637
|
+
# MiniWDL load code internally uses asyncio.get_event_loop()
|
|
5638
|
+
# which might not get an event loop if somebody has ever called
|
|
5639
|
+
# set_event_loop. So we need to make sure an event loop is
|
|
5640
|
+
# available.
|
|
5641
|
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
|
5642
|
+
|
|
5643
|
+
# Load the WDL document.
|
|
5644
|
+
document: WDL.Tree.Document = WDL.load(
|
|
5645
|
+
wdl_uri,
|
|
5646
|
+
read_source=toil_read_source,
|
|
5647
|
+
)
|
|
5648
|
+
|
|
5649
|
+
# See if we're going to run a workflow or a task
|
|
5650
|
+
target: WDL.Tree.Workflow | WDL.Tree.Task
|
|
5651
|
+
if document.workflow:
|
|
5652
|
+
target = document.workflow
|
|
5653
|
+
elif len(document.tasks) == 1:
|
|
5654
|
+
target = document.tasks[0]
|
|
5655
|
+
elif len(document.tasks) > 1:
|
|
5656
|
+
raise WDL.Error.InputError(
|
|
5657
|
+
"Multiple tasks found with no workflow! Either add a workflow or keep one task."
|
|
5658
|
+
)
|
|
5444
5659
|
else:
|
|
5445
|
-
|
|
5446
|
-
|
|
5447
|
-
|
|
5448
|
-
|
|
5449
|
-
#
|
|
5450
|
-
|
|
5451
|
-
|
|
5452
|
-
|
|
5660
|
+
raise WDL.Error.InputError("WDL document is empty!")
|
|
5661
|
+
|
|
5662
|
+
if "croo_out_def" in target.meta:
|
|
5663
|
+
# This workflow or task wants to have its outputs
|
|
5664
|
+
# "organized" by the Cromwell Output Organizer:
|
|
5665
|
+
# <https://github.com/ENCODE-DCC/croo>.
|
|
5666
|
+
#
|
|
5667
|
+
# TODO: We don't support generating anything that CROO can read.
|
|
5668
|
+
logger.warning(
|
|
5669
|
+
"This WDL expects to be used with the Cromwell Output Organizer (croo) <https://github.com/ENCODE-DCC/croo>. Toil cannot yet produce the outputs that croo requires. You will not be able to use croo on the output of this Toil run!"
|
|
5453
5670
|
)
|
|
5454
5671
|
|
|
5455
|
-
#
|
|
5456
|
-
|
|
5457
|
-
|
|
5458
|
-
|
|
5459
|
-
|
|
5460
|
-
|
|
5461
|
-
|
|
5462
|
-
|
|
5463
|
-
"Multiple tasks found with no workflow! Either add a workflow or keep one task."
|
|
5464
|
-
)
|
|
5465
|
-
else:
|
|
5466
|
-
raise WDL.Error.InputError("WDL document is empty!")
|
|
5467
|
-
|
|
5468
|
-
if "croo_out_def" in target.meta:
|
|
5469
|
-
# This workflow or task wants to have its outputs
|
|
5470
|
-
# "organized" by the Cromwell Output Organizer:
|
|
5471
|
-
# <https://github.com/ENCODE-DCC/croo>.
|
|
5472
|
-
#
|
|
5473
|
-
# TODO: We don't support generating anything that CROO can read.
|
|
5672
|
+
# But we can assume that we need to preserve individual
|
|
5673
|
+
# taks outputs since the point of CROO is fetching those
|
|
5674
|
+
# from Cromwell's output directories.
|
|
5675
|
+
#
|
|
5676
|
+
# This isn't quite WDL spec compliant but it will rescue
|
|
5677
|
+
# runs of the popular
|
|
5678
|
+
# <https://github.com/ENCODE-DCC/atac-seq-pipeline>
|
|
5679
|
+
if options.all_call_outputs is None:
|
|
5474
5680
|
logger.warning(
|
|
5475
|
-
"
|
|
5681
|
+
"Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
|
|
5476
5682
|
)
|
|
5683
|
+
options.all_call_outputs = True
|
|
5684
|
+
|
|
5685
|
+
# This mutates document to add linting information, but doesn't print any lint errors itself
|
|
5686
|
+
# or stop the workflow
|
|
5687
|
+
WDL.Lint.lint(document)
|
|
5688
|
+
|
|
5689
|
+
# We use a mutable variable and a generic file pointer to capture information about lint warnings
|
|
5690
|
+
# Both will be populated inside outline()
|
|
5691
|
+
lint_warnings_counter = [0]
|
|
5692
|
+
lint_warnings_io = io.StringIO()
|
|
5693
|
+
outline(
|
|
5694
|
+
document,
|
|
5695
|
+
0,
|
|
5696
|
+
file=lint_warnings_io,
|
|
5697
|
+
show_called=(document.workflow is not None),
|
|
5698
|
+
shown=lint_warnings_counter,
|
|
5699
|
+
) # type: ignore[no-untyped-call]
|
|
5700
|
+
|
|
5701
|
+
if getattr(WDL.Lint, "_shellcheck_available", None) is False:
|
|
5702
|
+
logger.info("Suggestion: install shellcheck (www.shellcheck.net) to check task commands")
|
|
5703
|
+
|
|
5704
|
+
if lint_warnings_counter[0]:
|
|
5705
|
+
logger.warning('Workflow lint warnings:\n%s', lint_warnings_io.getvalue().rstrip())
|
|
5706
|
+
if options.strict:
|
|
5707
|
+
logger.critical(f'Workflow did not pass linting in strict mode')
|
|
5708
|
+
# MiniWDL uses exit code 2 to indicate linting errors, so replicate that behavior
|
|
5709
|
+
sys.exit(2)
|
|
5710
|
+
|
|
5711
|
+
# Get the execution directory
|
|
5712
|
+
execution_dir = os.getcwd()
|
|
5713
|
+
|
|
5714
|
+
# Configure workflow interpreter options.
|
|
5715
|
+
# TODO: Would be nice to somehow be able to change some of these on
|
|
5716
|
+
# restart. For now we assume we are computing the same values.
|
|
5717
|
+
wdl_options: WDLContext = {
|
|
5718
|
+
"execution_dir": execution_dir,
|
|
5719
|
+
"container": options.container,
|
|
5720
|
+
"task_path": target.name,
|
|
5721
|
+
"namespace": target.name,
|
|
5722
|
+
"all_call_outputs": options.all_call_outputs,
|
|
5723
|
+
}
|
|
5724
|
+
assert wdl_options.get("container") is not None
|
|
5477
5725
|
|
|
5478
|
-
|
|
5479
|
-
|
|
5480
|
-
|
|
5481
|
-
#
|
|
5482
|
-
# This isn't quite WDL spec compliant but it will rescue
|
|
5483
|
-
# runs of the popular
|
|
5484
|
-
# <https://github.com/ENCODE-DCC/atac-seq-pipeline>
|
|
5485
|
-
if options.all_call_outputs is None:
|
|
5486
|
-
logger.warning(
|
|
5487
|
-
"Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
|
|
5488
|
-
)
|
|
5489
|
-
options.all_call_outputs = True
|
|
5490
|
-
|
|
5726
|
+
if options.restart:
|
|
5727
|
+
output_bindings = toil.restart()
|
|
5728
|
+
else:
|
|
5491
5729
|
# If our input really comes from a URI or path, remember it.
|
|
5492
5730
|
input_source_uri = None
|
|
5493
5731
|
# Also remember where we need to report JSON parse errors as
|
|
@@ -5564,12 +5802,14 @@ def main() -> None:
|
|
|
5564
5802
|
inputs_search_path.append(input_source_uri)
|
|
5565
5803
|
|
|
5566
5804
|
match = re.match(
|
|
5567
|
-
r"https://raw\.githubusercontent\.com/[^/]*/[^/]*/[^/]*/",
|
|
5805
|
+
r"https://raw\.githubusercontent\.com/[^/]*/[^/]*/(refs/heads/)?[^/]*/",
|
|
5568
5806
|
input_source_uri,
|
|
5569
5807
|
)
|
|
5570
5808
|
if match:
|
|
5571
5809
|
# Special magic for Github repos to make e.g.
|
|
5572
5810
|
# https://raw.githubusercontent.com/vgteam/vg_wdl/44a03d9664db3f6d041a2f4a69bbc4f65c79533f/params/giraffe.json
|
|
5811
|
+
# or
|
|
5812
|
+
# https://raw.githubusercontent.com/vgteam/vg_wdl/refs/heads/giraffedv/params/giraffe.json
|
|
5573
5813
|
# work when it references things relative to repo root.
|
|
5574
5814
|
logger.info(
|
|
5575
5815
|
"Inputs appear to come from a Github repository; adding repository root to file search path"
|
|
@@ -5578,19 +5818,6 @@ def main() -> None:
|
|
|
5578
5818
|
|
|
5579
5819
|
# TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ?
|
|
5580
5820
|
|
|
5581
|
-
# Get the execution directory
|
|
5582
|
-
execution_dir = os.getcwd()
|
|
5583
|
-
|
|
5584
|
-
# Configure workflow interpreter options
|
|
5585
|
-
wdl_options: WDLContext = {
|
|
5586
|
-
"execution_dir": execution_dir,
|
|
5587
|
-
"container": options.container,
|
|
5588
|
-
"task_path": target.name,
|
|
5589
|
-
"namespace": target.name,
|
|
5590
|
-
"all_call_outputs": options.all_call_outputs,
|
|
5591
|
-
}
|
|
5592
|
-
assert wdl_options.get("container") is not None
|
|
5593
|
-
|
|
5594
5821
|
# Run the workflow and get its outputs namespaced with the workflow name.
|
|
5595
5822
|
root_job = make_root_job(
|
|
5596
5823
|
target,
|