toil 8.1.0b1__py3-none-any.whl → 9.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +0 -35
- toil/batchSystems/abstractBatchSystem.py +1 -1
- toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
- toil/batchSystems/awsBatch.py +1 -1
- toil/batchSystems/cleanup_support.py +1 -1
- toil/batchSystems/kubernetes.py +53 -7
- toil/batchSystems/local_support.py +1 -1
- toil/batchSystems/mesos/batchSystem.py +13 -8
- toil/batchSystems/mesos/test/__init__.py +3 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/singleMachine.py +1 -1
- toil/batchSystems/slurm.py +27 -26
- toil/bus.py +5 -3
- toil/common.py +59 -12
- toil/cwl/cwltoil.py +81 -38
- toil/cwl/utils.py +103 -3
- toil/job.py +64 -49
- toil/jobStores/abstractJobStore.py +35 -239
- toil/jobStores/aws/jobStore.py +2 -1
- toil/jobStores/fileJobStore.py +27 -2
- toil/jobStores/googleJobStore.py +110 -33
- toil/leader.py +9 -0
- toil/lib/accelerators.py +4 -2
- toil/lib/aws/utils.py.orig +504 -0
- toil/lib/bioio.py +1 -1
- toil/lib/docker.py +252 -91
- toil/lib/dockstore.py +11 -3
- toil/lib/exceptions.py +5 -3
- toil/lib/generatedEC2Lists.py +81 -19
- toil/lib/history.py +87 -13
- toil/lib/history_submission.py +23 -9
- toil/lib/io.py +34 -22
- toil/lib/misc.py +8 -2
- toil/lib/plugins.py +106 -0
- toil/lib/resources.py +2 -1
- toil/lib/threading.py +11 -10
- toil/lib/url.py +320 -0
- toil/options/common.py +8 -0
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +22 -0
- toil/provisioners/aws/awsProvisioner.py +25 -2
- toil/server/api_spec/LICENSE +201 -0
- toil/server/api_spec/README.rst +5 -0
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +3 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +275 -115
- toil/test/batchSystems/batchSystemTest.py +228 -213
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +27 -0
- toil/test/cactus/pestis.tar.gz +0 -0
- toil/test/conftest.py +7 -0
- toil/test/cwl/2.fasta +11 -0
- toil/test/cwl/2.fastq +12 -0
- toil/test/cwl/conftest.py +1 -1
- toil/test/cwl/cwlTest.py +1175 -870
- toil/test/cwl/directory/directory/file.txt +15 -0
- toil/test/cwl/download_directory_file.json +4 -0
- toil/test/cwl/download_directory_s3.json +4 -0
- toil/test/cwl/download_file.json +6 -0
- toil/test/cwl/download_http.json +6 -0
- toil/test/cwl/download_https.json +6 -0
- toil/test/cwl/download_s3.json +6 -0
- toil/test/cwl/download_subdirectory_file.json +5 -0
- toil/test/cwl/download_subdirectory_s3.json +5 -0
- toil/test/cwl/empty.json +1 -0
- toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
- toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
- toil/test/cwl/optional-file-exists.json +6 -0
- toil/test/cwl/optional-file-missing.json +6 -0
- toil/test/cwl/preemptible_expression.json +1 -0
- toil/test/cwl/revsort-job-missing.json +6 -0
- toil/test/cwl/revsort-job.json +6 -0
- toil/test/cwl/s3_secondary_file.json +16 -0
- toil/test/cwl/seqtk_seq_job.json +6 -0
- toil/test/cwl/stream.json +6 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
- toil/test/cwl/whale.txt +16 -0
- toil/test/docs/scripts/example_alwaysfail.py +38 -0
- toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
- toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
- toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
- toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
- toil/test/docs/scripts/tutorial_arguments.py +23 -0
- toil/test/docs/scripts/tutorial_debugging.patch +12 -0
- toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
- toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
- toil/test/docs/scripts/tutorial_docker.py +20 -0
- toil/test/docs/scripts/tutorial_dynamic.py +24 -0
- toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
- toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
- toil/test/docs/scripts/tutorial_helloworld.py +15 -0
- toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
- toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
- toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
- toil/test/docs/scripts/tutorial_managing.py +29 -0
- toil/test/docs/scripts/tutorial_managing2.py +56 -0
- toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
- toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
- toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
- toil/test/docs/scripts/tutorial_promises.py +25 -0
- toil/test/docs/scripts/tutorial_promises2.py +30 -0
- toil/test/docs/scripts/tutorial_quickstart.py +22 -0
- toil/test/docs/scripts/tutorial_requirements.py +44 -0
- toil/test/docs/scripts/tutorial_services.py +45 -0
- toil/test/docs/scripts/tutorial_staging.py +45 -0
- toil/test/docs/scripts/tutorial_stats.py +64 -0
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/lib/aws/test_iam.py +3 -1
- toil/test/lib/dockerTest.py +205 -122
- toil/test/lib/test_history.py +101 -77
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +13 -10
- toil/test/provisioners/clusterTest.py +17 -4
- toil/test/provisioners/gceProvisionerTest.py +17 -15
- toil/test/server/serverTest.py +78 -36
- toil/test/sort/sort.py +4 -1
- toil/test/src/busTest.py +17 -17
- toil/test/src/deferredFunctionTest.py +145 -132
- toil/test/src/importExportFileTest.py +71 -63
- toil/test/src/jobEncapsulationTest.py +27 -28
- toil/test/src/jobServiceTest.py +149 -133
- toil/test/src/jobTest.py +219 -211
- toil/test/src/miscTests.py +66 -60
- toil/test/src/promisedRequirementTest.py +163 -169
- toil/test/src/regularLogTest.py +24 -24
- toil/test/src/resourceTest.py +82 -76
- toil/test/src/restartDAGTest.py +51 -47
- toil/test/src/resumabilityTest.py +24 -19
- toil/test/src/retainTempDirTest.py +60 -57
- toil/test/src/systemTest.py +17 -13
- toil/test/src/threadingTest.py +29 -32
- toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
- toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
- toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
- toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
- toil/test/utils/toilDebugTest.py +117 -102
- toil/test/utils/toilKillTest.py +54 -53
- toil/test/utils/utilsTest.py +303 -229
- toil/test/wdl/lint_error.wdl +9 -0
- toil/test/wdl/md5sum/empty_file.json +1 -0
- toil/test/wdl/md5sum/md5sum-gs.json +1 -0
- toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
- toil/test/wdl/md5sum/md5sum.input +1 -0
- toil/test/wdl/md5sum/md5sum.json +1 -0
- toil/test/wdl/md5sum/md5sum.wdl +25 -0
- toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
- toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
- toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
- toil/test/wdl/standard_library/as_map.json +16 -0
- toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
- toil/test/wdl/standard_library/as_pairs.json +7 -0
- toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
- toil/test/wdl/standard_library/ceil.json +3 -0
- toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
- toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
- toil/test/wdl/standard_library/collect_by_key.json +1 -0
- toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
- toil/test/wdl/standard_library/cross.json +11 -0
- toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
- toil/test/wdl/standard_library/flatten.json +7 -0
- toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
- toil/test/wdl/standard_library/floor.json +3 -0
- toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
- toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
- toil/test/wdl/standard_library/keys.json +8 -0
- toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
- toil/test/wdl/standard_library/length.json +7 -0
- toil/test/wdl/standard_library/length_as_input.wdl +16 -0
- toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
- toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
- toil/test/wdl/standard_library/length_invalid.json +3 -0
- toil/test/wdl/standard_library/range.json +3 -0
- toil/test/wdl/standard_library/range_0.json +3 -0
- toil/test/wdl/standard_library/range_as_input.wdl +17 -0
- toil/test/wdl/standard_library/range_invalid.json +3 -0
- toil/test/wdl/standard_library/read_boolean.json +3 -0
- toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_float.json +3 -0
- toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_int.json +3 -0
- toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_json.json +3 -0
- toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_lines.json +3 -0
- toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_map.json +3 -0
- toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_string.json +3 -0
- toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_tsv.json +3 -0
- toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
- toil/test/wdl/standard_library/round.json +3 -0
- toil/test/wdl/standard_library/round_as_command.wdl +16 -0
- toil/test/wdl/standard_library/round_as_input.wdl +16 -0
- toil/test/wdl/standard_library/size.json +3 -0
- toil/test/wdl/standard_library/size_as_command.wdl +17 -0
- toil/test/wdl/standard_library/size_as_output.wdl +36 -0
- toil/test/wdl/standard_library/stderr.json +3 -0
- toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
- toil/test/wdl/standard_library/stdout.json +3 -0
- toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
- toil/test/wdl/standard_library/sub.json +3 -0
- toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
- toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
- toil/test/wdl/standard_library/transpose.json +6 -0
- toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
- toil/test/wdl/standard_library/write_json.json +6 -0
- toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_lines.json +7 -0
- toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_map.json +6 -0
- toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_tsv.json +6 -0
- toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
- toil/test/wdl/standard_library/zip.json +12 -0
- toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
- toil/test/wdl/test.csv +3 -0
- toil/test/wdl/test.tsv +3 -0
- toil/test/wdl/testfiles/croo.wdl +38 -0
- toil/test/wdl/testfiles/drop_files.wdl +62 -0
- toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
- toil/test/wdl/testfiles/empty.txt +0 -0
- toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
- toil/test/wdl/testfiles/random.wdl +66 -0
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/string_file_coercion.json +1 -0
- toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
- toil/test/wdl/testfiles/test.json +4 -0
- toil/test/wdl/testfiles/test_boolean.txt +1 -0
- toil/test/wdl/testfiles/test_float.txt +1 -0
- toil/test/wdl/testfiles/test_int.txt +1 -0
- toil/test/wdl/testfiles/test_lines.txt +5 -0
- toil/test/wdl/testfiles/test_map.txt +2 -0
- toil/test/wdl/testfiles/test_string.txt +1 -0
- toil/test/wdl/testfiles/url_to_file.wdl +13 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +14 -0
- toil/test/wdl/testfiles/vocab.json +1 -0
- toil/test/wdl/testfiles/vocab.wdl +66 -0
- toil/test/wdl/testfiles/wait.wdl +34 -0
- toil/test/wdl/wdl_specification/type_pair.json +23 -0
- toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
- toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
- toil/test/wdl/wdl_specification/v1_spec.json +1 -0
- toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
- toil/test/wdl/wdltoil_test.py +751 -529
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +5 -5
- toil/wdl/wdltoil.py +518 -437
- toil/worker.py +11 -6
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/METADATA +25 -24
- toil-9.0.0.dist-info/RECORD +444 -0
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
- toil-8.1.0b1.dist-info/RECORD +0 -259
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info/licenses}/LICENSE +0 -0
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
17
17
|
import asyncio
|
|
18
|
+
import copy
|
|
18
19
|
import errno
|
|
19
20
|
import hashlib
|
|
20
21
|
import io
|
|
@@ -62,13 +63,14 @@ else:
|
|
|
62
63
|
|
|
63
64
|
from functools import partial
|
|
64
65
|
from urllib.error import HTTPError
|
|
65
|
-
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
66
|
+
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
66
67
|
|
|
67
68
|
import WDL.Error
|
|
68
69
|
import WDL.runtime.config
|
|
69
70
|
from configargparse import ArgParser, Namespace
|
|
70
71
|
from WDL._util import byte_size_units, chmod_R_plus
|
|
71
|
-
from WDL.CLI import print_error
|
|
72
|
+
from WDL.CLI import print_error, outline
|
|
73
|
+
import WDL.Lint
|
|
72
74
|
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
73
75
|
from WDL.runtime.backend.singularity import SingularityContainer
|
|
74
76
|
from WDL.runtime.error import DownloadFailed
|
|
@@ -110,6 +112,7 @@ from toil.lib.misc import get_user_name
|
|
|
110
112
|
from toil.lib.resources import ResourceMonitor
|
|
111
113
|
from toil.lib.threading import global_mutex
|
|
112
114
|
from toil.provisioners.clusterScaler import JobTooBigError
|
|
115
|
+
from toil.lib.url import URLAccess
|
|
113
116
|
|
|
114
117
|
logger = logging.getLogger(__name__)
|
|
115
118
|
|
|
@@ -292,207 +295,6 @@ def report_wdl_errors(
|
|
|
292
295
|
return decorator
|
|
293
296
|
|
|
294
297
|
|
|
295
|
-
def remove_common_leading_whitespace(
|
|
296
|
-
expression: WDL.Expr.String,
|
|
297
|
-
tolerate_blanks: bool = True,
|
|
298
|
-
tolerate_dedents: bool = False,
|
|
299
|
-
tolerate_all_whitespace: bool = True,
|
|
300
|
-
debug: bool = False,
|
|
301
|
-
) -> WDL.Expr.String:
|
|
302
|
-
"""
|
|
303
|
-
Remove "common leading whitespace" as defined in the WDL 1.1 spec.
|
|
304
|
-
|
|
305
|
-
See <https://github.com/openwdl/wdl/blob/main/versions/1.1/SPEC.md#stripping-leading-whitespace>.
|
|
306
|
-
|
|
307
|
-
Operates on a WDL.Expr.String expression that has already been parsed.
|
|
308
|
-
|
|
309
|
-
:param tolerate_blanks: If True, don't allow totally blank lines to zero
|
|
310
|
-
the common whitespace.
|
|
311
|
-
|
|
312
|
-
:param tolerate_dedents: If True, remove as much of the whitespace on the
|
|
313
|
-
first indented line as is found on subesquent lines, regardless of
|
|
314
|
-
whether later lines are out-dented relative to it.
|
|
315
|
-
|
|
316
|
-
:param tolerate_all_whitespace: If True, don't allow all-whitespace lines
|
|
317
|
-
to reduce the common whitespace prefix.
|
|
318
|
-
|
|
319
|
-
:param debug: If True, the function will show its work by logging at debug
|
|
320
|
-
level.
|
|
321
|
-
"""
|
|
322
|
-
|
|
323
|
-
# The expression has a "parts" list consisting of interleaved string
|
|
324
|
-
# literals and placeholder expressions.
|
|
325
|
-
#
|
|
326
|
-
# TODO: We assume that there are no newlines in the placeholders.
|
|
327
|
-
#
|
|
328
|
-
# TODO: Look at the placeholders and their line and end_line values and try
|
|
329
|
-
# and guess if they should reduce the amount of common whitespace.
|
|
330
|
-
|
|
331
|
-
if debug:
|
|
332
|
-
logger.debug("Parts: %s", expression.parts)
|
|
333
|
-
|
|
334
|
-
# We split the parts list into lines, which are also interleaved string
|
|
335
|
-
# literals and placeholder expressions.
|
|
336
|
-
lines: list[list[str | WDL.Expr.Placeholder]] = [[]]
|
|
337
|
-
for part in expression.parts:
|
|
338
|
-
if isinstance(part, str):
|
|
339
|
-
# It's a string. Split it into lines.
|
|
340
|
-
part_lines = part.split("\n")
|
|
341
|
-
# Part before any newline goes at the end of the current line
|
|
342
|
-
lines[-1].append(part_lines[0])
|
|
343
|
-
for part_line in part_lines[1:]:
|
|
344
|
-
# Any part after a newline starts a new line
|
|
345
|
-
lines.append([part_line])
|
|
346
|
-
else:
|
|
347
|
-
# It's a placeholder. Put it at the end of the current line.
|
|
348
|
-
lines[-1].append(part)
|
|
349
|
-
|
|
350
|
-
if debug:
|
|
351
|
-
logger.debug("Lines: %s", lines)
|
|
352
|
-
|
|
353
|
-
# Then we compute the common amount of leading whitespace on all the lines,
|
|
354
|
-
# looking at the first string literal.
|
|
355
|
-
# This will be the longest common whitespace prefix, or None if not yet detected.
|
|
356
|
-
common_whitespace_prefix: str | None = None
|
|
357
|
-
for line in lines:
|
|
358
|
-
if len(line) == 0:
|
|
359
|
-
# TODO: how should totally empty lines be handled? Not in the spec!
|
|
360
|
-
if not tolerate_blanks:
|
|
361
|
-
# There's no leading whitespace here!
|
|
362
|
-
common_whitespace_prefix = ""
|
|
363
|
-
continue
|
|
364
|
-
elif isinstance(line[0], WDL.Expr.Placeholder):
|
|
365
|
-
# TODO: How can we convert MiniWDL's column numbers into space/tab counts or sequences?
|
|
366
|
-
#
|
|
367
|
-
# For now just skip these too.
|
|
368
|
-
continue
|
|
369
|
-
else:
|
|
370
|
-
# The line starts with a string
|
|
371
|
-
assert isinstance(line[0], str)
|
|
372
|
-
if len(line[0]) == 0:
|
|
373
|
-
# Still totally empty though!
|
|
374
|
-
if not tolerate_blanks:
|
|
375
|
-
# There's no leading whitespace here!
|
|
376
|
-
common_whitespace_prefix = ""
|
|
377
|
-
continue
|
|
378
|
-
if (
|
|
379
|
-
len(line) == 1
|
|
380
|
-
and tolerate_all_whitespace
|
|
381
|
-
and all(x in (" ", "\t") for x in line[0])
|
|
382
|
-
):
|
|
383
|
-
# All-whitespace lines shouldn't count
|
|
384
|
-
continue
|
|
385
|
-
# TODO: There are good algorithms for common prefixes. This is a bad one.
|
|
386
|
-
# Find the number of leading whitespace characters
|
|
387
|
-
line_whitespace_end = 0
|
|
388
|
-
while line_whitespace_end < len(line[0]) and line[0][
|
|
389
|
-
line_whitespace_end
|
|
390
|
-
] in (" ", "\t"):
|
|
391
|
-
line_whitespace_end += 1
|
|
392
|
-
# Find the string of leading whitespace characters
|
|
393
|
-
line_whitespace_prefix = line[0][:line_whitespace_end]
|
|
394
|
-
|
|
395
|
-
if " " in line_whitespace_prefix and "\t" in line_whitespace_prefix:
|
|
396
|
-
# Warn and don't change anything if spaces and tabs are mixed, per the spec.
|
|
397
|
-
logger.warning(
|
|
398
|
-
"Line in command at %s mixes leading spaces and tabs! Not removing leading whitespace!",
|
|
399
|
-
expression.pos,
|
|
400
|
-
)
|
|
401
|
-
return expression
|
|
402
|
-
|
|
403
|
-
if common_whitespace_prefix is None:
|
|
404
|
-
# This is the first line we found, so it automatically has the common prefic
|
|
405
|
-
common_whitespace_prefix = line_whitespace_prefix
|
|
406
|
-
elif not tolerate_dedents:
|
|
407
|
-
# Trim the common prefix down to what we have for this line
|
|
408
|
-
if not line_whitespace_prefix.startswith(common_whitespace_prefix):
|
|
409
|
-
# Shorten to the real shared prefix.
|
|
410
|
-
# Hackily make os.path do it for us,
|
|
411
|
-
# character-by-character. See
|
|
412
|
-
# <https://stackoverflow.com/a/6718435>
|
|
413
|
-
common_whitespace_prefix = os.path.commonprefix(
|
|
414
|
-
[common_whitespace_prefix, line_whitespace_prefix]
|
|
415
|
-
)
|
|
416
|
-
|
|
417
|
-
if common_whitespace_prefix is None:
|
|
418
|
-
common_whitespace_prefix = ""
|
|
419
|
-
|
|
420
|
-
if debug:
|
|
421
|
-
logger.debug("Common Prefix: '%s'", common_whitespace_prefix)
|
|
422
|
-
|
|
423
|
-
# Then we trim that much whitespace off all the leading strings.
|
|
424
|
-
# We tolerate the common prefix not *actually* being common and remove as
|
|
425
|
-
# much of it as is there, to support tolerate_dedents.
|
|
426
|
-
|
|
427
|
-
def first_mismatch(prefix: str, value: str) -> int:
|
|
428
|
-
"""
|
|
429
|
-
Get the index of the first character in value that does not match the corresponding character in prefix, or the length of the shorter string.
|
|
430
|
-
"""
|
|
431
|
-
for n, (c1, c2) in enumerate(zip(prefix, value)):
|
|
432
|
-
if c1 != c2:
|
|
433
|
-
return n
|
|
434
|
-
return min(len(prefix), len(value))
|
|
435
|
-
|
|
436
|
-
# Trim up to the first mismatch vs. the common prefix if the line starts with a string literal.
|
|
437
|
-
stripped_lines = [
|
|
438
|
-
(
|
|
439
|
-
(
|
|
440
|
-
cast(
|
|
441
|
-
list[Union[str, WDL.Expr.Placeholder]],
|
|
442
|
-
[line[0][first_mismatch(common_whitespace_prefix, line[0]) :]],
|
|
443
|
-
)
|
|
444
|
-
+ line[1:]
|
|
445
|
-
)
|
|
446
|
-
if len(line) > 0 and isinstance(line[0], str)
|
|
447
|
-
else line
|
|
448
|
-
)
|
|
449
|
-
for line in lines
|
|
450
|
-
]
|
|
451
|
-
if debug:
|
|
452
|
-
logger.debug("Stripped Lines: %s", stripped_lines)
|
|
453
|
-
|
|
454
|
-
# Then we reassemble the parts and make a new expression.
|
|
455
|
-
# Build lists and turn the lists into strings later
|
|
456
|
-
new_parts: list[list[str] | WDL.Expr.Placeholder] = []
|
|
457
|
-
for i, line in enumerate(stripped_lines):
|
|
458
|
-
if i > 0:
|
|
459
|
-
# This is a second line, so we need to tack on a newline.
|
|
460
|
-
if len(new_parts) > 0 and isinstance(new_parts[-1], list):
|
|
461
|
-
# Tack on to existing string collection
|
|
462
|
-
new_parts[-1].append("\n")
|
|
463
|
-
else:
|
|
464
|
-
# Make a new string collection
|
|
465
|
-
new_parts.append(["\n"])
|
|
466
|
-
if len(line) > 0 and isinstance(line[0], str) and i > 0:
|
|
467
|
-
# Line starts with a string we need to merge with the last string.
|
|
468
|
-
# We know the previous line now ends with a string collection, so tack it on.
|
|
469
|
-
assert isinstance(new_parts[-1], list)
|
|
470
|
-
new_parts[-1].append(line[0])
|
|
471
|
-
# Make all the strings into string collections in the rest of the line
|
|
472
|
-
new_parts += [([x] if isinstance(x, str) else x) for x in line[1:]]
|
|
473
|
-
else:
|
|
474
|
-
# No string merge necessary
|
|
475
|
-
# Make all the strings into string collections in the whole line
|
|
476
|
-
new_parts += [([x] if isinstance(x, str) else x) for x in line]
|
|
477
|
-
|
|
478
|
-
if debug:
|
|
479
|
-
logger.debug("New Parts: %s", new_parts)
|
|
480
|
-
|
|
481
|
-
# Now go back to the alternating strings and placeholders that MiniWDL wants
|
|
482
|
-
new_parts_merged: list[str | WDL.Expr.Placeholder] = [
|
|
483
|
-
("".join(x) if isinstance(x, list) else x) for x in new_parts
|
|
484
|
-
]
|
|
485
|
-
|
|
486
|
-
if debug:
|
|
487
|
-
logger.debug("New Parts Merged: %s", new_parts_merged)
|
|
488
|
-
|
|
489
|
-
modified = WDL.Expr.String(expression.pos, new_parts_merged, expression.command)
|
|
490
|
-
# Fake the type checking of the modified expression.
|
|
491
|
-
# TODO: Make MiniWDL expose a real way to do this?
|
|
492
|
-
modified._type = expression._type
|
|
493
|
-
return modified
|
|
494
|
-
|
|
495
|
-
|
|
496
298
|
async def toil_read_source(
|
|
497
299
|
uri: str, path: list[str], importer: WDL.Tree.Document | None
|
|
498
300
|
) -> ReadSourceResult:
|
|
@@ -513,7 +315,7 @@ async def toil_read_source(
|
|
|
513
315
|
tried.append(candidate_uri)
|
|
514
316
|
try:
|
|
515
317
|
# TODO: this is probably sync work that would be better as async work here
|
|
516
|
-
|
|
318
|
+
URLAccess.read_from_url(candidate_uri, destination_buffer)
|
|
517
319
|
except Exception as e:
|
|
518
320
|
if isinstance(e, SyntaxError) or isinstance(e, NameError):
|
|
519
321
|
# These are probably actual problems with the code and not
|
|
@@ -917,8 +719,8 @@ def set_shared_fs_path(file: WDL.Value.File, path: str) -> WDL.Value.File:
|
|
|
917
719
|
|
|
918
720
|
|
|
919
721
|
def view_shared_fs_paths(
|
|
920
|
-
bindings:
|
|
921
|
-
) ->
|
|
722
|
+
bindings: WDLBindings,
|
|
723
|
+
) -> WDLBindings:
|
|
922
724
|
"""
|
|
923
725
|
Given WDL bindings, return a copy where all files have their shared filesystem paths as their values.
|
|
924
726
|
"""
|
|
@@ -1137,33 +939,50 @@ def choose_human_readable_directory(
|
|
|
1137
939
|
|
|
1138
940
|
def evaluate_decls_to_bindings(
|
|
1139
941
|
decls: list[WDL.Tree.Decl],
|
|
1140
|
-
all_bindings:
|
|
942
|
+
all_bindings: WDLBindings,
|
|
1141
943
|
standard_library: ToilWDLStdLibBase,
|
|
1142
944
|
include_previous: bool = False,
|
|
1143
945
|
drop_missing_files: bool = False,
|
|
1144
|
-
|
|
946
|
+
expressions_are_defaults: bool = False,
|
|
947
|
+
) -> WDLBindings:
|
|
1145
948
|
"""
|
|
1146
949
|
Evaluate decls with a given bindings environment and standard library.
|
|
950
|
+
|
|
1147
951
|
Creates a new bindings object that only contains the bindings from the given decls.
|
|
1148
952
|
Guarantees that each decl in `decls` can access the variables defined by the previous ones.
|
|
953
|
+
|
|
1149
954
|
:param all_bindings: Environment to use when evaluating decls
|
|
1150
955
|
:param decls: Decls to evaluate
|
|
1151
956
|
:param standard_library: Standard library
|
|
1152
|
-
:param include_previous: Whether to include the existing environment in the
|
|
1153
|
-
|
|
1154
|
-
|
|
957
|
+
:param include_previous: Whether to include the existing environment in the
|
|
958
|
+
new returned environment. This will be false for outputs where only
|
|
959
|
+
defined decls should be included
|
|
960
|
+
:param drop_missing_files: Whether to coerce nonexistent files to null. The
|
|
961
|
+
coerced elements will be checked that the transformation is valid.
|
|
962
|
+
Currently should only be enabled in output sections, see
|
|
963
|
+
https://github.com/openwdl/wdl/issues/673#issuecomment-2248828116.
|
|
964
|
+
:param expressions_are_defaults: If True, value expressions in decls are
|
|
965
|
+
treated as default values, and there may be existing values in the
|
|
966
|
+
incoming environment that take precedence. If False, each decl is taken
|
|
967
|
+
to be a fresh definition, and expressions are always evaluated and
|
|
968
|
+
used.
|
|
1155
969
|
:return: New bindings object
|
|
1156
970
|
"""
|
|
1157
971
|
# all_bindings contains current bindings + previous all_bindings
|
|
1158
972
|
# bindings only contains the decl bindings themselves so that bindings from other sections prior aren't included
|
|
1159
|
-
bindings:
|
|
973
|
+
bindings: WDLBindings = WDL.Env.Bindings()
|
|
1160
974
|
drop_if_missing_with_workdir = partial(
|
|
1161
975
|
drop_if_missing, standard_library=standard_library
|
|
1162
976
|
)
|
|
1163
977
|
for each_decl in decls:
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
978
|
+
if expressions_are_defaults:
|
|
979
|
+
output_value = evaluate_defaultable_decl(
|
|
980
|
+
each_decl, all_bindings, standard_library
|
|
981
|
+
)
|
|
982
|
+
else:
|
|
983
|
+
output_value = evaluate_decl(
|
|
984
|
+
each_decl, all_bindings, standard_library
|
|
985
|
+
)
|
|
1167
986
|
if drop_missing_files:
|
|
1168
987
|
dropped_output_value = map_over_typed_files_in_value(
|
|
1169
988
|
output_value, drop_if_missing_with_workdir
|
|
@@ -1222,7 +1041,7 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
1222
1041
|
else:
|
|
1223
1042
|
# This is some other kind of remote file.
|
|
1224
1043
|
# We need to get its size from the URI.
|
|
1225
|
-
item_size =
|
|
1044
|
+
item_size = URLAccess.get_size(uri)
|
|
1226
1045
|
if item_size is None:
|
|
1227
1046
|
# User asked for the size and we can't figure it out efficiently, so bail out.
|
|
1228
1047
|
raise RuntimeError(f"Attempt to check the size of {uri} failed")
|
|
@@ -1245,7 +1064,10 @@ class NonDownloadingSize(WDL.StdLib._Size):
|
|
|
1245
1064
|
return WDL.Value.Float(total_size)
|
|
1246
1065
|
|
|
1247
1066
|
|
|
1248
|
-
def
|
|
1067
|
+
def extract_file_values(environment: WDLBindings) -> list[str]:
|
|
1068
|
+
"""
|
|
1069
|
+
Get a list of all File object values in the given bindings.
|
|
1070
|
+
"""
|
|
1249
1071
|
filenames = list()
|
|
1250
1072
|
|
|
1251
1073
|
def add_filename(file: WDL.Value.File) -> WDL.Value.File:
|
|
@@ -1255,6 +1077,22 @@ def extract_workflow_inputs(environment: WDLBindings) -> list[str]:
|
|
|
1255
1077
|
map_over_files_in_bindings(environment, add_filename)
|
|
1256
1078
|
return filenames
|
|
1257
1079
|
|
|
1080
|
+
def extract_file_virtualized_values(environment: WDLBindings) -> list[str]:
|
|
1081
|
+
"""
|
|
1082
|
+
Get a list of all File object virtualized values in the given bindings.
|
|
1083
|
+
|
|
1084
|
+
If a file hasn't been virtualized, it won't contribute to the list.
|
|
1085
|
+
"""
|
|
1086
|
+
values = list()
|
|
1087
|
+
|
|
1088
|
+
def add_value(file: WDL.Value.File) -> WDL.Value.File:
|
|
1089
|
+
value = get_file_virtualized_value(file)
|
|
1090
|
+
if value is not None:
|
|
1091
|
+
values.append(value)
|
|
1092
|
+
return file
|
|
1093
|
+
|
|
1094
|
+
map_over_files_in_bindings(environment, add_value)
|
|
1095
|
+
return values
|
|
1258
1096
|
|
|
1259
1097
|
def convert_files(
|
|
1260
1098
|
environment: WDLBindings,
|
|
@@ -1263,19 +1101,21 @@ def convert_files(
|
|
|
1263
1101
|
task_path: str,
|
|
1264
1102
|
) -> WDLBindings:
|
|
1265
1103
|
"""
|
|
1266
|
-
|
|
1104
|
+
Fill in the virtualized_value fields for File objects in a WDL environment.
|
|
1267
1105
|
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
:param
|
|
1271
|
-
|
|
1106
|
+
:param environment: Bindings to evaluate on. Will not be modified.
|
|
1107
|
+
:param file_to_id: Maps from imported URI to Toil FileID with the data.
|
|
1108
|
+
:param file_to_data: Maps from WDL-level file calue to metadata about the
|
|
1109
|
+
file, including URI that would have been imported.
|
|
1110
|
+
:return: new bindings object with the annotated File objects in it.
|
|
1272
1111
|
"""
|
|
1273
1112
|
dir_ids = {t[1] for t in file_to_data.values()}
|
|
1274
1113
|
dir_to_id = {k: uuid.uuid4() for k in dir_ids}
|
|
1275
1114
|
|
|
1276
1115
|
def convert_file_to_uri(file: WDL.Value.File) -> WDL.Value.File:
|
|
1277
1116
|
"""
|
|
1278
|
-
|
|
1117
|
+
Produce a WDL File with the virtualized_value set to the Toil URI for
|
|
1118
|
+
the already-imported data, but the same value.
|
|
1279
1119
|
"""
|
|
1280
1120
|
candidate_uri = file_to_data[file.value][0]
|
|
1281
1121
|
file_id = file_to_id[candidate_uri]
|
|
@@ -1352,7 +1192,7 @@ def convert_remote_files(
|
|
|
1352
1192
|
tried.append(candidate_uri)
|
|
1353
1193
|
try:
|
|
1354
1194
|
# Try polling existence first.
|
|
1355
|
-
polled_existence =
|
|
1195
|
+
polled_existence = URLAccess.url_exists(candidate_uri)
|
|
1356
1196
|
if polled_existence is False:
|
|
1357
1197
|
# Known not to exist
|
|
1358
1198
|
logger.debug("URL does not exist: %s", candidate_uri)
|
|
@@ -1638,32 +1478,35 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1638
1478
|
logger.debug("File has no virtualized value so not changing value")
|
|
1639
1479
|
return file
|
|
1640
1480
|
|
|
1481
|
+
def _resolve_devirtualized_to_uri(self, devirtualized: str) -> str:
|
|
1482
|
+
"""
|
|
1483
|
+
Get a URI pointing to whatever URI or divirtualized file path is provided.
|
|
1484
|
+
|
|
1485
|
+
Handles resolving symlinks using in-container paths if necessary.
|
|
1486
|
+
"""
|
|
1487
|
+
|
|
1488
|
+
return Toil.normalize_uri(devirtualized, dir_path=self.execution_dir)
|
|
1489
|
+
|
|
1641
1490
|
def _virtualize_file(
|
|
1642
1491
|
self, file: WDL.Value.File, enforce_existence: bool = True
|
|
1643
1492
|
) -> WDL.Value.File:
|
|
1644
|
-
logger.debug("Virtualizing %s", file)
|
|
1645
|
-
# If enforce_existence is true, then if a file is detected as nonexistent, raise an error. Else, let it pass through
|
|
1646
1493
|
if get_file_virtualized_value(file) is not None:
|
|
1647
|
-
|
|
1494
|
+
# Already virtualized
|
|
1648
1495
|
return file
|
|
1649
1496
|
|
|
1650
|
-
|
|
1651
|
-
# We only want to error on a nonexistent file in the output section
|
|
1652
|
-
# Since we need to virtualize on task boundaries, don't enforce existence if on a boundary
|
|
1653
|
-
if is_standard_url(file.value):
|
|
1654
|
-
file_uri = Toil.normalize_uri(file.value)
|
|
1655
|
-
else:
|
|
1656
|
-
abs_filepath = (
|
|
1657
|
-
os.path.join(self.execution_dir, file.value)
|
|
1658
|
-
if self.execution_dir is not None
|
|
1659
|
-
else os.path.abspath(file.value)
|
|
1660
|
-
)
|
|
1661
|
-
file_uri = Toil.normalize_uri(abs_filepath)
|
|
1497
|
+
logger.debug("Virtualizing %s", file)
|
|
1662
1498
|
|
|
1663
|
-
|
|
1499
|
+
try:
|
|
1500
|
+
# Let the actual virtualization implementation signal a missing file
|
|
1501
|
+
virtualized_filename = self._virtualize_filename(file.value)
|
|
1502
|
+
except FileNotFoundError:
|
|
1503
|
+
if enforce_existence:
|
|
1504
|
+
raise
|
|
1505
|
+
else:
|
|
1664
1506
|
logger.debug("File appears nonexistent so marking it nonexistent")
|
|
1507
|
+
# Mark the file nonexistent.
|
|
1665
1508
|
return set_file_nonexistent(file, True)
|
|
1666
|
-
|
|
1509
|
+
|
|
1667
1510
|
logger.debug(
|
|
1668
1511
|
"For file %s got virtualized filename %s", file, virtualized_filename
|
|
1669
1512
|
)
|
|
@@ -1747,7 +1590,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1747
1590
|
# Open it exclusively
|
|
1748
1591
|
with open(dest_path, "xb") as dest_file:
|
|
1749
1592
|
# And save to it
|
|
1750
|
-
size, executable =
|
|
1593
|
+
size, executable = URLAccess.read_from_url(filename, dest_file)
|
|
1751
1594
|
if executable:
|
|
1752
1595
|
# Set the execute bit in the file's permissions
|
|
1753
1596
|
os.chmod(dest_path, os.stat(dest_path).st_mode | stat.S_IXUSR)
|
|
@@ -1846,9 +1689,12 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1846
1689
|
@memoize
|
|
1847
1690
|
def _virtualize_filename(self, filename: str) -> str:
|
|
1848
1691
|
"""
|
|
1849
|
-
from a local path
|
|
1692
|
+
from a local path or other URL, 'virtualize' into the filename as it should present in a File value.
|
|
1693
|
+
|
|
1694
|
+
New in Toil: the path or URL may not actually exist.
|
|
1850
1695
|
|
|
1851
1696
|
:param filename: Can be a local file path, URL (http, https, s3, gs), or toilfile
|
|
1697
|
+
:raises FileNotFoundError: if the file doesn't actually exist (new addition in Toil over MiniWDL)
|
|
1852
1698
|
"""
|
|
1853
1699
|
|
|
1854
1700
|
if is_toil_url(filename):
|
|
@@ -1868,7 +1714,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1868
1714
|
try:
|
|
1869
1715
|
imported = self._file_store.import_file(filename)
|
|
1870
1716
|
except FileNotFoundError:
|
|
1871
|
-
|
|
1717
|
+
# This might happen because we're also along the code path for
|
|
1718
|
+
# optional file outputs.
|
|
1719
|
+
logger.info(
|
|
1872
1720
|
"File at URL %s does not exist or is inaccessible." % filename
|
|
1873
1721
|
)
|
|
1874
1722
|
raise
|
|
@@ -1879,9 +1727,13 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1879
1727
|
filename,
|
|
1880
1728
|
e.code,
|
|
1881
1729
|
)
|
|
1730
|
+
# We don't need to handle translating error codes for not
|
|
1731
|
+
# found; import_file does it already.
|
|
1882
1732
|
raise
|
|
1883
1733
|
if imported is None:
|
|
1884
|
-
# Satisfy mypy
|
|
1734
|
+
# Satisfy mypy. This should never happen though as we don't
|
|
1735
|
+
# pass a shared file name (which is the only way import_file
|
|
1736
|
+
# returns None)
|
|
1885
1737
|
raise RuntimeError("Failed to import URL %s into jobstore." % filename)
|
|
1886
1738
|
file_basename = os.path.basename(urlsplit(filename).path)
|
|
1887
1739
|
# Get the URL to the parent directory and use that.
|
|
@@ -1890,23 +1742,19 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1890
1742
|
dir_id = self._parent_dir_to_ids.setdefault(parent_dir, uuid.uuid4())
|
|
1891
1743
|
result = pack_toil_uri(imported, self.task_path, dir_id, file_basename)
|
|
1892
1744
|
logger.debug("Virtualized %s as WDL file %s", filename, result)
|
|
1893
|
-
# We can't put the Toil URI in the virtualized_to_devirtualized
|
|
1894
|
-
#
|
|
1745
|
+
# We can't put the Toil URI in the virtualized_to_devirtualized
|
|
1746
|
+
# cache because it would point to the URL instead of a local file
|
|
1747
|
+
# on the machine, so only store the forward mapping
|
|
1895
1748
|
self._devirtualized_to_virtualized[filename] = result
|
|
1896
1749
|
return result
|
|
1897
1750
|
else:
|
|
1898
|
-
# Otherwise this is a local file and we want to fake it
|
|
1899
|
-
#
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
# To support relative paths from execution directory, join the execution dir and filename
|
|
1906
|
-
# If filename is already an abs path, join() will not do anything
|
|
1907
|
-
abs_filename = os.path.join(self.execution_dir, filename)
|
|
1908
|
-
else:
|
|
1909
|
-
abs_filename = os.path.abspath(filename)
|
|
1751
|
+
# Otherwise this is a local file name or URI and we want to fake it
|
|
1752
|
+
# as a Toil file store file
|
|
1753
|
+
|
|
1754
|
+
# Convert to a properly-absolutized file URI
|
|
1755
|
+
file_uri = Toil.normalize_uri(filename, dir_path=self.execution_dir)
|
|
1756
|
+
# Extract the absolute path name
|
|
1757
|
+
abs_filename = unquote(urlsplit(file_uri).path)
|
|
1910
1758
|
|
|
1911
1759
|
if abs_filename in self._devirtualized_to_virtualized:
|
|
1912
1760
|
# This is a previously devirtualized thing so we can just use the
|
|
@@ -1917,6 +1765,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1917
1765
|
)
|
|
1918
1766
|
return result
|
|
1919
1767
|
|
|
1768
|
+
if not os.path.exists(abs_filename):
|
|
1769
|
+
raise FileNotFoundError(abs_filename)
|
|
1770
|
+
|
|
1920
1771
|
file_id = self._file_store.writeGlobalFile(abs_filename)
|
|
1921
1772
|
|
|
1922
1773
|
file_dir = os.path.dirname(abs_filename)
|
|
@@ -1946,6 +1797,51 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
1946
1797
|
|
|
1947
1798
|
self._miniwdl_cache: Optional[WDL.runtime.cache.CallCache] = None
|
|
1948
1799
|
|
|
1800
|
+
def _virtualize_file(
|
|
1801
|
+
self, file: WDL.Value.File, enforce_existence: bool = True
|
|
1802
|
+
) -> WDL.Value.File:
|
|
1803
|
+
# When a workflow coerces a string path or file: URI to a File at
|
|
1804
|
+
# workflow scope, we need to fill in the cache filesystem path.
|
|
1805
|
+
if (
|
|
1806
|
+
get_file_virtualized_value(file) is None
|
|
1807
|
+
and get_shared_fs_path(file) is None
|
|
1808
|
+
and (
|
|
1809
|
+
not is_any_url(file.value)
|
|
1810
|
+
or is_file_url(file.value)
|
|
1811
|
+
)
|
|
1812
|
+
):
|
|
1813
|
+
# This is a never-virtualized file that is a file path or URI and
|
|
1814
|
+
# has no shared FS path associated with it. We just made it at
|
|
1815
|
+
# workflow scope. (If it came from a task, it would have a
|
|
1816
|
+
# virtualized value already.)
|
|
1817
|
+
|
|
1818
|
+
# If we are loading it at workflow scope, the file path can be used
|
|
1819
|
+
# as the cache path.
|
|
1820
|
+
|
|
1821
|
+
if not is_any_url(file.value):
|
|
1822
|
+
# Handle file path
|
|
1823
|
+
cache_path = file.value
|
|
1824
|
+
else:
|
|
1825
|
+
# Handle pulling path out of file URI
|
|
1826
|
+
cache_path = unquote(urlsplit(file.value).path)
|
|
1827
|
+
|
|
1828
|
+
# Apply the path
|
|
1829
|
+
file = set_shared_fs_path(file, cache_path)
|
|
1830
|
+
|
|
1831
|
+
logger.info(
|
|
1832
|
+
"Applied shared filesystem path %s to File %s that appears to "
|
|
1833
|
+
"have been coerced from String at workflow scope.",
|
|
1834
|
+
cache_path,
|
|
1835
|
+
file
|
|
1836
|
+
)
|
|
1837
|
+
|
|
1838
|
+
# Do the virtualization
|
|
1839
|
+
return super()._virtualize_file(file, enforce_existence)
|
|
1840
|
+
|
|
1841
|
+
# TODO: If the workflow coerces a File to a String and back again, we
|
|
1842
|
+
# should have some way to recover the toilfile: URL it had in the job
|
|
1843
|
+
# store to avoid re-importing it.
|
|
1844
|
+
|
|
1949
1845
|
# This needs to be hash-compatible with MiniWDL.
|
|
1950
1846
|
# MiniWDL hooks _virtualize_filename
|
|
1951
1847
|
# <https://github.com/chanzuckerberg/miniwdl/blob/475dd3f3784d1390e6a0e880d43316a620114de3/WDL/runtime/workflow.py#L699-L729>,
|
|
@@ -1999,7 +1895,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
1999
1895
|
)
|
|
2000
1896
|
# Make an environment of "file_sha256" to that as a WDL string, and
|
|
2001
1897
|
# digest that, and make a write_ cache key. No need to transform to
|
|
2002
|
-
# shared FS paths
|
|
1898
|
+
# shared FS paths since no paths are in it.
|
|
2003
1899
|
log_bindings(
|
|
2004
1900
|
logger.debug, "Digesting file bindings:", [file_input_bindings]
|
|
2005
1901
|
)
|
|
@@ -2346,6 +2242,8 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2346
2242
|
filenames.
|
|
2347
2243
|
"""
|
|
2348
2244
|
|
|
2245
|
+
logger.debug("WDL task outputs stdlib asked to virtualize %s", filename)
|
|
2246
|
+
|
|
2349
2247
|
if not is_any_url(filename) and not filename.startswith("/"):
|
|
2350
2248
|
# We are getting a bare relative path on the supposedly devirtualized side.
|
|
2351
2249
|
# Find a real path to it relative to the current directory override.
|
|
@@ -2394,8 +2292,12 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2394
2292
|
logger.error(
|
|
2395
2293
|
"Handling broken symlink %s ultimately to %s", filename, here
|
|
2396
2294
|
)
|
|
2295
|
+
# This should produce a FileNotFoundError since we think of
|
|
2296
|
+
# broken symlinks as nonexistent.
|
|
2297
|
+
raise FileNotFoundError(filename)
|
|
2397
2298
|
filename = here
|
|
2398
|
-
|
|
2299
|
+
|
|
2300
|
+
logger.debug("WDL task outputs stdlib thinks we really need to virtualize %s", filename)
|
|
2399
2301
|
return super()._virtualize_filename(filename)
|
|
2400
2302
|
|
|
2401
2303
|
|
|
@@ -2450,11 +2352,15 @@ def evaluate_decl(
|
|
|
2450
2352
|
"""
|
|
2451
2353
|
Evaluate the expression of a declaration node, or raise an error.
|
|
2452
2354
|
"""
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2355
|
+
try:
|
|
2356
|
+
return evaluate_named_expression(
|
|
2357
|
+
node, node.name, node.type, node.expr, environment, stdlib
|
|
2358
|
+
)
|
|
2359
|
+
except Exception:
|
|
2360
|
+
# If something goes wrong, dump.
|
|
2361
|
+
logger.exception("Evaluation failed for %s", node)
|
|
2362
|
+
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
2363
|
+
raise
|
|
2458
2364
|
|
|
2459
2365
|
def evaluate_call_inputs(
|
|
2460
2366
|
context: WDL.Error.SourceNode | WDL.Error.SourcePosition,
|
|
@@ -2497,33 +2403,28 @@ def evaluate_defaultable_decl(
|
|
|
2497
2403
|
If the name of the declaration is already defined in the environment, return its value. Otherwise, return the evaluated expression.
|
|
2498
2404
|
"""
|
|
2499
2405
|
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
|
|
2504
|
-
)
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
)
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
return environment[node.name].coerce(node.type)
|
|
2511
|
-
else:
|
|
2512
|
-
return environment[node.name]
|
|
2406
|
+
if (
|
|
2407
|
+
node.name in environment
|
|
2408
|
+
and not isinstance(environment[node.name], WDL.Value.Null)
|
|
2409
|
+
) or (
|
|
2410
|
+
isinstance(environment.get(node.name), WDL.Value.Null)
|
|
2411
|
+
and node.type.optional
|
|
2412
|
+
):
|
|
2413
|
+
logger.debug("Name %s is already defined, not using default", node.name)
|
|
2414
|
+
if not isinstance(environment[node.name].type, type(node.type)):
|
|
2415
|
+
return environment[node.name].coerce(node.type)
|
|
2513
2416
|
else:
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
2526
|
-
raise
|
|
2417
|
+
return environment[node.name]
|
|
2418
|
+
else:
|
|
2419
|
+
if node.type is not None and not node.type.optional and node.expr is None:
|
|
2420
|
+
# We need a value for this but there isn't one.
|
|
2421
|
+
raise WDL.Error.EvalError(
|
|
2422
|
+
node,
|
|
2423
|
+
f"Value for {node.name} was not provided and no default value is available",
|
|
2424
|
+
)
|
|
2425
|
+
logger.info("Defaulting %s to %s", node.name, node.expr)
|
|
2426
|
+
return evaluate_decl(node, environment, stdlib)
|
|
2427
|
+
|
|
2527
2428
|
|
|
2528
2429
|
|
|
2529
2430
|
# TODO: make these stdlib methods???
|
|
@@ -2535,7 +2436,7 @@ def devirtualize_files(
|
|
|
2535
2436
|
that are actually available to command line commands.
|
|
2536
2437
|
The same virtual file always maps to the same devirtualized filename even with duplicates
|
|
2537
2438
|
"""
|
|
2538
|
-
logger.
|
|
2439
|
+
logger.debug("Devirtualizing files")
|
|
2539
2440
|
return map_over_files_in_bindings(environment, stdlib._devirtualize_file)
|
|
2540
2441
|
|
|
2541
2442
|
|
|
@@ -2546,12 +2447,35 @@ def virtualize_files(
|
|
|
2546
2447
|
Make sure all the File values embedded in the given bindings point to files
|
|
2547
2448
|
that are usable from other machines.
|
|
2548
2449
|
"""
|
|
2549
|
-
logger.
|
|
2450
|
+
logger.debug("Virtualizing files")
|
|
2550
2451
|
virtualize_func = partial(
|
|
2551
2452
|
stdlib._virtualize_file, enforce_existence=enforce_existence
|
|
2552
2453
|
)
|
|
2553
2454
|
return map_over_files_in_bindings(environment, virtualize_func)
|
|
2554
2455
|
|
|
2456
|
+
def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[WDLBindings], file_store: AbstractFileStore) -> None:
|
|
2457
|
+
"""
|
|
2458
|
+
Delete any files that in the given bindings but not in the live list.
|
|
2459
|
+
|
|
2460
|
+
Operates on the virtualized values of File objects anywhere in the bindings.
|
|
2461
|
+
"""
|
|
2462
|
+
|
|
2463
|
+
# Get all the files in the first bindings and not any of the others.
|
|
2464
|
+
unused_files = set(
|
|
2465
|
+
extract_file_virtualized_values(internal_bindings)
|
|
2466
|
+
).difference(
|
|
2467
|
+
*(
|
|
2468
|
+
extract_file_virtualized_values(bindings)
|
|
2469
|
+
for bindings in live_bindings_list
|
|
2470
|
+
)
|
|
2471
|
+
)
|
|
2472
|
+
|
|
2473
|
+
for file_uri in unused_files:
|
|
2474
|
+
# Delete them
|
|
2475
|
+
if is_toil_url(file_uri):
|
|
2476
|
+
logger.debug("Delete file %s that is not needed", file_uri)
|
|
2477
|
+
file_id, _, _, _ = unpack_toil_uri(file_uri)
|
|
2478
|
+
file_store.deleteGlobalFile(file_id)
|
|
2555
2479
|
|
|
2556
2480
|
def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
2557
2481
|
"""
|
|
@@ -2612,7 +2536,7 @@ def drop_if_missing(
|
|
|
2612
2536
|
|
|
2613
2537
|
if filename is not None and is_any_url(filename):
|
|
2614
2538
|
try:
|
|
2615
|
-
if filename.startswith(TOIL_URI_SCHEME) or
|
|
2539
|
+
if filename.startswith(TOIL_URI_SCHEME) or URLAccess.url_exists(
|
|
2616
2540
|
filename
|
|
2617
2541
|
):
|
|
2618
2542
|
# We assume anything in the filestore actually exists.
|
|
@@ -2728,64 +2652,52 @@ def map_over_files_in_binding(
|
|
|
2728
2652
|
binding.info,
|
|
2729
2653
|
)
|
|
2730
2654
|
|
|
2655
|
+
def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
2656
|
+
"""
|
|
2657
|
+
Remove the expression from a WDL value
|
|
2658
|
+
:param value: Original WDL value
|
|
2659
|
+
:return: New WDL value without the expr field
|
|
2660
|
+
"""
|
|
2661
|
+
# TODO: This is an extra copy that we could get rid of by dropping the immutability idea
|
|
2662
|
+
def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
2663
|
+
# Do a shallow copy to preserve immutability
|
|
2664
|
+
new_value = copy.copy(value)
|
|
2665
|
+
if value.expr:
|
|
2666
|
+
# We use a Null expr instead of None here, because when evaluating an expression,
|
|
2667
|
+
# MiniWDL applies that expression to the result value *and* all values it contains that
|
|
2668
|
+
# have None expressions. Using a Null expression here protects nested values that
|
|
2669
|
+
# didn't really get created by the current expression from being attributed to it, while
|
|
2670
|
+
# still cutting the reference to the parsed WDL document.
|
|
2671
|
+
new_value._expr = WDL.Expr.Null(value.expr.pos)
|
|
2672
|
+
else:
|
|
2673
|
+
new_value._expr = value.expr
|
|
2674
|
+
return new_value
|
|
2675
|
+
return map_over_typed_value(value, predicate)
|
|
2731
2676
|
|
|
2732
|
-
# TODO: We want to type this to say, for anything descended from a WDL type, we
|
|
2733
|
-
# return something descended from the same WDL type or a null. But I can't
|
|
2734
|
-
# quite do that with generics, since you could pass in some extended WDL value
|
|
2735
|
-
# type we've never heard of and expect to get one of those out.
|
|
2736
|
-
#
|
|
2737
|
-
# For now we assume that any types extending the WDL value types will implement
|
|
2738
|
-
# compatible constructors.
|
|
2739
|
-
def map_over_typed_files_in_value(
|
|
2740
|
-
value: WDL.Value.Base, transform: Callable[[WDL.Value.File], WDL.Value.File | None]
|
|
2741
|
-
) -> WDL.Value.Base:
|
|
2742
|
-
"""
|
|
2743
|
-
Run all File values embedded in the given value through the given
|
|
2744
|
-
transformation function.
|
|
2745
|
-
|
|
2746
|
-
The transformation function must not mutate the original File.
|
|
2747
|
-
|
|
2748
|
-
If the transform returns None, the file value is changed to Null.
|
|
2749
|
-
|
|
2750
|
-
The transform has access to the type information for the value, so it knows
|
|
2751
|
-
if it may return None, depending on if the value is optional or not.
|
|
2752
2677
|
|
|
2753
|
-
|
|
2754
|
-
actually be used, to allow for scans. So error checking needs to be part of
|
|
2755
|
-
the transform itself.
|
|
2678
|
+
def map_over_typed_value(value: WDL.Value.Base, transform: Callable[[WDL.Value.Base], WDL.Value.Base]) -> WDL.Value.Base:
|
|
2756
2679
|
"""
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
), "Transformation mutated the original File"
|
|
2764
|
-
if new_file is None:
|
|
2765
|
-
# Assume the transform checked types if we actually care about the
|
|
2766
|
-
# result.
|
|
2767
|
-
logger.warning("File %s became Null", value)
|
|
2768
|
-
return WDL.Value.Null()
|
|
2769
|
-
else:
|
|
2770
|
-
# Make whatever the value is around the new path.
|
|
2771
|
-
# TODO: why does this need casting?
|
|
2772
|
-
return new_file
|
|
2773
|
-
elif isinstance(value, WDL.Value.Array):
|
|
2680
|
+
Apply a transform to a WDL value and all contained WDL values.
|
|
2681
|
+
:param value: WDL value to transform
|
|
2682
|
+
:param transform: Function that takes a WDL value and returns a new WDL value
|
|
2683
|
+
:return: New transformed WDL value
|
|
2684
|
+
"""
|
|
2685
|
+
if isinstance(value, WDL.Value.Array):
|
|
2774
2686
|
# This is an array, so recurse on the items
|
|
2775
|
-
|
|
2687
|
+
value = WDL.Value.Array(
|
|
2776
2688
|
value.type.item_type,
|
|
2777
|
-
[
|
|
2689
|
+
[map_over_typed_value(v, transform) for v in value.value],
|
|
2778
2690
|
value.expr,
|
|
2779
2691
|
)
|
|
2780
2692
|
elif isinstance(value, WDL.Value.Map):
|
|
2781
2693
|
# This is a map, so recurse on the members of the items, which are tuples (but not wrapped as WDL Pair objects)
|
|
2782
2694
|
# TODO: Can we avoid a cast in a comprehension if we get MyPy to know that each pair is always a 2-element tuple?
|
|
2783
|
-
|
|
2695
|
+
value = WDL.Value.Map(
|
|
2784
2696
|
value.type.item_type,
|
|
2785
2697
|
[
|
|
2786
2698
|
cast(
|
|
2787
2699
|
tuple[WDL.Value.Base, WDL.Value.Base],
|
|
2788
|
-
tuple(
|
|
2700
|
+
tuple(map_over_typed_value(v, transform) for v in pair),
|
|
2789
2701
|
)
|
|
2790
2702
|
for pair in value.value
|
|
2791
2703
|
],
|
|
@@ -2793,29 +2705,74 @@ def map_over_typed_files_in_value(
|
|
|
2793
2705
|
)
|
|
2794
2706
|
elif isinstance(value, WDL.Value.Pair):
|
|
2795
2707
|
# This is a pair, so recurse on the left and right items
|
|
2796
|
-
|
|
2708
|
+
value = WDL.Value.Pair(
|
|
2797
2709
|
value.type.left_type,
|
|
2798
2710
|
value.type.right_type,
|
|
2799
2711
|
cast(
|
|
2800
2712
|
tuple[WDL.Value.Base, WDL.Value.Base],
|
|
2801
|
-
tuple(
|
|
2713
|
+
tuple(map_over_typed_value(v, transform) for v in value.value),
|
|
2802
2714
|
),
|
|
2803
2715
|
value.expr,
|
|
2804
2716
|
)
|
|
2805
2717
|
elif isinstance(value, WDL.Value.Struct):
|
|
2806
2718
|
# This is a struct, so recurse on the values in the backing dict
|
|
2807
|
-
|
|
2719
|
+
value = WDL.Value.Struct(
|
|
2808
2720
|
cast(Union[WDL.Type.StructInstance, WDL.Type.Object], value.type),
|
|
2809
2721
|
{
|
|
2810
|
-
k:
|
|
2722
|
+
k: map_over_typed_value(v, transform)
|
|
2811
2723
|
for k, v in value.value.items()
|
|
2812
2724
|
},
|
|
2813
2725
|
value.expr,
|
|
2814
2726
|
)
|
|
2815
|
-
|
|
2816
|
-
|
|
2727
|
+
# Run the predicate on the final value
|
|
2728
|
+
return transform(value)
|
|
2729
|
+
|
|
2730
|
+
|
|
2731
|
+
# TODO: We want to type this to say, for anything descended from a WDL type, we
|
|
2732
|
+
# return something descended from the same WDL type or a null. But I can't
|
|
2733
|
+
# quite do that with generics, since you could pass in some extended WDL value
|
|
2734
|
+
# type we've never heard of and expect to get one of those out.
|
|
2735
|
+
#
|
|
2736
|
+
# For now we assume that any types extending the WDL value types will implement
|
|
2737
|
+
# compatible constructors.
|
|
2738
|
+
def map_over_typed_files_in_value(
|
|
2739
|
+
value: WDL.Value.Base, transform: Callable[[WDL.Value.File], WDL.Value.File | None]
|
|
2740
|
+
) -> WDL.Value.Base:
|
|
2741
|
+
"""
|
|
2742
|
+
Run all File values embedded in the given value through the given
|
|
2743
|
+
transformation function.
|
|
2744
|
+
|
|
2745
|
+
The transformation function must not mutate the original File.
|
|
2746
|
+
|
|
2747
|
+
If the transform returns None, the file value is changed to Null.
|
|
2748
|
+
|
|
2749
|
+
The transform has access to the type information for the value, so it knows
|
|
2750
|
+
if it may return None, depending on if the value is optional or not.
|
|
2751
|
+
|
|
2752
|
+
The transform is *allowed* to return None only if the mapping result won't
|
|
2753
|
+
actually be used, to allow for scans. So error checking needs to be part of
|
|
2754
|
+
the transform itself.
|
|
2755
|
+
"""
|
|
2756
|
+
def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
2757
|
+
if isinstance(value, WDL.Value.File):
|
|
2758
|
+
# This is a file so we need to process it
|
|
2759
|
+
orig_file_value = value.value
|
|
2760
|
+
new_file = transform(value)
|
|
2761
|
+
assert (
|
|
2762
|
+
value.value == orig_file_value
|
|
2763
|
+
), "Transformation mutated the original File"
|
|
2764
|
+
if new_file is None:
|
|
2765
|
+
# Assume the transform checked types if we actually care about the
|
|
2766
|
+
# result.
|
|
2767
|
+
logger.warning("File %s became Null", value)
|
|
2768
|
+
return WDL.Value.Null()
|
|
2769
|
+
else:
|
|
2770
|
+
# Make whatever the value is around the new path.
|
|
2771
|
+
return new_file
|
|
2817
2772
|
return value
|
|
2818
2773
|
|
|
2774
|
+
return map_over_typed_value(value, predicate)
|
|
2775
|
+
|
|
2819
2776
|
|
|
2820
2777
|
def ensure_null_files_are_nullable(
|
|
2821
2778
|
value: WDL.Value.Base, original_value: WDL.Value.Base, expected_type: WDL.Type.Base
|
|
@@ -2958,6 +2915,11 @@ class WDLBaseJob(Job):
|
|
|
2958
2915
|
logger.debug("Overlay %s after %s", overlay, self)
|
|
2959
2916
|
self._postprocessing_steps.append(("overlay", overlay))
|
|
2960
2917
|
|
|
2918
|
+
def remove_expr_from_bindings(self, bindings: WDLBindings) -> WDLBindings:
|
|
2919
|
+
# We have to throw out the expressions because they drag the entire WDL document into the WDL outputs
|
|
2920
|
+
# which causes duplicate pickling and linear growth in scatter memory usage
|
|
2921
|
+
return bindings.map(lambda b: WDL.Env.Binding(b.name, remove_expr_from_value(b.value), b.info))
|
|
2922
|
+
|
|
2961
2923
|
def postprocess(self, bindings: WDLBindings) -> WDLBindings:
|
|
2962
2924
|
"""
|
|
2963
2925
|
Apply queued changes to bindings.
|
|
@@ -2994,7 +2956,7 @@ class WDLBaseJob(Job):
|
|
|
2994
2956
|
bindings = combine_bindings([bindings.subtract(argument), argument])
|
|
2995
2957
|
else:
|
|
2996
2958
|
raise RuntimeError(f"Unknown postprocessing action {action}")
|
|
2997
|
-
|
|
2959
|
+
bindings = self.remove_expr_from_bindings(bindings)
|
|
2998
2960
|
return bindings
|
|
2999
2961
|
|
|
3000
2962
|
def defer_postprocessing(self, other: WDLBaseJob) -> None:
|
|
@@ -3025,6 +2987,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3025
2987
|
self,
|
|
3026
2988
|
task: WDL.Tree.Task,
|
|
3027
2989
|
prev_node_results: Sequence[Promised[WDLBindings]],
|
|
2990
|
+
enclosing_bindings: WDLBindings,
|
|
3028
2991
|
task_id: list[str],
|
|
3029
2992
|
wdl_options: WDLContext,
|
|
3030
2993
|
**kwargs: Any,
|
|
@@ -3032,6 +2995,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3032
2995
|
"""
|
|
3033
2996
|
Make a new job to determine resources and run a task.
|
|
3034
2997
|
|
|
2998
|
+
:param enclosing_bindings: Bindings in the enclosing section,
|
|
2999
|
+
containing files not to clean up. Files that are passed as inputs
|
|
3000
|
+
but not uses as outputs or present in the enclosing section
|
|
3001
|
+
bindings will be deleted after the task call completes.
|
|
3002
|
+
|
|
3035
3003
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
3036
3004
|
The caller has alredy added the task's own name.
|
|
3037
3005
|
"""
|
|
@@ -3052,6 +3020,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3052
3020
|
|
|
3053
3021
|
self._task = task
|
|
3054
3022
|
self._prev_node_results = prev_node_results
|
|
3023
|
+
self._enclosing_bindings = enclosing_bindings
|
|
3055
3024
|
self._task_id = task_id
|
|
3056
3025
|
|
|
3057
3026
|
@report_wdl_errors("evaluate task code", exit=True)
|
|
@@ -3091,17 +3060,34 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3091
3060
|
# TODO: What if the same file is passed through several tasks, and
|
|
3092
3061
|
# we get cache hits on those tasks? Won't we upload it several
|
|
3093
3062
|
# times?
|
|
3063
|
+
|
|
3064
|
+
# Load output bindings from the cache
|
|
3065
|
+
cached_bindings = virtualize_files(
|
|
3066
|
+
cached_result, standard_library, enforce_existence=False
|
|
3067
|
+
)
|
|
3068
|
+
|
|
3069
|
+
# Throw away anything input but not available outside the call or
|
|
3070
|
+
# output.
|
|
3071
|
+
delete_dead_files(
|
|
3072
|
+
bindings,
|
|
3073
|
+
[cached_bindings, self._enclosing_bindings],
|
|
3074
|
+
file_store
|
|
3075
|
+
)
|
|
3076
|
+
|
|
3077
|
+
# Postprocess and ship the output bindings.
|
|
3094
3078
|
return self.postprocess(
|
|
3095
|
-
|
|
3096
|
-
cached_result, standard_library, enforce_existence=False
|
|
3097
|
-
)
|
|
3079
|
+
cached_bindings
|
|
3098
3080
|
)
|
|
3099
3081
|
|
|
3100
3082
|
if self._task.inputs:
|
|
3101
3083
|
logger.debug("Evaluating task code")
|
|
3102
3084
|
# Evaluate all the inputs that aren't pre-set
|
|
3103
3085
|
bindings = evaluate_decls_to_bindings(
|
|
3104
|
-
self._task.inputs,
|
|
3086
|
+
self._task.inputs,
|
|
3087
|
+
bindings,
|
|
3088
|
+
standard_library,
|
|
3089
|
+
include_previous=True,
|
|
3090
|
+
expressions_are_defaults=True
|
|
3105
3091
|
)
|
|
3106
3092
|
if self._task.postinputs:
|
|
3107
3093
|
# Evaluate all the postinput decls.
|
|
@@ -3231,6 +3217,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3231
3217
|
virtualize_files(
|
|
3232
3218
|
runtime_bindings, standard_library, enforce_existence=False
|
|
3233
3219
|
),
|
|
3220
|
+
self._enclosing_bindings,
|
|
3234
3221
|
self._task_id,
|
|
3235
3222
|
cores=runtime_cores or self.cores,
|
|
3236
3223
|
memory=runtime_memory or self.memory,
|
|
@@ -3266,6 +3253,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3266
3253
|
task: WDL.Tree.Task,
|
|
3267
3254
|
task_internal_bindings: Promised[WDLBindings],
|
|
3268
3255
|
runtime_bindings: Promised[WDLBindings],
|
|
3256
|
+
enclosing_bindings: WDLBindings,
|
|
3269
3257
|
task_id: list[str],
|
|
3270
3258
|
mount_spec: dict[str | None, int],
|
|
3271
3259
|
wdl_options: WDLContext,
|
|
@@ -3275,6 +3263,9 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3275
3263
|
"""
|
|
3276
3264
|
Make a new job to run a task.
|
|
3277
3265
|
|
|
3266
|
+
:param enclosing_bindings: Bindings outside the workflow call, with
|
|
3267
|
+
files that should not be cleaned up at the end of the task.
|
|
3268
|
+
|
|
3278
3269
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
3279
3270
|
The caller has alredy added the task's own name.
|
|
3280
3271
|
"""
|
|
@@ -3298,6 +3289,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3298
3289
|
self._task = task
|
|
3299
3290
|
self._task_internal_bindings = task_internal_bindings
|
|
3300
3291
|
self._runtime_bindings = runtime_bindings
|
|
3292
|
+
self._enclosing_bindings = enclosing_bindings
|
|
3301
3293
|
self._task_id = task_id
|
|
3302
3294
|
self._cache_key = cache_key
|
|
3303
3295
|
self._mount_spec = mount_spec
|
|
@@ -3646,6 +3638,8 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3646
3638
|
"is not yet implemented in the MiniWDL Docker "
|
|
3647
3639
|
"containerization implementation."
|
|
3648
3640
|
)
|
|
3641
|
+
if runtime_bindings.has_binding("memory") and human2bytes(runtime_bindings.resolve("memory").value) < human2bytes("4MiB"):
|
|
3642
|
+
runtime_bindings.resolve("memory").value = "4MiB"
|
|
3649
3643
|
else:
|
|
3650
3644
|
raise RuntimeError(
|
|
3651
3645
|
f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}"
|
|
@@ -3878,7 +3872,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
3878
3872
|
self._task,
|
|
3879
3873
|
"command",
|
|
3880
3874
|
WDL.Type.String(),
|
|
3881
|
-
|
|
3875
|
+
self._task.command,
|
|
3882
3876
|
contained_bindings,
|
|
3883
3877
|
command_library,
|
|
3884
3878
|
)
|
|
@@ -4056,6 +4050,18 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
4056
4050
|
miniwdl_config=miniwdl_config,
|
|
4057
4051
|
)
|
|
4058
4052
|
|
|
4053
|
+
# Clean up anything from the task call input: block or the runtime
|
|
4054
|
+
# section that isn't getting output or available in the enclosing
|
|
4055
|
+
# section. Runtime sections aren't meant to have files, but nothing
|
|
4056
|
+
# actually stops them from being there.
|
|
4057
|
+
delete_dead_files(
|
|
4058
|
+
combine_bindings([bindings, runtime_bindings]),
|
|
4059
|
+
[output_bindings, self._enclosing_bindings],
|
|
4060
|
+
file_store
|
|
4061
|
+
)
|
|
4062
|
+
# If File objects somehow made it to the runtime block they shouldn't
|
|
4063
|
+
# have been virtualized so don't bother with them.
|
|
4064
|
+
|
|
4059
4065
|
# Do postprocessing steps to e.g. apply namespaces.
|
|
4060
4066
|
output_bindings = self.postprocess(output_bindings)
|
|
4061
4067
|
|
|
@@ -4108,7 +4114,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4108
4114
|
logger.info("Setting %s to %s", self._node.name, self._node.expr)
|
|
4109
4115
|
value = evaluate_decl(self._node, incoming_bindings, standard_library)
|
|
4110
4116
|
bindings = incoming_bindings.bind(self._node.name, value)
|
|
4111
|
-
|
|
4117
|
+
# TODO: Only virtualize the new binding
|
|
4118
|
+
return self.postprocess(virtualize_files(bindings, standard_library, enforce_existence=False))
|
|
4112
4119
|
elif isinstance(self._node, WDL.Tree.Call):
|
|
4113
4120
|
# This is a call of a task or workflow
|
|
4114
4121
|
|
|
@@ -4129,6 +4136,8 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4129
4136
|
standard_library,
|
|
4130
4137
|
inputs_mapping,
|
|
4131
4138
|
)
|
|
4139
|
+
# Prepare call inputs to move to another node
|
|
4140
|
+
input_bindings = virtualize_files(input_bindings, standard_library, enforce_existence=False)
|
|
4132
4141
|
|
|
4133
4142
|
# Bindings may also be added in from the enclosing workflow inputs
|
|
4134
4143
|
# TODO: this is letting us also inject them from the workflow body.
|
|
@@ -4146,6 +4155,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4146
4155
|
subjob: WDLBaseJob = WDLWorkflowJob(
|
|
4147
4156
|
self._node.callee,
|
|
4148
4157
|
[input_bindings, passed_down_bindings],
|
|
4158
|
+
incoming_bindings,
|
|
4149
4159
|
self._node.callee_id,
|
|
4150
4160
|
wdl_options=wdl_options,
|
|
4151
4161
|
local=True,
|
|
@@ -4156,6 +4166,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4156
4166
|
subjob = WDLTaskWrapperJob(
|
|
4157
4167
|
self._node.callee,
|
|
4158
4168
|
[input_bindings, passed_down_bindings],
|
|
4169
|
+
incoming_bindings,
|
|
4159
4170
|
self._node.callee_id,
|
|
4160
4171
|
wdl_options=wdl_options,
|
|
4161
4172
|
local=True,
|
|
@@ -4257,7 +4268,8 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
|
4257
4268
|
node, "Unimplemented WorkflowNode: " + str(type(node))
|
|
4258
4269
|
)
|
|
4259
4270
|
|
|
4260
|
-
|
|
4271
|
+
# TODO: Only virtualize the new bindings created
|
|
4272
|
+
return self.postprocess(virtualize_files(current_bindings, standard_library, enforce_existence=False))
|
|
4261
4273
|
|
|
4262
4274
|
|
|
4263
4275
|
class WDLCombineBindingsJob(WDLBaseJob):
|
|
@@ -4792,6 +4804,12 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
4792
4804
|
[(p, p) for p in standard_library.get_local_paths()]
|
|
4793
4805
|
)
|
|
4794
4806
|
|
|
4807
|
+
# Set the exprs of the WDL values to WDL.Expr.Null to reduce the memory footprint. This got set from evaluate_named_expression
|
|
4808
|
+
# because any evaluation on an expression will mutate child values of the result values of the expression, and we had not
|
|
4809
|
+
# processed it yet by this point as the bindings from input environment and WDLWorkflowJob do not get processing and postprocessing
|
|
4810
|
+
# ran respectively
|
|
4811
|
+
bindings = self.remove_expr_from_bindings(bindings)
|
|
4812
|
+
|
|
4795
4813
|
if not isinstance(scatter_value, WDL.Value.Array):
|
|
4796
4814
|
raise RuntimeError(
|
|
4797
4815
|
"The returned value from a scatter is not an Array type."
|
|
@@ -4804,6 +4822,8 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
4804
4822
|
# duration of the body.
|
|
4805
4823
|
local_bindings: WDLBindings = WDL.Env.Bindings()
|
|
4806
4824
|
local_bindings = local_bindings.bind(self._scatter.variable, item)
|
|
4825
|
+
# Remove expr from new scatter binding
|
|
4826
|
+
local_bindings = self.remove_expr_from_bindings(local_bindings)
|
|
4807
4827
|
# TODO: We need to turn values() into a list because MyPy seems to
|
|
4808
4828
|
# think a dict_values isn't a Sequence. This is a waste of time to
|
|
4809
4829
|
# appease MyPy but probably better than a cast?
|
|
@@ -5020,6 +5040,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5020
5040
|
self,
|
|
5021
5041
|
workflow: WDL.Tree.Workflow,
|
|
5022
5042
|
prev_node_results: Sequence[Promised[WDLBindings]],
|
|
5043
|
+
enclosing_bindings: WDLBindings,
|
|
5023
5044
|
workflow_id: list[str],
|
|
5024
5045
|
wdl_options: WDLContext,
|
|
5025
5046
|
**kwargs: Any,
|
|
@@ -5028,6 +5049,13 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5028
5049
|
Create a subtree that will run a WDL workflow. The job returns the
|
|
5029
5050
|
return value of the workflow.
|
|
5030
5051
|
|
|
5052
|
+
:param prev_node_results: Bindings fed into the workflow call as inputs.
|
|
5053
|
+
|
|
5054
|
+
:param enclosing_bindings: Bindings in the enclosing section,
|
|
5055
|
+
containing files not to clean up. Files that are passed as inputs
|
|
5056
|
+
but not uses as outputs or present in the enclosing section
|
|
5057
|
+
bindings will be deleted after the workflow call completes.
|
|
5058
|
+
|
|
5031
5059
|
:param namespace: the namespace that the workflow's *contents* will be
|
|
5032
5060
|
in. Caller has already added the workflow's own name.
|
|
5033
5061
|
"""
|
|
@@ -5044,6 +5072,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5044
5072
|
|
|
5045
5073
|
self._workflow = workflow
|
|
5046
5074
|
self._prev_node_results = prev_node_results
|
|
5075
|
+
self._enclosing_bindings = enclosing_bindings
|
|
5047
5076
|
self._workflow_id = workflow_id
|
|
5048
5077
|
|
|
5049
5078
|
@report_wdl_errors("run workflow")
|
|
@@ -5084,6 +5113,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5084
5113
|
bindings,
|
|
5085
5114
|
standard_library,
|
|
5086
5115
|
include_previous=True,
|
|
5116
|
+
expressions_are_defaults=True,
|
|
5087
5117
|
)
|
|
5088
5118
|
finally:
|
|
5089
5119
|
# Report all files are downloaded now that all expressions are evaluated.
|
|
@@ -5095,11 +5125,13 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5095
5125
|
# Make jobs to run all the parts of the workflow
|
|
5096
5126
|
sink = self.create_subgraph(self._workflow.body, [], bindings)
|
|
5097
5127
|
|
|
5098
|
-
# To support the all call outputs feature
|
|
5099
|
-
# we have a declared but
|
|
5128
|
+
# To support the all call outputs feature and cleanup of files created
|
|
5129
|
+
# in input: blocks, run an outputs job even if we have a declared but
|
|
5130
|
+
# empty outputs section.
|
|
5100
5131
|
outputs_job = WDLOutputsJob(
|
|
5101
5132
|
self._workflow,
|
|
5102
5133
|
sink.rv(),
|
|
5134
|
+
self._enclosing_bindings,
|
|
5103
5135
|
wdl_options=self._wdl_options,
|
|
5104
5136
|
cache_key=cache_key,
|
|
5105
5137
|
local=True,
|
|
@@ -5121,6 +5153,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5121
5153
|
self,
|
|
5122
5154
|
workflow: WDL.Tree.Workflow,
|
|
5123
5155
|
bindings: Promised[WDLBindings],
|
|
5156
|
+
enclosing_bindings: WDLBindings,
|
|
5124
5157
|
wdl_options: WDLContext,
|
|
5125
5158
|
cache_key: str | None = None,
|
|
5126
5159
|
**kwargs: Any,
|
|
@@ -5128,6 +5161,11 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5128
5161
|
"""
|
|
5129
5162
|
Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
|
|
5130
5163
|
|
|
5164
|
+
:param bindings: Bindings after execution of the workflow body.
|
|
5165
|
+
|
|
5166
|
+
:param enclosing_bindings: Bindings outside the workflow call, with
|
|
5167
|
+
files that should not be cleaned up at the end of the workflow.
|
|
5168
|
+
|
|
5131
5169
|
:param cache_key: If set and storing into the call cache is on, will
|
|
5132
5170
|
cache the workflow execution result under the given key in a
|
|
5133
5171
|
MiniWDL-compatible way.
|
|
@@ -5135,6 +5173,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5135
5173
|
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
5136
5174
|
|
|
5137
5175
|
self._bindings = bindings
|
|
5176
|
+
self._enclosing_bindings = enclosing_bindings
|
|
5138
5177
|
self._workflow = workflow
|
|
5139
5178
|
self._cache_key = cache_key
|
|
5140
5179
|
|
|
@@ -5150,9 +5189,8 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5150
5189
|
|
|
5151
5190
|
try:
|
|
5152
5191
|
if self._workflow.outputs is not None:
|
|
5153
|
-
# Output section is declared and is nonempty, so evaluate normally
|
|
5154
|
-
|
|
5155
|
-
# Combine the bindings from the previous job
|
|
5192
|
+
# Output section is declared and is nonempty, so evaluate normally.
|
|
5193
|
+
# Don't drop nonexistent files here; we do that later.
|
|
5156
5194
|
output_bindings = evaluate_decls_to_bindings(
|
|
5157
5195
|
self._workflow.outputs, unwrap(self._bindings), standard_library
|
|
5158
5196
|
)
|
|
@@ -5163,7 +5201,8 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5163
5201
|
if self._workflow.outputs is None or self._wdl_options.get(
|
|
5164
5202
|
"all_call_outputs", False
|
|
5165
5203
|
):
|
|
5166
|
-
# The output section is not declared, or we want to keep task
|
|
5204
|
+
# The output section is not declared, or we want to keep task
|
|
5205
|
+
# outputs anyway on top of an already-evaluated output section.
|
|
5167
5206
|
|
|
5168
5207
|
# Get all task outputs and return that
|
|
5169
5208
|
# First get all task output names
|
|
@@ -5194,16 +5233,6 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5194
5233
|
output_bindings = output_bindings.bind(
|
|
5195
5234
|
binding.name, binding.value
|
|
5196
5235
|
)
|
|
5197
|
-
else:
|
|
5198
|
-
# Output section is declared and is nonempty, so evaluate normally
|
|
5199
|
-
|
|
5200
|
-
# Combine the bindings from the previous job
|
|
5201
|
-
output_bindings = evaluate_decls_to_bindings(
|
|
5202
|
-
self._workflow.outputs,
|
|
5203
|
-
unwrap(self._bindings),
|
|
5204
|
-
standard_library,
|
|
5205
|
-
drop_missing_files=True,
|
|
5206
|
-
)
|
|
5207
5236
|
finally:
|
|
5208
5237
|
# We don't actually know when all our files are downloaded since
|
|
5209
5238
|
# anything we evaluate might devirtualize inside any expression.
|
|
@@ -5222,13 +5251,27 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5222
5251
|
output_bindings, standard_library=standard_library
|
|
5223
5252
|
)
|
|
5224
5253
|
|
|
5254
|
+
# TODO: Unify the rest of this with task output managment somehow
|
|
5255
|
+
|
|
5256
|
+
# Upload any files in the outputs if not uploaded already.
|
|
5257
|
+
# We need this because it's possible to create new files in a workflow
|
|
5258
|
+
# outputs section.
|
|
5259
|
+
output_bindings = virtualize_files(output_bindings, standard_library)
|
|
5260
|
+
|
|
5225
5261
|
if self._cache_key is not None:
|
|
5226
5262
|
output_bindings = fill_execution_cache(
|
|
5227
5263
|
self._cache_key, output_bindings, file_store, self._wdl_options
|
|
5228
5264
|
)
|
|
5229
5265
|
|
|
5230
|
-
|
|
5266
|
+
# Let Files that are not output or available outside the call go out of
|
|
5267
|
+
# scope.
|
|
5268
|
+
delete_dead_files(
|
|
5269
|
+
unwrap(self._bindings),
|
|
5270
|
+
[output_bindings, self._enclosing_bindings],
|
|
5271
|
+
file_store
|
|
5272
|
+
)
|
|
5231
5273
|
|
|
5274
|
+
return self.postprocess(output_bindings)
|
|
5232
5275
|
|
|
5233
5276
|
class WDLStartJob(WDLSectionJob):
|
|
5234
5277
|
"""
|
|
@@ -5263,18 +5306,24 @@ class WDLStartJob(WDLSectionJob):
|
|
|
5263
5306
|
if isinstance(self._target, WDL.Tree.Workflow):
|
|
5264
5307
|
# Create a workflow job. We rely in this to handle entering the input
|
|
5265
5308
|
# namespace if needed, or handling free-floating inputs.
|
|
5309
|
+
# Pass top-level inputs as enclosing section inputs to avoid
|
|
5310
|
+
# bothering to separately delete them.
|
|
5266
5311
|
job: WDLBaseJob = WDLWorkflowJob(
|
|
5267
5312
|
self._target,
|
|
5268
5313
|
[inputs],
|
|
5314
|
+
inputs,
|
|
5269
5315
|
[self._target.name],
|
|
5270
5316
|
wdl_options=self._wdl_options,
|
|
5271
5317
|
local=True,
|
|
5272
5318
|
)
|
|
5273
5319
|
else:
|
|
5274
5320
|
# There is no workflow. Create a task job.
|
|
5321
|
+
# Pass top-level inputs as enclosing section inputs to avoid
|
|
5322
|
+
# bothering to separately delete them.
|
|
5275
5323
|
job = WDLTaskWrapperJob(
|
|
5276
5324
|
self._target,
|
|
5277
5325
|
[inputs],
|
|
5326
|
+
inputs,
|
|
5278
5327
|
[self._target.name],
|
|
5279
5328
|
wdl_options=self._wdl_options,
|
|
5280
5329
|
local=True,
|
|
@@ -5330,7 +5379,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5330
5379
|
wdl_options: WDLContext,
|
|
5331
5380
|
inputs_search_path: list[str],
|
|
5332
5381
|
import_remote_files: bool,
|
|
5333
|
-
|
|
5382
|
+
import_workers_batchsize: ParseableIndivisibleResource,
|
|
5334
5383
|
import_workers_disk: ParseableIndivisibleResource,
|
|
5335
5384
|
**kwargs: Any,
|
|
5336
5385
|
):
|
|
@@ -5344,11 +5393,11 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5344
5393
|
self._target = target
|
|
5345
5394
|
self._inputs_search_path = inputs_search_path
|
|
5346
5395
|
self._import_remote_files = import_remote_files
|
|
5347
|
-
self.
|
|
5396
|
+
self._import_workers_batchsize = import_workers_batchsize
|
|
5348
5397
|
self._import_workers_disk = import_workers_disk
|
|
5349
5398
|
|
|
5350
5399
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
5351
|
-
filenames =
|
|
5400
|
+
filenames = extract_file_values(self._inputs)
|
|
5352
5401
|
file_to_data = get_file_sizes(
|
|
5353
5402
|
filenames,
|
|
5354
5403
|
file_store.jobStore,
|
|
@@ -5356,7 +5405,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5356
5405
|
include_remote_files=self._import_remote_files,
|
|
5357
5406
|
execution_dir=self._wdl_options.get("execution_dir")
|
|
5358
5407
|
)
|
|
5359
|
-
imports_job = ImportsJob(file_to_data, self.
|
|
5408
|
+
imports_job = ImportsJob(file_to_data, self._import_workers_batchsize, self._import_workers_disk)
|
|
5360
5409
|
self.addChild(imports_job)
|
|
5361
5410
|
install_imports_job = WDLInstallImportsJob(
|
|
5362
5411
|
self._target.name, self._inputs, imports_job.rv()
|
|
@@ -5388,7 +5437,7 @@ def make_root_job(
|
|
|
5388
5437
|
wdl_options=wdl_options,
|
|
5389
5438
|
inputs_search_path=inputs_search_path,
|
|
5390
5439
|
import_remote_files=options.reference_inputs,
|
|
5391
|
-
|
|
5440
|
+
import_workers_batchsize=options.import_workers_batchsize,
|
|
5392
5441
|
import_workers_disk=options.import_workers_disk
|
|
5393
5442
|
)
|
|
5394
5443
|
else:
|
|
@@ -5445,61 +5494,106 @@ def main() -> None:
|
|
|
5445
5494
|
wdl_uri, trs_spec = resolve_workflow(options.wdl_uri, supported_languages={"WDL"})
|
|
5446
5495
|
|
|
5447
5496
|
with Toil(options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec) as toil:
|
|
5448
|
-
|
|
5449
|
-
|
|
5497
|
+
# TODO: Move all the input parsing outside the Toil context
|
|
5498
|
+
# manager to avoid leaving a job store behind if the workflow
|
|
5499
|
+
# can't start.
|
|
5500
|
+
|
|
5501
|
+
# Both start and restart need us to have the workflow and the
|
|
5502
|
+
# wdl_options WDLContext.
|
|
5503
|
+
|
|
5504
|
+
# MiniWDL load code internally uses asyncio.get_event_loop()
|
|
5505
|
+
# which might not get an event loop if somebody has ever called
|
|
5506
|
+
# set_event_loop. So we need to make sure an event loop is
|
|
5507
|
+
# available.
|
|
5508
|
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
|
5509
|
+
|
|
5510
|
+
# Load the WDL document.
|
|
5511
|
+
document: WDL.Tree.Document = WDL.load(
|
|
5512
|
+
wdl_uri,
|
|
5513
|
+
read_source=toil_read_source,
|
|
5514
|
+
check_quant=options.quant_check
|
|
5515
|
+
)
|
|
5516
|
+
|
|
5517
|
+
# See if we're going to run a workflow or a task
|
|
5518
|
+
target: WDL.Tree.Workflow | WDL.Tree.Task
|
|
5519
|
+
if document.workflow:
|
|
5520
|
+
target = document.workflow
|
|
5521
|
+
elif len(document.tasks) == 1:
|
|
5522
|
+
target = document.tasks[0]
|
|
5523
|
+
elif len(document.tasks) > 1:
|
|
5524
|
+
raise WDL.Error.InputError(
|
|
5525
|
+
"Multiple tasks found with no workflow! Either add a workflow or keep one task."
|
|
5526
|
+
)
|
|
5450
5527
|
else:
|
|
5451
|
-
|
|
5452
|
-
|
|
5453
|
-
|
|
5454
|
-
|
|
5455
|
-
#
|
|
5456
|
-
#
|
|
5457
|
-
#
|
|
5458
|
-
#
|
|
5459
|
-
|
|
5460
|
-
|
|
5461
|
-
# Load the WDL document.
|
|
5462
|
-
document: WDL.Tree.Document = WDL.load(
|
|
5463
|
-
wdl_uri,
|
|
5464
|
-
read_source=toil_read_source,
|
|
5528
|
+
raise WDL.Error.InputError("WDL document is empty!")
|
|
5529
|
+
|
|
5530
|
+
if "croo_out_def" in target.meta:
|
|
5531
|
+
# This workflow or task wants to have its outputs
|
|
5532
|
+
# "organized" by the Cromwell Output Organizer:
|
|
5533
|
+
# <https://github.com/ENCODE-DCC/croo>.
|
|
5534
|
+
#
|
|
5535
|
+
# TODO: We don't support generating anything that CROO can read.
|
|
5536
|
+
logger.warning(
|
|
5537
|
+
"This WDL expects to be used with the Cromwell Output Organizer (croo) <https://github.com/ENCODE-DCC/croo>. Toil cannot yet produce the outputs that croo requires. You will not be able to use croo on the output of this Toil run!"
|
|
5465
5538
|
)
|
|
5466
5539
|
|
|
5467
|
-
#
|
|
5468
|
-
|
|
5469
|
-
|
|
5470
|
-
|
|
5471
|
-
|
|
5472
|
-
|
|
5473
|
-
|
|
5474
|
-
|
|
5475
|
-
"Multiple tasks found with no workflow! Either add a workflow or keep one task."
|
|
5476
|
-
)
|
|
5477
|
-
else:
|
|
5478
|
-
raise WDL.Error.InputError("WDL document is empty!")
|
|
5479
|
-
|
|
5480
|
-
if "croo_out_def" in target.meta:
|
|
5481
|
-
# This workflow or task wants to have its outputs
|
|
5482
|
-
# "organized" by the Cromwell Output Organizer:
|
|
5483
|
-
# <https://github.com/ENCODE-DCC/croo>.
|
|
5484
|
-
#
|
|
5485
|
-
# TODO: We don't support generating anything that CROO can read.
|
|
5540
|
+
# But we can assume that we need to preserve individual
|
|
5541
|
+
# taks outputs since the point of CROO is fetching those
|
|
5542
|
+
# from Cromwell's output directories.
|
|
5543
|
+
#
|
|
5544
|
+
# This isn't quite WDL spec compliant but it will rescue
|
|
5545
|
+
# runs of the popular
|
|
5546
|
+
# <https://github.com/ENCODE-DCC/atac-seq-pipeline>
|
|
5547
|
+
if options.all_call_outputs is None:
|
|
5486
5548
|
logger.warning(
|
|
5487
|
-
"
|
|
5549
|
+
"Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
|
|
5488
5550
|
)
|
|
5551
|
+
options.all_call_outputs = True
|
|
5552
|
+
|
|
5553
|
+
# This mutates document to add linting information, but doesn't print any lint errors itself
|
|
5554
|
+
# or stop the workflow
|
|
5555
|
+
WDL.Lint.lint(document)
|
|
5556
|
+
|
|
5557
|
+
# We use a mutable variable and a generic file pointer to capture information about lint warnings
|
|
5558
|
+
# Both will be populated inside outline()
|
|
5559
|
+
lint_warnings_counter = [0]
|
|
5560
|
+
lint_warnings_io = io.StringIO()
|
|
5561
|
+
outline(
|
|
5562
|
+
document,
|
|
5563
|
+
0,
|
|
5564
|
+
file=lint_warnings_io,
|
|
5565
|
+
show_called=(document.workflow is not None),
|
|
5566
|
+
shown=lint_warnings_counter,
|
|
5567
|
+
) # type: ignore[no-untyped-call]
|
|
5568
|
+
|
|
5569
|
+
if getattr(WDL.Lint, "_shellcheck_available", None) is False:
|
|
5570
|
+
logger.info("Suggestion: install shellcheck (www.shellcheck.net) to check task commands")
|
|
5571
|
+
|
|
5572
|
+
if lint_warnings_counter[0]:
|
|
5573
|
+
logger.warning('Workflow lint warnings:\n%s', lint_warnings_io.getvalue().rstrip())
|
|
5574
|
+
if options.strict:
|
|
5575
|
+
logger.critical(f'Workflow did not pass linting in strict mode')
|
|
5576
|
+
# MiniWDL uses exit code 2 to indicate linting errors, so replicate that behavior
|
|
5577
|
+
sys.exit(2)
|
|
5578
|
+
|
|
5579
|
+
# Get the execution directory
|
|
5580
|
+
execution_dir = os.getcwd()
|
|
5581
|
+
|
|
5582
|
+
# Configure workflow interpreter options.
|
|
5583
|
+
# TODO: Would be nice to somehow be able to change some of these on
|
|
5584
|
+
# restart. For now we assume we are computing the same values.
|
|
5585
|
+
wdl_options: WDLContext = {
|
|
5586
|
+
"execution_dir": execution_dir,
|
|
5587
|
+
"container": options.container,
|
|
5588
|
+
"task_path": target.name,
|
|
5589
|
+
"namespace": target.name,
|
|
5590
|
+
"all_call_outputs": options.all_call_outputs,
|
|
5591
|
+
}
|
|
5592
|
+
assert wdl_options.get("container") is not None
|
|
5489
5593
|
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
|
|
5493
|
-
#
|
|
5494
|
-
# This isn't quite WDL spec compliant but it will rescue
|
|
5495
|
-
# runs of the popular
|
|
5496
|
-
# <https://github.com/ENCODE-DCC/atac-seq-pipeline>
|
|
5497
|
-
if options.all_call_outputs is None:
|
|
5498
|
-
logger.warning(
|
|
5499
|
-
"Inferring --allCallOutputs=True to preserve probable actual outputs of a croo WDL file."
|
|
5500
|
-
)
|
|
5501
|
-
options.all_call_outputs = True
|
|
5502
|
-
|
|
5594
|
+
if options.restart:
|
|
5595
|
+
output_bindings = toil.restart()
|
|
5596
|
+
else:
|
|
5503
5597
|
# If our input really comes from a URI or path, remember it.
|
|
5504
5598
|
input_source_uri = None
|
|
5505
5599
|
# Also remember where we need to report JSON parse errors as
|
|
@@ -5592,19 +5686,6 @@ def main() -> None:
|
|
|
5592
5686
|
|
|
5593
5687
|
# TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ?
|
|
5594
5688
|
|
|
5595
|
-
# Get the execution directory
|
|
5596
|
-
execution_dir = os.getcwd()
|
|
5597
|
-
|
|
5598
|
-
# Configure workflow interpreter options
|
|
5599
|
-
wdl_options: WDLContext = {
|
|
5600
|
-
"execution_dir": execution_dir,
|
|
5601
|
-
"container": options.container,
|
|
5602
|
-
"task_path": target.name,
|
|
5603
|
-
"namespace": target.name,
|
|
5604
|
-
"all_call_outputs": options.all_call_outputs,
|
|
5605
|
-
}
|
|
5606
|
-
assert wdl_options.get("container") is not None
|
|
5607
|
-
|
|
5608
5689
|
# Run the workflow and get its outputs namespaced with the workflow name.
|
|
5609
5690
|
root_job = make_root_job(
|
|
5610
5691
|
target,
|