toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +17 -22
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -33,35 +33,23 @@ import sys
|
|
|
33
33
|
import tempfile
|
|
34
34
|
import textwrap
|
|
35
35
|
import uuid
|
|
36
|
-
from collections.abc import Generator, Iterable, Iterator, Sequence
|
|
36
|
+
from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
|
|
37
37
|
from contextlib import ExitStack, contextmanager
|
|
38
38
|
from graphlib import TopologicalSorter
|
|
39
39
|
from tempfile import mkstemp
|
|
40
40
|
from typing import (
|
|
41
|
+
IO,
|
|
41
42
|
Any,
|
|
42
|
-
Callable,
|
|
43
|
-
Dict,
|
|
44
|
-
Generator,
|
|
45
|
-
Iterable,
|
|
46
|
-
Iterator,
|
|
47
|
-
List,
|
|
48
43
|
Optional,
|
|
49
|
-
|
|
50
|
-
|
|
44
|
+
Protocol,
|
|
45
|
+
TypedDict,
|
|
46
|
+
TypeGuard,
|
|
51
47
|
TypeVar,
|
|
52
48
|
Union,
|
|
53
49
|
cast,
|
|
54
|
-
TypedDict,
|
|
55
|
-
IO,
|
|
56
|
-
Protocol,
|
|
57
50
|
overload,
|
|
58
51
|
)
|
|
59
52
|
|
|
60
|
-
if sys.version_info < (3, 10):
|
|
61
|
-
from typing_extensions import TypeGuard
|
|
62
|
-
else:
|
|
63
|
-
from typing import TypeGuard
|
|
64
|
-
|
|
65
53
|
if sys.version_info < (3, 11):
|
|
66
54
|
from typing_extensions import NotRequired
|
|
67
55
|
else:
|
|
@@ -73,11 +61,11 @@ from urllib.error import HTTPError
|
|
|
73
61
|
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
74
62
|
|
|
75
63
|
import WDL.Error
|
|
64
|
+
import WDL.Lint
|
|
76
65
|
import WDL.runtime.config
|
|
77
66
|
from configargparse import ArgParser, Namespace
|
|
78
67
|
from WDL._util import byte_size_units, chmod_R_plus
|
|
79
|
-
from WDL.CLI import
|
|
80
|
-
import WDL.Lint
|
|
68
|
+
from WDL.CLI import outline, print_error
|
|
81
69
|
from WDL.runtime.backend.docker_swarm import SwarmContainer
|
|
82
70
|
from WDL.runtime.backend.singularity import SingularityContainer
|
|
83
71
|
from WDL.runtime.error import DownloadFailed
|
|
@@ -91,45 +79,56 @@ from toil.fileStores import FileID
|
|
|
91
79
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
92
80
|
from toil.job import (
|
|
93
81
|
AcceleratorRequirement,
|
|
82
|
+
FileMetadata,
|
|
83
|
+
ImportsJob,
|
|
94
84
|
Job,
|
|
85
|
+
ParseableIndivisibleResource,
|
|
95
86
|
Promise,
|
|
96
87
|
Promised,
|
|
97
88
|
TemporaryID,
|
|
89
|
+
get_file_sizes,
|
|
98
90
|
parse_accelerator,
|
|
91
|
+
potential_absolute_uris,
|
|
99
92
|
unwrap,
|
|
100
93
|
unwrap_all,
|
|
101
|
-
ParseableIndivisibleResource,
|
|
102
|
-
ImportsJob,
|
|
103
|
-
FileMetadata,
|
|
104
|
-
potential_absolute_uris,
|
|
105
|
-
get_file_sizes
|
|
106
94
|
)
|
|
107
95
|
from toil.jobStores.abstractJobStore import (
|
|
108
96
|
AbstractJobStore,
|
|
109
97
|
InvalidImportExportUrlException,
|
|
110
98
|
LocatorException,
|
|
111
99
|
)
|
|
112
|
-
from toil.lib.exceptions import UnimplementedURLException
|
|
113
100
|
from toil.lib.accelerators import get_individual_local_accelerators
|
|
114
101
|
from toil.lib.conversions import VALID_PREFIXES, convert_units, human2bytes
|
|
115
102
|
from toil.lib.directory import (
|
|
116
103
|
DirectoryContents,
|
|
117
104
|
decode_directory,
|
|
118
|
-
|
|
105
|
+
directory_contents_items,
|
|
119
106
|
directory_item_exists,
|
|
107
|
+
directory_items,
|
|
108
|
+
encode_directory,
|
|
120
109
|
get_directory_contents_item,
|
|
121
110
|
get_directory_item,
|
|
122
|
-
directory_items,
|
|
123
|
-
directory_contents_items,
|
|
124
111
|
)
|
|
125
|
-
from toil.lib.
|
|
126
|
-
from toil.lib.io import
|
|
112
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
113
|
+
from toil.lib.io import (
|
|
114
|
+
TOIL_URI_SCHEME,
|
|
115
|
+
is_any_url,
|
|
116
|
+
is_directory_url,
|
|
117
|
+
is_file_url,
|
|
118
|
+
is_remote_url,
|
|
119
|
+
is_standard_url,
|
|
120
|
+
is_toil_dir_url,
|
|
121
|
+
is_toil_file_url,
|
|
122
|
+
is_toil_url,
|
|
123
|
+
mkdtemp,
|
|
124
|
+
)
|
|
127
125
|
from toil.lib.memoize import memoize
|
|
128
126
|
from toil.lib.misc import get_user_name
|
|
129
127
|
from toil.lib.resources import ResourceMonitor
|
|
130
128
|
from toil.lib.threading import global_mutex
|
|
131
|
-
from toil.
|
|
129
|
+
from toil.lib.trs import resolve_workflow
|
|
132
130
|
from toil.lib.url import URLAccess
|
|
131
|
+
from toil.provisioners.clusterScaler import JobTooBigError
|
|
133
132
|
|
|
134
133
|
logger = logging.getLogger(__name__)
|
|
135
134
|
|
|
@@ -141,6 +140,7 @@ WDLINode = Union[WDL.Value.File, WDL.Value.Directory]
|
|
|
141
140
|
# Some functions take either a File or Directory and return the same type.
|
|
142
141
|
AnyINode = TypeVar("AnyINode", bound=WDLINode)
|
|
143
142
|
|
|
143
|
+
|
|
144
144
|
# TODO: Is there a way to get out of needing this? Or make this support N types?
|
|
145
145
|
class INodeTransform(Protocol):
|
|
146
146
|
"""
|
|
@@ -152,12 +152,14 @@ class INodeTransform(Protocol):
|
|
|
152
152
|
complicated type for functions that transform inodes to the same type of
|
|
153
153
|
inodes.
|
|
154
154
|
"""
|
|
155
|
+
|
|
155
156
|
@overload
|
|
156
|
-
def __call__(self, __file: WDL.Value.File) -> WDL.Value.File | None:
|
|
157
|
-
...
|
|
157
|
+
def __call__(self, __file: WDL.Value.File) -> WDL.Value.File | None: ...
|
|
158
158
|
@overload
|
|
159
|
-
def __call__(
|
|
160
|
-
|
|
159
|
+
def __call__(
|
|
160
|
+
self, __directory: WDL.Value.Directory
|
|
161
|
+
) -> WDL.Value.Directory | None: ...
|
|
162
|
+
|
|
161
163
|
|
|
162
164
|
def is_inode(value: WDL.Value.Base) -> TypeGuard[WDLINode]:
|
|
163
165
|
"""
|
|
@@ -169,12 +171,14 @@ def is_inode(value: WDL.Value.Base) -> TypeGuard[WDLINode]:
|
|
|
169
171
|
"""
|
|
170
172
|
return isinstance(value, WDL.Value.File) or isinstance(value, WDL.Value.Directory)
|
|
171
173
|
|
|
174
|
+
|
|
172
175
|
# In regards to "toilfile:" URIs:
|
|
173
176
|
# We define a URI scheme kind of like but not actually compatible with the one
|
|
174
177
|
# we use for CWL. CWL brings along the file basename in its file and directory
|
|
175
178
|
# types, but WDL inode types don't. So we need to make sure we stash that
|
|
176
179
|
# somewhere in the URI.
|
|
177
180
|
|
|
181
|
+
|
|
178
182
|
# We want to use hashlib.file_digest to avoid a 3-line hashing loop like
|
|
179
183
|
# MiniWDL has. But it is only in 3.11+
|
|
180
184
|
#
|
|
@@ -213,22 +217,12 @@ class FileDigester(Protocol):
|
|
|
213
217
|
def __call__(self, __f: ReadableFileObj, __alg_name: str) -> hashlib._Hash: ...
|
|
214
218
|
|
|
215
219
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
# the polyfill needs *exactly* the signature of file_digest, and not just
|
|
219
|
-
# one that can accept all calls we make in the file, or MyPy will complain.
|
|
220
|
-
#
|
|
221
|
-
# We need to tell MyPy we expect this import to fail, when typechecking on
|
|
222
|
-
# pythons that don't have it. But we also need to tell it that it is fine
|
|
223
|
-
# if it succeeds, for Pythons that do have it.
|
|
224
|
-
#
|
|
225
|
-
# TODO: Change to checking sys.version_info because MyPy understands that
|
|
226
|
-
# better?
|
|
227
|
-
from hashlib import file_digest as file_digest_impl # type: ignore[attr-defined,unused-ignore]
|
|
220
|
+
if sys.version_info >= (3, 11):
|
|
221
|
+
from hashlib import file_digest as file_digest_impl
|
|
228
222
|
|
|
229
223
|
file_digest: FileDigester = file_digest_impl
|
|
230
|
-
|
|
231
|
-
|
|
224
|
+
else: # Polyfill file_digest from 3.11+
|
|
225
|
+
|
|
232
226
|
def file_digest_fallback_impl(f: ReadableFileObj, alg_name: str) -> hashlib._Hash:
|
|
233
227
|
BUFFER_SIZE = 1024 * 1024
|
|
234
228
|
hasher = hashlib.new(alg_name)
|
|
@@ -240,22 +234,20 @@ except ImportError:
|
|
|
240
234
|
|
|
241
235
|
file_digest = file_digest_fallback_impl
|
|
242
236
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
"
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
},
|
|
258
|
-
)
|
|
237
|
+
|
|
238
|
+
class WDLContext(TypedDict):
|
|
239
|
+
"""WDL options to pass into the WDL jobs and standard libraries"""
|
|
240
|
+
|
|
241
|
+
execution_dir: NotRequired[str]
|
|
242
|
+
"""Directory to use as the working directory for workflow code"""
|
|
243
|
+
container: NotRequired[str]
|
|
244
|
+
"""The type of container to use when executing a WDL task. Carries through the value of the commandline --container option."""
|
|
245
|
+
task_path: str
|
|
246
|
+
"""Dotted WDL name of the part of the workflow this library is working for"""
|
|
247
|
+
namespace: str
|
|
248
|
+
"""Namespace of the WDL that the current job is in"""
|
|
249
|
+
all_call_outputs: bool
|
|
250
|
+
"""Whether a job should include all calls outputs"""
|
|
259
251
|
|
|
260
252
|
|
|
261
253
|
class InsufficientMountDiskSpace(Exception):
|
|
@@ -357,7 +349,9 @@ async def toil_read_source(
|
|
|
357
349
|
# We track our own failures for debugging
|
|
358
350
|
tried = []
|
|
359
351
|
|
|
360
|
-
for candidate_uri in potential_absolute_uris(
|
|
352
|
+
for candidate_uri in potential_absolute_uris(
|
|
353
|
+
uri, path, importer=importer.pos.abspath if importer else None
|
|
354
|
+
):
|
|
361
355
|
# For each place to try in order
|
|
362
356
|
destination_buffer = io.BytesIO()
|
|
363
357
|
logger.debug("Fetching %s", candidate_uri)
|
|
@@ -373,7 +367,13 @@ async def toil_read_source(
|
|
|
373
367
|
# TODO: we need to assume in general that an error is just a
|
|
374
368
|
# not-found, because the exceptions thrown by read_from_url()
|
|
375
369
|
# implementations are not specified.
|
|
376
|
-
logger.debug(
|
|
370
|
+
logger.debug(
|
|
371
|
+
"Tried to fetch %s from %s but got %s: %s",
|
|
372
|
+
uri,
|
|
373
|
+
candidate_uri,
|
|
374
|
+
type(e),
|
|
375
|
+
e,
|
|
376
|
+
)
|
|
377
377
|
continue
|
|
378
378
|
# If we get here, we got it probably.
|
|
379
379
|
try:
|
|
@@ -699,7 +699,9 @@ def clone_metadata(old_inode: AnyINode, new_inode: AnyINode) -> None:
|
|
|
699
699
|
setattr(new_inode, attribute, getattr(old_inode, attribute))
|
|
700
700
|
|
|
701
701
|
|
|
702
|
-
def make_inode(
|
|
702
|
+
def make_inode(
|
|
703
|
+
example_inode: AnyINode, value: str, expr: WDL.Expr.Base | None
|
|
704
|
+
) -> AnyINode:
|
|
703
705
|
"""
|
|
704
706
|
Make a new File or Directory of the same type as the example with the given arguments.
|
|
705
707
|
|
|
@@ -709,6 +711,7 @@ def make_inode(example_inode: AnyINode, value: str, expr: Optional[WDL.Expr.Base
|
|
|
709
711
|
|
|
710
712
|
return cast(AnyINode, type(example_inode)(value, expr))
|
|
711
713
|
|
|
714
|
+
|
|
712
715
|
def set_inode_value(inode: AnyINode, new_value: str) -> AnyINode:
|
|
713
716
|
"""
|
|
714
717
|
Return a copy of a WDL File/Directory with the value changed.
|
|
@@ -740,9 +743,7 @@ def get_inode_nonexistent(inode: WDLINode) -> bool:
|
|
|
740
743
|
return cast(bool, getattr(inode, "nonexistent", False))
|
|
741
744
|
|
|
742
745
|
|
|
743
|
-
def set_inode_virtualized_value(
|
|
744
|
-
inode: AnyINode, virtualized_value: str
|
|
745
|
-
) -> AnyINode:
|
|
746
|
+
def set_inode_virtualized_value(inode: AnyINode, virtualized_value: str) -> AnyINode:
|
|
746
747
|
"""
|
|
747
748
|
Return a copy of a WDL File/Directory with the virtualized_value attribute set.
|
|
748
749
|
|
|
@@ -754,14 +755,14 @@ def set_inode_virtualized_value(
|
|
|
754
755
|
return new_inode
|
|
755
756
|
|
|
756
757
|
|
|
757
|
-
def get_inode_virtualized_value(inode: WDLINode) ->
|
|
758
|
+
def get_inode_virtualized_value(inode: WDLINode) -> str | None:
|
|
758
759
|
"""
|
|
759
760
|
Get the virtualized storage location for a File/Directory.
|
|
760
761
|
"""
|
|
761
762
|
return cast(Optional[str], getattr(inode, "virtualized_value", None))
|
|
762
763
|
|
|
763
764
|
|
|
764
|
-
def get_shared_fs_path(inode: WDLINode) ->
|
|
765
|
+
def get_shared_fs_path(inode: WDLINode) -> str | None:
|
|
765
766
|
"""
|
|
766
767
|
If a File/Directory has a shared filesystem path, get that path.
|
|
767
768
|
|
|
@@ -814,7 +815,7 @@ def view_shared_fs_paths(
|
|
|
814
815
|
|
|
815
816
|
|
|
816
817
|
def poll_execution_cache(
|
|
817
|
-
node:
|
|
818
|
+
node: WDL.Tree.Workflow | WDL.Tree.Task, bindings: WDLBindings
|
|
818
819
|
) -> tuple[WDLBindings | None, str]:
|
|
819
820
|
"""
|
|
820
821
|
Return the cached result of calling this workflow or task, and its key.
|
|
@@ -832,7 +833,7 @@ def poll_execution_cache(
|
|
|
832
833
|
# TODO: Ship config from leader? It might not see the right environment.
|
|
833
834
|
miniwdl_config = WDL.runtime.config.Loader(miniwdl_logger)
|
|
834
835
|
miniwdl_cache = WDL.runtime.cache.new(miniwdl_config, miniwdl_logger)
|
|
835
|
-
cached_result:
|
|
836
|
+
cached_result: WDLBindings | None = miniwdl_cache.get(
|
|
836
837
|
cache_key, transformed_bindings, node.effective_outputs
|
|
837
838
|
)
|
|
838
839
|
if cached_result is not None:
|
|
@@ -848,8 +849,8 @@ def fill_execution_cache(
|
|
|
848
849
|
output_bindings: WDLBindings,
|
|
849
850
|
file_store: AbstractFileStore,
|
|
850
851
|
wdl_options: WDLContext,
|
|
851
|
-
miniwdl_logger:
|
|
852
|
-
miniwdl_config:
|
|
852
|
+
miniwdl_logger: logging.Logger | None = None,
|
|
853
|
+
miniwdl_config: WDL.runtime.config.Loader | None = None,
|
|
853
854
|
) -> WDLBindings:
|
|
854
855
|
"""
|
|
855
856
|
Cache the result of calling a workflow or task.
|
|
@@ -908,9 +909,7 @@ def fill_execution_cache(
|
|
|
908
909
|
if virtualized is None:
|
|
909
910
|
# TODO: If we're passing things around by URL reference and
|
|
910
911
|
# some of them are file: is this actually allowed?
|
|
911
|
-
raise RuntimeError(
|
|
912
|
-
f"{inode} caught escaping from task unvirtualized"
|
|
913
|
-
)
|
|
912
|
+
raise RuntimeError(f"{inode} caught escaping from task unvirtualized")
|
|
914
913
|
|
|
915
914
|
# We need to save this somewhere.
|
|
916
915
|
# This needs to exist before we can export to it. And now we know
|
|
@@ -933,7 +932,9 @@ def fill_execution_cache(
|
|
|
933
932
|
|
|
934
933
|
return inode
|
|
935
934
|
|
|
936
|
-
output_bindings = map_over_inodes_in_bindings(
|
|
935
|
+
output_bindings = map_over_inodes_in_bindings(
|
|
936
|
+
output_bindings, assign_shared_fs_path
|
|
937
|
+
)
|
|
937
938
|
|
|
938
939
|
# Save the bindings to the cache, representing all files with their shared filesystem paths.
|
|
939
940
|
miniwdl_cache.put(cache_key, view_shared_fs_paths(output_bindings))
|
|
@@ -943,6 +944,7 @@ def fill_execution_cache(
|
|
|
943
944
|
# the cached files in their input digests.
|
|
944
945
|
return output_bindings
|
|
945
946
|
|
|
947
|
+
|
|
946
948
|
def choose_human_readable_directory(
|
|
947
949
|
root_dir: str,
|
|
948
950
|
source_task_path: str,
|
|
@@ -1042,9 +1044,7 @@ def evaluate_decls_to_bindings(
|
|
|
1042
1044
|
each_decl, all_bindings, standard_library
|
|
1043
1045
|
)
|
|
1044
1046
|
else:
|
|
1045
|
-
output_value = evaluate_decl(
|
|
1046
|
-
each_decl, all_bindings, standard_library
|
|
1047
|
-
)
|
|
1047
|
+
output_value = evaluate_decl(each_decl, all_bindings, standard_library)
|
|
1048
1048
|
if drop_missing_files:
|
|
1049
1049
|
dropped_output_value = map_over_typed_inodes_in_value(
|
|
1050
1050
|
output_value, missing_inode_dropper(standard_library)
|
|
@@ -1142,6 +1142,7 @@ def extract_inode_values(environment: WDLBindings) -> list[str]:
|
|
|
1142
1142
|
map_over_inodes_in_bindings(environment, add_value)
|
|
1143
1143
|
return values
|
|
1144
1144
|
|
|
1145
|
+
|
|
1145
1146
|
def extract_inode_virtualized_values(environment: WDLBindings) -> list[str]:
|
|
1146
1147
|
"""
|
|
1147
1148
|
Get a list of all File/Directory object virtualized values in the bindings.
|
|
@@ -1159,6 +1160,7 @@ def extract_inode_virtualized_values(environment: WDLBindings) -> list[str]:
|
|
|
1159
1160
|
map_over_inodes_in_bindings(environment, add_value)
|
|
1160
1161
|
return values
|
|
1161
1162
|
|
|
1163
|
+
|
|
1162
1164
|
def extract_toil_file_uris(environment: WDLBindings) -> Iterable[str]:
|
|
1163
1165
|
"""
|
|
1164
1166
|
Get the toilfile: URIs in the given bindings.
|
|
@@ -1181,8 +1183,8 @@ def extract_toil_file_uris(environment: WDLBindings) -> Iterable[str]:
|
|
|
1181
1183
|
|
|
1182
1184
|
def virtualize_inodes_in_bindings(
|
|
1183
1185
|
environment: WDLBindings,
|
|
1184
|
-
file_to_id:
|
|
1185
|
-
file_to_metadata:
|
|
1186
|
+
file_to_id: dict[str, FileID],
|
|
1187
|
+
file_to_metadata: dict[str, FileMetadata],
|
|
1186
1188
|
task_path: str,
|
|
1187
1189
|
) -> WDLBindings:
|
|
1188
1190
|
"""
|
|
@@ -1235,9 +1237,9 @@ def convert_remote_files(
|
|
|
1235
1237
|
environment: WDLBindings,
|
|
1236
1238
|
file_source: AbstractJobStore,
|
|
1237
1239
|
task_path: str,
|
|
1238
|
-
search_paths:
|
|
1240
|
+
search_paths: list[str] | None = None,
|
|
1239
1241
|
import_remote_files: bool = True,
|
|
1240
|
-
execution_dir:
|
|
1242
|
+
execution_dir: str | None = None,
|
|
1241
1243
|
) -> WDLBindings:
|
|
1242
1244
|
"""
|
|
1243
1245
|
Resolve relative-URI files in the given environment and import all files.
|
|
@@ -1529,7 +1531,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1529
1531
|
file, self._virtualize_filename(file.value)
|
|
1530
1532
|
)
|
|
1531
1533
|
with open(
|
|
1532
|
-
self._devirtualize_filename(get_inode_virtualized_value(file))
|
|
1534
|
+
self._devirtualize_filename(get_inode_virtualized_value(file))
|
|
1533
1535
|
) as infile:
|
|
1534
1536
|
return parse(infile.read())
|
|
1535
1537
|
|
|
@@ -1607,9 +1609,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1607
1609
|
# Mark the inode nonexistent.
|
|
1608
1610
|
return set_inode_nonexistent(inode, True)
|
|
1609
1611
|
|
|
1610
|
-
logger.debug(
|
|
1611
|
-
"For %s got virtualized value %s", inode, virtualized_filename
|
|
1612
|
-
)
|
|
1612
|
+
logger.debug("For %s got virtualized value %s", inode, virtualized_filename)
|
|
1613
1613
|
marked_inode = set_inode_virtualized_value(inode, virtualized_filename)
|
|
1614
1614
|
return marked_inode
|
|
1615
1615
|
|
|
@@ -1635,8 +1635,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1635
1635
|
filename: str,
|
|
1636
1636
|
dest_path: str,
|
|
1637
1637
|
file_source: AbstractFileStore | Toil,
|
|
1638
|
-
export:
|
|
1639
|
-
symlink:
|
|
1638
|
+
export: bool | None = None,
|
|
1639
|
+
symlink: bool | None = None,
|
|
1640
1640
|
) -> None:
|
|
1641
1641
|
"""
|
|
1642
1642
|
Given a filename/URI, write it to the given dest_path.
|
|
@@ -1674,7 +1674,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1674
1674
|
)
|
|
1675
1675
|
if result != dest_path:
|
|
1676
1676
|
# We definitely want this to be put where we asked.
|
|
1677
|
-
raise RuntimeError(
|
|
1677
|
+
raise RuntimeError(
|
|
1678
|
+
f"Tried to read file to {dest_path} but it went to {result} instead"
|
|
1679
|
+
)
|
|
1678
1680
|
else:
|
|
1679
1681
|
raise RuntimeError(f"Unsupported file source: {file_source}")
|
|
1680
1682
|
else:
|
|
@@ -1741,7 +1743,11 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1741
1743
|
)
|
|
1742
1744
|
return result
|
|
1743
1745
|
else:
|
|
1744
|
-
logger.debug(
|
|
1746
|
+
logger.debug(
|
|
1747
|
+
"Virtualized filename %s is not any of the %s cached items",
|
|
1748
|
+
filename,
|
|
1749
|
+
len(virtualized_to_devirtualized),
|
|
1750
|
+
)
|
|
1745
1751
|
|
|
1746
1752
|
if is_directory_url(filename):
|
|
1747
1753
|
# This points to a directory, so handle it as a tree.
|
|
@@ -1754,12 +1760,20 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1754
1760
|
|
|
1755
1761
|
if is_toil_dir_url(filename):
|
|
1756
1762
|
# This is a Toil directory URL directory.
|
|
1757
|
-
|
|
1763
|
+
(
|
|
1764
|
+
base_dir_decoded,
|
|
1765
|
+
remaining_path,
|
|
1766
|
+
_,
|
|
1767
|
+
base_dir_source_uri,
|
|
1768
|
+
source_task,
|
|
1769
|
+
) = decode_directory(filename)
|
|
1758
1770
|
# We always set the directory URI and source task.
|
|
1759
1771
|
assert base_dir_source_uri is not None
|
|
1760
1772
|
assert source_task is not None
|
|
1761
1773
|
|
|
1762
|
-
contents = get_directory_contents_item(
|
|
1774
|
+
contents = get_directory_contents_item(
|
|
1775
|
+
base_dir_decoded, remaining_path
|
|
1776
|
+
)
|
|
1763
1777
|
|
|
1764
1778
|
# This is a directory and we have its decoded structure.
|
|
1765
1779
|
assert not isinstance(contents, str)
|
|
@@ -1767,12 +1781,19 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1767
1781
|
# Work out where the root uploaded directory would go
|
|
1768
1782
|
dir_basename = os.path.basename(urlsplit(base_dir_source_uri).path)
|
|
1769
1783
|
parent_url = urljoin(base_dir_source_uri, ".")
|
|
1770
|
-
parent_path = os.path.join(
|
|
1771
|
-
|
|
1772
|
-
|
|
1784
|
+
parent_path = os.path.join(
|
|
1785
|
+
choose_human_readable_directory(
|
|
1786
|
+
dest_dir, source_task, parent_url
|
|
1787
|
+
),
|
|
1788
|
+
dir_basename,
|
|
1789
|
+
)
|
|
1773
1790
|
|
|
1774
1791
|
# And where this particular subdirectory we're fetching goes
|
|
1775
|
-
dest_path =
|
|
1792
|
+
dest_path = (
|
|
1793
|
+
os.path.join(parent_path, remaining_path)
|
|
1794
|
+
if remaining_path is not None
|
|
1795
|
+
else parent_path
|
|
1796
|
+
)
|
|
1776
1797
|
|
|
1777
1798
|
# contents is already a dict from basename to sub-dict or full URL.
|
|
1778
1799
|
else:
|
|
@@ -1793,7 +1814,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1793
1814
|
# Synthesize a contents dict
|
|
1794
1815
|
contents = {}
|
|
1795
1816
|
|
|
1796
|
-
def list_recursively(
|
|
1817
|
+
def list_recursively(
|
|
1818
|
+
url: str, contents_to_fill: DirectoryContents
|
|
1819
|
+
) -> None:
|
|
1797
1820
|
"""
|
|
1798
1821
|
Recursively list the given URL into the given dict.
|
|
1799
1822
|
|
|
@@ -1818,7 +1841,10 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1818
1841
|
# Now we know we have filename (the directory), dest_path (the
|
|
1819
1842
|
# desired local path), and contents (all the files and
|
|
1820
1843
|
# subdirectories we need to materialize).
|
|
1821
|
-
logger.debug(
|
|
1844
|
+
logger.debug(
|
|
1845
|
+
"Devirtualizing %s directly contained items, and their children",
|
|
1846
|
+
len(contents),
|
|
1847
|
+
)
|
|
1822
1848
|
|
|
1823
1849
|
for relative_path, item_value in directory_contents_items(contents):
|
|
1824
1850
|
# Recursively visit the directory itself and its contents.
|
|
@@ -1834,22 +1860,39 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1834
1860
|
item_devirtualized_path = os.path.join(dest_path, relative_path)
|
|
1835
1861
|
if item_virtualized_path in virtualized_to_devirtualized:
|
|
1836
1862
|
# This has been downloaded already
|
|
1837
|
-
assert
|
|
1863
|
+
assert (
|
|
1864
|
+
virtualized_to_devirtualized[item_virtualized_path]
|
|
1865
|
+
== item_devirtualized_path
|
|
1866
|
+
), f"Devirtualized version of {item_virtualized_path} expected at {item_devirtualized_path} but is actually already at {virtualized_to_devirtualized[item_virtualized_path]}"
|
|
1838
1867
|
# We don't do the back-check because we will have
|
|
1839
1868
|
# entries with the directory URL *and* the base file ID
|
|
1840
1869
|
# URL for files.
|
|
1841
1870
|
assert os.path.exists(item_devirtualized_path)
|
|
1842
|
-
elif
|
|
1871
|
+
elif (
|
|
1872
|
+
item_value is not None
|
|
1873
|
+
and item_value in virtualized_to_devirtualized
|
|
1874
|
+
):
|
|
1843
1875
|
# The target file is already downloaded.
|
|
1844
1876
|
# TODO: Are there circumstances where we're going to
|
|
1845
1877
|
# need multiple copies, such as distinct base
|
|
1846
1878
|
# directories that can't be nested?
|
|
1847
|
-
logger.debug(
|
|
1848
|
-
|
|
1879
|
+
logger.debug(
|
|
1880
|
+
"%s points to %s which is already cached",
|
|
1881
|
+
item_virtualized_path,
|
|
1882
|
+
item_value,
|
|
1883
|
+
)
|
|
1884
|
+
assert (
|
|
1885
|
+
virtualized_to_devirtualized[item_value]
|
|
1886
|
+
== item_devirtualized_path
|
|
1887
|
+
), f"Directory item {item_virtualized_path} points to file {item_value}, which was already devirtualized to {virtualized_to_devirtualized[item_value]}, but for the directory we need it to be at {item_devirtualized_path} instead!"
|
|
1849
1888
|
assert os.path.exists(item_devirtualized_path)
|
|
1850
1889
|
# Cache the file's devirtualized version also under the directory-based path.
|
|
1851
|
-
virtualized_to_devirtualized[item_virtualized_path] =
|
|
1852
|
-
|
|
1890
|
+
virtualized_to_devirtualized[item_virtualized_path] = (
|
|
1891
|
+
virtualized_to_devirtualized[item_value]
|
|
1892
|
+
)
|
|
1893
|
+
logger.debug(
|
|
1894
|
+
"Cache now has %s items", len(virtualized_to_devirtualized)
|
|
1895
|
+
)
|
|
1853
1896
|
else:
|
|
1854
1897
|
# We need to download this now and cache it.
|
|
1855
1898
|
if item_value is None:
|
|
@@ -1860,12 +1903,22 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1860
1903
|
os.makedirs(item_devirtualized_path, exist_ok=True)
|
|
1861
1904
|
|
|
1862
1905
|
# Cache the directory
|
|
1863
|
-
logger.debug(
|
|
1864
|
-
|
|
1865
|
-
|
|
1906
|
+
logger.debug(
|
|
1907
|
+
"Add %s to cache at %s",
|
|
1908
|
+
item_virtualized_path,
|
|
1909
|
+
item_devirtualized_path,
|
|
1910
|
+
)
|
|
1911
|
+
virtualized_to_devirtualized[item_virtualized_path] = (
|
|
1912
|
+
item_devirtualized_path
|
|
1913
|
+
)
|
|
1914
|
+
devirtualized_to_virtualized[item_devirtualized_path] = (
|
|
1915
|
+
item_virtualized_path
|
|
1916
|
+
)
|
|
1866
1917
|
else:
|
|
1867
1918
|
# Download files from their stored locations.
|
|
1868
|
-
assert not os.path.exists(
|
|
1919
|
+
assert not os.path.exists(
|
|
1920
|
+
item_devirtualized_path
|
|
1921
|
+
), f"Virtualized file {item_virtualized_path} pointing to {item_value} already exists at {item_devirtualized_path}, but is not in cache. Back-cache says: {devirtualized_to_virtualized.get(item_devirtualized_path)}"
|
|
1869
1922
|
|
|
1870
1923
|
# Download, not allowing a symlink.
|
|
1871
1924
|
#
|
|
@@ -1883,21 +1936,38 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1883
1936
|
item_devirtualized_path,
|
|
1884
1937
|
file_source,
|
|
1885
1938
|
export,
|
|
1886
|
-
symlink=False
|
|
1939
|
+
symlink=False,
|
|
1887
1940
|
)
|
|
1888
1941
|
|
|
1889
|
-
logger.debug(
|
|
1942
|
+
logger.debug(
|
|
1943
|
+
"Add %s pointing to %s to cache at %s",
|
|
1944
|
+
item_virtualized_path,
|
|
1945
|
+
item_value,
|
|
1946
|
+
item_devirtualized_path,
|
|
1947
|
+
)
|
|
1890
1948
|
# Cache the file in its own right
|
|
1891
|
-
virtualized_to_devirtualized[item_value] =
|
|
1892
|
-
|
|
1949
|
+
virtualized_to_devirtualized[item_value] = (
|
|
1950
|
+
item_devirtualized_path
|
|
1951
|
+
)
|
|
1952
|
+
devirtualized_to_virtualized[item_devirtualized_path] = (
|
|
1953
|
+
item_value
|
|
1954
|
+
)
|
|
1893
1955
|
# And the directory entry as pointing to the file.
|
|
1894
|
-
virtualized_to_devirtualized[item_virtualized_path] =
|
|
1956
|
+
virtualized_to_devirtualized[item_virtualized_path] = (
|
|
1957
|
+
virtualized_to_devirtualized[item_value]
|
|
1958
|
+
)
|
|
1895
1959
|
|
|
1896
|
-
logger.debug(
|
|
1960
|
+
logger.debug(
|
|
1961
|
+
"Cache now has %s items", len(virtualized_to_devirtualized)
|
|
1962
|
+
)
|
|
1897
1963
|
|
|
1898
1964
|
# We should now have it in the cache.
|
|
1899
|
-
assert
|
|
1900
|
-
|
|
1965
|
+
assert (
|
|
1966
|
+
virtualized_to_devirtualized[filename] == dest_path
|
|
1967
|
+
), f"Cached devirtualized path for {filename} should be {dest_path} but is {virtualized_to_devirtualized[filename]} instead!"
|
|
1968
|
+
logger.debug(
|
|
1969
|
+
"Devirtualized %s as local directory %s", filename, dest_path
|
|
1970
|
+
)
|
|
1901
1971
|
# Return where we put it.
|
|
1902
1972
|
return dest_path
|
|
1903
1973
|
|
|
@@ -1919,7 +1989,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1919
1989
|
wdl_options,
|
|
1920
1990
|
devirtualized_to_virtualized,
|
|
1921
1991
|
virtualized_to_devirtualized,
|
|
1922
|
-
export
|
|
1992
|
+
export,
|
|
1923
1993
|
)
|
|
1924
1994
|
# Otherwise, we have a direct URL to a file to get. Base case.
|
|
1925
1995
|
|
|
@@ -1952,12 +2022,16 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
1952
2022
|
# Download the file into it.
|
|
1953
2023
|
cls._write_uri_to(filename, dest_path, file_source, export)
|
|
1954
2024
|
|
|
1955
|
-
logger.debug(
|
|
2025
|
+
logger.debug(
|
|
2026
|
+
"Devirtualized %s as openable file %s", filename, dest_path
|
|
2027
|
+
)
|
|
1956
2028
|
|
|
1957
2029
|
# Store it in the cache
|
|
1958
2030
|
virtualized_to_devirtualized[filename] = dest_path
|
|
1959
2031
|
devirtualized_to_virtualized[dest_path] = filename
|
|
1960
|
-
logger.debug(
|
|
2032
|
+
logger.debug(
|
|
2033
|
+
"Cache now has %s items", len(virtualized_to_devirtualized)
|
|
2034
|
+
)
|
|
1961
2035
|
return dest_path
|
|
1962
2036
|
else:
|
|
1963
2037
|
# This is a local file or file URL
|
|
@@ -2013,7 +2087,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
2013
2087
|
|
|
2014
2088
|
if real_path.startswith(execution_prefix):
|
|
2015
2089
|
# This is a task working firectory relative file
|
|
2016
|
-
return real_path[len(execution_prefix):]
|
|
2090
|
+
return real_path[len(execution_prefix) :]
|
|
2017
2091
|
|
|
2018
2092
|
if real_path.startswith(ltd_prefix):
|
|
2019
2093
|
# This file is relative to the Toil working directory.
|
|
@@ -2023,11 +2097,10 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
2023
2097
|
#
|
|
2024
2098
|
# We already inject _miniwdl_inputs in there, so just inject
|
|
2025
2099
|
# another underscore-prefixed thing.
|
|
2026
|
-
return "_toil_job/" + real_path[len(ltd_prefix):]
|
|
2100
|
+
return "_toil_job/" + real_path[len(ltd_prefix) :]
|
|
2027
2101
|
|
|
2028
2102
|
return path
|
|
2029
2103
|
|
|
2030
|
-
|
|
2031
2104
|
@memoize
|
|
2032
2105
|
def _virtualize_filename(self, filename: str) -> str:
|
|
2033
2106
|
"""
|
|
@@ -2117,7 +2190,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
2117
2190
|
# Satisfy mypy. This should never happen though as we don't
|
|
2118
2191
|
# pass a shared file name (which is the only way import_file
|
|
2119
2192
|
# returns None)
|
|
2120
|
-
raise RuntimeError(
|
|
2193
|
+
raise RuntimeError(
|
|
2194
|
+
"Failed to import URL %s into jobstore." % normalized_uri
|
|
2195
|
+
)
|
|
2121
2196
|
file_basename = os.path.basename(urlsplit(normalized_uri).path)
|
|
2122
2197
|
# Get the URL to the parent directory and use that.
|
|
2123
2198
|
parent_dir = urljoin(normalized_uri, ".")
|
|
@@ -2144,9 +2219,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
2144
2219
|
# This is a previously devirtualized thing so we can just use the
|
|
2145
2220
|
# virtual version we remembered instead of reuploading it.
|
|
2146
2221
|
result = self._devirtualized_to_virtualized[abs_filename]
|
|
2147
|
-
logger.debug(
|
|
2148
|
-
"Re-using virtualized WDL %s for %s", result, filename
|
|
2149
|
-
)
|
|
2222
|
+
logger.debug("Re-using virtualized WDL %s for %s", result, filename)
|
|
2150
2223
|
return result
|
|
2151
2224
|
|
|
2152
2225
|
if not os.path.exists(abs_filename):
|
|
@@ -2181,7 +2254,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
2181
2254
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
2182
2255
|
super().__init__(*args, **kwargs)
|
|
2183
2256
|
|
|
2184
|
-
self._miniwdl_cache:
|
|
2257
|
+
self._miniwdl_cache: WDL.runtime.cache.CallCache | None = None
|
|
2185
2258
|
|
|
2186
2259
|
def _virtualize_inode(
|
|
2187
2260
|
self, inode: AnyINode, enforce_existence: bool = True
|
|
@@ -2192,10 +2265,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
2192
2265
|
if (
|
|
2193
2266
|
get_inode_virtualized_value(inode) is None
|
|
2194
2267
|
and get_shared_fs_path(inode) is None
|
|
2195
|
-
and (
|
|
2196
|
-
not is_any_url(inode.value)
|
|
2197
|
-
or is_file_url(inode.value)
|
|
2198
|
-
)
|
|
2268
|
+
and (not is_any_url(inode.value) or is_file_url(inode.value))
|
|
2199
2269
|
):
|
|
2200
2270
|
# This is a never-virtualized inode that is a path or URI and
|
|
2201
2271
|
# has no shared FS path associated with it. We just made it at
|
|
@@ -2219,7 +2289,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
|
|
|
2219
2289
|
"Applied shared filesystem path %s to %s that appears to "
|
|
2220
2290
|
"have been coerced from String at workflow scope.",
|
|
2221
2291
|
cache_path,
|
|
2222
|
-
inode
|
|
2292
|
+
inode,
|
|
2223
2293
|
)
|
|
2224
2294
|
|
|
2225
2295
|
# Do the virtualization
|
|
@@ -2690,7 +2760,9 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
2690
2760
|
raise FileNotFoundError(filename)
|
|
2691
2761
|
filename = here
|
|
2692
2762
|
|
|
2693
|
-
logger.debug(
|
|
2763
|
+
logger.debug(
|
|
2764
|
+
"WDL task outputs stdlib thinks we really need to virtualize %s", filename
|
|
2765
|
+
)
|
|
2694
2766
|
return super()._virtualize_filename(filename)
|
|
2695
2767
|
|
|
2696
2768
|
|
|
@@ -2755,6 +2827,7 @@ def evaluate_decl(
|
|
|
2755
2827
|
log_bindings(logger.error, "Statement was evaluated in:", [environment])
|
|
2756
2828
|
raise
|
|
2757
2829
|
|
|
2830
|
+
|
|
2758
2831
|
def evaluate_call_inputs(
|
|
2759
2832
|
context: WDL.Error.SourceNode | WDL.Error.SourcePosition,
|
|
2760
2833
|
expressions: dict[str, WDL.Expr.Base],
|
|
@@ -2800,8 +2873,7 @@ def evaluate_defaultable_decl(
|
|
|
2800
2873
|
node.name in environment
|
|
2801
2874
|
and not isinstance(environment[node.name], WDL.Value.Null)
|
|
2802
2875
|
) or (
|
|
2803
|
-
isinstance(environment.get(node.name), WDL.Value.Null)
|
|
2804
|
-
and node.type.optional
|
|
2876
|
+
isinstance(environment.get(node.name), WDL.Value.Null) and node.type.optional
|
|
2805
2877
|
):
|
|
2806
2878
|
logger.debug("Name %s is already defined, not using default", node.name)
|
|
2807
2879
|
if not isinstance(environment[node.name].type, type(node.type)):
|
|
@@ -2819,7 +2891,6 @@ def evaluate_defaultable_decl(
|
|
|
2819
2891
|
return evaluate_decl(node, environment, stdlib)
|
|
2820
2892
|
|
|
2821
2893
|
|
|
2822
|
-
|
|
2823
2894
|
# TODO: make these stdlib methods???
|
|
2824
2895
|
def devirtualize_inodes(
|
|
2825
2896
|
environment: WDLBindings, stdlib: ToilWDLStdLibBase
|
|
@@ -2843,14 +2914,16 @@ def virtualize_inodes(
|
|
|
2843
2914
|
logger.debug("Virtualizing files and directories")
|
|
2844
2915
|
virtualize_func = cast(
|
|
2845
2916
|
INodeTransform,
|
|
2846
|
-
partial(
|
|
2847
|
-
stdlib._virtualize_inode,
|
|
2848
|
-
enforce_existence=enforce_existence
|
|
2849
|
-
)
|
|
2917
|
+
partial(stdlib._virtualize_inode, enforce_existence=enforce_existence),
|
|
2850
2918
|
)
|
|
2851
2919
|
return map_over_inodes_in_bindings(environment, virtualize_func)
|
|
2852
2920
|
|
|
2853
|
-
|
|
2921
|
+
|
|
2922
|
+
def delete_dead_files(
|
|
2923
|
+
internal_bindings: WDLBindings,
|
|
2924
|
+
live_bindings_list: list[WDLBindings],
|
|
2925
|
+
file_store: AbstractFileStore,
|
|
2926
|
+
) -> None:
|
|
2854
2927
|
"""
|
|
2855
2928
|
Delete any files that are in the given bindings but not in the live list.
|
|
2856
2929
|
|
|
@@ -2859,22 +2932,20 @@ def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[W
|
|
|
2859
2932
|
"""
|
|
2860
2933
|
|
|
2861
2934
|
# Get all the files in the first bindings and not any of the others.
|
|
2862
|
-
unused_files = set(
|
|
2863
|
-
extract_toil_file_uris(
|
|
2864
|
-
).difference(
|
|
2865
|
-
*(
|
|
2866
|
-
extract_toil_file_uris(bindings)
|
|
2867
|
-
for bindings in live_bindings_list
|
|
2868
|
-
)
|
|
2935
|
+
unused_files = set(extract_toil_file_uris(internal_bindings)).difference(
|
|
2936
|
+
*(extract_toil_file_uris(bindings) for bindings in live_bindings_list)
|
|
2869
2937
|
)
|
|
2870
2938
|
|
|
2871
2939
|
for file_uri in unused_files:
|
|
2872
2940
|
# Delete them
|
|
2873
|
-
assert is_toil_url(
|
|
2941
|
+
assert is_toil_url(
|
|
2942
|
+
file_uri
|
|
2943
|
+
), f"Trying to clean up file {file_uri} not managed by Toil"
|
|
2874
2944
|
logger.debug("Delete file %s that is not needed", file_uri)
|
|
2875
2945
|
file_id, _, _, _ = unpack_toil_uri(file_uri)
|
|
2876
2946
|
file_store.deleteGlobalFile(file_id)
|
|
2877
2947
|
|
|
2948
|
+
|
|
2878
2949
|
def all_parents(path: str) -> Iterable[str]:
|
|
2879
2950
|
"""
|
|
2880
2951
|
Yield all parents of the given path, up to the filesystem root.
|
|
@@ -2900,6 +2971,7 @@ def all_parents(path: str) -> Iterable[str]:
|
|
|
2900
2971
|
here = os.path.dirname(here).rstrip("/")
|
|
2901
2972
|
yield here + "/"
|
|
2902
2973
|
|
|
2974
|
+
|
|
2903
2975
|
def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
2904
2976
|
"""
|
|
2905
2977
|
Based off of WDL.runtime.task_container.add_paths from miniwdl
|
|
@@ -2929,7 +3001,14 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
|
2929
3001
|
#
|
|
2930
3002
|
# TODO: I wish I had a BWT here but that seems fiddly.
|
|
2931
3003
|
|
|
2932
|
-
paths_with_slashes = (
|
|
3004
|
+
paths_with_slashes = (
|
|
3005
|
+
(
|
|
3006
|
+
host_path + "/"
|
|
3007
|
+
if not host_path.endswith("/") and os.path.isdir(host_path)
|
|
3008
|
+
else host_path
|
|
3009
|
+
)
|
|
3010
|
+
for host_path in host_paths
|
|
3011
|
+
)
|
|
2933
3012
|
paths_by_length = list(sorted(paths_with_slashes, key=len))
|
|
2934
3013
|
|
|
2935
3014
|
# This stores all the paths that need to be mounted, organized by top
|
|
@@ -2954,7 +3033,9 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
|
2954
3033
|
# We need to preserve sibling relationships among top items. So organize them by parents.
|
|
2955
3034
|
top_items_by_parent = collections.defaultdict(list)
|
|
2956
3035
|
for top_item in paths_by_top_item.keys():
|
|
2957
|
-
top_items_by_parent[os.path.dirname(top_item.rstrip("/")) + "/"].append(
|
|
3036
|
+
top_items_by_parent[os.path.dirname(top_item.rstrip("/")) + "/"].append(
|
|
3037
|
+
top_item
|
|
3038
|
+
)
|
|
2958
3039
|
|
|
2959
3040
|
logger.debug("Top items by parent: %s", top_items_by_parent)
|
|
2960
3041
|
|
|
@@ -2982,7 +3063,9 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
|
2982
3063
|
for host_path in paths_by_top_item[top_item]:
|
|
2983
3064
|
# Figure out where relative to the parent's assigned path
|
|
2984
3065
|
# in the container we should put this file/directory.
|
|
2985
|
-
container_path = os.path.join(
|
|
3066
|
+
container_path = os.path.join(
|
|
3067
|
+
parent_container_base, host_path[len(parent) :]
|
|
3068
|
+
)
|
|
2986
3069
|
|
|
2987
3070
|
# Put it there.
|
|
2988
3071
|
task_container.input_path_map[host_path] = container_path
|
|
@@ -2990,6 +3073,7 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
|
|
|
2990
3073
|
|
|
2991
3074
|
logger.debug("Mount %s at %s", host_path, container_path)
|
|
2992
3075
|
|
|
3076
|
+
|
|
2993
3077
|
def drop_if_missing(
|
|
2994
3078
|
inode: WDLINode, standard_library: ToilWDLStdLibBase
|
|
2995
3079
|
) -> WDLINode | None:
|
|
@@ -3006,12 +3090,9 @@ def drop_if_missing(
|
|
|
3006
3090
|
if reference is not None and is_any_url(reference):
|
|
3007
3091
|
try:
|
|
3008
3092
|
if (
|
|
3009
|
-
is_toil_file_url(reference)
|
|
3010
|
-
(
|
|
3011
|
-
|
|
3012
|
-
directory_item_exists(reference)
|
|
3013
|
-
) or
|
|
3014
|
-
URLAccess.url_exists(reference)
|
|
3093
|
+
is_toil_file_url(reference)
|
|
3094
|
+
or (is_toil_dir_url(reference) and directory_item_exists(reference))
|
|
3095
|
+
or URLAccess.url_exists(reference)
|
|
3015
3096
|
):
|
|
3016
3097
|
# We assume anything in the filestore actually exists.
|
|
3017
3098
|
devirtualized_filename = standard_library._devirtualize_filename(
|
|
@@ -3037,9 +3118,7 @@ def drop_if_missing(
|
|
|
3037
3118
|
raise
|
|
3038
3119
|
else:
|
|
3039
3120
|
# Get the absolute path, not resolving symlinks
|
|
3040
|
-
effective_path = os.path.abspath(
|
|
3041
|
-
os.path.join(work_dir, reference)
|
|
3042
|
-
)
|
|
3121
|
+
effective_path = os.path.abspath(os.path.join(work_dir, reference))
|
|
3043
3122
|
if os.path.islink(effective_path) or os.path.exists(effective_path):
|
|
3044
3123
|
# This is a broken symlink or a working symlink or a file/directory.
|
|
3045
3124
|
return inode
|
|
@@ -3052,6 +3131,7 @@ def drop_if_missing(
|
|
|
3052
3131
|
)
|
|
3053
3132
|
return None
|
|
3054
3133
|
|
|
3134
|
+
|
|
3055
3135
|
def missing_inode_dropper(standard_library: ToilWDLStdLibBase) -> INodeTransform:
|
|
3056
3136
|
"""
|
|
3057
3137
|
Get a function to null out missing File/Directory values.
|
|
@@ -3063,13 +3143,10 @@ def missing_inode_dropper(standard_library: ToilWDLStdLibBase) -> INodeTransform
|
|
|
3063
3143
|
# We need this to wrap partial() because MyPy can't really understand the
|
|
3064
3144
|
# effects of partial() on making a function match a protocol.
|
|
3065
3145
|
return cast(
|
|
3066
|
-
INodeTransform,
|
|
3067
|
-
partial(
|
|
3068
|
-
drop_if_missing,
|
|
3069
|
-
standard_library=standard_library
|
|
3070
|
-
)
|
|
3146
|
+
INodeTransform, partial(drop_if_missing, standard_library=standard_library)
|
|
3071
3147
|
)
|
|
3072
3148
|
|
|
3149
|
+
|
|
3073
3150
|
def drop_missing_files(
|
|
3074
3151
|
environment: WDLBindings, standard_library: ToilWDLStdLibBase
|
|
3075
3152
|
) -> WDLBindings:
|
|
@@ -3080,7 +3157,9 @@ def drop_missing_files(
|
|
|
3080
3157
|
Files must not be virtualized.
|
|
3081
3158
|
"""
|
|
3082
3159
|
|
|
3083
|
-
return map_over_inodes_in_bindings(
|
|
3160
|
+
return map_over_inodes_in_bindings(
|
|
3161
|
+
environment, missing_inode_dropper(standard_library)
|
|
3162
|
+
)
|
|
3084
3163
|
|
|
3085
3164
|
|
|
3086
3165
|
def get_paths_in_bindings(environment: WDLBindings) -> list[str]:
|
|
@@ -3139,12 +3218,14 @@ def map_over_inodes_in_binding(
|
|
|
3139
3218
|
binding.info,
|
|
3140
3219
|
)
|
|
3141
3220
|
|
|
3221
|
+
|
|
3142
3222
|
def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
3143
3223
|
"""
|
|
3144
3224
|
Remove the expression from a WDL value
|
|
3145
3225
|
:param value: Original WDL value
|
|
3146
3226
|
:return: New WDL value without the expr field
|
|
3147
3227
|
"""
|
|
3228
|
+
|
|
3148
3229
|
# TODO: This is an extra copy that we could get rid of by dropping the immutability idea
|
|
3149
3230
|
def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
3150
3231
|
# Do a shallow copy to preserve immutability
|
|
@@ -3159,8 +3240,10 @@ def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
|
3159
3240
|
else:
|
|
3160
3241
|
new_value._expr = value.expr
|
|
3161
3242
|
return new_value
|
|
3243
|
+
|
|
3162
3244
|
return map_over_typed_value(value, predicate)
|
|
3163
3245
|
|
|
3246
|
+
|
|
3164
3247
|
# TODO: We want to type this to say, for anything descended from a WDL type, we
|
|
3165
3248
|
# return something descended from the same WDL type or a null. But I can't
|
|
3166
3249
|
# quite do that with generics, since you could pass in some extended WDL value
|
|
@@ -3168,7 +3251,9 @@ def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
|
3168
3251
|
#
|
|
3169
3252
|
# For now we assume that any types extending the WDL value types will implement
|
|
3170
3253
|
# compatible constructors.
|
|
3171
|
-
def map_over_typed_value(
|
|
3254
|
+
def map_over_typed_value(
|
|
3255
|
+
value: WDL.Value.Base, transform: Callable[[WDL.Value.Base], WDL.Value.Base]
|
|
3256
|
+
) -> WDL.Value.Base:
|
|
3172
3257
|
"""
|
|
3173
3258
|
Apply a transform to a WDL value and all contained WDL values.
|
|
3174
3259
|
:param value: WDL value to transform
|
|
@@ -3211,10 +3296,7 @@ def map_over_typed_value(value: WDL.Value.Base, transform: Callable[[WDL.Value.B
|
|
|
3211
3296
|
# This is a struct, so recurse on the values in the backing dict
|
|
3212
3297
|
value = WDL.Value.Struct(
|
|
3213
3298
|
cast(Union[WDL.Type.StructInstance, WDL.Type.Object], value.type),
|
|
3214
|
-
{
|
|
3215
|
-
k: map_over_typed_value(v, transform)
|
|
3216
|
-
for k, v in value.value.items()
|
|
3217
|
-
},
|
|
3299
|
+
{k: map_over_typed_value(v, transform) for k, v in value.value.items()},
|
|
3218
3300
|
value.expr,
|
|
3219
3301
|
)
|
|
3220
3302
|
# Run the predicate on the final value
|
|
@@ -3239,6 +3321,7 @@ def map_over_typed_inodes_in_value(
|
|
|
3239
3321
|
actually be used, to allow for scans. So error checking needs to be part of
|
|
3240
3322
|
the transform itself.
|
|
3241
3323
|
"""
|
|
3324
|
+
|
|
3242
3325
|
def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
|
|
3243
3326
|
if is_inode(value):
|
|
3244
3327
|
# This is a File or Directory so we need to process it
|
|
@@ -3406,7 +3489,9 @@ class WDLBaseJob(Job):
|
|
|
3406
3489
|
def remove_expr_from_bindings(self, bindings: WDLBindings) -> WDLBindings:
|
|
3407
3490
|
# We have to throw out the expressions because they drag the entire WDL document into the WDL outputs
|
|
3408
3491
|
# which causes duplicate pickling and linear growth in scatter memory usage
|
|
3409
|
-
return bindings.map(
|
|
3492
|
+
return bindings.map(
|
|
3493
|
+
lambda b: WDL.Env.Binding(b.name, remove_expr_from_value(b.value), b.info)
|
|
3494
|
+
)
|
|
3410
3495
|
|
|
3411
3496
|
def postprocess(self, bindings: WDLBindings) -> WDLBindings:
|
|
3412
3497
|
"""
|
|
@@ -3557,15 +3642,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3557
3642
|
# Throw away anything input but not available outside the call or
|
|
3558
3643
|
# output.
|
|
3559
3644
|
delete_dead_files(
|
|
3560
|
-
bindings,
|
|
3561
|
-
[cached_bindings, self._enclosing_bindings],
|
|
3562
|
-
file_store
|
|
3645
|
+
bindings, [cached_bindings, self._enclosing_bindings], file_store
|
|
3563
3646
|
)
|
|
3564
3647
|
|
|
3565
3648
|
# Postprocess and ship the output bindings.
|
|
3566
|
-
return self.postprocess(
|
|
3567
|
-
cached_bindings
|
|
3568
|
-
)
|
|
3649
|
+
return self.postprocess(cached_bindings)
|
|
3569
3650
|
|
|
3570
3651
|
if self._task.inputs:
|
|
3571
3652
|
logger.debug("Evaluating task code")
|
|
@@ -3575,7 +3656,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
|
|
|
3575
3656
|
bindings,
|
|
3576
3657
|
standard_library,
|
|
3577
3658
|
include_previous=True,
|
|
3578
|
-
expressions_are_defaults=True
|
|
3659
|
+
expressions_are_defaults=True,
|
|
3579
3660
|
)
|
|
3580
3661
|
if self._task.postinputs:
|
|
3581
3662
|
# Evaluate all the postinput decls.
|
|
@@ -4139,8 +4220,10 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
4139
4220
|
"is not yet implemented in the MiniWDL Docker "
|
|
4140
4221
|
"containerization implementation."
|
|
4141
4222
|
)
|
|
4142
|
-
if runtime_bindings.has_binding("memory") and human2bytes(
|
|
4143
|
-
|
|
4223
|
+
if runtime_bindings.has_binding("memory") and human2bytes(
|
|
4224
|
+
runtime_bindings.resolve("memory").value
|
|
4225
|
+
) < human2bytes("4MiB"):
|
|
4226
|
+
runtime_bindings.resolve("memory").value = "4MiB"
|
|
4144
4227
|
else:
|
|
4145
4228
|
raise RuntimeError(
|
|
4146
4229
|
f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}"
|
|
@@ -4544,7 +4627,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
4544
4627
|
delete_dead_files(
|
|
4545
4628
|
combine_bindings([bindings, runtime_bindings]),
|
|
4546
4629
|
[output_bindings, self._enclosing_bindings],
|
|
4547
|
-
file_store
|
|
4630
|
+
file_store,
|
|
4548
4631
|
)
|
|
4549
4632
|
# If File objects somehow made it to the runtime block they shouldn't
|
|
4550
4633
|
# have been virtualized so don't bother with them.
|
|
@@ -4602,7 +4685,9 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4602
4685
|
value = evaluate_decl(self._node, incoming_bindings, standard_library)
|
|
4603
4686
|
bindings = incoming_bindings.bind(self._node.name, value)
|
|
4604
4687
|
# TODO: Only virtualize the new binding
|
|
4605
|
-
return self.postprocess(
|
|
4688
|
+
return self.postprocess(
|
|
4689
|
+
virtualize_inodes(bindings, standard_library, enforce_existence=False)
|
|
4690
|
+
)
|
|
4606
4691
|
elif isinstance(self._node, WDL.Tree.Call):
|
|
4607
4692
|
# This is a call of a task or workflow
|
|
4608
4693
|
|
|
@@ -4624,7 +4709,9 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
4624
4709
|
inputs_mapping,
|
|
4625
4710
|
)
|
|
4626
4711
|
# Prepare call inputs to move to another node
|
|
4627
|
-
input_bindings = virtualize_inodes(
|
|
4712
|
+
input_bindings = virtualize_inodes(
|
|
4713
|
+
input_bindings, standard_library, enforce_existence=False
|
|
4714
|
+
)
|
|
4628
4715
|
|
|
4629
4716
|
# Bindings may also be added in from the enclosing workflow inputs
|
|
4630
4717
|
# TODO: this is letting us also inject them from the workflow body.
|
|
@@ -4756,7 +4843,11 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
|
4756
4843
|
)
|
|
4757
4844
|
|
|
4758
4845
|
# TODO: Only virtualize the new bindings created
|
|
4759
|
-
return self.postprocess(
|
|
4846
|
+
return self.postprocess(
|
|
4847
|
+
virtualize_inodes(
|
|
4848
|
+
current_bindings, standard_library, enforce_existence=False
|
|
4849
|
+
)
|
|
4850
|
+
)
|
|
4760
4851
|
|
|
4761
4852
|
|
|
4762
4853
|
class WDLCombineBindingsJob(WDLBaseJob):
|
|
@@ -5611,7 +5702,9 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
5611
5702
|
[(p, p) for p in standard_library.get_local_paths()]
|
|
5612
5703
|
)
|
|
5613
5704
|
|
|
5614
|
-
bindings = virtualize_inodes(
|
|
5705
|
+
bindings = virtualize_inodes(
|
|
5706
|
+
bindings, standard_library, enforce_existence=False
|
|
5707
|
+
)
|
|
5615
5708
|
# Make jobs to run all the parts of the workflow
|
|
5616
5709
|
sink = self.create_subgraph(self._workflow.body, [], bindings)
|
|
5617
5710
|
|
|
@@ -5758,11 +5851,12 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
5758
5851
|
delete_dead_files(
|
|
5759
5852
|
unwrap(self._bindings),
|
|
5760
5853
|
[output_bindings, self._enclosing_bindings],
|
|
5761
|
-
file_store
|
|
5854
|
+
file_store,
|
|
5762
5855
|
)
|
|
5763
5856
|
|
|
5764
5857
|
return self.postprocess(output_bindings)
|
|
5765
5858
|
|
|
5859
|
+
|
|
5766
5860
|
class WDLStartJob(WDLSectionJob):
|
|
5767
5861
|
"""
|
|
5768
5862
|
Job that evaluates an entire WDL workflow, and returns the workflow outputs
|
|
@@ -5830,7 +5924,7 @@ class WDLInstallImportsJob(Job):
|
|
|
5830
5924
|
self,
|
|
5831
5925
|
task_path: str,
|
|
5832
5926
|
inputs: WDLBindings,
|
|
5833
|
-
import_data: Promised[
|
|
5927
|
+
import_data: Promised[tuple[dict[str, FileID], dict[str, FileMetadata]]],
|
|
5834
5928
|
**kwargs: Any,
|
|
5835
5929
|
) -> None:
|
|
5836
5930
|
"""
|
|
@@ -5851,7 +5945,9 @@ class WDLInstallImportsJob(Job):
|
|
|
5851
5945
|
"""
|
|
5852
5946
|
candidate_to_fileid = unwrap(self._import_data)[0]
|
|
5853
5947
|
file_to_metadata = unwrap(self._import_data)[1]
|
|
5854
|
-
return virtualize_inodes_in_bindings(
|
|
5948
|
+
return virtualize_inodes_in_bindings(
|
|
5949
|
+
self._inputs, candidate_to_fileid, file_to_metadata, self._task_path
|
|
5950
|
+
)
|
|
5855
5951
|
|
|
5856
5952
|
|
|
5857
5953
|
class WDLImportWrapper(WDLSectionJob):
|
|
@@ -5864,7 +5960,7 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5864
5960
|
|
|
5865
5961
|
def __init__(
|
|
5866
5962
|
self,
|
|
5867
|
-
target:
|
|
5963
|
+
target: WDL.Tree.Workflow | WDL.Tree.Task,
|
|
5868
5964
|
inputs: WDLBindings,
|
|
5869
5965
|
wdl_options: WDLContext,
|
|
5870
5966
|
inputs_search_path: list[str],
|
|
@@ -5893,9 +5989,11 @@ class WDLImportWrapper(WDLSectionJob):
|
|
|
5893
5989
|
file_store.jobStore,
|
|
5894
5990
|
self._inputs_search_path,
|
|
5895
5991
|
include_remote_files=self._import_remote_files,
|
|
5896
|
-
execution_dir=self._wdl_options.get("execution_dir")
|
|
5992
|
+
execution_dir=self._wdl_options.get("execution_dir"),
|
|
5993
|
+
)
|
|
5994
|
+
imports_job = ImportsJob(
|
|
5995
|
+
file_to_metadata, self._import_workers_batchsize, self._import_workers_disk
|
|
5897
5996
|
)
|
|
5898
|
-
imports_job = ImportsJob(file_to_metadata, self._import_workers_batchsize, self._import_workers_disk)
|
|
5899
5997
|
self.addChild(imports_job)
|
|
5900
5998
|
install_imports_job = WDLInstallImportsJob(
|
|
5901
5999
|
self._target.name, self._inputs, imports_job.rv()
|
|
@@ -5928,7 +6026,7 @@ def make_root_job(
|
|
|
5928
6026
|
inputs_search_path=inputs_search_path,
|
|
5929
6027
|
import_remote_files=options.reference_inputs,
|
|
5930
6028
|
import_workers_batchsize=options.import_workers_batchsize,
|
|
5931
|
-
import_workers_disk=options.import_workers_disk
|
|
6029
|
+
import_workers_disk=options.import_workers_disk,
|
|
5932
6030
|
)
|
|
5933
6031
|
else:
|
|
5934
6032
|
# Run WDL imports on leader
|
|
@@ -5968,7 +6066,7 @@ def main() -> None:
|
|
|
5968
6066
|
raise RuntimeError(
|
|
5969
6067
|
f"Workflow inputs cannot be specified with both the -i/--input/--inputs flag "
|
|
5970
6068
|
f"and as a positional argument at the same time. Cannot use both "
|
|
5971
|
-
f"
|
|
6069
|
+
f'"{input_sources[0]}" and "{input_sources[1]}".'
|
|
5972
6070
|
)
|
|
5973
6071
|
|
|
5974
6072
|
# Make sure we have an output directory (or URL prefix) and we don't need
|
|
@@ -5981,9 +6079,13 @@ def main() -> None:
|
|
|
5981
6079
|
)
|
|
5982
6080
|
|
|
5983
6081
|
try:
|
|
5984
|
-
wdl_uri, trs_spec = resolve_workflow(
|
|
6082
|
+
wdl_uri, trs_spec = resolve_workflow(
|
|
6083
|
+
options.wdl_uri, supported_languages={"WDL"}
|
|
6084
|
+
)
|
|
5985
6085
|
|
|
5986
|
-
with Toil(
|
|
6086
|
+
with Toil(
|
|
6087
|
+
options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec
|
|
6088
|
+
) as toil:
|
|
5987
6089
|
# TODO: Move all the input parsing outside the Toil context
|
|
5988
6090
|
# manager to avoid leaving a job store behind if the workflow
|
|
5989
6091
|
# can't start.
|
|
@@ -5999,9 +6101,7 @@ def main() -> None:
|
|
|
5999
6101
|
|
|
6000
6102
|
# Load the WDL document.
|
|
6001
6103
|
document: WDL.Tree.Document = WDL.load(
|
|
6002
|
-
wdl_uri,
|
|
6003
|
-
read_source=toil_read_source,
|
|
6004
|
-
check_quant=options.quant_check
|
|
6104
|
+
wdl_uri, read_source=toil_read_source, check_quant=options.quant_check
|
|
6005
6105
|
)
|
|
6006
6106
|
|
|
6007
6107
|
# See if we're going to run a workflow or a task
|
|
@@ -6057,12 +6157,16 @@ def main() -> None:
|
|
|
6057
6157
|
) # type: ignore[no-untyped-call]
|
|
6058
6158
|
|
|
6059
6159
|
if getattr(WDL.Lint, "_shellcheck_available", None) is False:
|
|
6060
|
-
logger.info(
|
|
6160
|
+
logger.info(
|
|
6161
|
+
"Suggestion: install shellcheck (www.shellcheck.net) to check task commands"
|
|
6162
|
+
)
|
|
6061
6163
|
|
|
6062
6164
|
if lint_warnings_counter[0]:
|
|
6063
|
-
logger.warning(
|
|
6165
|
+
logger.warning(
|
|
6166
|
+
"Workflow lint warnings:\n%s", lint_warnings_io.getvalue().rstrip()
|
|
6167
|
+
)
|
|
6064
6168
|
if options.strict:
|
|
6065
|
-
logger.critical(f
|
|
6169
|
+
logger.critical(f"Workflow did not pass linting in strict mode")
|
|
6066
6170
|
# MiniWDL uses exit code 2 to indicate linting errors, so replicate that behavior
|
|
6067
6171
|
sys.exit(2)
|
|
6068
6172
|
|