toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +17 -22
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py
CHANGED
|
@@ -34,25 +34,13 @@ import stat
|
|
|
34
34
|
import sys
|
|
35
35
|
import textwrap
|
|
36
36
|
import uuid
|
|
37
|
+
|
|
38
|
+
# This is also in configargparse but MyPy doesn't know it
|
|
39
|
+
from argparse import RawDescriptionHelpFormatter
|
|
40
|
+
from collections.abc import Callable, Iterator, Mapping, MutableMapping, MutableSequence
|
|
37
41
|
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
38
42
|
from threading import Thread
|
|
39
|
-
from typing import
|
|
40
|
-
IO,
|
|
41
|
-
Any,
|
|
42
|
-
Callable,
|
|
43
|
-
Iterator,
|
|
44
|
-
Mapping,
|
|
45
|
-
MutableMapping,
|
|
46
|
-
MutableSequence,
|
|
47
|
-
Optional,
|
|
48
|
-
TextIO,
|
|
49
|
-
Tuple,
|
|
50
|
-
TypeVar,
|
|
51
|
-
Union,
|
|
52
|
-
cast,
|
|
53
|
-
Literal,
|
|
54
|
-
Protocol,
|
|
55
|
-
)
|
|
43
|
+
from typing import IO, Any, Literal, Optional, Protocol, TextIO, TypeVar, Union, cast
|
|
56
44
|
from urllib.parse import quote, unquote, urlparse, urlsplit
|
|
57
45
|
|
|
58
46
|
import cwl_utils.errors
|
|
@@ -66,9 +54,6 @@ import cwltool.load_tool
|
|
|
66
54
|
import cwltool.main
|
|
67
55
|
import cwltool.resolver
|
|
68
56
|
import schema_salad.ref_resolver
|
|
69
|
-
|
|
70
|
-
# This is also in configargparse but MyPy doesn't know it
|
|
71
|
-
from argparse import RawDescriptionHelpFormatter
|
|
72
57
|
from configargparse import ArgParser, Namespace
|
|
73
58
|
from cwltool.loghandler import _logger as cwllogger
|
|
74
59
|
from cwltool.loghandler import defaultStreamHandler
|
|
@@ -110,13 +95,9 @@ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
|
110
95
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
111
96
|
from toil.common import Config, Toil, addOptions
|
|
112
97
|
from toil.cwl import check_cwltool_version
|
|
113
|
-
from toil.lib.directory import
|
|
114
|
-
DirectoryContents,
|
|
115
|
-
decode_directory,
|
|
116
|
-
encode_directory,
|
|
117
|
-
)
|
|
118
|
-
from toil.lib.trs import resolve_workflow
|
|
98
|
+
from toil.lib.directory import DirectoryContents, decode_directory, encode_directory
|
|
119
99
|
from toil.lib.misc import call_command
|
|
100
|
+
from toil.lib.trs import resolve_workflow
|
|
120
101
|
from toil.provisioners.clusterScaler import JobTooBigError
|
|
121
102
|
|
|
122
103
|
check_cwltool_version()
|
|
@@ -125,36 +106,36 @@ from toil.cwl.utils import (
|
|
|
125
106
|
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
|
|
126
107
|
download_structure,
|
|
127
108
|
get_from_structure,
|
|
109
|
+
remove_redundant_mounts,
|
|
128
110
|
visit_cwl_class_and_reduce,
|
|
129
|
-
remove_redundant_mounts
|
|
130
111
|
)
|
|
131
112
|
from toil.exceptions import FailedJobsException
|
|
132
113
|
from toil.fileStores import FileID
|
|
133
114
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
134
115
|
from toil.job import (
|
|
135
116
|
AcceleratorRequirement,
|
|
117
|
+
FileMetadata,
|
|
118
|
+
ImportsJob,
|
|
136
119
|
Job,
|
|
137
120
|
Promise,
|
|
138
121
|
Promised,
|
|
139
|
-
unwrap,
|
|
140
|
-
ImportsJob,
|
|
141
|
-
get_file_sizes,
|
|
142
|
-
FileMetadata,
|
|
143
122
|
WorkerImportJob,
|
|
123
|
+
get_file_sizes,
|
|
124
|
+
unwrap,
|
|
144
125
|
)
|
|
145
126
|
from toil.jobStores.abstractJobStore import (
|
|
146
127
|
AbstractJobStore,
|
|
147
|
-
NoSuchFileException,
|
|
148
128
|
InvalidImportExportUrlException,
|
|
149
129
|
LocatorException,
|
|
130
|
+
NoSuchFileException,
|
|
150
131
|
)
|
|
151
|
-
from toil.lib.exceptions import UnimplementedURLException
|
|
152
132
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
153
133
|
from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
134
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
154
135
|
from toil.lib.io import mkdtemp
|
|
155
136
|
from toil.lib.threading import ExceptionalThread, global_mutex
|
|
156
|
-
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
157
137
|
from toil.lib.url import URLAccess
|
|
138
|
+
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
158
139
|
|
|
159
140
|
logger = logging.getLogger(__name__)
|
|
160
141
|
|
|
@@ -229,18 +210,19 @@ def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
|
|
|
229
210
|
allows us to flag, at any level of recursion, that we have
|
|
230
211
|
encountered a SkipNull.
|
|
231
212
|
"""
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
213
|
+
match value:
|
|
214
|
+
case SkipNull():
|
|
215
|
+
err_flag[0] = True
|
|
216
|
+
value = None
|
|
217
|
+
case list(val_list):
|
|
218
|
+
return [_filter_skip_null(v, err_flag) for v in val_list]
|
|
219
|
+
case dict(val_dict):
|
|
220
|
+
return {k: _filter_skip_null(v, err_flag) for k, v in val_dict.items()}
|
|
239
221
|
return value
|
|
240
222
|
|
|
241
223
|
|
|
242
224
|
def ensure_no_collisions(
|
|
243
|
-
directory: DirectoryType, dir_description:
|
|
225
|
+
directory: DirectoryType, dir_description: str | None = None
|
|
244
226
|
) -> None:
|
|
245
227
|
"""
|
|
246
228
|
Make sure no items in the given CWL Directory have the same name.
|
|
@@ -310,9 +292,9 @@ class Conditional:
|
|
|
310
292
|
|
|
311
293
|
def __init__(
|
|
312
294
|
self,
|
|
313
|
-
expression:
|
|
314
|
-
outputs:
|
|
315
|
-
requirements:
|
|
295
|
+
expression: str | None = None,
|
|
296
|
+
outputs: dict[str, CWLOutputType] | None = None,
|
|
297
|
+
requirements: list[CWLObjectType] | None = None,
|
|
316
298
|
container_engine: str = "docker",
|
|
317
299
|
):
|
|
318
300
|
"""
|
|
@@ -377,7 +359,7 @@ class Conditional:
|
|
|
377
359
|
class ResolveSource:
|
|
378
360
|
"""Apply linkMerge and pickValue operators to values coming into a port."""
|
|
379
361
|
|
|
380
|
-
promise_tuples:
|
|
362
|
+
promise_tuples: list[tuple[str, Promise]] | tuple[str, Promise]
|
|
381
363
|
|
|
382
364
|
def __init__(
|
|
383
365
|
self,
|
|
@@ -434,7 +416,7 @@ class ResolveSource:
|
|
|
434
416
|
def resolve(self) -> Any:
|
|
435
417
|
"""First apply linkMerge then pickValue if either present."""
|
|
436
418
|
|
|
437
|
-
result:
|
|
419
|
+
result: Any | None = None
|
|
438
420
|
if isinstance(self.promise_tuples, list):
|
|
439
421
|
result = self.link_merge(
|
|
440
422
|
cast(
|
|
@@ -449,9 +431,7 @@ class ResolveSource:
|
|
|
449
431
|
result = filter_skip_null(self.name, result)
|
|
450
432
|
return result
|
|
451
433
|
|
|
452
|
-
def link_merge(
|
|
453
|
-
self, values: CWLObjectType
|
|
454
|
-
) -> Union[list[CWLOutputType], CWLOutputType]:
|
|
434
|
+
def link_merge(self, values: CWLObjectType) -> list[CWLOutputType] | CWLOutputType:
|
|
455
435
|
"""
|
|
456
436
|
Apply linkMerge operator to `values` object.
|
|
457
437
|
|
|
@@ -477,7 +457,7 @@ class ResolveSource:
|
|
|
477
457
|
f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
|
|
478
458
|
)
|
|
479
459
|
|
|
480
|
-
def pick_value(self, values:
|
|
460
|
+
def pick_value(self, values: list[str | SkipNull] | Any) -> Any:
|
|
481
461
|
"""
|
|
482
462
|
Apply pickValue operator to `values` object.
|
|
483
463
|
|
|
@@ -500,40 +480,39 @@ class ResolveSource:
|
|
|
500
480
|
|
|
501
481
|
result = [v for v in values if not isinstance(v, SkipNull) and v is not None]
|
|
502
482
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
483
|
+
match pick_value_type:
|
|
484
|
+
case "first_non_null":
|
|
485
|
+
if len(result) < 1:
|
|
486
|
+
logger.error(
|
|
487
|
+
"Could not find non-null entry for %s:\n%s",
|
|
488
|
+
self.name,
|
|
489
|
+
pprint.pformat(self.promise_tuples),
|
|
490
|
+
)
|
|
491
|
+
raise cwl_utils.errors.WorkflowException(
|
|
492
|
+
"%s: first_non_null operator found no non-null values"
|
|
493
|
+
% self.name
|
|
494
|
+
)
|
|
495
|
+
else:
|
|
496
|
+
return result[0]
|
|
497
|
+
case "the_only_non_null":
|
|
498
|
+
if len(result) == 0:
|
|
499
|
+
raise cwl_utils.errors.WorkflowException(
|
|
500
|
+
"%s: the_only_non_null operator found no non-null values"
|
|
501
|
+
% self.name
|
|
502
|
+
)
|
|
503
|
+
elif len(result) > 1:
|
|
504
|
+
raise cwl_utils.errors.WorkflowException(
|
|
505
|
+
"%s: the_only_non_null operator found more than one non-null values"
|
|
506
|
+
% self.name
|
|
507
|
+
)
|
|
508
|
+
else:
|
|
509
|
+
return result[0]
|
|
510
|
+
case "all_non_null":
|
|
511
|
+
return result
|
|
512
|
+
case _:
|
|
523
513
|
raise cwl_utils.errors.WorkflowException(
|
|
524
|
-
"
|
|
525
|
-
% self.name
|
|
514
|
+
f"Unsupported pickValue '{pick_value_type}' on {self.name}"
|
|
526
515
|
)
|
|
527
|
-
else:
|
|
528
|
-
return result[0]
|
|
529
|
-
|
|
530
|
-
elif pick_value_type == "all_non_null":
|
|
531
|
-
return result
|
|
532
|
-
|
|
533
|
-
else:
|
|
534
|
-
raise cwl_utils.errors.WorkflowException(
|
|
535
|
-
f"Unsupported pickValue '{pick_value_type}' on {self.name}"
|
|
536
|
-
)
|
|
537
516
|
|
|
538
517
|
|
|
539
518
|
class StepValueFrom:
|
|
@@ -676,10 +655,8 @@ class JustAValue:
|
|
|
676
655
|
|
|
677
656
|
|
|
678
657
|
def resolve_dict_w_promises(
|
|
679
|
-
dict_w_promises:
|
|
680
|
-
|
|
681
|
-
],
|
|
682
|
-
file_store: Optional[AbstractFileStore] = None,
|
|
658
|
+
dict_w_promises: UnresolvedDict | CWLObjectType | dict[str, str | StepValueFrom],
|
|
659
|
+
file_store: AbstractFileStore | None = None,
|
|
683
660
|
) -> CWLObjectType:
|
|
684
661
|
"""
|
|
685
662
|
Resolve a dictionary of promises evaluate expressions to produce the actual values.
|
|
@@ -736,7 +713,7 @@ class ToilPathMapper(PathMapper):
|
|
|
736
713
|
basedir: str,
|
|
737
714
|
stagedir: str,
|
|
738
715
|
separateDirs: bool = True,
|
|
739
|
-
get_file:
|
|
716
|
+
get_file: Any | None = None,
|
|
740
717
|
stage_listing: bool = False,
|
|
741
718
|
streaming_allowed: bool = True,
|
|
742
719
|
):
|
|
@@ -881,179 +858,182 @@ class ToilPathMapper(PathMapper):
|
|
|
881
858
|
)
|
|
882
859
|
tgt = new_tgt
|
|
883
860
|
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
logger.debug("ToilPathMapper visiting directory %s", location)
|
|
889
|
-
|
|
890
|
-
# We want to check the directory to make sure it is not
|
|
891
|
-
# self-contradictory in its immediate children and their names.
|
|
892
|
-
ensure_no_collisions(cast(DirectoryType, obj))
|
|
893
|
-
|
|
894
|
-
# We may need to copy this directory even if we don't copy things inside it.
|
|
895
|
-
copy_here = False
|
|
896
|
-
|
|
897
|
-
# Try and resolve the location to a local path
|
|
898
|
-
if location.startswith("file://"):
|
|
899
|
-
# This is still from the local machine, so go find where it is
|
|
900
|
-
resolved = schema_salad.ref_resolver.uri_file_path(location)
|
|
901
|
-
elif location.startswith("toildir:"):
|
|
902
|
-
# We need to download this directory (or subdirectory)
|
|
903
|
-
if self.get_file:
|
|
904
|
-
# We can actually go get it and its contents
|
|
905
|
-
resolved = schema_salad.ref_resolver.uri_file_path(
|
|
906
|
-
self.get_file(location)
|
|
907
|
-
)
|
|
908
|
-
else:
|
|
909
|
-
# We are probably staging final outputs on the leader. We
|
|
910
|
-
# can't go get the directory. Just pass it through.
|
|
911
|
-
resolved = location
|
|
912
|
-
elif location.startswith("_:"):
|
|
913
|
-
# cwltool made this up for an empty/synthetic directory it
|
|
914
|
-
# wants to make.
|
|
915
|
-
|
|
916
|
-
# If we let cwltool make the directory and stage it, and then
|
|
917
|
-
# stage files inside it, we can end up with Docker creating
|
|
918
|
-
# root-owned files in whatever we mounted for the Docker work
|
|
919
|
-
# directory, somehow. So make a directory ourselves instead.
|
|
920
|
-
if self.get_file:
|
|
921
|
-
# Ask for an empty directory
|
|
922
|
-
new_dir_uri = self.get_file("_:")
|
|
923
|
-
# And get a path for it
|
|
924
|
-
resolved = schema_salad.ref_resolver.uri_file_path(new_dir_uri)
|
|
925
|
-
|
|
926
|
-
if "listing" in obj and obj["listing"] != []:
|
|
927
|
-
# If there's stuff inside here to stage, we need to copy
|
|
928
|
-
# this directory here, because we can't Docker mount things
|
|
929
|
-
# over top of immutable directories.
|
|
930
|
-
copy_here = True
|
|
931
|
-
else:
|
|
932
|
-
# We can't really make the directory. Maybe we are
|
|
933
|
-
# exporting from the leader and it doesn't matter.
|
|
934
|
-
resolved = location
|
|
935
|
-
elif location.startswith("/"):
|
|
936
|
-
# Test if path is an absolute local path
|
|
937
|
-
# Does not check if the path is relative
|
|
938
|
-
# While Toil encodes paths into a URL with ToilPathMapper,
|
|
939
|
-
# something called internally in cwltool may return an absolute path
|
|
940
|
-
# ex: if cwltool calls itself internally in command_line_tool.py,
|
|
941
|
-
# it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
|
|
942
|
-
resolved = location
|
|
943
|
-
else:
|
|
944
|
-
raise RuntimeError("Unsupported location: " + location)
|
|
861
|
+
match obj:
|
|
862
|
+
case {"class": "Directory"}:
|
|
863
|
+
# Whether or not we've already mapped this path, we need to map all
|
|
864
|
+
# children recursively.
|
|
945
865
|
|
|
946
|
-
|
|
947
|
-
# Don't map the same directory twice
|
|
948
|
-
logger.debug(
|
|
949
|
-
"ToilPathMapper stopping recursion because we have already "
|
|
950
|
-
"mapped directory: %s",
|
|
951
|
-
location,
|
|
952
|
-
)
|
|
953
|
-
return
|
|
866
|
+
logger.debug("ToilPathMapper visiting directory %s", location)
|
|
954
867
|
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
self._pathmap[location] = MapperEnt(
|
|
959
|
-
resolved,
|
|
960
|
-
tgt,
|
|
961
|
-
"WritableDirectory" if (copy or copy_here) else "Directory",
|
|
962
|
-
staged,
|
|
963
|
-
)
|
|
868
|
+
# We want to check the directory to make sure it is not
|
|
869
|
+
# self-contradictory in its immediate children and their names.
|
|
870
|
+
ensure_no_collisions(cast(DirectoryType, obj))
|
|
964
871
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
# to copy the whole directory from somewhere and and we can't
|
|
968
|
-
# stage files over themselves.
|
|
969
|
-
staged = False
|
|
872
|
+
# We may need to copy this directory even if we don't copy things inside it.
|
|
873
|
+
copy_here = False
|
|
970
874
|
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
875
|
+
# Try and resolve the location to a local path
|
|
876
|
+
if location.startswith("file://"):
|
|
877
|
+
# This is still from the local machine, so go find where it is
|
|
878
|
+
resolved = schema_salad.ref_resolver.uri_file_path(location)
|
|
879
|
+
elif location.startswith("toildir:"):
|
|
880
|
+
# We need to download this directory (or subdirectory)
|
|
881
|
+
if self.get_file:
|
|
882
|
+
# We can actually go get it and its contents
|
|
883
|
+
resolved = schema_salad.ref_resolver.uri_file_path(
|
|
884
|
+
self.get_file(location)
|
|
885
|
+
)
|
|
886
|
+
else:
|
|
887
|
+
# We are probably staging final outputs on the leader. We
|
|
888
|
+
# can't go get the directory. Just pass it through.
|
|
889
|
+
resolved = location
|
|
890
|
+
elif location.startswith("_:"):
|
|
891
|
+
# cwltool made this up for an empty/synthetic directory it
|
|
892
|
+
# wants to make.
|
|
893
|
+
|
|
894
|
+
# If we let cwltool make the directory and stage it, and then
|
|
895
|
+
# stage files inside it, we can end up with Docker creating
|
|
896
|
+
# root-owned files in whatever we mounted for the Docker work
|
|
897
|
+
# directory, somehow. So make a directory ourselves instead.
|
|
898
|
+
if self.get_file:
|
|
899
|
+
# Ask for an empty directory
|
|
900
|
+
new_dir_uri = self.get_file("_:")
|
|
901
|
+
# And get a path for it
|
|
902
|
+
resolved = schema_salad.ref_resolver.uri_file_path(new_dir_uri)
|
|
903
|
+
|
|
904
|
+
if "listing" in obj and obj["listing"] != []:
|
|
905
|
+
# If there's stuff inside here to stage, we need to copy
|
|
906
|
+
# this directory here, because we can't Docker mount things
|
|
907
|
+
# over top of immutable directories.
|
|
908
|
+
copy_here = True
|
|
909
|
+
else:
|
|
910
|
+
# We can't really make the directory. Maybe we are
|
|
911
|
+
# exporting from the leader and it doesn't matter.
|
|
912
|
+
resolved = location
|
|
913
|
+
elif location.startswith("/"):
|
|
914
|
+
# Test if path is an absolute local path
|
|
915
|
+
# Does not check if the path is relative
|
|
916
|
+
# While Toil encodes paths into a URL with ToilPathMapper,
|
|
917
|
+
# something called internally in cwltool may return an absolute path
|
|
918
|
+
# ex: if cwltool calls itself internally in command_line_tool.py,
|
|
919
|
+
# it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
|
|
920
|
+
resolved = location
|
|
921
|
+
else:
|
|
922
|
+
raise RuntimeError("Unsupported location: " + location)
|
|
979
923
|
|
|
980
|
-
|
|
981
|
-
|
|
924
|
+
if location in self._pathmap:
|
|
925
|
+
# Don't map the same directory twice
|
|
926
|
+
logger.debug(
|
|
927
|
+
"ToilPathMapper stopping recursion because we have already "
|
|
928
|
+
"mapped directory: %s",
|
|
929
|
+
location,
|
|
930
|
+
)
|
|
931
|
+
return
|
|
982
932
|
|
|
983
|
-
if location in self._pathmap:
|
|
984
|
-
# Don't map the same file twice
|
|
985
933
|
logger.debug(
|
|
986
|
-
"ToilPathMapper
|
|
987
|
-
"mapped file: %s",
|
|
988
|
-
location,
|
|
934
|
+
"ToilPathMapper adding directory mapping %s -> %s", resolved, tgt
|
|
989
935
|
)
|
|
990
|
-
return
|
|
991
|
-
|
|
992
|
-
ab = abspath(location, basedir)
|
|
993
|
-
if "contents" in obj and location.startswith("_:"):
|
|
994
|
-
# We are supposed to create this file
|
|
995
936
|
self._pathmap[location] = MapperEnt(
|
|
996
|
-
|
|
937
|
+
resolved,
|
|
997
938
|
tgt,
|
|
998
|
-
"
|
|
939
|
+
"WritableDirectory" if (copy or copy_here) else "Directory",
|
|
999
940
|
staged,
|
|
1000
941
|
)
|
|
1001
|
-
else:
|
|
1002
|
-
with SourceLine(
|
|
1003
|
-
obj,
|
|
1004
|
-
"location",
|
|
1005
|
-
ValidationException,
|
|
1006
|
-
logger.isEnabledFor(logging.DEBUG),
|
|
1007
|
-
):
|
|
1008
|
-
# If we have access to the Toil file store, we will have a
|
|
1009
|
-
# get_file set, and it will convert this path to a file:
|
|
1010
|
-
# URI for a local file it downloaded.
|
|
1011
|
-
if self.get_file:
|
|
1012
|
-
deref = self.get_file(
|
|
1013
|
-
location,
|
|
1014
|
-
obj.get("streamable", False),
|
|
1015
|
-
self.streaming_allowed,
|
|
1016
|
-
)
|
|
1017
|
-
else:
|
|
1018
|
-
deref = ab
|
|
1019
|
-
if deref.startswith("file:"):
|
|
1020
|
-
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
1021
|
-
if urlsplit(deref).scheme in ["http", "https"]:
|
|
1022
|
-
deref = downloadHttpFile(location)
|
|
1023
|
-
elif urlsplit(deref).scheme != "toilfile":
|
|
1024
|
-
# Dereference symbolic links
|
|
1025
|
-
st = os.lstat(deref)
|
|
1026
|
-
while stat.S_ISLNK(st.st_mode):
|
|
1027
|
-
logger.debug("ToilPathMapper following symlink %s", deref)
|
|
1028
|
-
rl = os.readlink(deref)
|
|
1029
|
-
deref = (
|
|
1030
|
-
rl
|
|
1031
|
-
if os.path.isabs(rl)
|
|
1032
|
-
else os.path.join(os.path.dirname(deref), rl)
|
|
1033
|
-
)
|
|
1034
|
-
st = os.lstat(deref)
|
|
1035
942
|
|
|
1036
|
-
|
|
1037
|
-
#
|
|
943
|
+
if not location.startswith("_:") and not self.stage_listing:
|
|
944
|
+
# Don't stage anything below here separately, since we are able
|
|
945
|
+
# to copy the whole directory from somewhere and and we can't
|
|
946
|
+
# stage files over themselves.
|
|
947
|
+
staged = False
|
|
1038
948
|
|
|
1039
|
-
|
|
949
|
+
# Keep recursing
|
|
950
|
+
self.visitlisting(
|
|
951
|
+
cast(list[CWLObjectType], obj.get("listing", [])),
|
|
952
|
+
tgt,
|
|
953
|
+
basedir,
|
|
954
|
+
copy=copy,
|
|
955
|
+
staged=staged,
|
|
956
|
+
)
|
|
1040
957
|
|
|
958
|
+
case {"class": "File"}:
|
|
959
|
+
logger.debug("ToilPathMapper visiting file %s", location)
|
|
960
|
+
|
|
961
|
+
if location in self._pathmap:
|
|
962
|
+
# Don't map the same file twice
|
|
1041
963
|
logger.debug(
|
|
1042
|
-
"ToilPathMapper
|
|
964
|
+
"ToilPathMapper stopping recursion because we have already "
|
|
965
|
+
"mapped file: %s",
|
|
966
|
+
location,
|
|
1043
967
|
)
|
|
968
|
+
return
|
|
1044
969
|
|
|
970
|
+
ab = abspath(location, basedir)
|
|
971
|
+
if "contents" in obj and location.startswith("_:"):
|
|
972
|
+
# We are supposed to create this file
|
|
1045
973
|
self._pathmap[location] = MapperEnt(
|
|
1046
|
-
|
|
974
|
+
cast(str, obj["contents"]),
|
|
975
|
+
tgt,
|
|
976
|
+
"CreateWritableFile" if copy else "CreateFile",
|
|
977
|
+
staged,
|
|
1047
978
|
)
|
|
979
|
+
else:
|
|
980
|
+
with SourceLine(
|
|
981
|
+
obj,
|
|
982
|
+
"location",
|
|
983
|
+
ValidationException,
|
|
984
|
+
logger.isEnabledFor(logging.DEBUG),
|
|
985
|
+
):
|
|
986
|
+
# If we have access to the Toil file store, we will have a
|
|
987
|
+
# get_file set, and it will convert this path to a file:
|
|
988
|
+
# URI for a local file it downloaded.
|
|
989
|
+
if self.get_file:
|
|
990
|
+
deref = self.get_file(
|
|
991
|
+
location,
|
|
992
|
+
obj.get("streamable", False),
|
|
993
|
+
self.streaming_allowed,
|
|
994
|
+
)
|
|
995
|
+
else:
|
|
996
|
+
deref = ab
|
|
997
|
+
if deref.startswith("file:"):
|
|
998
|
+
deref = schema_salad.ref_resolver.uri_file_path(deref)
|
|
999
|
+
if urlsplit(deref).scheme in ["http", "https"]:
|
|
1000
|
+
deref = downloadHttpFile(location)
|
|
1001
|
+
elif urlsplit(deref).scheme != "toilfile":
|
|
1002
|
+
# Dereference symbolic links
|
|
1003
|
+
st = os.lstat(deref)
|
|
1004
|
+
while stat.S_ISLNK(st.st_mode):
|
|
1005
|
+
logger.debug(
|
|
1006
|
+
"ToilPathMapper following symlink %s", deref
|
|
1007
|
+
)
|
|
1008
|
+
rl = os.readlink(deref)
|
|
1009
|
+
deref = (
|
|
1010
|
+
rl
|
|
1011
|
+
if os.path.isabs(rl)
|
|
1012
|
+
else os.path.join(os.path.dirname(deref), rl)
|
|
1013
|
+
)
|
|
1014
|
+
st = os.lstat(deref)
|
|
1048
1015
|
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1016
|
+
# If we didn't download something that is a toilfile:
|
|
1017
|
+
# reference, we just pass that along.
|
|
1018
|
+
|
|
1019
|
+
"""Link or copy files to their targets. Create them as needed."""
|
|
1020
|
+
|
|
1021
|
+
logger.debug(
|
|
1022
|
+
"ToilPathMapper adding file mapping %s -> %s", deref, tgt
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
self._pathmap[location] = MapperEnt(
|
|
1026
|
+
deref, tgt, "WritableFile" if copy else "File", staged
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
# Handle all secondary files that need to be next to this one.
|
|
1030
|
+
self.visitlisting(
|
|
1031
|
+
cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
|
|
1032
|
+
stagedir,
|
|
1033
|
+
basedir,
|
|
1034
|
+
copy=copy,
|
|
1035
|
+
staged=staged,
|
|
1036
|
+
)
|
|
1057
1037
|
|
|
1058
1038
|
|
|
1059
1039
|
class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
|
|
@@ -1112,7 +1092,7 @@ class ToilTool:
|
|
|
1112
1092
|
"""
|
|
1113
1093
|
super().__init__(*args, **kwargs)
|
|
1114
1094
|
# Reserve a spot for the Toil job that ends up executing this tool.
|
|
1115
|
-
self._toil_job:
|
|
1095
|
+
self._toil_job: Job | None = None
|
|
1116
1096
|
# Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
|
|
1117
1097
|
self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
|
|
1118
1098
|
|
|
@@ -1161,7 +1141,7 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1161
1141
|
"""Subclass the cwltool command line tool to provide the custom ToilPathMapper."""
|
|
1162
1142
|
|
|
1163
1143
|
def _initialworkdir(
|
|
1164
|
-
self, j:
|
|
1144
|
+
self, j: cwltool.job.JobBase | None, builder: cwltool.builder.Builder
|
|
1165
1145
|
) -> None:
|
|
1166
1146
|
"""
|
|
1167
1147
|
Hook the InitialWorkDirRequirement setup to make sure that there are no
|
|
@@ -1227,6 +1207,7 @@ def toil_make_tool(
|
|
|
1227
1207
|
# URI instead of raising an error right away, in case it is optional.
|
|
1228
1208
|
MISSING_FILE = "missing://"
|
|
1229
1209
|
|
|
1210
|
+
|
|
1230
1211
|
class ToilFsAccess(StdFsAccess):
|
|
1231
1212
|
"""
|
|
1232
1213
|
Custom filesystem access class which handles toil filestore references.
|
|
@@ -1240,7 +1221,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1240
1221
|
def __init__(
|
|
1241
1222
|
self,
|
|
1242
1223
|
basedir: str,
|
|
1243
|
-
file_store:
|
|
1224
|
+
file_store: AbstractFileStore | None = None,
|
|
1244
1225
|
) -> None:
|
|
1245
1226
|
"""Create a FsAccess object for the given Toil Filestore and basedir."""
|
|
1246
1227
|
self.file_store = file_store
|
|
@@ -1271,103 +1252,104 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1271
1252
|
# See: https://github.com/common-workflow-language/cwltool/blob/beab66d649dd3ee82a013322a5e830875e8556ba/cwltool/stdfsaccess.py#L43 # noqa B950
|
|
1272
1253
|
|
|
1273
1254
|
parse = urlparse(path)
|
|
1274
|
-
|
|
1275
|
-
# Is a Toil file
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
raise RuntimeError("URL requires a file store: " + path)
|
|
1255
|
+
match parse.scheme:
|
|
1256
|
+
case "toilfile": # Is a Toil file
|
|
1257
|
+
if self.file_store is None:
|
|
1258
|
+
raise RuntimeError("URL requires a file store: " + path)
|
|
1279
1259
|
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
)
|
|
1283
|
-
logger.debug("Downloaded %s to %s", path, destination)
|
|
1284
|
-
if not os.path.exists(destination):
|
|
1285
|
-
raise RuntimeError(
|
|
1286
|
-
f"{destination} does not exist after filestore read."
|
|
1260
|
+
destination = self.file_store.readGlobalFile(
|
|
1261
|
+
FileID.unpack(path[len("toilfile:") :]), symlink=True
|
|
1287
1262
|
)
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1263
|
+
logger.debug("Downloaded %s to %s", path, destination)
|
|
1264
|
+
if not os.path.exists(destination):
|
|
1265
|
+
raise RuntimeError(
|
|
1266
|
+
f"{destination} does not exist after filestore read."
|
|
1267
|
+
)
|
|
1268
|
+
case "toildir": # Is a directory or relative to it
|
|
1269
|
+
if self.file_store is None:
|
|
1270
|
+
raise RuntimeError("URL requires a file store: " + path)
|
|
1293
1271
|
|
|
1294
|
-
|
|
1272
|
+
# We will download the whole directory and then look inside it
|
|
1295
1273
|
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1274
|
+
# Decode its contents, the path inside it to the file (if any), and
|
|
1275
|
+
# the key to use for caching the directory.
|
|
1276
|
+
contents, subpath, cache_key, _, _ = decode_directory(path)
|
|
1277
|
+
logger.debug("Decoded directory contents: %s", contents)
|
|
1300
1278
|
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1279
|
+
if cache_key not in self.dir_to_download:
|
|
1280
|
+
# Download to a temp directory.
|
|
1281
|
+
temp_dir = self.file_store.getLocalTempDir()
|
|
1282
|
+
temp_dir += "/toildownload"
|
|
1283
|
+
os.makedirs(temp_dir)
|
|
1306
1284
|
|
|
1307
|
-
|
|
1285
|
+
logger.debug(
|
|
1286
|
+
"ToilFsAccess downloading %s to %s", cache_key, temp_dir
|
|
1287
|
+
)
|
|
1308
1288
|
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1289
|
+
# Save it all into this new temp directory.
|
|
1290
|
+
# Guaranteed to fill it with real files and not symlinks.
|
|
1291
|
+
download_structure(self.file_store, {}, {}, contents, temp_dir)
|
|
1312
1292
|
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1293
|
+
# Make sure we use the same temp directory if we go traversing
|
|
1294
|
+
# around this thing.
|
|
1295
|
+
self.dir_to_download[cache_key] = temp_dir
|
|
1296
|
+
else:
|
|
1297
|
+
logger.debug("ToilFsAccess already has %s", cache_key)
|
|
1318
1298
|
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
#
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
)
|
|
1350
|
-
else:
|
|
1351
|
-
URLAccess.read_from_url(url, open(dest, "wb"))
|
|
1299
|
+
if subpath is None:
|
|
1300
|
+
# We didn't have any subdirectory, so just give back
|
|
1301
|
+
# the path to the root
|
|
1302
|
+
destination = self.dir_to_download[cache_key]
|
|
1303
|
+
else:
|
|
1304
|
+
# Navigate to the right subdirectory
|
|
1305
|
+
destination = self.dir_to_download[cache_key] + "/" + subpath
|
|
1306
|
+
case "file": # This is a File URL. Decode it to an actual path.
|
|
1307
|
+
destination = unquote(parse.path)
|
|
1308
|
+
case "": # This is just a local file and not a URL
|
|
1309
|
+
destination = path
|
|
1310
|
+
case _: # The destination is something else.
|
|
1311
|
+
if URLAccess.get_is_directory(path):
|
|
1312
|
+
# Treat this as a directory
|
|
1313
|
+
if path not in self.dir_to_download:
|
|
1314
|
+
logger.debug(
|
|
1315
|
+
"ToilFsAccess fetching directory %s from a JobStore", path
|
|
1316
|
+
)
|
|
1317
|
+
dest_dir = mkdtemp()
|
|
1318
|
+
|
|
1319
|
+
# Recursively fetch all the files in the directory.
|
|
1320
|
+
def download_to(url: str, dest: str) -> None:
|
|
1321
|
+
if URLAccess.get_is_directory(url):
|
|
1322
|
+
os.mkdir(dest)
|
|
1323
|
+
for part in URLAccess.list_url(url):
|
|
1324
|
+
download_to(
|
|
1325
|
+
os.path.join(url, part),
|
|
1326
|
+
os.path.join(dest, part),
|
|
1327
|
+
)
|
|
1328
|
+
else:
|
|
1329
|
+
URLAccess.read_from_url(url, open(dest, "wb"))
|
|
1352
1330
|
|
|
1353
|
-
|
|
1354
|
-
|
|
1331
|
+
download_to(path, dest_dir)
|
|
1332
|
+
self.dir_to_download[path] = dest_dir
|
|
1355
1333
|
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1334
|
+
destination = self.dir_to_download[path]
|
|
1335
|
+
else:
|
|
1336
|
+
# Treat this as a file.
|
|
1337
|
+
if path not in self.dir_to_download:
|
|
1338
|
+
logger.debug(
|
|
1339
|
+
"ToilFsAccess fetching file %s from a JobStore", path
|
|
1340
|
+
)
|
|
1341
|
+
# Try to grab it with a jobstore implementation, and save it
|
|
1342
|
+
# somewhere arbitrary.
|
|
1343
|
+
dest_file = NamedTemporaryFile(delete=False)
|
|
1344
|
+
URLAccess.read_from_url(path, dest_file)
|
|
1345
|
+
dest_file.close()
|
|
1346
|
+
self.dir_to_download[path] = dest_file.name
|
|
1347
|
+
destination = self.dir_to_download[path]
|
|
1348
|
+
logger.debug(
|
|
1349
|
+
"ToilFsAccess has JobStore-supported URL %s at %s",
|
|
1350
|
+
path,
|
|
1351
|
+
destination,
|
|
1352
|
+
)
|
|
1371
1353
|
|
|
1372
1354
|
# Now destination is a local file, so make sure we really do have an
|
|
1373
1355
|
# absolute path
|
|
@@ -1376,14 +1358,15 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1376
1358
|
|
|
1377
1359
|
def glob(self, pattern: str) -> list[str]:
|
|
1378
1360
|
parse = urlparse(pattern)
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1361
|
+
match parse.scheme:
|
|
1362
|
+
case "file":
|
|
1363
|
+
pattern = os.path.abspath(unquote(parse.path))
|
|
1364
|
+
case "":
|
|
1365
|
+
pattern = os.path.abspath(pattern)
|
|
1366
|
+
case _:
|
|
1367
|
+
raise RuntimeError(
|
|
1368
|
+
f"Cannot efficiently support globbing on {parse.scheme} URIs"
|
|
1369
|
+
)
|
|
1387
1370
|
|
|
1388
1371
|
# Actually do the glob
|
|
1389
1372
|
return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
|
|
@@ -1393,144 +1376,142 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1393
1376
|
raise RuntimeError(f"Mode {mode} for opening {fn} involves writing")
|
|
1394
1377
|
|
|
1395
1378
|
parse = urlparse(fn)
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
elif parse.scheme == "toildir":
|
|
1400
|
-
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1401
|
-
if cache_key in self.dir_to_download:
|
|
1402
|
-
# This is already available locally, so fall back on the local copy
|
|
1379
|
+
match parse.scheme:
|
|
1380
|
+
case "" | "file":
|
|
1381
|
+
# Handle local files
|
|
1403
1382
|
return open(self._abs(fn), mode)
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
if
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1383
|
+
case "toildir":
|
|
1384
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1385
|
+
if cache_key in self.dir_to_download:
|
|
1386
|
+
# This is already available locally, so fall back on the local copy
|
|
1387
|
+
return open(self._abs(fn), mode)
|
|
1388
|
+
else:
|
|
1389
|
+
# We need to get the URI out of the virtual directory
|
|
1390
|
+
if subpath is None:
|
|
1391
|
+
raise RuntimeError(f"{fn} is a toildir directory")
|
|
1392
|
+
uri = get_from_structure(contents, subpath)
|
|
1393
|
+
if not isinstance(uri, str):
|
|
1394
|
+
raise RuntimeError(f"{fn} does not point to a file")
|
|
1395
|
+
# Recurse on that URI
|
|
1396
|
+
return self.open(uri, mode)
|
|
1397
|
+
case "toilfile":
|
|
1398
|
+
if self.file_store is None:
|
|
1399
|
+
raise RuntimeError("URL requires a file store: " + fn)
|
|
1400
|
+
# Streaming access to Toil file store files requires being inside a
|
|
1401
|
+
# context manager, which we can't require. So we need to download
|
|
1402
|
+
# the file.
|
|
1403
|
+
return open(self._abs(fn), mode)
|
|
1404
|
+
# This should be supported by a job store.
|
|
1405
|
+
byte_stream = URLAccess.open_url(fn)
|
|
1406
|
+
if "b" in mode:
|
|
1407
|
+
# Pass stream along in binary
|
|
1408
|
+
return byte_stream
|
|
1420
1409
|
else:
|
|
1421
|
-
#
|
|
1422
|
-
byte_stream =
|
|
1423
|
-
if "b" in mode:
|
|
1424
|
-
# Pass stream along in binary
|
|
1425
|
-
return byte_stream
|
|
1426
|
-
else:
|
|
1427
|
-
# Wrap it in a text decoder
|
|
1428
|
-
return io.TextIOWrapper(byte_stream, encoding="utf-8")
|
|
1410
|
+
# Wrap it in a text decoder
|
|
1411
|
+
return io.TextIOWrapper(byte_stream, encoding="utf-8")
|
|
1429
1412
|
|
|
1430
1413
|
def exists(self, path: str) -> bool:
|
|
1431
1414
|
"""Test for file existence."""
|
|
1432
1415
|
parse = urlparse(path)
|
|
1433
|
-
|
|
1434
|
-
# Handle local files
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1416
|
+
match parse.scheme:
|
|
1417
|
+
case "" | "file": # Handle local files
|
|
1418
|
+
# toil's _abs() throws errors when files are not found and cwltool's _abs() does not
|
|
1419
|
+
try:
|
|
1420
|
+
return os.path.exists(self._abs(path))
|
|
1421
|
+
except NoSuchFileException:
|
|
1422
|
+
return False
|
|
1423
|
+
case "toildir":
|
|
1424
|
+
contents, subpath, cache_key, _, _ = decode_directory(path)
|
|
1425
|
+
if subpath is None:
|
|
1426
|
+
# The toildir directory itself exists
|
|
1427
|
+
return True
|
|
1428
|
+
uri = get_from_structure(contents, subpath)
|
|
1429
|
+
if uri is None:
|
|
1430
|
+
# It's not in the virtual directory, so it doesn't exist
|
|
1431
|
+
return False
|
|
1432
|
+
if isinstance(uri, dict):
|
|
1433
|
+
# Actually it's a subdirectory, so it exists.
|
|
1434
|
+
return True
|
|
1435
|
+
# We recurse and poll the URI directly to make sure it really exists
|
|
1436
|
+
return self.exists(uri)
|
|
1437
|
+
case "toilfile":
|
|
1438
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1451
1439
|
return True
|
|
1452
|
-
|
|
1453
|
-
return self.exists(uri)
|
|
1454
|
-
elif parse.scheme == "toilfile":
|
|
1455
|
-
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1456
|
-
return True
|
|
1457
|
-
else:
|
|
1458
|
-
# This should be supported by a job store.
|
|
1459
|
-
return URLAccess.url_exists(path)
|
|
1440
|
+
return URLAccess.url_exists(path) # This should be supported by a job store.
|
|
1460
1441
|
|
|
1461
1442
|
def size(self, path: str) -> int:
|
|
1462
1443
|
parse = urlparse(path)
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1444
|
+
match parse.scheme:
|
|
1445
|
+
case "" | "file":
|
|
1446
|
+
return os.stat(self._abs(path)).st_size
|
|
1447
|
+
case "toildir":
|
|
1448
|
+
# Decode its contents, the path inside it to the file (if any), and
|
|
1449
|
+
# the key to use for caching the directory.
|
|
1450
|
+
contents, subpath, cache_key, _, _ = decode_directory(path)
|
|
1451
|
+
|
|
1452
|
+
# We can't get the size of just a directory.
|
|
1453
|
+
if subpath is None:
|
|
1454
|
+
raise RuntimeError(f"Attempted to check size of directory {path}")
|
|
1455
|
+
|
|
1456
|
+
uri = get_from_structure(contents, subpath)
|
|
1457
|
+
|
|
1458
|
+
# We ought to end up with a URI.
|
|
1459
|
+
if not isinstance(uri, str):
|
|
1460
|
+
raise RuntimeError(f"Did not find a file at {path}")
|
|
1461
|
+
return self.size(uri)
|
|
1462
|
+
case "toilfile":
|
|
1463
|
+
if self.file_store is None:
|
|
1464
|
+
raise RuntimeError("URL requires a file store: " + path)
|
|
1465
|
+
return self.file_store.getGlobalFileSize(
|
|
1466
|
+
FileID.unpack(path[len("toilfile:") :])
|
|
1467
|
+
)
|
|
1468
|
+
# This should be supported by a job store.
|
|
1469
|
+
size = URLAccess.get_size(path)
|
|
1470
|
+
if size is None:
|
|
1471
|
+
# get_size can be unimplemented or unavailable
|
|
1472
|
+
raise RuntimeError(f"Could not get size of {path}")
|
|
1473
|
+
return size
|
|
1493
1474
|
|
|
1494
1475
|
def isfile(self, fn: str) -> bool:
|
|
1495
1476
|
parse = urlparse(fn)
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1477
|
+
match parse.scheme:
|
|
1478
|
+
case "file" | "":
|
|
1479
|
+
return os.path.isfile(self._abs(fn))
|
|
1480
|
+
case "toilfile":
|
|
1481
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1482
|
+
return True
|
|
1483
|
+
case "toildir":
|
|
1484
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1485
|
+
if subpath is None:
|
|
1486
|
+
# This is the toildir directory itself
|
|
1487
|
+
return False
|
|
1488
|
+
found = get_from_structure(contents, subpath)
|
|
1489
|
+
# If we find a string, that's a file
|
|
1490
|
+
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1491
|
+
return isinstance(found, str)
|
|
1492
|
+
return self.exists(fn) and not URLAccess.get_is_directory(fn)
|
|
1512
1493
|
|
|
1513
1494
|
def isdir(self, fn: str) -> bool:
|
|
1514
1495
|
logger.debug("ToilFsAccess checking type of %s", fn)
|
|
1515
1496
|
parse = urlparse(fn)
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1497
|
+
match parse.scheme:
|
|
1498
|
+
case "file" | "":
|
|
1499
|
+
return os.path.isdir(self._abs(fn))
|
|
1500
|
+
case "toilfile":
|
|
1501
|
+
return False
|
|
1502
|
+
case "toildir":
|
|
1503
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1504
|
+
if subpath is None:
|
|
1505
|
+
# This is the toildir directory itself.
|
|
1506
|
+
# TODO: We assume directories can't be deleted.
|
|
1507
|
+
return True
|
|
1508
|
+
found = get_from_structure(contents, subpath)
|
|
1509
|
+
# If we find a dict, that's a directory.
|
|
1524
1510
|
# TODO: We assume directories can't be deleted.
|
|
1525
|
-
return
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
return isinstance(found, dict)
|
|
1530
|
-
else:
|
|
1531
|
-
status = URLAccess.get_is_directory(fn)
|
|
1532
|
-
logger.debug("AbstractJobStore said: %s", status)
|
|
1533
|
-
return status
|
|
1511
|
+
return isinstance(found, dict)
|
|
1512
|
+
status = URLAccess.get_is_directory(fn)
|
|
1513
|
+
logger.debug("AbstractJobStore said: %s", status)
|
|
1514
|
+
return status
|
|
1534
1515
|
|
|
1535
1516
|
def listdir(self, fn: str) -> list[str]:
|
|
1536
1517
|
# This needs to return full URLs for everything in the directory.
|
|
@@ -1538,32 +1519,29 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1538
1519
|
logger.debug("ToilFsAccess listing %s", fn)
|
|
1539
1520
|
|
|
1540
1521
|
parse = urlparse(fn)
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
os.path.join(fn, entry.rstrip("/"))
|
|
1565
|
-
for entry in URLAccess.list_url(fn)
|
|
1566
|
-
]
|
|
1522
|
+
match parse.scheme:
|
|
1523
|
+
case "file" | "":
|
|
1524
|
+
# Find the local path
|
|
1525
|
+
directory = self._abs(fn)
|
|
1526
|
+
# Now list it (it is probably a directory)
|
|
1527
|
+
return [abspath(quote(entry), fn) for entry in os.listdir(directory)]
|
|
1528
|
+
case "toilfile":
|
|
1529
|
+
raise RuntimeError(f"Cannot list a file: {fn}")
|
|
1530
|
+
case "toildir":
|
|
1531
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1532
|
+
here = contents
|
|
1533
|
+
if subpath is not None:
|
|
1534
|
+
got = get_from_structure(contents, subpath)
|
|
1535
|
+
if got is None:
|
|
1536
|
+
raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
|
|
1537
|
+
if isinstance(got, str):
|
|
1538
|
+
raise RuntimeError(
|
|
1539
|
+
f"Cannot list file or dubdirectory of a file: {fn}"
|
|
1540
|
+
)
|
|
1541
|
+
here = got
|
|
1542
|
+
# List all the things in here and make full URIs to them
|
|
1543
|
+
return [os.path.join(fn, k) for k in here.keys()]
|
|
1544
|
+
return [os.path.join(fn, entry.rstrip("/")) for entry in URLAccess.list_url(fn)]
|
|
1567
1545
|
|
|
1568
1546
|
def join(self, path: str, *paths: str) -> str:
|
|
1569
1547
|
# This falls back on os.path.join
|
|
@@ -1583,7 +1561,7 @@ def toil_get_file(
|
|
|
1583
1561
|
uri: str,
|
|
1584
1562
|
streamable: bool = False,
|
|
1585
1563
|
streaming_allowed: bool = True,
|
|
1586
|
-
pipe_threads:
|
|
1564
|
+
pipe_threads: list[tuple[Thread, int]] | None = None,
|
|
1587
1565
|
) -> str:
|
|
1588
1566
|
"""
|
|
1589
1567
|
Set up the given file or directory from the Toil jobstore at a file URI
|
|
@@ -1725,21 +1703,31 @@ def toil_get_file(
|
|
|
1725
1703
|
return schema_salad.ref_resolver.file_uri(src_path)
|
|
1726
1704
|
|
|
1727
1705
|
|
|
1728
|
-
def
|
|
1729
|
-
|
|
1706
|
+
def import_file_through_cache(
|
|
1707
|
+
import_func: Callable[[str], FileID],
|
|
1730
1708
|
index: dict[str, str],
|
|
1731
1709
|
existing: dict[str, str],
|
|
1732
1710
|
file_uri: str,
|
|
1733
1711
|
) -> str:
|
|
1734
1712
|
"""
|
|
1735
|
-
Given a file URI, convert it to a toil file URI
|
|
1713
|
+
Given a file URI, convert it to a toil file URI using the given caches.
|
|
1714
|
+
|
|
1715
|
+
Uses import_func to do any required new imports. Runs import_func once on
|
|
1716
|
+
every unique URI for a given set of caches.
|
|
1736
1717
|
|
|
1737
|
-
|
|
1718
|
+
:param index: Mapping from file URI to imported Toil URI.
|
|
1738
1719
|
|
|
1739
|
-
|
|
1740
|
-
|
|
1720
|
+
:param existing: Reverse mapping from imported Toil URI or other remote URI
|
|
1721
|
+
to file URI or local path. Allows integration with toil_get_file so
|
|
1722
|
+
that a URI downloaded to a local file by get_toil_file will re-import
|
|
1723
|
+
back to the same original URI.
|
|
1741
1724
|
|
|
1742
|
-
|
|
1725
|
+
:param file_uri: URI to the file to import. Not necessarily a file:// URI.
|
|
1726
|
+
|
|
1727
|
+
:raises FileNotFoundError: if the input RUI is a MISSING_FILE URI.
|
|
1728
|
+
|
|
1729
|
+
:returns: A toilfile: URI (or passed-through _: or toildir: URI) to the
|
|
1730
|
+
imported file.
|
|
1743
1731
|
"""
|
|
1744
1732
|
# Toil fileStore reference
|
|
1745
1733
|
if file_uri.startswith("toilfile:") or file_uri.startswith("toildir:"):
|
|
@@ -1755,7 +1743,8 @@ def convert_file_uri_to_toil_uri(
|
|
|
1755
1743
|
file_uri = existing.get(file_uri, file_uri)
|
|
1756
1744
|
if file_uri not in index:
|
|
1757
1745
|
try:
|
|
1758
|
-
index[file_uri] = "toilfile:" +
|
|
1746
|
+
index[file_uri] = "toilfile:" + import_func(file_uri).pack()
|
|
1747
|
+
# TODO: Won't this put URIs in existing that toil_get_file() will expect to be local paths?
|
|
1759
1748
|
existing[index[file_uri]] = file_uri
|
|
1760
1749
|
except Exception as e:
|
|
1761
1750
|
logger.error("Got exception '%s' while copying '%s'", e, file_uri)
|
|
@@ -1775,26 +1764,38 @@ def path_to_loc(obj: CWLObjectType) -> None:
|
|
|
1775
1764
|
|
|
1776
1765
|
|
|
1777
1766
|
def extract_file_uri_once(
|
|
1778
|
-
fileindex: dict[str, str],
|
|
1779
|
-
existing: dict[str, str],
|
|
1780
1767
|
file_metadata: CWLObjectType,
|
|
1768
|
+
fileindex: dict[str, str],
|
|
1781
1769
|
mark_broken: bool = False,
|
|
1782
1770
|
skip_remote: bool = False,
|
|
1783
|
-
) ->
|
|
1771
|
+
) -> str | None:
|
|
1784
1772
|
"""
|
|
1785
|
-
Extract the filename from a CWL file record.
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
:param
|
|
1797
|
-
|
|
1773
|
+
Extract the filename that needs to be downloaded from a CWL file record.
|
|
1774
|
+
|
|
1775
|
+
Updates the FileMetadata.
|
|
1776
|
+
|
|
1777
|
+
This function matches the predefined function signature in visit_files,
|
|
1778
|
+
which should be used to run it for all files inside a CWL object.
|
|
1779
|
+
|
|
1780
|
+
Ensures no duplicate files are returned according to fileindex. If a file
|
|
1781
|
+
has not been resolved already (and had file:// prepended) then resolve
|
|
1782
|
+
symlinks.
|
|
1783
|
+
|
|
1784
|
+
:param file_metadata: CWL file record to operate on.
|
|
1785
|
+
|
|
1786
|
+
:param fileindex: Forward mapping of filename to downloaded file path. If
|
|
1787
|
+
the file's location already appears here, uses the cached value and
|
|
1788
|
+
returns None.
|
|
1789
|
+
|
|
1790
|
+
:param mark_broken: If True, when files can't be imported because they e.g.
|
|
1791
|
+
don't exist, set their locations to MISSING_FILE rather than failing
|
|
1792
|
+
with an error.
|
|
1793
|
+
|
|
1794
|
+
:param skp_remote: If True, return None for remote URIs.
|
|
1795
|
+
|
|
1796
|
+
:return: The URI or local file path that needs to be dowlnoaded for this
|
|
1797
|
+
file, given the ones already scheduled to be downloaded in existing and
|
|
1798
|
+
the settings passed about what files need to be downloaded.
|
|
1798
1799
|
"""
|
|
1799
1800
|
location = cast(str, file_metadata["location"])
|
|
1800
1801
|
if (
|
|
@@ -1810,16 +1811,28 @@ def extract_file_uri_once(
|
|
|
1810
1811
|
file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
|
|
1811
1812
|
cast(str, file_metadata["path"])
|
|
1812
1813
|
)
|
|
1813
|
-
if location.startswith("file://")
|
|
1814
|
-
schema_salad.ref_resolver.uri_file_path(location)
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1814
|
+
if location.startswith("file://"):
|
|
1815
|
+
file_path = schema_salad.ref_resolver.uri_file_path(location)
|
|
1816
|
+
if not os.path.exists(file_path):
|
|
1817
|
+
if mark_broken:
|
|
1818
|
+
logger.debug("File %s is missing", file_metadata)
|
|
1819
|
+
file_metadata["location"] = location = MISSING_FILE + location
|
|
1820
|
+
else:
|
|
1821
|
+
raise cwl_utils.errors.WorkflowException(
|
|
1822
|
+
"File is missing: %s" % file_metadata
|
|
1823
|
+
)
|
|
1824
|
+
elif os.path.isdir(file_path):
|
|
1820
1825
|
raise cwl_utils.errors.WorkflowException(
|
|
1821
|
-
"
|
|
1826
|
+
f"Cannot import directory as a file: {file_path}"
|
|
1822
1827
|
)
|
|
1828
|
+
elif not os.path.isfile(file_path):
|
|
1829
|
+
# It exists but is not a regular file or directory
|
|
1830
|
+
# Allow /dev/null specifically as it's safe to read (returns EOF immediately)
|
|
1831
|
+
if file_path != "/dev/null":
|
|
1832
|
+
raise cwl_utils.errors.WorkflowException(
|
|
1833
|
+
f"Cannot import {file_path} as a file: not a regular file. "
|
|
1834
|
+
f"Only regular files and /dev/null are supported."
|
|
1835
|
+
)
|
|
1823
1836
|
if location.startswith("file://") or not skip_remote:
|
|
1824
1837
|
# This is a local file or a remote file
|
|
1825
1838
|
if location not in fileindex:
|
|
@@ -1840,25 +1853,29 @@ def extract_file_uri_once(
|
|
|
1840
1853
|
V = TypeVar("V", covariant=True)
|
|
1841
1854
|
|
|
1842
1855
|
|
|
1843
|
-
class
|
|
1856
|
+
class FileVisitFunc(Protocol[V]):
|
|
1844
1857
|
def __call__(
|
|
1845
1858
|
self,
|
|
1846
|
-
fileindex: dict[str, str],
|
|
1847
|
-
existing: dict[str, str],
|
|
1848
1859
|
file_metadata: CWLObjectType,
|
|
1849
|
-
mark_broken: bool,
|
|
1850
|
-
skip_remote: bool,
|
|
1851
1860
|
) -> V: ...
|
|
1852
1861
|
|
|
1853
1862
|
|
|
1863
|
+
class DirectoryVisitFunc(Protocol[V]):
|
|
1864
|
+
def __call__(
|
|
1865
|
+
self,
|
|
1866
|
+
directory_metadata: CWLObjectType,
|
|
1867
|
+
directory_contents: DirectoryContents,
|
|
1868
|
+
) -> V: ...
|
|
1869
|
+
|
|
1870
|
+
|
|
1871
|
+
V2 = TypeVar("V2", covariant=True)
|
|
1872
|
+
|
|
1873
|
+
|
|
1854
1874
|
def visit_files(
|
|
1855
|
-
|
|
1875
|
+
file_func: FileVisitFunc[V],
|
|
1876
|
+
directory_func: DirectoryVisitFunc[V2],
|
|
1856
1877
|
fs_access: StdFsAccess,
|
|
1857
|
-
|
|
1858
|
-
existing: dict[str, str],
|
|
1859
|
-
cwl_object: Optional[CWLObjectType],
|
|
1860
|
-
mark_broken: bool = False,
|
|
1861
|
-
skip_remote: bool = False,
|
|
1878
|
+
cwl_object: CWLObjectType | None,
|
|
1862
1879
|
bypass_file_store: bool = False,
|
|
1863
1880
|
) -> list[V]:
|
|
1864
1881
|
"""
|
|
@@ -1880,37 +1897,38 @@ def visit_files(
|
|
|
1880
1897
|
|
|
1881
1898
|
Also does some miscellaneous normalization.
|
|
1882
1899
|
|
|
1883
|
-
:param
|
|
1884
|
-
|
|
1900
|
+
:param file_func: Function to run on each file's URI. This might
|
|
1901
|
+
do something like uploading a URI and filling in the file's location
|
|
1902
|
+
and/or returning an uploaded FileID. Any return values are aggregated
|
|
1903
|
+
and returned.
|
|
1904
|
+
|
|
1905
|
+
:param directory_func: Function to run on each directory's contents. This
|
|
1906
|
+
might fill in the directory's location based on its already-processed
|
|
1907
|
+
contents. Any return values are ignored.
|
|
1885
1908
|
|
|
1886
1909
|
:param fs_access: the CWL FS access object we use to access the filesystem
|
|
1887
1910
|
to find files to import. Needs to support the URI schemes used.
|
|
1888
1911
|
|
|
1889
|
-
:param fileindex: Forward map to fill in from file URI to Toil storage
|
|
1890
|
-
location, used by write_file to deduplicate writes.
|
|
1891
|
-
|
|
1892
|
-
:param existing: Reverse map to fill in from Toil storage location to file
|
|
1893
|
-
URI. Not read from.
|
|
1894
|
-
|
|
1895
1912
|
:param cwl_object: CWL tool (or workflow order) we are importing files for
|
|
1896
1913
|
|
|
1897
|
-
:param
|
|
1898
|
-
|
|
1899
|
-
with an error.
|
|
1900
|
-
|
|
1901
|
-
:param skp_remote: If True, leave remote URIs in place instead of importing
|
|
1902
|
-
files.
|
|
1903
|
-
|
|
1904
|
-
:param bypass_file_store: If True, leave file:// URIs in place instead of
|
|
1914
|
+
:param bypass_file_store: If True, only do the normalization, and don't
|
|
1915
|
+
actually visit. This will leave file:// URIs in place instead of
|
|
1905
1916
|
importing files and directories.
|
|
1906
1917
|
|
|
1907
1918
|
:param log_level: Log imported files at the given level.
|
|
1919
|
+
|
|
1920
|
+
:returns: A list of all return values from file_func calls.
|
|
1908
1921
|
"""
|
|
1922
|
+
# TODO: This function used to be very specific to coordinating the actual
|
|
1923
|
+
# upload of all the files, and has only been half-converted to a more
|
|
1924
|
+
# generic scan. Some of the comments and structure only make sense in its
|
|
1925
|
+
# original application.
|
|
1926
|
+
|
|
1909
1927
|
func_return: list[Any] = list()
|
|
1910
1928
|
tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
|
|
1911
1929
|
|
|
1912
|
-
logger.debug("
|
|
1913
|
-
logger.debug("
|
|
1930
|
+
logger.debug("Visiting files for %s", tool_id)
|
|
1931
|
+
logger.debug("Visiting files in %s", cwl_object)
|
|
1914
1932
|
|
|
1915
1933
|
# We need to upload all files to the Toil filestore, and encode structure
|
|
1916
1934
|
# recursively into all Directories' locations. But we cannot safely alter
|
|
@@ -1935,7 +1953,7 @@ def visit_files(
|
|
|
1935
1953
|
|
|
1936
1954
|
def visit_file_or_directory_down(
|
|
1937
1955
|
rec: CWLObjectType,
|
|
1938
|
-
) ->
|
|
1956
|
+
) -> list[CWLObjectType] | None:
|
|
1939
1957
|
"""
|
|
1940
1958
|
Visit each CWL File or Directory on the way down.
|
|
1941
1959
|
|
|
@@ -1984,7 +2002,7 @@ def visit_files(
|
|
|
1984
2002
|
|
|
1985
2003
|
def visit_file_or_directory_up(
|
|
1986
2004
|
rec: CWLObjectType,
|
|
1987
|
-
down_result:
|
|
2005
|
+
down_result: list[CWLObjectType] | None,
|
|
1988
2006
|
child_results: list[DirectoryContents],
|
|
1989
2007
|
) -> DirectoryContents:
|
|
1990
2008
|
"""
|
|
@@ -2006,17 +2024,12 @@ def visit_files(
|
|
|
2006
2024
|
if rec.get("class", None) == "File":
|
|
2007
2025
|
# This is a CWL File
|
|
2008
2026
|
|
|
2027
|
+
# We want to track it and any of its associated secondary files in
|
|
2028
|
+
# this pseudo-Directory.
|
|
2009
2029
|
result: DirectoryContents = {}
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
fileindex,
|
|
2014
|
-
existing,
|
|
2015
|
-
rec,
|
|
2016
|
-
mark_broken=mark_broken,
|
|
2017
|
-
skip_remote=skip_remote,
|
|
2018
|
-
)
|
|
2019
|
-
)
|
|
2030
|
+
|
|
2031
|
+
# Run the vsitor function on the file and store the return
|
|
2032
|
+
func_return.append(file_func(rec))
|
|
2020
2033
|
|
|
2021
2034
|
# Make a record for this file under its name
|
|
2022
2035
|
result[cast(str, rec["basename"])] = cast(str, rec["location"])
|
|
@@ -2043,8 +2056,8 @@ def visit_files(
|
|
|
2043
2056
|
# file under its name
|
|
2044
2057
|
contents.update(child_result)
|
|
2045
2058
|
|
|
2046
|
-
#
|
|
2047
|
-
|
|
2059
|
+
# Visit the directory itself (which will probably adjust its location).
|
|
2060
|
+
directory_func(rec, contents)
|
|
2048
2061
|
|
|
2049
2062
|
# Show those contents as being under our name in our parent.
|
|
2050
2063
|
return {cast(str, rec["basename"]): contents}
|
|
@@ -2118,18 +2131,20 @@ def upload_directory(
|
|
|
2118
2131
|
directory_metadata["location"] = encode_directory(directory_contents)
|
|
2119
2132
|
|
|
2120
2133
|
|
|
2121
|
-
def
|
|
2122
|
-
|
|
2134
|
+
def ensure_file_imported(
|
|
2135
|
+
import_func: Callable[[str], FileID],
|
|
2136
|
+
file_metadata: CWLObjectType,
|
|
2123
2137
|
fileindex: dict[str, str],
|
|
2124
2138
|
existing: dict[str, str],
|
|
2125
|
-
file_metadata: CWLObjectType,
|
|
2126
2139
|
mark_broken: bool = False,
|
|
2127
2140
|
skip_remote: bool = False,
|
|
2128
2141
|
) -> None:
|
|
2129
2142
|
"""
|
|
2130
2143
|
Extract the file URI out of a file object and convert it to a Toil URI.
|
|
2131
2144
|
|
|
2132
|
-
|
|
2145
|
+
Stores the Toil URI in file_metadata.
|
|
2146
|
+
|
|
2147
|
+
Runs import_func to actually import new URIs.
|
|
2133
2148
|
|
|
2134
2149
|
Is used to handle importing files into the jobstore.
|
|
2135
2150
|
|
|
@@ -2139,12 +2154,10 @@ def extract_and_convert_file_to_toil_uri(
|
|
|
2139
2154
|
Unless skip_remote is set, also run on remote files and sets their locations
|
|
2140
2155
|
to toil URIs as well.
|
|
2141
2156
|
"""
|
|
2142
|
-
location = extract_file_uri_once(
|
|
2143
|
-
fileindex, existing, file_metadata, mark_broken, skip_remote
|
|
2144
|
-
)
|
|
2157
|
+
location = extract_file_uri_once(file_metadata, fileindex, mark_broken, skip_remote)
|
|
2145
2158
|
if location is not None:
|
|
2146
|
-
file_metadata["location"] =
|
|
2147
|
-
|
|
2159
|
+
file_metadata["location"] = import_file_through_cache(
|
|
2160
|
+
import_func, fileindex, existing, location
|
|
2148
2161
|
)
|
|
2149
2162
|
|
|
2150
2163
|
logger.debug("Sending file at: %s", file_metadata["location"])
|
|
@@ -2175,15 +2188,15 @@ class CWLNamedJob(Job):
|
|
|
2175
2188
|
|
|
2176
2189
|
def __init__(
|
|
2177
2190
|
self,
|
|
2178
|
-
cores:
|
|
2179
|
-
memory:
|
|
2180
|
-
disk:
|
|
2181
|
-
accelerators:
|
|
2182
|
-
preemptible:
|
|
2183
|
-
tool_id:
|
|
2184
|
-
parent_name:
|
|
2185
|
-
subjob_name:
|
|
2186
|
-
local:
|
|
2191
|
+
cores: float | None = 1,
|
|
2192
|
+
memory: int | str | None = "1GiB",
|
|
2193
|
+
disk: int | str | None = "1MiB",
|
|
2194
|
+
accelerators: list[AcceleratorRequirement] | None = None,
|
|
2195
|
+
preemptible: bool | None = None,
|
|
2196
|
+
tool_id: str | None = None,
|
|
2197
|
+
parent_name: str | None = None,
|
|
2198
|
+
subjob_name: str | None = None,
|
|
2199
|
+
local: bool | None = None,
|
|
2187
2200
|
) -> None:
|
|
2188
2201
|
"""
|
|
2189
2202
|
Make a new job and set up its requirements and naming.
|
|
@@ -2239,9 +2252,7 @@ class ResolveIndirect(CWLNamedJob):
|
|
|
2239
2252
|
of actual values.
|
|
2240
2253
|
"""
|
|
2241
2254
|
|
|
2242
|
-
def __init__(
|
|
2243
|
-
self, cwljob: Promised[CWLObjectType], parent_name: Optional[str] = None
|
|
2244
|
-
):
|
|
2255
|
+
def __init__(self, cwljob: Promised[CWLObjectType], parent_name: str | None = None):
|
|
2245
2256
|
"""Store the dictionary of promises for later resolution."""
|
|
2246
2257
|
super().__init__(parent_name=parent_name, subjob_name="_resolve", local=True)
|
|
2247
2258
|
self.cwljob = cwljob
|
|
@@ -2253,9 +2264,9 @@ class ResolveIndirect(CWLNamedJob):
|
|
|
2253
2264
|
|
|
2254
2265
|
def toilStageFiles(
|
|
2255
2266
|
toil: Toil,
|
|
2256
|
-
cwljob:
|
|
2267
|
+
cwljob: CWLObjectType | list[CWLObjectType],
|
|
2257
2268
|
outdir: str,
|
|
2258
|
-
destBucket:
|
|
2269
|
+
destBucket: str | None = None,
|
|
2259
2270
|
log_level: int = logging.DEBUG,
|
|
2260
2271
|
) -> None:
|
|
2261
2272
|
"""
|
|
@@ -2268,7 +2279,7 @@ def toilStageFiles(
|
|
|
2268
2279
|
"""
|
|
2269
2280
|
|
|
2270
2281
|
def _collectDirEntries(
|
|
2271
|
-
obj:
|
|
2282
|
+
obj: CWLObjectType | list[CWLObjectType],
|
|
2272
2283
|
) -> Iterator[CWLObjectType]:
|
|
2273
2284
|
if isinstance(obj, dict):
|
|
2274
2285
|
if obj.get("class") in ("File", "Directory"):
|
|
@@ -2450,8 +2461,8 @@ class CWLJobWrapper(CWLNamedJob):
|
|
|
2450
2461
|
tool: Process,
|
|
2451
2462
|
cwljob: CWLObjectType,
|
|
2452
2463
|
runtime_context: cwltool.context.RuntimeContext,
|
|
2453
|
-
parent_name:
|
|
2454
|
-
conditional:
|
|
2464
|
+
parent_name: str | None,
|
|
2465
|
+
conditional: Conditional | None = None,
|
|
2455
2466
|
):
|
|
2456
2467
|
"""Store our context for later evaluation."""
|
|
2457
2468
|
super().__init__(
|
|
@@ -2498,8 +2509,8 @@ class CWLJob(CWLNamedJob):
|
|
|
2498
2509
|
tool: Process,
|
|
2499
2510
|
cwljob: CWLObjectType,
|
|
2500
2511
|
runtime_context: cwltool.context.RuntimeContext,
|
|
2501
|
-
parent_name:
|
|
2502
|
-
conditional:
|
|
2512
|
+
parent_name: str | None = None,
|
|
2513
|
+
conditional: Conditional | None = None,
|
|
2503
2514
|
):
|
|
2504
2515
|
"""Store the context for later execution."""
|
|
2505
2516
|
self.cwltool = tool
|
|
@@ -2549,14 +2560,14 @@ class CWLJob(CWLNamedJob):
|
|
|
2549
2560
|
else:
|
|
2550
2561
|
# We use a None requirement and the Toil default applies.
|
|
2551
2562
|
memory = None
|
|
2552
|
-
|
|
2563
|
+
|
|
2553
2564
|
# Imposing a minimum memory limit
|
|
2554
2565
|
min_ram = getattr(runtime_context, "cwl_min_ram")
|
|
2555
2566
|
if min_ram is not None and memory is not None:
|
|
2556
2567
|
# Note: if the job is using the toil default memory, it won't be increased
|
|
2557
2568
|
memory = max(memory, min_ram)
|
|
2558
2569
|
|
|
2559
|
-
accelerators:
|
|
2570
|
+
accelerators: list[AcceleratorRequirement] | None = None
|
|
2560
2571
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
2561
2572
|
# There's a CUDARequirement, which cwltool processed for us
|
|
2562
2573
|
# TODO: How is cwltool deciding what value to use between min and max?
|
|
@@ -2579,7 +2590,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2579
2590
|
# https://github.com/common-workflow-language/cwltool/blob/1573509eea2faa3cd1dc959224e52ff1d796d3eb/cwltool/extensions.yml#L221
|
|
2580
2591
|
#
|
|
2581
2592
|
# By default we have default preemptibility.
|
|
2582
|
-
preemptible:
|
|
2593
|
+
preemptible: bool | None = None
|
|
2583
2594
|
preemptible_req, _ = tool.get_requirement(
|
|
2584
2595
|
"http://arvados.org/cwl#UsePreemptible"
|
|
2585
2596
|
)
|
|
@@ -2858,17 +2869,19 @@ class CWLJob(CWLNamedJob):
|
|
|
2858
2869
|
logger.log(log_level, "Loading %s...", url)
|
|
2859
2870
|
return writeGlobalFileWrapper(file_store, url)
|
|
2860
2871
|
|
|
2861
|
-
|
|
2862
|
-
|
|
2872
|
+
file_visitor = functools.partial(
|
|
2873
|
+
ensure_file_imported,
|
|
2874
|
+
file_import_function,
|
|
2875
|
+
fileindex=index,
|
|
2876
|
+
existing=existing,
|
|
2863
2877
|
)
|
|
2864
2878
|
|
|
2865
2879
|
# Upload all the Files and set their and the Directories' locations, if
|
|
2866
2880
|
# needed.
|
|
2867
2881
|
visit_files(
|
|
2868
|
-
|
|
2882
|
+
file_visitor,
|
|
2883
|
+
upload_directory,
|
|
2869
2884
|
fs_access,
|
|
2870
|
-
index,
|
|
2871
|
-
existing,
|
|
2872
2885
|
output,
|
|
2873
2886
|
bypass_file_store=getattr(runtime_context, "bypass_file_store", False),
|
|
2874
2887
|
)
|
|
@@ -2912,19 +2925,51 @@ def makeRootJob(
|
|
|
2912
2925
|
:return:
|
|
2913
2926
|
"""
|
|
2914
2927
|
if options.run_imports_on_workers:
|
|
2915
|
-
|
|
2916
|
-
|
|
2917
|
-
|
|
2928
|
+
input_filenames, tool_filenames = extract_workflow_inputs(
|
|
2929
|
+
options, initialized_job_order, tool
|
|
2930
|
+
)
|
|
2931
|
+
|
|
2932
|
+
# We need to import the tool files on the leader without symlinking,
|
|
2933
|
+
# because they might not be available on shared storage.
|
|
2934
|
+
|
|
2935
|
+
# We need to make sure that if a workflow input and a tool input
|
|
2936
|
+
# resolve to the same real file, it only gets imported once, without
|
|
2937
|
+
# symlinking.
|
|
2938
|
+
|
|
2939
|
+
# Get metadata for non-tool input files
|
|
2940
|
+
input_metadata = get_file_sizes(
|
|
2941
|
+
input_filenames,
|
|
2942
|
+
toil._jobStore,
|
|
2943
|
+
include_remote_files=options.reference_inputs,
|
|
2944
|
+
)
|
|
2945
|
+
|
|
2946
|
+
# Also get metadata for tool input files, so we can resilve them to candidate URIs
|
|
2947
|
+
tool_metadata = get_file_sizes(
|
|
2948
|
+
input_filenames,
|
|
2949
|
+
toil._jobStore,
|
|
2950
|
+
include_remote_files=options.reference_inputs,
|
|
2951
|
+
)
|
|
2952
|
+
|
|
2953
|
+
# Import all the tool files right away, because a file that's both a
|
|
2954
|
+
# tool file and an input needs to be imported without symlinking (since
|
|
2955
|
+
# they might not be accessible from workers), and this builds the dict
|
|
2956
|
+
# we can use to see if a resolved URI was a tool file.
|
|
2957
|
+
logger.info("Importing tool-associated files...")
|
|
2958
|
+
tool_path_to_fileid = WorkerImportJob.import_files(
|
|
2959
|
+
tool_filenames, toil._jobStore, symlink=False
|
|
2918
2960
|
)
|
|
2919
2961
|
|
|
2920
2962
|
# Mapping of files to metadata for files that will be imported on the worker
|
|
2921
|
-
# This will consist of files that we were able to get a file size for
|
|
2963
|
+
# This will consist of input files that we were able to get a file size for
|
|
2922
2964
|
worker_metadata: dict[str, FileMetadata] = dict()
|
|
2923
|
-
# Mapping of files to metadata for files that will be imported on the leader
|
|
2924
|
-
# This will consist of files that we were not able to get a file size for
|
|
2925
|
-
leader_metadata = dict()
|
|
2926
|
-
for filename, file_data in
|
|
2927
|
-
if file_data
|
|
2965
|
+
# Mapping of files to metadata for input files that will be imported on the leader
|
|
2966
|
+
# This will consist of input files that we were not able to get a file size for
|
|
2967
|
+
leader_metadata: dict[str, FileMetadata] = dict()
|
|
2968
|
+
for filename, file_data in input_metadata.items():
|
|
2969
|
+
if file_data.source in tool_path_to_fileid:
|
|
2970
|
+
# This input is also a tool file and is already imported.
|
|
2971
|
+
continue
|
|
2972
|
+
if file_data.size is None:
|
|
2928
2973
|
leader_metadata[filename] = file_data
|
|
2929
2974
|
else:
|
|
2930
2975
|
worker_metadata[filename] = file_data
|
|
@@ -2935,20 +2980,32 @@ def makeRootJob(
|
|
|
2935
2980
|
len(worker_metadata),
|
|
2936
2981
|
)
|
|
2937
2982
|
|
|
2938
|
-
#
|
|
2983
|
+
# Import other leader files (those without size info) with symlink=True
|
|
2984
|
+
logger.info("Importing unknown-size files...")
|
|
2939
2985
|
path_to_fileid = WorkerImportJob.import_files(
|
|
2940
2986
|
list(leader_metadata.keys()), toil._jobStore
|
|
2941
2987
|
)
|
|
2942
2988
|
|
|
2989
|
+
# Combine leader imports
|
|
2990
|
+
path_to_fileid.update(tool_path_to_fileid)
|
|
2991
|
+
|
|
2943
2992
|
# Because installing the imported files expects all files to have been
|
|
2944
2993
|
# imported, we don't do that here; we combine the leader imports and
|
|
2945
2994
|
# the worker imports and install them all at once.
|
|
2946
2995
|
|
|
2947
2996
|
import_job = CWLImportWrapper(
|
|
2948
|
-
initialized_job_order,
|
|
2997
|
+
initialized_job_order,
|
|
2998
|
+
tool,
|
|
2999
|
+
runtime_context,
|
|
3000
|
+
worker_metadata,
|
|
3001
|
+
path_to_fileid,
|
|
3002
|
+
options,
|
|
2949
3003
|
)
|
|
2950
3004
|
return import_job
|
|
2951
3005
|
else:
|
|
3006
|
+
# Use a separate codepath to doa ll the imports on the leader.
|
|
3007
|
+
# TODO: Can we combine the two codepaths and just do 0 worker imports
|
|
3008
|
+
# in all-leader mode?
|
|
2952
3009
|
import_workflow_inputs(
|
|
2953
3010
|
toil._jobStore,
|
|
2954
3011
|
options,
|
|
@@ -2966,13 +3023,13 @@ def makeJob(
|
|
|
2966
3023
|
tool: Process,
|
|
2967
3024
|
jobobj: CWLObjectType,
|
|
2968
3025
|
runtime_context: cwltool.context.RuntimeContext,
|
|
2969
|
-
parent_name:
|
|
2970
|
-
conditional:
|
|
2971
|
-
) ->
|
|
2972
|
-
tuple["CWLWorkflow", ResolveIndirect]
|
|
2973
|
-
tuple[CWLJob, CWLJob]
|
|
2974
|
-
tuple[CWLJobWrapper, CWLJobWrapper]
|
|
2975
|
-
|
|
3026
|
+
parent_name: str | None,
|
|
3027
|
+
conditional: Conditional | None,
|
|
3028
|
+
) -> (
|
|
3029
|
+
tuple["CWLWorkflow", ResolveIndirect]
|
|
3030
|
+
| tuple[CWLJob, CWLJob]
|
|
3031
|
+
| tuple[CWLJobWrapper, CWLJobWrapper]
|
|
3032
|
+
):
|
|
2976
3033
|
"""
|
|
2977
3034
|
Create the correct Toil Job object for the CWL tool.
|
|
2978
3035
|
|
|
@@ -3044,8 +3101,8 @@ class CWLScatter(Job):
|
|
|
3044
3101
|
step: cwltool.workflow.WorkflowStep,
|
|
3045
3102
|
cwljob: CWLObjectType,
|
|
3046
3103
|
runtime_context: cwltool.context.RuntimeContext,
|
|
3047
|
-
parent_name:
|
|
3048
|
-
conditional:
|
|
3104
|
+
parent_name: str | None,
|
|
3105
|
+
conditional: Conditional | None,
|
|
3049
3106
|
):
|
|
3050
3107
|
"""Store our context for later execution."""
|
|
3051
3108
|
super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
|
|
@@ -3205,7 +3262,7 @@ class CWLGather(Job):
|
|
|
3205
3262
|
def __init__(
|
|
3206
3263
|
self,
|
|
3207
3264
|
step: cwltool.workflow.WorkflowStep,
|
|
3208
|
-
outputs: Promised[
|
|
3265
|
+
outputs: Promised[CWLObjectType | list[CWLObjectType]],
|
|
3209
3266
|
):
|
|
3210
3267
|
"""Collect our context for later gathering."""
|
|
3211
3268
|
super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
|
|
@@ -3214,8 +3271,8 @@ class CWLGather(Job):
|
|
|
3214
3271
|
|
|
3215
3272
|
@staticmethod
|
|
3216
3273
|
def extract(
|
|
3217
|
-
obj:
|
|
3218
|
-
) ->
|
|
3274
|
+
obj: CWLObjectType | list[CWLObjectType], k: str
|
|
3275
|
+
) -> CWLOutputType | list[CWLObjectType]:
|
|
3219
3276
|
"""
|
|
3220
3277
|
Extract the given key from the obj.
|
|
3221
3278
|
|
|
@@ -3235,14 +3292,14 @@ class CWLGather(Job):
|
|
|
3235
3292
|
"""Gather all the outputs of the scatter."""
|
|
3236
3293
|
outobj = {}
|
|
3237
3294
|
|
|
3238
|
-
def sn(n:
|
|
3295
|
+
def sn(n: Mapping[str, Any] | str) -> str:
|
|
3239
3296
|
if isinstance(n, Mapping):
|
|
3240
3297
|
return shortname(n["id"])
|
|
3241
3298
|
if isinstance(n, str):
|
|
3242
3299
|
return shortname(n)
|
|
3243
3300
|
|
|
3244
3301
|
# TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
|
|
3245
|
-
outputs:
|
|
3302
|
+
outputs: CWLObjectType | list[CWLObjectType] = cast(
|
|
3246
3303
|
Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
|
|
3247
3304
|
)
|
|
3248
3305
|
for k in [sn(i) for i in self.step.tool["out"]]:
|
|
@@ -3311,8 +3368,8 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3311
3368
|
cwlwf: cwltool.workflow.Workflow,
|
|
3312
3369
|
cwljob: CWLObjectType,
|
|
3313
3370
|
runtime_context: cwltool.context.RuntimeContext,
|
|
3314
|
-
parent_name:
|
|
3315
|
-
conditional:
|
|
3371
|
+
parent_name: str | None = None,
|
|
3372
|
+
conditional: Conditional | None = None,
|
|
3316
3373
|
):
|
|
3317
3374
|
"""Gather our context for later execution."""
|
|
3318
3375
|
super().__init__(
|
|
@@ -3325,7 +3382,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3325
3382
|
|
|
3326
3383
|
def run(
|
|
3327
3384
|
self, file_store: AbstractFileStore
|
|
3328
|
-
) ->
|
|
3385
|
+
) -> UnresolvedDict | dict[str, SkipNull]:
|
|
3329
3386
|
"""
|
|
3330
3387
|
Convert a CWL Workflow graph into a Toil job graph.
|
|
3331
3388
|
|
|
@@ -3376,7 +3433,7 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3376
3433
|
if stepinputs_fufilled:
|
|
3377
3434
|
logger.debug("Ready to make job for workflow step %s", step_id)
|
|
3378
3435
|
jobobj: dict[
|
|
3379
|
-
str,
|
|
3436
|
+
str, ResolveSource | DefaultWithSource | StepValueFrom
|
|
3380
3437
|
] = {}
|
|
3381
3438
|
|
|
3382
3439
|
for inp in step.tool["inputs"]:
|
|
@@ -3415,18 +3472,18 @@ class CWLWorkflow(CWLNamedJob):
|
|
|
3415
3472
|
)
|
|
3416
3473
|
|
|
3417
3474
|
if "scatter" in step.tool:
|
|
3418
|
-
wfjob:
|
|
3419
|
-
CWLScatter
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
|
|
3423
|
-
|
|
3424
|
-
|
|
3425
|
-
|
|
3475
|
+
wfjob: CWLScatter | CWLWorkflow | CWLJob | CWLJobWrapper = (
|
|
3476
|
+
CWLScatter(
|
|
3477
|
+
step,
|
|
3478
|
+
UnresolvedDict(jobobj),
|
|
3479
|
+
self.runtime_context,
|
|
3480
|
+
parent_name=parent_name,
|
|
3481
|
+
conditional=conditional,
|
|
3482
|
+
)
|
|
3426
3483
|
)
|
|
3427
|
-
followOn:
|
|
3428
|
-
CWLGather
|
|
3429
|
-
|
|
3484
|
+
followOn: (
|
|
3485
|
+
CWLGather | ResolveIndirect | CWLJob | CWLJobWrapper
|
|
3486
|
+
) = CWLGather(step, wfjob.rv())
|
|
3430
3487
|
wfjob.addFollowOn(followOn)
|
|
3431
3488
|
logger.debug(
|
|
3432
3489
|
"Is scatter with job %s and follow-on %s",
|
|
@@ -3517,7 +3574,10 @@ class CWLInstallImportsJob(Job):
|
|
|
3517
3574
|
basedir: str,
|
|
3518
3575
|
skip_remote: bool,
|
|
3519
3576
|
bypass_file_store: bool,
|
|
3520
|
-
|
|
3577
|
+
leader_imports: dict[str, FileID],
|
|
3578
|
+
worker_imports: None | (
|
|
3579
|
+
Promised[tuple[dict[str, FileID], dict[str, FileMetadata]]]
|
|
3580
|
+
) = None,
|
|
3521
3581
|
**kwargs: Any,
|
|
3522
3582
|
) -> None:
|
|
3523
3583
|
"""
|
|
@@ -3526,7 +3586,9 @@ class CWLInstallImportsJob(Job):
|
|
|
3526
3586
|
|
|
3527
3587
|
This class is only used when runImportsOnWorkers is enabled.
|
|
3528
3588
|
|
|
3529
|
-
:param
|
|
3589
|
+
:param leader_imports: Direct mapping from file URI to FileID for files imported on the leader.
|
|
3590
|
+
:param worker_imports: Promise of (candidate_uri->FileID, filename->FileMetadata) tuple from worker imports.
|
|
3591
|
+
These two dicts must be used together for lookups.
|
|
3530
3592
|
"""
|
|
3531
3593
|
super().__init__(local=True, **kwargs)
|
|
3532
3594
|
self.initialized_job_order = initialized_job_order
|
|
@@ -3534,7 +3596,8 @@ class CWLInstallImportsJob(Job):
|
|
|
3534
3596
|
self.basedir = basedir
|
|
3535
3597
|
self.skip_remote = skip_remote
|
|
3536
3598
|
self.bypass_file_store = bypass_file_store
|
|
3537
|
-
self.
|
|
3599
|
+
self.leader_imports = leader_imports
|
|
3600
|
+
self.worker_imports = worker_imports
|
|
3538
3601
|
|
|
3539
3602
|
# TODO: Since we only call this from the class itself now it doesn't really
|
|
3540
3603
|
# need to be static anymore.
|
|
@@ -3542,52 +3605,74 @@ class CWLInstallImportsJob(Job):
|
|
|
3542
3605
|
def fill_in_files(
|
|
3543
3606
|
initialized_job_order: CWLObjectType,
|
|
3544
3607
|
tool: Process,
|
|
3545
|
-
|
|
3608
|
+
leader_imports: dict[str, FileID],
|
|
3609
|
+
worker_candidate_to_fileid: dict[str, FileID] | None,
|
|
3610
|
+
file_to_metadata: dict[str, FileMetadata] | None,
|
|
3546
3611
|
basedir: str,
|
|
3547
3612
|
skip_remote: bool,
|
|
3548
3613
|
bypass_file_store: bool,
|
|
3549
3614
|
) -> tuple[Process, CWLObjectType]:
|
|
3550
3615
|
"""
|
|
3551
|
-
Given
|
|
3616
|
+
Given mappings of filenames to Toil file IDs, replace the filename with
|
|
3617
|
+
the file IDs throughout the CWL object.
|
|
3618
|
+
|
|
3619
|
+
:param leader_imports: Direct mapping from file URI to FileID for files
|
|
3620
|
+
imported on the leader.
|
|
3621
|
+
:param worker_candidate_to_fileid: Mapping from normalized candidate
|
|
3622
|
+
URI to FileID for worker imports.
|
|
3623
|
+
:param file_to_metadata: Mapping from original filename to FileMetadata (which contains
|
|
3624
|
+
the normalized candidate URI in .source). Must be provided
|
|
3625
|
+
together with worker_candidate_to_fileid.
|
|
3552
3626
|
"""
|
|
3553
3627
|
|
|
3554
3628
|
def fill_in_file(filename: str) -> FileID:
|
|
3555
3629
|
"""
|
|
3556
3630
|
Return the file name's associated Toil file ID
|
|
3557
3631
|
"""
|
|
3558
|
-
|
|
3559
|
-
|
|
3560
|
-
|
|
3561
|
-
|
|
3562
|
-
|
|
3563
|
-
|
|
3564
|
-
|
|
3565
|
-
|
|
3566
|
-
|
|
3567
|
-
|
|
3568
|
-
|
|
3632
|
+
# Try worker imports first
|
|
3633
|
+
if (
|
|
3634
|
+
worker_candidate_to_fileid is not None
|
|
3635
|
+
and file_to_metadata is not None
|
|
3636
|
+
and filename in file_to_metadata
|
|
3637
|
+
):
|
|
3638
|
+
# Get the full candidate URI we used for this file
|
|
3639
|
+
candidate_uri = file_to_metadata[filename].source
|
|
3640
|
+
# Get the FIleID we got from that URI
|
|
3641
|
+
return worker_candidate_to_fileid[candidate_uri]
|
|
3642
|
+
|
|
3643
|
+
# Fall back to direct lookup in leader imports
|
|
3644
|
+
if filename in leader_imports:
|
|
3645
|
+
return leader_imports[filename]
|
|
3646
|
+
|
|
3647
|
+
# If it wasn't imported on a worker or on the leader, it is missing.
|
|
3648
|
+
raise RuntimeError(f'File at "{filename}" was never imported.')
|
|
3649
|
+
|
|
3569
3650
|
fileindex: dict[str, str] = {}
|
|
3570
3651
|
existing: dict[str, str] = {}
|
|
3652
|
+
file_visitor = functools.partial(
|
|
3653
|
+
ensure_file_imported,
|
|
3654
|
+
fill_in_file,
|
|
3655
|
+
fileindex=fileindex,
|
|
3656
|
+
existing=existing,
|
|
3657
|
+
mark_broken=True,
|
|
3658
|
+
skip_remote=skip_remote,
|
|
3659
|
+
)
|
|
3660
|
+
directory_visitor = functools.partial(upload_directory, mark_broken=True)
|
|
3661
|
+
fs_access = ToilFsAccess(basedir)
|
|
3571
3662
|
visit_files(
|
|
3572
|
-
|
|
3663
|
+
file_visitor,
|
|
3664
|
+
directory_visitor,
|
|
3573
3665
|
fs_access,
|
|
3574
|
-
fileindex,
|
|
3575
|
-
existing,
|
|
3576
3666
|
initialized_job_order,
|
|
3577
|
-
mark_broken=True,
|
|
3578
|
-
skip_remote=skip_remote,
|
|
3579
3667
|
bypass_file_store=bypass_file_store,
|
|
3580
3668
|
)
|
|
3581
3669
|
visitSteps(
|
|
3582
3670
|
tool,
|
|
3583
3671
|
functools.partial(
|
|
3584
3672
|
visit_files,
|
|
3585
|
-
|
|
3673
|
+
file_visitor,
|
|
3674
|
+
directory_visitor,
|
|
3586
3675
|
fs_access,
|
|
3587
|
-
fileindex,
|
|
3588
|
-
existing,
|
|
3589
|
-
mark_broken=True,
|
|
3590
|
-
skip_remote=skip_remote,
|
|
3591
3676
|
bypass_file_store=bypass_file_store,
|
|
3592
3677
|
),
|
|
3593
3678
|
)
|
|
@@ -3602,27 +3687,28 @@ class CWLInstallImportsJob(Job):
|
|
|
3602
3687
|
rm_unprocessed_secondary_files(param_value)
|
|
3603
3688
|
return tool, initialized_job_order
|
|
3604
3689
|
|
|
3605
|
-
def run(self, file_store: AbstractFileStore) ->
|
|
3690
|
+
def run(self, file_store: AbstractFileStore) -> tuple[Process, CWLObjectType]:
|
|
3606
3691
|
"""
|
|
3607
3692
|
Convert the filenames in the workflow inputs into the URIs
|
|
3608
3693
|
:return: Promise of transformed workflow inputs. A tuple of the job order and process
|
|
3609
3694
|
"""
|
|
3610
3695
|
|
|
3611
|
-
# Merge all the input dicts down to one to check.
|
|
3612
|
-
candidate_to_fileid: dict[str, FileID] = {
|
|
3613
|
-
k: v for mapping in unwrap(
|
|
3614
|
-
self.import_data
|
|
3615
|
-
) for k, v in unwrap(mapping).items()
|
|
3616
|
-
}
|
|
3617
|
-
|
|
3618
3696
|
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3619
3697
|
tool = unwrap(self.tool)
|
|
3620
3698
|
|
|
3699
|
+
# Unpack worker imports if present
|
|
3700
|
+
worker_candidate_to_fileid: dict[str, FileID] | None = None
|
|
3701
|
+
file_to_metadata: dict[str, FileMetadata] | None = None
|
|
3702
|
+
if self.worker_imports is not None:
|
|
3703
|
+
worker_candidate_to_fileid, file_to_metadata = unwrap(self.worker_imports)
|
|
3704
|
+
|
|
3621
3705
|
# Install the imported files in the tool and job order
|
|
3622
3706
|
return self.fill_in_files(
|
|
3623
3707
|
initialized_job_order,
|
|
3624
3708
|
tool,
|
|
3625
|
-
|
|
3709
|
+
self.leader_imports,
|
|
3710
|
+
worker_candidate_to_fileid,
|
|
3711
|
+
file_to_metadata,
|
|
3626
3712
|
self.basedir,
|
|
3627
3713
|
self.skip_remote,
|
|
3628
3714
|
self.bypass_file_store,
|
|
@@ -3677,7 +3763,8 @@ class CWLImportWrapper(CWLNamedJob):
|
|
|
3677
3763
|
basedir=self.options.basedir,
|
|
3678
3764
|
skip_remote=self.options.reference_inputs,
|
|
3679
3765
|
bypass_file_store=self.options.bypass_file_store,
|
|
3680
|
-
|
|
3766
|
+
leader_imports=self.imported_files,
|
|
3767
|
+
worker_imports=imports_job.rv(),
|
|
3681
3768
|
)
|
|
3682
3769
|
self.addChild(install_imports_job)
|
|
3683
3770
|
imports_job.addFollowOn(install_imports_job)
|
|
@@ -3727,28 +3814,40 @@ class CWLStartJob(CWLNamedJob):
|
|
|
3727
3814
|
|
|
3728
3815
|
def extract_workflow_inputs(
|
|
3729
3816
|
options: Namespace, initialized_job_order: CWLObjectType, tool: Process
|
|
3730
|
-
) -> list[str]:
|
|
3817
|
+
) -> tuple[list[str], list[str]]:
|
|
3731
3818
|
"""
|
|
3732
|
-
Collect all the workflow input files to import later.
|
|
3819
|
+
Collect all the workflow input files and tool-associated files to import later.
|
|
3820
|
+
|
|
3821
|
+
Tool-associated files need to be imported without symlinks since they might be
|
|
3822
|
+
coming from storage not accessible to all nodes.
|
|
3823
|
+
|
|
3733
3824
|
:param options: namespace
|
|
3734
3825
|
:param initialized_job_order: cwl object
|
|
3735
3826
|
:param tool: tool object
|
|
3736
|
-
:return:
|
|
3827
|
+
:return: tuple of (input_files, tool_files)
|
|
3737
3828
|
"""
|
|
3738
3829
|
fileindex: dict[str, str] = {}
|
|
3739
3830
|
existing: dict[str, str] = {}
|
|
3740
3831
|
|
|
3832
|
+
# TODO: These visit passes do normalization, and when we install the
|
|
3833
|
+
# imports we'll do the normakization again. We should refactor to just do
|
|
3834
|
+
# the normalization once!
|
|
3835
|
+
|
|
3741
3836
|
# Extract out all the input files' filenames
|
|
3742
3837
|
logger.info("Collecting input files...")
|
|
3743
3838
|
fs_access = ToilFsAccess(options.basedir)
|
|
3744
|
-
|
|
3839
|
+
file_visitor = functools.partial(
|
|
3745
3840
|
extract_file_uri_once,
|
|
3746
|
-
|
|
3747
|
-
fileindex,
|
|
3748
|
-
existing,
|
|
3749
|
-
initialized_job_order,
|
|
3841
|
+
fileindex=fileindex,
|
|
3750
3842
|
mark_broken=True,
|
|
3751
3843
|
skip_remote=options.reference_inputs,
|
|
3844
|
+
)
|
|
3845
|
+
directory_visitor = functools.partial(upload_directory, mark_broken=True)
|
|
3846
|
+
input_filenames = visit_files(
|
|
3847
|
+
file_visitor,
|
|
3848
|
+
directory_visitor,
|
|
3849
|
+
fs_access,
|
|
3850
|
+
initialized_job_order,
|
|
3752
3851
|
bypass_file_store=options.bypass_file_store,
|
|
3753
3852
|
)
|
|
3754
3853
|
# Extract filenames of all the files associated with tools (binaries, etc.).
|
|
@@ -3757,17 +3856,16 @@ def extract_workflow_inputs(
|
|
|
3757
3856
|
tool,
|
|
3758
3857
|
functools.partial(
|
|
3759
3858
|
visit_files,
|
|
3760
|
-
|
|
3859
|
+
file_visitor,
|
|
3860
|
+
directory_visitor,
|
|
3761
3861
|
fs_access,
|
|
3762
|
-
fileindex,
|
|
3763
|
-
existing,
|
|
3764
|
-
mark_broken=True,
|
|
3765
|
-
skip_remote=options.reference_inputs,
|
|
3766
3862
|
bypass_file_store=options.bypass_file_store,
|
|
3767
3863
|
),
|
|
3768
3864
|
)
|
|
3769
|
-
|
|
3770
|
-
|
|
3865
|
+
return (
|
|
3866
|
+
[file for file in input_filenames if file is not None],
|
|
3867
|
+
[file for file in tool_filenames if file is not None],
|
|
3868
|
+
)
|
|
3771
3869
|
|
|
3772
3870
|
|
|
3773
3871
|
def import_workflow_inputs(
|
|
@@ -3788,6 +3886,11 @@ def import_workflow_inputs(
|
|
|
3788
3886
|
:param log_level: log level
|
|
3789
3887
|
:return:
|
|
3790
3888
|
"""
|
|
3889
|
+
|
|
3890
|
+
# Work out how to access files
|
|
3891
|
+
fs_access = ToilFsAccess(options.basedir)
|
|
3892
|
+
|
|
3893
|
+
# Create a cache for importing files
|
|
3791
3894
|
fileindex: dict[str, str] = {}
|
|
3792
3895
|
existing: dict[str, str] = {}
|
|
3793
3896
|
|
|
@@ -3797,61 +3900,70 @@ def import_workflow_inputs(
|
|
|
3797
3900
|
logger.log(log_level, "Loading %s...", url)
|
|
3798
3901
|
return jobstore.import_file(url, symlink=True)
|
|
3799
3902
|
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
|
|
3806
|
-
|
|
3807
|
-
visit_files(
|
|
3808
|
-
import_function,
|
|
3809
|
-
fs_access,
|
|
3810
|
-
fileindex,
|
|
3811
|
-
existing,
|
|
3812
|
-
initialized_job_order,
|
|
3903
|
+
# Make a visiting function for importing workflow input files, which may
|
|
3904
|
+
# allow symlinking
|
|
3905
|
+
file_visitor = functools.partial(
|
|
3906
|
+
ensure_file_imported,
|
|
3907
|
+
file_import_function,
|
|
3908
|
+
fileindex=fileindex,
|
|
3909
|
+
existing=existing,
|
|
3813
3910
|
mark_broken=True,
|
|
3814
3911
|
skip_remote=options.reference_inputs,
|
|
3815
|
-
bypass_file_store=options.bypass_file_store,
|
|
3816
3912
|
)
|
|
3913
|
+
# And a function for packign up directories of imported files.
|
|
3914
|
+
directory_visitor = functools.partial(upload_directory, mark_broken=True)
|
|
3817
3915
|
|
|
3818
3916
|
# Make another function for importing tool files. This one doesn't allow
|
|
3819
3917
|
# symlinking, since the tools might be coming from storage not accessible
|
|
3820
3918
|
# to all nodes.
|
|
3821
|
-
|
|
3822
|
-
|
|
3919
|
+
tool_file_visitor = functools.partial(
|
|
3920
|
+
ensure_file_imported,
|
|
3823
3921
|
cast(
|
|
3824
3922
|
Callable[[str], FileID],
|
|
3825
3923
|
functools.partial(jobstore.import_file, symlink=False),
|
|
3826
3924
|
),
|
|
3925
|
+
fileindex=fileindex,
|
|
3926
|
+
existing=existing,
|
|
3927
|
+
mark_broken=True,
|
|
3928
|
+
skip_remote=options.reference_inputs,
|
|
3827
3929
|
)
|
|
3828
3930
|
|
|
3829
|
-
# Import all the files associated with tools (binaries, etc.)
|
|
3830
|
-
#
|
|
3831
|
-
#
|
|
3931
|
+
# Import all the files associated with tools (binaries, etc.) FIRST, so
|
|
3932
|
+
# that they can be imported without symlinking even if they are also
|
|
3933
|
+
# workflow inputs.
|
|
3832
3934
|
logger.info("Importing tool-associated files...")
|
|
3833
3935
|
visitSteps(
|
|
3834
3936
|
tool,
|
|
3835
3937
|
functools.partial(
|
|
3836
3938
|
visit_files,
|
|
3837
|
-
|
|
3939
|
+
tool_file_visitor,
|
|
3940
|
+
directory_visitor,
|
|
3838
3941
|
fs_access,
|
|
3839
|
-
fileindex,
|
|
3840
|
-
existing,
|
|
3841
|
-
mark_broken=True,
|
|
3842
|
-
skip_remote=options.reference_inputs,
|
|
3843
3942
|
bypass_file_store=options.bypass_file_store,
|
|
3844
3943
|
),
|
|
3845
3944
|
)
|
|
3846
3945
|
|
|
3847
|
-
#
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
|
|
3946
|
+
# Not sure why you would have an optional secondary file here, but
|
|
3947
|
+
# the spec probably needs us to support them.
|
|
3948
|
+
visitSteps(tool, rm_unprocessed_secondary_files)
|
|
3949
|
+
|
|
3950
|
+
# Import all the input files, some of which may be missing optional
|
|
3951
|
+
# files.
|
|
3952
|
+
logger.info("Importing input files...")
|
|
3953
|
+
visit_files(
|
|
3954
|
+
file_visitor,
|
|
3955
|
+
directory_visitor,
|
|
3956
|
+
fs_access,
|
|
3957
|
+
initialized_job_order,
|
|
3958
|
+
bypass_file_store=options.bypass_file_store,
|
|
3959
|
+
)
|
|
3960
|
+
|
|
3961
|
+
# We always expect to have processed all files that exist.
|
|
3962
|
+
# Drop any files that aren't either imported (for when we use
|
|
3963
|
+
# the file store) or available on disk (for when we don't).
|
|
3964
|
+
# This will properly make them cause an error later if they
|
|
3965
|
+
# were required.
|
|
3966
|
+
rm_unprocessed_secondary_files(initialized_job_order)
|
|
3855
3967
|
|
|
3856
3968
|
|
|
3857
3969
|
T = TypeVar("T")
|
|
@@ -3859,7 +3971,7 @@ T = TypeVar("T")
|
|
|
3859
3971
|
|
|
3860
3972
|
def visitSteps(
|
|
3861
3973
|
cmdline_tool: Process,
|
|
3862
|
-
op: Callable[[CommentedMap], list[T]],
|
|
3974
|
+
op: Callable[[CommentedMap], list[T] | None],
|
|
3863
3975
|
) -> list[T]:
|
|
3864
3976
|
"""
|
|
3865
3977
|
Iterate over a CWL Process object, running the op on each tool description
|
|
@@ -3867,10 +3979,10 @@ def visitSteps(
|
|
|
3867
3979
|
"""
|
|
3868
3980
|
if isinstance(cmdline_tool, cwltool.workflow.Workflow):
|
|
3869
3981
|
# For workflows we need to dispatch on steps
|
|
3870
|
-
ret = []
|
|
3982
|
+
ret: list[T] = []
|
|
3871
3983
|
for step in cmdline_tool.steps:
|
|
3872
3984
|
# Handle the step's tool
|
|
3873
|
-
ret.extend(op(step.tool))
|
|
3985
|
+
ret.extend(op(step.tool) or [])
|
|
3874
3986
|
# Recures on the embedded tool; maybe it's a workflow.
|
|
3875
3987
|
recurse_ret = visitSteps(step.embedded_tool, op)
|
|
3876
3988
|
ret.extend(recurse_ret)
|
|
@@ -3878,17 +3990,33 @@ def visitSteps(
|
|
|
3878
3990
|
elif isinstance(cmdline_tool, cwltool.process.Process):
|
|
3879
3991
|
# All CWL Process objects (including CommandLineTool) will have tools
|
|
3880
3992
|
# if they bothered to run the Process __init__.
|
|
3881
|
-
return op(cmdline_tool.tool)
|
|
3993
|
+
return op(cmdline_tool.tool) or []
|
|
3882
3994
|
raise RuntimeError(
|
|
3883
3995
|
f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
|
|
3884
3996
|
)
|
|
3885
3997
|
|
|
3886
3998
|
|
|
3887
3999
|
def rm_unprocessed_secondary_files(job_params: Any) -> None:
|
|
4000
|
+
"""
|
|
4001
|
+
Scan a CWL object or collection and drop missing secondary files.
|
|
4002
|
+
"""
|
|
3888
4003
|
if isinstance(job_params, list):
|
|
3889
4004
|
for j in job_params:
|
|
4005
|
+
# Recurse on list entries
|
|
3890
4006
|
rm_unprocessed_secondary_files(j)
|
|
3891
|
-
if isinstance(job_params, dict)
|
|
4007
|
+
if isinstance(job_params, dict):
|
|
4008
|
+
for v in job_params.values():
|
|
4009
|
+
# Recurse on dict values (maybe a secondary file has its own
|
|
4010
|
+
# secondary files? Is that allowed?)
|
|
4011
|
+
rm_unprocessed_secondary_files(v)
|
|
4012
|
+
|
|
4013
|
+
if (
|
|
4014
|
+
isinstance(job_params, dict)
|
|
4015
|
+
and job_params.get("class", None) in ("File", "Directory")
|
|
4016
|
+
and "secondaryFiles" in job_params
|
|
4017
|
+
):
|
|
4018
|
+
# When we actually find a File or Directory (can directories have
|
|
4019
|
+
# these?) with secondary files, filter them.
|
|
3892
4020
|
job_params["secondaryFiles"] = filtered_secondary_files(job_params)
|
|
3893
4021
|
|
|
3894
4022
|
|
|
@@ -4048,8 +4176,8 @@ class NoAvailableJobStoreException(Exception):
|
|
|
4048
4176
|
|
|
4049
4177
|
|
|
4050
4178
|
def generate_default_job_store(
|
|
4051
|
-
batch_system_name:
|
|
4052
|
-
provisioner_name:
|
|
4179
|
+
batch_system_name: str | None,
|
|
4180
|
+
provisioner_name: str | None,
|
|
4053
4181
|
local_directory: str,
|
|
4054
4182
|
) -> str:
|
|
4055
4183
|
"""
|
|
@@ -4170,7 +4298,7 @@ def get_options(args: list[str]) -> Namespace:
|
|
|
4170
4298
|
return options
|
|
4171
4299
|
|
|
4172
4300
|
|
|
4173
|
-
def main(args:
|
|
4301
|
+
def main(args: list[str] | None = None, stdout: TextIO = sys.stdout) -> int:
|
|
4174
4302
|
"""Run the main loop for toil-cwl-runner."""
|
|
4175
4303
|
# Remove cwltool logger's stream handler so it uses Toil's
|
|
4176
4304
|
cwllogger.removeHandler(defaultStreamHandler)
|
|
@@ -4286,7 +4414,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4286
4414
|
try:
|
|
4287
4415
|
|
|
4288
4416
|
# We might have workflow metadata to pass to Toil
|
|
4289
|
-
workflow_name=None
|
|
4417
|
+
workflow_name = None
|
|
4290
4418
|
trs_spec = None
|
|
4291
4419
|
|
|
4292
4420
|
if not options.restart:
|
|
@@ -4573,7 +4701,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4573
4701
|
InvalidImportExportUrlException,
|
|
4574
4702
|
UnimplementedURLException,
|
|
4575
4703
|
JobTooBigError,
|
|
4576
|
-
FileNotFoundError
|
|
4704
|
+
FileNotFoundError,
|
|
4577
4705
|
) as err:
|
|
4578
4706
|
logging.error(err)
|
|
4579
4707
|
return 1
|
|
@@ -4583,7 +4711,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4583
4711
|
|
|
4584
4712
|
def find_default_container(
|
|
4585
4713
|
args: Namespace, builder: cwltool.builder.Builder
|
|
4586
|
-
) ->
|
|
4714
|
+
) -> str | None:
|
|
4587
4715
|
"""Find the default constructor by consulting a Toil.options object."""
|
|
4588
4716
|
if args.default_container:
|
|
4589
4717
|
return str(args.default_container)
|