toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Implemented support for Common Workflow Language (CWL) for Toil."""
2
+
2
3
  # Copyright (C) 2015 Curoverse, Inc
3
4
  # Copyright (C) 2015-2021 Regents of the University of California
4
5
  # Copyright (C) 2019-2020 Seven Bridges
@@ -33,25 +34,26 @@ import stat
33
34
  import sys
34
35
  import textwrap
35
36
  import uuid
37
+ from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
36
38
  from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
37
39
  from threading import Thread
38
- from typing import (IO,
39
- Any,
40
- Callable,
41
- Dict,
42
- Iterator,
43
- List,
44
- Mapping,
45
- MutableMapping,
46
- MutableSequence,
47
- Optional,
48
- Sequence,
49
- TextIO,
50
- Tuple,
51
- Type,
52
- TypeVar,
53
- Union,
54
- cast)
40
+ from typing import (
41
+ IO,
42
+ Any,
43
+ Callable,
44
+ Iterator,
45
+ Mapping,
46
+ MutableMapping,
47
+ MutableSequence,
48
+ Optional,
49
+ TextIO,
50
+ Tuple,
51
+ TypeVar,
52
+ Union,
53
+ cast,
54
+ Literal,
55
+ Protocol,
56
+ )
55
57
  from urllib.parse import quote, unquote, urlparse, urlsplit
56
58
 
57
59
  import cwl_utils.errors
@@ -65,59 +67,87 @@ import cwltool.load_tool
65
67
  import cwltool.main
66
68
  import cwltool.resolver
67
69
  import schema_salad.ref_resolver
70
+
71
+ # This is also in configargparse but MyPy doesn't know it
72
+ from argparse import RawDescriptionHelpFormatter
68
73
  from configargparse import ArgParser, Namespace
69
74
  from cwltool.loghandler import _logger as cwllogger
70
75
  from cwltool.loghandler import defaultStreamHandler
71
76
  from cwltool.mpi import MpiConfig
72
77
  from cwltool.mutation import MutationManager
73
78
  from cwltool.pathmapper import MapperEnt, PathMapper
74
- from cwltool.process import (Process,
75
- add_sizes,
76
- compute_checksums,
77
- fill_in_defaults,
78
- shortname)
79
+ from cwltool.process import (
80
+ Process,
81
+ add_sizes,
82
+ compute_checksums,
83
+ fill_in_defaults,
84
+ shortname,
85
+ )
79
86
  from cwltool.secrets import SecretStore
80
- from cwltool.software_requirements import (DependenciesConfiguration,
81
- get_container_from_software_requirements)
87
+ from cwltool.singularity import SingularityCommandLineJob
88
+ from cwltool.software_requirements import (
89
+ DependenciesConfiguration,
90
+ get_container_from_software_requirements,
91
+ )
82
92
  from cwltool.stdfsaccess import StdFsAccess, abspath
83
- from cwltool.utils import (CWLObjectType,
84
- CWLOutputType,
85
- DirectoryType,
86
- adjustDirObjs,
87
- aslist,
88
- downloadHttpFile,
89
- get_listing,
90
- normalizeFilesDirs,
91
- visit_class)
93
+ from cwltool.utils import (
94
+ CWLObjectType,
95
+ CWLOutputType,
96
+ DirectoryType,
97
+ adjustDirObjs,
98
+ aslist,
99
+ downloadHttpFile,
100
+ get_listing,
101
+ normalizeFilesDirs,
102
+ visit_class,
103
+ )
92
104
  from ruamel.yaml.comments import CommentedMap, CommentedSeq
93
105
  from schema_salad.avro.schema import Names
94
106
  from schema_salad.exceptions import ValidationException
95
107
  from schema_salad.ref_resolver import file_uri, uri_file_path
96
108
  from schema_salad.sourceline import SourceLine
97
- from typing_extensions import Literal
98
109
 
99
110
  from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
100
111
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
101
- from toil.common import Toil, addOptions
112
+ from toil.common import Config, Toil, addOptions
102
113
  from toil.cwl import check_cwltool_version
114
+ from toil.lib.trs import resolve_workflow
115
+ from toil.lib.misc import call_command
103
116
  from toil.provisioners.clusterScaler import JobTooBigError
104
117
 
105
118
  check_cwltool_version()
106
- from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
107
- CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
108
- download_structure,
109
- get_from_structure,
110
- visit_cwl_class_and_reduce)
119
+ from toil.cwl.utils import (
120
+ CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
121
+ CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
122
+ download_structure,
123
+ get_from_structure,
124
+ visit_cwl_class_and_reduce,
125
+ )
111
126
  from toil.exceptions import FailedJobsException
112
127
  from toil.fileStores import FileID
113
128
  from toil.fileStores.abstractFileStore import AbstractFileStore
114
- from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
115
- from toil.jobStores.abstractJobStore import (AbstractJobStore, NoSuchFileException, LocatorException,
116
- InvalidImportExportUrlException, UnimplementedURLException)
129
+ from toil.job import (
130
+ AcceleratorRequirement,
131
+ Job,
132
+ Promise,
133
+ Promised,
134
+ unwrap,
135
+ ImportsJob,
136
+ get_file_sizes,
137
+ FileMetadata,
138
+ WorkerImportJob,
139
+ )
140
+ from toil.jobStores.abstractJobStore import (
141
+ AbstractJobStore,
142
+ NoSuchFileException,
143
+ InvalidImportExportUrlException,
144
+ LocatorException,
145
+ )
146
+ from toil.lib.exceptions import UnimplementedURLException
117
147
  from toil.jobStores.fileJobStore import FileJobStore
118
148
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
119
149
  from toil.lib.io import mkdtemp
120
- from toil.lib.threading import ExceptionalThread
150
+ from toil.lib.threading import ExceptionalThread, global_mutex
121
151
  from toil.statsAndLogging import DEFAULT_LOGLEVEL
122
152
 
123
153
  logger = logging.getLogger(__name__)
@@ -149,7 +179,7 @@ def cwltoil_was_removed() -> None:
149
179
  # output object to the correct key of the input object.
150
180
 
151
181
 
152
- class UnresolvedDict(Dict[Any, Any]):
182
+ class UnresolvedDict(dict[Any, Any]):
153
183
  """Tag to indicate a dict contains promises that must be resolved."""
154
184
 
155
185
 
@@ -184,7 +214,7 @@ def filter_skip_null(name: str, value: Any) -> Any:
184
214
  return value
185
215
 
186
216
 
187
- def _filter_skip_null(value: Any, err_flag: List[bool]) -> Any:
217
+ def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
188
218
  """
189
219
  Private implementation for recursively filtering out SkipNull objects from 'value'.
190
220
 
@@ -233,18 +263,50 @@ def ensure_no_collisions(
233
263
  seen_names.add(wanted_name)
234
264
 
235
265
 
266
+ def try_prepull(
267
+ cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str
268
+ ) -> None:
269
+ """
270
+ Try to prepull all containers in a CWL workflow with Singularity or Docker.
271
+ This will not prepull the default container specified on the command line.
272
+ :param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well
273
+ :param runtime_context: runtime context of cwltool
274
+ :param batchsystem: type of Toil batchsystem
275
+ :return:
276
+ """
277
+ if runtime_context.singularity:
278
+ if "CWL_SINGULARITY_CACHE" in os.environ:
279
+ logger.info("Prepulling the workflow's containers with Singularity...")
280
+ call_command(
281
+ [
282
+ "cwl-docker-extract",
283
+ "--singularity",
284
+ "--dir",
285
+ os.environ["CWL_SINGULARITY_CACHE"],
286
+ cwl_tool_uri,
287
+ ]
288
+ )
289
+ elif not runtime_context.user_space_docker_cmd and not runtime_context.podman:
290
+ # For udocker and podman prefetching is unimplemented
291
+ # This is docker
292
+ if batchsystem == "single_machine":
293
+ # Only on single machine will the docker daemon be accessible by all workers and the leader
294
+ logger.info("Prepulling the workflow's containers with Docker...")
295
+ call_command(["cwl-docker-extract", cwl_tool_uri])
296
+
297
+
236
298
  class Conditional:
237
299
  """
238
300
  Object holding conditional expression until we are ready to evaluate it.
239
301
 
240
- Evaluation occurs at the moment the encloses step is ready to run.
302
+ Evaluation occurs before the enclosing step's inputs are type-checked.
241
303
  """
242
304
 
243
305
  def __init__(
244
306
  self,
245
307
  expression: Optional[str] = None,
246
- outputs: Union[Dict[str, CWLOutputType], None] = None,
247
- requirements: Optional[List[CWLObjectType]] = None,
308
+ outputs: Union[dict[str, CWLOutputType], None] = None,
309
+ requirements: Optional[list[CWLObjectType]] = None,
248
310
  container_engine: str = "docker",
249
311
  ):
250
312
  """
@@ -289,7 +351,7 @@ class Conditional:
289
351
  "'%s' evaluated to a non-boolean value" % self.expression
290
352
  )
291
353
 
292
- def skipped_outputs(self) -> Dict[str, SkipNull]:
354
+ def skipped_outputs(self) -> dict[str, SkipNull]:
293
355
  """Generate a dict of SkipNull objects corresponding to the output structure."""
294
356
  outobj = {}
295
357
 
@@ -309,14 +371,14 @@ class Conditional:
309
371
  class ResolveSource:
310
372
  """Apply linkMerge and pickValue operators to values coming into a port."""
311
373
 
312
- promise_tuples: Union[List[Tuple[str, Promise]], Tuple[str, Promise]]
374
+ promise_tuples: Union[list[tuple[str, Promise]], tuple[str, Promise]]
313
375
 
314
376
  def __init__(
315
377
  self,
316
378
  name: str,
317
- input: Dict[str, CWLObjectType],
379
+ input: dict[str, CWLObjectType],
318
380
  source_key: str,
319
- promises: Dict[str, Job],
381
+ promises: dict[str, Job],
320
382
  ):
321
383
  """
322
384
  Construct a container object.
@@ -375,7 +437,7 @@ class ResolveSource:
375
437
  )
376
438
  else:
377
439
  name, rv = self.promise_tuples
378
- result = cast(Dict[str, Any], rv).get(name)
440
+ result = cast(dict[str, Any], rv).get(name)
379
441
 
380
442
  result = self.pick_value(result)
381
443
  result = filter_skip_null(self.name, result)
@@ -383,7 +445,7 @@ class ResolveSource:
383
445
 
384
446
  def link_merge(
385
447
  self, values: CWLObjectType
386
- ) -> Union[List[CWLOutputType], CWLOutputType]:
448
+ ) -> Union[list[CWLOutputType], CWLOutputType]:
387
449
  """
388
450
  Apply linkMerge operator to `values` object.
389
451
 
@@ -396,7 +458,7 @@ class ResolveSource:
396
458
  return values
397
459
 
398
460
  elif link_merge_type == "merge_flattened":
399
- result: List[CWLOutputType] = []
461
+ result: list[CWLOutputType] = []
400
462
  for v in values:
401
463
  if isinstance(v, MutableSequence):
402
464
  result.extend(v)
@@ -409,7 +471,7 @@ class ResolveSource:
409
471
  f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
410
472
  )
411
473
 
412
- def pick_value(self, values: Union[List[Union[str, SkipNull]], Any]) -> Any:
474
+ def pick_value(self, values: Union[list[Union[str, SkipNull]], Any]) -> Any:
413
475
  """
414
476
  Apply pickValue operator to `values` object.
415
477
 
@@ -477,7 +539,7 @@ class StepValueFrom:
477
539
  """
478
540
 
479
541
  def __init__(
480
- self, expr: str, source: Any, req: List[CWLObjectType], container_engine: str
542
+ self, expr: str, source: Any, req: list[CWLObjectType], container_engine: str
481
543
  ):
482
544
  """
483
545
  Instantiate an object to carry all know about this valueFrom expression.
@@ -609,7 +671,7 @@ class JustAValue:
609
671
 
610
672
  def resolve_dict_w_promises(
611
673
  dict_w_promises: Union[
612
- UnresolvedDict, CWLObjectType, Dict[str, Union[str, StepValueFrom]]
674
+ UnresolvedDict, CWLObjectType, dict[str, Union[str, StepValueFrom]]
613
675
  ],
614
676
  file_store: Optional[AbstractFileStore] = None,
615
677
  ) -> CWLObjectType:
@@ -664,7 +726,7 @@ class ToilPathMapper(PathMapper):
664
726
 
665
727
  def __init__(
666
728
  self,
667
- referenced_files: List[CWLObjectType],
729
+ referenced_files: list[CWLObjectType],
668
730
  basedir: str,
669
731
  stagedir: str,
670
732
  separateDirs: bool = True,
@@ -779,19 +841,44 @@ class ToilPathMapper(PathMapper):
779
841
  # TODO: why would we do that?
780
842
  stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
781
843
 
782
- # Decide where to put the file or directory, as an absolute path.
783
- tgt = os.path.join(
784
- stagedir,
785
- cast(str, obj["basename"]),
786
- )
844
+ if obj["class"] not in ("File", "Directory"):
845
+ # We only handle files and directories; only they have locations.
846
+ return
847
+
848
+ location = cast(str, obj["location"])
849
+ if location in self:
850
+ # If we've already mapped this, map it consistently.
851
+ tgt = self._pathmap[location].target
852
+ logger.debug(
853
+ "ToilPathMapper re-using target %s for path %s",
854
+ tgt,
855
+ location,
856
+ )
857
+ else:
858
+ # Decide where to put the file or directory, as an absolute path.
859
+ tgt = os.path.join(
860
+ stagedir,
861
+ cast(str, obj["basename"]),
862
+ )
863
+ if self.reversemap(tgt) is not None:
864
+ # If the target already exists in the pathmap, but we haven't yet
865
+ # mapped this, it means we have a conflict.
866
+ i = 2
867
+ new_tgt = f"{tgt}_{i}"
868
+ while self.reversemap(new_tgt) is not None:
869
+ i += 1
870
+ new_tgt = f"{tgt}_{i}"
871
+ logger.debug(
872
+ "ToilPathMapper resolving mapping conflict: %s is now %s",
873
+ tgt,
874
+ new_tgt,
875
+ )
876
+ tgt = new_tgt
787
877
 
788
878
  if obj["class"] == "Directory":
789
879
  # Whether or not we've already mapped this path, we need to map all
790
880
  # children recursively.
791
881
 
792
- # Grab its location
793
- location = cast(str, obj["location"])
794
-
795
882
  logger.debug("ToilPathMapper visiting directory %s", location)
796
883
 
797
884
  # We want to check the directory to make sure it is not
@@ -877,7 +964,7 @@ class ToilPathMapper(PathMapper):
877
964
 
878
965
  # Keep recursing
879
966
  self.visitlisting(
880
- cast(List[CWLObjectType], obj.get("listing", [])),
967
+ cast(list[CWLObjectType], obj.get("listing", [])),
881
968
  tgt,
882
969
  basedir,
883
970
  copy=copy,
@@ -885,23 +972,21 @@ class ToilPathMapper(PathMapper):
885
972
  )
886
973
 
887
974
  elif obj["class"] == "File":
888
- path = cast(str, obj["location"])
975
+ logger.debug("ToilPathMapper visiting file %s", location)
889
976
 
890
- logger.debug("ToilPathMapper visiting file %s", path)
891
-
892
- if path in self._pathmap:
977
+ if location in self._pathmap:
893
978
  # Don't map the same file twice
894
979
  logger.debug(
895
980
  "ToilPathMapper stopping recursion because we have already "
896
981
  "mapped file: %s",
897
- path,
982
+ location,
898
983
  )
899
984
  return
900
985
 
901
- ab = abspath(path, basedir)
902
- if "contents" in obj and path.startswith("_:"):
986
+ ab = abspath(location, basedir)
987
+ if "contents" in obj and location.startswith("_:"):
903
988
  # We are supposed to create this file
904
- self._pathmap[path] = MapperEnt(
989
+ self._pathmap[location] = MapperEnt(
905
990
  cast(str, obj["contents"]),
906
991
  tgt,
907
992
  "CreateWritableFile" if copy else "CreateFile",
@@ -919,14 +1004,16 @@ class ToilPathMapper(PathMapper):
919
1004
  # URI for a local file it downloaded.
920
1005
  if self.get_file:
921
1006
  deref = self.get_file(
922
- path, obj.get("streamable", False), self.streaming_allowed
1007
+ location,
1008
+ obj.get("streamable", False),
1009
+ self.streaming_allowed,
923
1010
  )
924
1011
  else:
925
1012
  deref = ab
926
1013
  if deref.startswith("file:"):
927
1014
  deref = schema_salad.ref_resolver.uri_file_path(deref)
928
1015
  if urlsplit(deref).scheme in ["http", "https"]:
929
- deref = downloadHttpFile(path)
1016
+ deref = downloadHttpFile(location)
930
1017
  elif urlsplit(deref).scheme != "toilfile":
931
1018
  # Dereference symbolic links
932
1019
  st = os.lstat(deref)
@@ -944,42 +1031,18 @@ class ToilPathMapper(PathMapper):
944
1031
  # reference, we just pass that along.
945
1032
 
946
1033
  """Link or copy files to their targets. Create them as needed."""
947
- targets: Dict[str, str] = {}
948
- for _, value in self._pathmap.items():
949
- # If the target already exists in the pathmap, it means we have a conflict. But we didn't change tgt to reflect new name.
950
- if value.target == tgt: # Conflict detected in the pathmap
951
- i = 2
952
- new_tgt = f"{tgt}_{i}"
953
- while new_tgt in targets:
954
- i += 1
955
- new_tgt = f"{tgt}_{i}"
956
- targets[new_tgt] = new_tgt
957
-
958
- for _, value_conflict in targets.items():
959
- logger.debug(
960
- "ToilPathMapper adding file mapping for conflict %s -> %s",
961
- deref,
962
- value_conflict,
963
- )
964
- self._pathmap[path] = MapperEnt(
965
- deref,
966
- value_conflict,
967
- "WritableFile" if copy else "File",
968
- staged,
969
- )
970
- # No conflicts detected so we can write out the original name.
971
- if not targets:
972
- logger.debug(
973
- "ToilPathMapper adding file mapping %s -> %s", deref, tgt
974
- )
975
1034
 
976
- self._pathmap[path] = MapperEnt(
977
- deref, tgt, "WritableFile" if copy else "File", staged
978
- )
1035
+ logger.debug(
1036
+ "ToilPathMapper adding file mapping %s -> %s", deref, tgt
1037
+ )
1038
+
1039
+ self._pathmap[location] = MapperEnt(
1040
+ deref, tgt, "WritableFile" if copy else "File", staged
1041
+ )
979
1042
 
980
1043
  # Handle all secondary files that need to be next to this one.
981
1044
  self.visitlisting(
982
- cast(List[CWLObjectType], obj.get("secondaryFiles", [])),
1045
+ cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
983
1046
  stagedir,
984
1047
  basedir,
985
1048
  copy=copy,
@@ -1005,6 +1068,32 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
1005
1068
  ) -> None:
1006
1069
  """run_jobs from SingleJobExecutor, but not in a top level runtime context."""
1007
1070
  runtime_context.toplevel = False
1071
+ if isinstance(
1072
+ process, cwltool.command_line_tool.CommandLineTool
1073
+ ) and isinstance(
1074
+ process.make_job_runner(runtime_context), SingularityCommandLineJob
1075
+ ):
1076
+ # Set defaults for singularity cache environment variables, similar to what we do in wdltoil
1077
+ # Use the same place as the default singularity cache directory
1078
+ singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
1079
+ os.environ["SINGULARITY_CACHEDIR"] = os.environ.get(
1080
+ "SINGULARITY_CACHEDIR", singularity_cache
1081
+ )
1082
+
1083
+ # If singularity is detected, prepull the image to ensure locking
1084
+ (docker_req, docker_is_req) = process.get_requirement(
1085
+ feature="DockerRequirement"
1086
+ )
1087
+ with global_mutex(
1088
+ os.environ["SINGULARITY_CACHEDIR"], "toil_singularity_cache_mutex"
1089
+ ):
1090
+ SingularityCommandLineJob.get_image(
1091
+ dockerRequirement=cast(dict[str, str], docker_req),
1092
+ pull_image=runtime_context.pull_image,
1093
+ force_pull=runtime_context.force_docker_pull,
1094
+ tmp_outdir_prefix=runtime_context.tmp_outdir_prefix,
1095
+ )
1096
+
1008
1097
  return super().run_jobs(process, job_order_object, logger, runtime_context)
1009
1098
 
1010
1099
 
@@ -1019,7 +1108,7 @@ class ToilTool:
1019
1108
  # Reserve a spot for the Toil job that ends up executing this tool.
1020
1109
  self._toil_job: Optional[Job] = None
1021
1110
  # Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
1022
- self._path_mappers: List[cwltool.pathmapper.PathMapper] = []
1111
+ self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
1023
1112
 
1024
1113
  def connect_toil_job(self, job: Job) -> None:
1025
1114
  """
@@ -1031,7 +1120,7 @@ class ToilTool:
1031
1120
 
1032
1121
  def make_path_mapper(
1033
1122
  self,
1034
- reffiles: List[Any],
1123
+ reffiles: list[Any],
1035
1124
  stagedir: str,
1036
1125
  runtimeContext: cwltool.context.RuntimeContext,
1037
1126
  separateDirs: bool,
@@ -1089,13 +1178,15 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1089
1178
  # Make a table of all the places we mapped files to when downloading the inputs.
1090
1179
 
1091
1180
  # We want to hint which host paths and container (if any) paths correspond
1092
- host_and_job_paths: List[Tuple[str, str]] = []
1181
+ host_and_job_paths: list[tuple[str, str]] = []
1093
1182
 
1094
1183
  for pm in self._path_mappers:
1095
1184
  for _, mapper_entry in pm.items_exclude_children():
1096
1185
  # We know that mapper_entry.target as seen by the task is
1097
1186
  # mapper_entry.resolved on the host.
1098
- host_and_job_paths.append((mapper_entry.resolved, mapper_entry.target))
1187
+ host_and_job_paths.append(
1188
+ (mapper_entry.resolved, mapper_entry.target)
1189
+ )
1099
1190
 
1100
1191
  # Notice that we have downloaded our inputs. Explain which files
1101
1192
  # those are here and what the task will expect to call them.
@@ -1123,11 +1214,11 @@ def toil_make_tool(
1123
1214
  return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
1124
1215
 
1125
1216
 
1126
- # When a file we want to have is missing, we can give it this sentinal location
1217
+ # When a file we want to have is missing, we can give it this sentinel location
1127
1218
  # URI instead of raising an error right away, in case it is optional.
1128
1219
  MISSING_FILE = "missing://"
1129
1220
 
1130
- DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1221
+ DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
1131
1222
 
1132
1223
 
1133
1224
  def check_directory_dict_invariants(contents: DirectoryContents) -> None:
@@ -1149,7 +1240,7 @@ def check_directory_dict_invariants(contents: DirectoryContents) -> None:
1149
1240
 
1150
1241
  def decode_directory(
1151
1242
  dir_path: str,
1152
- ) -> Tuple[DirectoryContents, Optional[str], str]:
1243
+ ) -> tuple[DirectoryContents, Optional[str], str]:
1153
1244
  """
1154
1245
  Decode a directory from a "toildir:" path to a directory (or a file in it).
1155
1246
 
@@ -1224,7 +1315,7 @@ class ToilFsAccess(StdFsAccess):
1224
1315
  # they know what will happen.
1225
1316
  # Also maps files and directories from external URLs to downloaded
1226
1317
  # locations.
1227
- self.dir_to_download: Dict[str, str] = {}
1318
+ self.dir_to_download: dict[str, str] = {}
1228
1319
 
1229
1320
  super().__init__(basedir)
1230
1321
 
@@ -1347,14 +1438,16 @@ class ToilFsAccess(StdFsAccess):
1347
1438
  destination = super()._abs(destination)
1348
1439
  return destination
1349
1440
 
1350
- def glob(self, pattern: str) -> List[str]:
1441
+ def glob(self, pattern: str) -> list[str]:
1351
1442
  parse = urlparse(pattern)
1352
1443
  if parse.scheme == "file":
1353
1444
  pattern = os.path.abspath(unquote(parse.path))
1354
1445
  elif parse.scheme == "":
1355
1446
  pattern = os.path.abspath(pattern)
1356
1447
  else:
1357
- raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
1448
+ raise RuntimeError(
1449
+ f"Cannot efficiently support globbing on {parse.scheme} URIs"
1450
+ )
1358
1451
 
1359
1452
  # Actually do the glob
1360
1453
  return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
@@ -1391,12 +1484,12 @@ class ToilFsAccess(StdFsAccess):
1391
1484
  else:
1392
1485
  # This should be supported by a job store.
1393
1486
  byte_stream = AbstractJobStore.open_url(fn)
1394
- if 'b' in mode:
1487
+ if "b" in mode:
1395
1488
  # Pass stream along in binary
1396
1489
  return byte_stream
1397
1490
  else:
1398
1491
  # Wrap it in a text decoder
1399
- return io.TextIOWrapper(byte_stream, encoding='utf-8')
1492
+ return io.TextIOWrapper(byte_stream, encoding="utf-8")
1400
1493
 
1401
1494
  def exists(self, path: str) -> bool:
1402
1495
  """Test for file existence."""
@@ -1503,7 +1596,7 @@ class ToilFsAccess(StdFsAccess):
1503
1596
  logger.debug("AbstractJobStore said: %s", status)
1504
1597
  return status
1505
1598
 
1506
- def listdir(self, fn: str) -> List[str]:
1599
+ def listdir(self, fn: str) -> list[str]:
1507
1600
  # This needs to return full URLs for everything in the directory.
1508
1601
  # URLs are not allowed to end in '/', even for subdirectories.
1509
1602
  logger.debug("ToilFsAccess listing %s", fn)
@@ -1524,7 +1617,9 @@ class ToilFsAccess(StdFsAccess):
1524
1617
  if got is None:
1525
1618
  raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
1526
1619
  if isinstance(got, str):
1527
- raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
1620
+ raise RuntimeError(
1621
+ f"Cannot list file or dubdirectory of a file: {fn}"
1622
+ )
1528
1623
  here = got
1529
1624
  # List all the things in here and make full URIs to them
1530
1625
  return [os.path.join(fn, k) for k in here.keys()]
@@ -1534,7 +1629,7 @@ class ToilFsAccess(StdFsAccess):
1534
1629
  for entry in AbstractJobStore.list_url(fn)
1535
1630
  ]
1536
1631
 
1537
- def join(self, path, *paths): # type: (str, *str) -> str
1632
+ def join(self, path: str, *paths: str) -> str:
1538
1633
  # This falls back on os.path.join
1539
1634
  return super().join(path, *paths)
1540
1635
 
@@ -1547,12 +1642,12 @@ class ToilFsAccess(StdFsAccess):
1547
1642
 
1548
1643
  def toil_get_file(
1549
1644
  file_store: AbstractFileStore,
1550
- index: Dict[str, str],
1551
- existing: Dict[str, str],
1645
+ index: dict[str, str],
1646
+ existing: dict[str, str],
1552
1647
  uri: str,
1553
1648
  streamable: bool = False,
1554
1649
  streaming_allowed: bool = True,
1555
- pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
1650
+ pipe_threads: Optional[list[tuple[Thread, int]]] = None,
1556
1651
  ) -> str:
1557
1652
  """
1558
1653
  Set up the given file or directory from the Toil jobstore at a file URI
@@ -1653,9 +1748,7 @@ def toil_get_file(
1653
1748
  and streamable
1654
1749
  and not isinstance(file_store.jobStore, FileJobStore)
1655
1750
  ):
1656
- logger.debug(
1657
- "Streaming file %s", uri
1658
- )
1751
+ logger.debug("Streaming file %s", uri)
1659
1752
  src_path = file_store.getLocalTempFileName()
1660
1753
  os.mkfifo(src_path)
1661
1754
  th = ExceptionalThread(
@@ -1677,34 +1770,35 @@ def toil_get_file(
1677
1770
  if uri.startswith("toilfile:"):
1678
1771
  # Download from the file store
1679
1772
  file_store_id = FileID.unpack(uri[len("toilfile:") :])
1680
- src_path = file_store.readGlobalFile(
1681
- file_store_id, symlink=True
1682
- )
1773
+ src_path = file_store.readGlobalFile(file_store_id, symlink=True)
1683
1774
  else:
1684
1775
  # Download from the URI via the job store.
1685
1776
 
1686
1777
  # Figure out where it goes.
1687
1778
  src_path = file_store.getLocalTempFileName()
1688
1779
  # Open that path exclusively to make sure we created it
1689
- with open(src_path, 'xb') as fh:
1780
+ with open(src_path, "xb") as fh:
1690
1781
  # Download into the file
1691
- size, executable = AbstractJobStore.read_from_url(uri, fh)
1692
- if executable:
1693
- # Set the execute bit in the file's permissions
1694
- os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1782
+ size, executable = AbstractJobStore.read_from_url(uri, fh)
1783
+ if executable:
1784
+ # Set the execute bit in the file's permissions
1785
+ os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1695
1786
 
1696
1787
  index[src_path] = uri
1697
1788
  existing[uri] = src_path
1698
1789
  return schema_salad.ref_resolver.file_uri(src_path)
1699
1790
 
1700
- def write_file(
1701
- writeFunc: Callable[[str], FileID],
1702
- index: Dict[str, str],
1703
- existing: Dict[str, str],
1791
+
1792
+ def convert_file_uri_to_toil_uri(
1793
+ applyFunc: Callable[[str], FileID],
1794
+ index: dict[str, str],
1795
+ existing: dict[str, str],
1704
1796
  file_uri: str,
1705
1797
  ) -> str:
1706
1798
  """
1707
- Write a file into the Toil jobstore.
1799
+ Given a file URI, convert it to a toil file URI. Uses applyFunc to handle the conversion.
1800
+
1801
+ Runs once on every unique file URI.
1708
1802
 
1709
1803
  'existing' is a set of files retrieved as inputs from toil_get_file. This
1710
1804
  ensures they are mapped back as the same name if passed through.
@@ -1718,15 +1812,14 @@ def write_file(
1718
1812
  # with unsupportedRequirement when retrieving later with getFile
1719
1813
  elif file_uri.startswith("_:"):
1720
1814
  return file_uri
1815
+ elif file_uri.startswith(MISSING_FILE):
1816
+ # We cannot import a missing file
1817
+ raise FileNotFoundError(f"Could not find {file_uri[len(MISSING_FILE):]}")
1721
1818
  else:
1722
1819
  file_uri = existing.get(file_uri, file_uri)
1723
1820
  if file_uri not in index:
1724
- if not urlparse(file_uri).scheme:
1725
- rp = os.path.realpath(file_uri)
1726
- else:
1727
- rp = file_uri
1728
1821
  try:
1729
- index[file_uri] = "toilfile:" + writeFunc(rp).pack()
1822
+ index[file_uri] = "toilfile:" + applyFunc(file_uri).pack()
1730
1823
  existing[index[file_uri]] = file_uri
1731
1824
  except Exception as e:
1732
1825
  logger.error("Got exception '%s' while copying '%s'", e, file_uri)
@@ -1745,17 +1838,93 @@ def path_to_loc(obj: CWLObjectType) -> None:
1745
1838
  del obj["path"]
1746
1839
 
1747
1840
 
1748
- def import_files(
1749
- import_function: Callable[[str], FileID],
1841
+ def extract_file_uri_once(
1842
+ fileindex: dict[str, str],
1843
+ existing: dict[str, str],
1844
+ file_metadata: CWLObjectType,
1845
+ mark_broken: bool = False,
1846
+ skip_remote: bool = False,
1847
+ ) -> Optional[str]:
1848
+ """
1849
+ Extract the filename from a CWL file record.
1850
+
1851
+ This function matches the predefined function signature in visit_files, which ensures
1852
+ that this function is called on all files inside a CWL object.
1853
+
1854
+ Ensures no duplicate files are returned according to fileindex. If a file has not been resolved already (and had file:// prepended)
1855
+ then resolve symlinks.
1856
+ :param fileindex: Forward mapping of filename
1857
+ :param existing: Reverse mapping of filename. This function does not use this
1858
+ :param file_metadata: CWL file record
1859
+ :param mark_broken: Whether files should be marked as missing
1860
+ :param skip_remote: Whether to skip remote files
1861
+ :return:
1862
+ """
1863
+ location = cast(str, file_metadata["location"])
1864
+ if (
1865
+ location.startswith("toilfile:")
1866
+ or location.startswith("toildir:")
1867
+ or location.startswith("_:")
1868
+ ):
1869
+ return None
1870
+ if location in fileindex:
1871
+ file_metadata["location"] = fileindex[location]
1872
+ return None
1873
+ if not location and file_metadata["path"]:
1874
+ file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
1875
+ cast(str, file_metadata["path"])
1876
+ )
1877
+ if location.startswith("file://") and not os.path.isfile(
1878
+ schema_salad.ref_resolver.uri_file_path(location)
1879
+ ):
1880
+ if mark_broken:
1881
+ logger.debug("File %s is missing", file_metadata)
1882
+ file_metadata["location"] = location = MISSING_FILE + location
1883
+ else:
1884
+ raise cwl_utils.errors.WorkflowException(
1885
+ "File is missing: %s" % file_metadata
1886
+ )
1887
+ if location.startswith("file://") or not skip_remote:
1888
+ # This is a local file or a remote file
1889
+ if location not in fileindex:
1890
+ # These dictionaries are meant to keep track of what we're going to import
1891
+ # In the actual import, this is used as a bidirectional mapping from unvirtualized to virtualized
1892
+ # For this case, keep track of the files to prevent returning duplicate files
1893
+ # see write_file
1894
+
1895
+ # If there is not a scheme, this file has not been resolved yet or is a URL.
1896
+ if not urlparse(location).scheme:
1897
+ rp = os.path.realpath(location)
1898
+ else:
1899
+ rp = location
1900
+ return rp
1901
+ return None
1902
+
1903
+
1904
+ V = TypeVar("V", covariant=True)
1905
+
1906
+
1907
+ class VisitFunc(Protocol[V]):
1908
+ def __call__(
1909
+ self,
1910
+ fileindex: dict[str, str],
1911
+ existing: dict[str, str],
1912
+ file_metadata: CWLObjectType,
1913
+ mark_broken: bool,
1914
+ skip_remote: bool,
1915
+ ) -> V: ...
1916
+
1917
+
1918
+ def visit_files(
1919
+ func: VisitFunc[V],
1750
1920
  fs_access: StdFsAccess,
1751
- fileindex: Dict[str, str],
1752
- existing: Dict[str, str],
1921
+ fileindex: dict[str, str],
1922
+ existing: dict[str, str],
1753
1923
  cwl_object: Optional[CWLObjectType],
1754
1924
  mark_broken: bool = False,
1755
1925
  skip_remote: bool = False,
1756
1926
  bypass_file_store: bool = False,
1757
- log_level: int = logging.DEBUG
1758
- ) -> None:
1927
+ ) -> list[V]:
1759
1928
  """
1760
1929
  Prepare all files and directories.
1761
1930
 
@@ -1801,18 +1970,12 @@ def import_files(
1801
1970
 
1802
1971
  :param log_level: Log imported files at the given level.
1803
1972
  """
1973
+ func_return: list[Any] = list()
1804
1974
  tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
1805
1975
 
1806
1976
  logger.debug("Importing files for %s", tool_id)
1807
1977
  logger.debug("Importing files in %s", cwl_object)
1808
1978
 
1809
- def import_and_log(url: str) -> FileID:
1810
- """
1811
- Upload a file and log that we are doing so.
1812
- """
1813
- logger.log(log_level, "Loading %s...", url)
1814
- return import_function(url)
1815
-
1816
1979
  # We need to upload all files to the Toil filestore, and encode structure
1817
1980
  # recursively into all Directories' locations. But we cannot safely alter
1818
1981
  # the listing fields of Directory objects, because the handling required by
@@ -1830,13 +1993,13 @@ def import_files(
1830
1993
  if bypass_file_store:
1831
1994
  # Don't go on to actually import files or encode contents for
1832
1995
  # directories.
1833
- return
1996
+ return func_return
1834
1997
 
1835
1998
  # Otherwise we actually want to put the things in the file store.
1836
1999
 
1837
2000
  def visit_file_or_directory_down(
1838
2001
  rec: CWLObjectType,
1839
- ) -> Optional[List[CWLObjectType]]:
2002
+ ) -> Optional[list[CWLObjectType]]:
1840
2003
  """
1841
2004
  Visit each CWL File or Directory on the way down.
1842
2005
 
@@ -1863,7 +2026,7 @@ def import_files(
1863
2026
  ensure_no_collisions(cast(DirectoryType, rec))
1864
2027
 
1865
2028
  # Pull out the old listing, if any
1866
- old_listing = cast(Optional[List[CWLObjectType]], rec.get("listing", None))
2029
+ old_listing = cast(Optional[list[CWLObjectType]], rec.get("listing", None))
1867
2030
 
1868
2031
  if not cast(str, rec["location"]).startswith("_:"):
1869
2032
  # This is a thing we can list and not just a literal, so we
@@ -1885,8 +2048,8 @@ def import_files(
1885
2048
 
1886
2049
  def visit_file_or_directory_up(
1887
2050
  rec: CWLObjectType,
1888
- down_result: Optional[List[CWLObjectType]],
1889
- child_results: List[DirectoryContents],
2051
+ down_result: Optional[list[CWLObjectType]],
2052
+ child_results: list[DirectoryContents],
1890
2053
  ) -> DirectoryContents:
1891
2054
  """
1892
2055
  For a CWL File or Directory, make sure it is uploaded and it has a
@@ -1908,10 +2071,15 @@ def import_files(
1908
2071
  # This is a CWL File
1909
2072
 
1910
2073
  result: DirectoryContents = {}
1911
-
1912
- # Upload the file itself, which will adjust its location.
1913
- upload_file(
1914
- import_and_log, fileindex, existing, rec, mark_broken=mark_broken, skip_remote=skip_remote
2074
+ # Run a function on the file and store the return
2075
+ func_return.append(
2076
+ func(
2077
+ fileindex,
2078
+ existing,
2079
+ rec,
2080
+ mark_broken=mark_broken,
2081
+ skip_remote=skip_remote,
2082
+ )
1915
2083
  )
1916
2084
 
1917
2085
  # Make a record for this file under its name
@@ -1955,6 +2123,7 @@ def import_files(
1955
2123
  visit_file_or_directory_down,
1956
2124
  visit_file_or_directory_up,
1957
2125
  )
2126
+ return func_return
1958
2127
 
1959
2128
 
1960
2129
  def upload_directory(
@@ -2013,52 +2182,34 @@ def upload_directory(
2013
2182
  directory_metadata["location"] = encode_directory(directory_contents)
2014
2183
 
2015
2184
 
2016
- def upload_file(
2017
- uploadfunc: Callable[[str], FileID],
2018
- fileindex: Dict[str, str],
2019
- existing: Dict[str, str],
2185
+ def extract_and_convert_file_to_toil_uri(
2186
+ convertfunc: Callable[[str], FileID],
2187
+ fileindex: dict[str, str],
2188
+ existing: dict[str, str],
2020
2189
  file_metadata: CWLObjectType,
2021
2190
  mark_broken: bool = False,
2022
- skip_remote: bool = False
2191
+ skip_remote: bool = False,
2023
2192
  ) -> None:
2024
2193
  """
2025
- Update a file object so that the file will be accessible from another machine.
2194
+ Extract the file URI out of a file object and convert it to a Toil URI.
2195
+
2196
+ Runs convertfunc on the file URI to handle conversion.
2026
2197
 
2027
- Uploads local files to the Toil file store, and sets their location to a
2028
- reference to the toil file store.
2198
+ Is used to handle importing files into the jobstore.
2029
2199
 
2030
2200
  If a file doesn't exist, fails with an error, unless mark_broken is set, in
2031
2201
  which case the missing file is given a special sentinel location.
2032
2202
 
2033
- Unless skip_remote is set, downloads remote files into the file store and
2034
- sets their locations to references into the file store as well.
2203
+ Unless skip_remote is set, also run on remote files and sets their locations
2204
+ to toil URIs as well.
2035
2205
  """
2036
- location = cast(str, file_metadata["location"])
2037
- if (
2038
- location.startswith("toilfile:")
2039
- or location.startswith("toildir:")
2040
- or location.startswith("_:")
2041
- ):
2042
- return
2043
- if location in fileindex:
2044
- file_metadata["location"] = fileindex[location]
2045
- return
2046
- if not location and file_metadata["path"]:
2047
- file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
2048
- cast(str, file_metadata["path"])
2206
+ location = extract_file_uri_once(
2207
+ fileindex, existing, file_metadata, mark_broken, skip_remote
2208
+ )
2209
+ if location is not None:
2210
+ file_metadata["location"] = convert_file_uri_to_toil_uri(
2211
+ convertfunc, fileindex, existing, location
2049
2212
  )
2050
- if location.startswith("file://") and not os.path.isfile(
2051
- schema_salad.ref_resolver.uri_file_path(location)
2052
- ):
2053
- if mark_broken:
2054
- logger.debug("File %s is missing", file_metadata)
2055
- file_metadata["location"] = location = MISSING_FILE
2056
- else:
2057
- raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
2058
-
2059
- if location.startswith("file://") or not skip_remote:
2060
- # This is a local file, or we also need to download and re-upload remote files
2061
- file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
2062
2213
 
2063
2214
  logger.debug("Sending file at: %s", file_metadata["location"])
2064
2215
 
@@ -2071,7 +2222,7 @@ def writeGlobalFileWrapper(file_store: AbstractFileStore, fileuri: str) -> FileI
2071
2222
 
2072
2223
  def remove_empty_listings(rec: CWLObjectType) -> None:
2073
2224
  if rec.get("class") != "Directory":
2074
- finddirs = [] # type: List[CWLObjectType]
2225
+ finddirs: list[CWLObjectType] = []
2075
2226
  visit_class(rec, ("Directory",), finddirs.append)
2076
2227
  for f in finddirs:
2077
2228
  remove_empty_listings(f)
@@ -2091,7 +2242,7 @@ class CWLNamedJob(Job):
2091
2242
  cores: Union[float, None] = 1,
2092
2243
  memory: Union[int, str, None] = "1GiB",
2093
2244
  disk: Union[int, str, None] = "1MiB",
2094
- accelerators: Optional[List[AcceleratorRequirement]] = None,
2245
+ accelerators: Optional[list[AcceleratorRequirement]] = None,
2095
2246
  preemptible: Optional[bool] = None,
2096
2247
  tool_id: Optional[str] = None,
2097
2248
  parent_name: Optional[str] = None,
@@ -2166,10 +2317,10 @@ class ResolveIndirect(CWLNamedJob):
2166
2317
 
2167
2318
  def toilStageFiles(
2168
2319
  toil: Toil,
2169
- cwljob: Union[CWLObjectType, List[CWLObjectType]],
2320
+ cwljob: Union[CWLObjectType, list[CWLObjectType]],
2170
2321
  outdir: str,
2171
2322
  destBucket: Union[str, None] = None,
2172
- log_level: int = logging.DEBUG
2323
+ log_level: int = logging.DEBUG,
2173
2324
  ) -> None:
2174
2325
  """
2175
2326
  Copy input files out of the global file store and update location and path.
@@ -2181,7 +2332,7 @@ def toilStageFiles(
2181
2332
  """
2182
2333
 
2183
2334
  def _collectDirEntries(
2184
- obj: Union[CWLObjectType, List[CWLObjectType]]
2335
+ obj: Union[CWLObjectType, list[CWLObjectType]]
2185
2336
  ) -> Iterator[CWLObjectType]:
2186
2337
  if isinstance(obj, dict):
2187
2338
  if obj.get("class") in ("File", "Directory"):
@@ -2263,13 +2414,17 @@ def toilStageFiles(
2263
2414
  # TODO: Use direct S3 to S3 copy on exports as well
2264
2415
  file_id_or_contents = (
2265
2416
  "toilfile:"
2266
- + toil.import_file(file_id_or_contents, symlink=False).pack()
2417
+ + toil.import_file(
2418
+ file_id_or_contents, symlink=False
2419
+ ).pack()
2267
2420
  )
2268
2421
 
2269
2422
  if file_id_or_contents.startswith("toilfile:"):
2270
2423
  # This is something we can export
2271
2424
  # TODO: Do we need to urlencode the parts before sending them to S3?
2272
- dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
2425
+ dest_url = "/".join(
2426
+ s.strip("/") for s in [destBucket, baseName]
2427
+ )
2273
2428
  logger.log(log_level, "Saving %s...", dest_url)
2274
2429
  toil.export_file(
2275
2430
  FileID.unpack(file_id_or_contents[len("toilfile:") :]),
@@ -2291,7 +2446,12 @@ def toilStageFiles(
2291
2446
  # Probably staging and bypassing file store. Just copy.
2292
2447
  logger.log(log_level, "Saving %s...", dest_url)
2293
2448
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2294
- shutil.copyfile(p.resolved, p.target)
2449
+ try:
2450
+ shutil.copyfile(p.resolved, p.target)
2451
+ except shutil.SameFileError:
2452
+ # If outdir isn't set and we're passing through an input file/directory as the output,
2453
+ # the file doesn't need to be copied because it is already there
2454
+ pass
2295
2455
  else:
2296
2456
  uri = p.resolved
2297
2457
  if not uri.startswith("toilfile:"):
@@ -2364,26 +2524,31 @@ class CWLJobWrapper(CWLNamedJob):
2364
2524
  subjob_name="_wrapper",
2365
2525
  local=True,
2366
2526
  )
2367
- self.cwltool = remove_pickle_problems(tool)
2527
+ self.cwltool = tool
2368
2528
  self.cwljob = cwljob
2369
2529
  self.runtime_context = runtime_context
2370
- self.conditional = conditional
2530
+ self.conditional = conditional or Conditional()
2371
2531
  self.parent_name = parent_name
2372
2532
 
2373
2533
  def run(self, file_store: AbstractFileStore) -> Any:
2374
2534
  """Create a child job with the correct resource requirements set."""
2375
2535
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2536
+
2537
+ # Check confitional to license full evaluation of job inputs.
2538
+ if self.conditional.is_false(cwljob):
2539
+ return self.conditional.skipped_outputs()
2540
+
2376
2541
  fill_in_defaults(
2377
2542
  self.cwltool.tool["inputs"],
2378
2543
  cwljob,
2379
2544
  self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
2380
2545
  )
2546
+ # Don't forward the conditional. We checked it already.
2381
2547
  realjob = CWLJob(
2382
2548
  tool=self.cwltool,
2383
2549
  cwljob=cwljob,
2384
2550
  runtime_context=self.runtime_context,
2385
2551
  parent_name=self.parent_name,
2386
- conditional=self.conditional,
2387
2552
  )
2388
2553
  self.addChild(realjob)
2389
2554
  return realjob.rv()
@@ -2401,7 +2566,7 @@ class CWLJob(CWLNamedJob):
2401
2566
  conditional: Union[Conditional, None] = None,
2402
2567
  ):
2403
2568
  """Store the context for later execution."""
2404
- self.cwltool = remove_pickle_problems(tool)
2569
+ self.cwltool = tool
2405
2570
  self.conditional = conditional or Conditional()
2406
2571
 
2407
2572
  if runtime_context.builder:
@@ -2418,7 +2583,7 @@ class CWLJob(CWLNamedJob):
2418
2583
  resources={},
2419
2584
  mutation_manager=runtime_context.mutation_manager,
2420
2585
  formatgraph=tool.formatgraph,
2421
- make_fs_access=cast(Type[StdFsAccess], runtime_context.make_fs_access),
2586
+ make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
2422
2587
  fs_access=runtime_context.make_fs_access(""),
2423
2588
  job_script_provider=runtime_context.job_script_provider,
2424
2589
  timeout=runtime_context.eval_timeout,
@@ -2435,7 +2600,21 @@ class CWLJob(CWLNamedJob):
2435
2600
 
2436
2601
  req = tool.evalResources(self.builder, runtime_context)
2437
2602
 
2438
- accelerators: Optional[List[AcceleratorRequirement]] = None
2603
+ tool_own_resources = tool.get_requirement("ResourceRequirement")[0] or {}
2604
+ if "ramMin" in tool_own_resources or "ramMax" in tool_own_resources:
2605
+ # The tool is actually asking for memory.
2606
+ memory = int(req["ram"] * (2**20))
2607
+ else:
2608
+ # The tool is getting a default ram allocation.
2609
+ if getattr(runtime_context, "cwl_default_ram"):
2610
+ # We will respect the CWL spec and apply the default cwltool
2611
+ # computed, which might be different than Toil's default.
2612
+ memory = int(req["ram"] * (2**20))
2613
+ else:
2614
+ # We use a None requirement and the Toil default applies.
2615
+ memory = None
2616
+
2617
+ accelerators: Optional[list[AcceleratorRequirement]] = None
2439
2618
  if req.get("cudaDeviceCount", 0) > 0:
2440
2619
  # There's a CUDARequirement, which cwltool processed for us
2441
2620
  # TODO: How is cwltool deciding what value to use between min and max?
@@ -2499,7 +2678,7 @@ class CWLJob(CWLNamedJob):
2499
2678
 
2500
2679
  super().__init__(
2501
2680
  cores=req["cores"],
2502
- memory=int(req["ram"] * (2**20)),
2681
+ memory=memory,
2503
2682
  disk=int(total_disk),
2504
2683
  accelerators=accelerators,
2505
2684
  preemptible=preemptible,
@@ -2513,7 +2692,7 @@ class CWLJob(CWLNamedJob):
2513
2692
  self.step_inputs = self.cwltool.tool["inputs"]
2514
2693
  self.workdir: str = runtime_context.workdir # type: ignore[attr-defined]
2515
2694
 
2516
- def required_env_vars(self, cwljob: Any) -> Iterator[Tuple[str, str]]:
2695
+ def required_env_vars(self, cwljob: Any) -> Iterator[tuple[str, str]]:
2517
2696
  """Yield environment variables from EnvVarRequirement."""
2518
2697
  if isinstance(cwljob, dict):
2519
2698
  if cwljob.get("class") == "EnvVarRequirement":
@@ -2525,7 +2704,7 @@ class CWLJob(CWLNamedJob):
2525
2704
  for env_var in cwljob:
2526
2705
  yield from self.required_env_vars(env_var)
2527
2706
 
2528
- def populate_env_vars(self, cwljob: CWLObjectType) -> Dict[str, str]:
2707
+ def populate_env_vars(self, cwljob: CWLObjectType) -> dict[str, str]:
2529
2708
  """
2530
2709
  Prepare environment variables necessary at runtime for the job.
2531
2710
 
@@ -2541,9 +2720,9 @@ class CWLJob(CWLNamedJob):
2541
2720
  required_env_vars = {}
2542
2721
  # iterate over EnvVarRequirement env vars, if any
2543
2722
  for k, v in self.required_env_vars(cwljob):
2544
- required_env_vars[
2545
- k
2546
- ] = v # will tell cwltool which env vars to take from the environment
2723
+ required_env_vars[k] = (
2724
+ v # will tell cwltool which env vars to take from the environment
2725
+ )
2547
2726
  os.environ[k] = v
2548
2727
  # needs to actually be populated in the environment as well or
2549
2728
  # they're not used
@@ -2553,7 +2732,7 @@ class CWLJob(CWLNamedJob):
2553
2732
  # env var with the same name is found
2554
2733
  for req in self.cwltool.requirements:
2555
2734
  if req["class"] == "EnvVarRequirement":
2556
- envDefs = cast(List[Dict[str, str]], req["envDef"])
2735
+ envDefs = cast(list[dict[str, str]], req["envDef"])
2557
2736
  for env_def in envDefs:
2558
2737
  env_name = env_def["envName"]
2559
2738
  if env_name in required_env_vars:
@@ -2585,7 +2764,7 @@ class CWLJob(CWLNamedJob):
2585
2764
  for inp_id in immobile_cwljob_dict.keys():
2586
2765
  found = False
2587
2766
  for field in cast(
2588
- List[Dict[str, str]], self.cwltool.inputs_record_schema["fields"]
2767
+ list[dict[str, str]], self.cwltool.inputs_record_schema["fields"]
2589
2768
  ):
2590
2769
  if field["name"] == inp_id:
2591
2770
  found = True
@@ -2600,8 +2779,8 @@ class CWLJob(CWLNamedJob):
2600
2779
  functools.partial(remove_empty_listings),
2601
2780
  )
2602
2781
 
2603
- index: Dict[str, str] = {}
2604
- existing: Dict[str, str] = {}
2782
+ index: dict[str, str] = {}
2783
+ existing: dict[str, str] = {}
2605
2784
 
2606
2785
  # Prepare the run instructions for cwltool
2607
2786
  runtime_context = self.runtime_context.copy()
@@ -2613,7 +2792,7 @@ class CWLJob(CWLNamedJob):
2613
2792
  # will come and grab this function for fetching files from the Toil
2614
2793
  # file store. pipe_threads is used for keeping track of separate
2615
2794
  # threads launched to stream files around.
2616
- pipe_threads: List[Tuple[Thread, int]] = []
2795
+ pipe_threads: list[tuple[Thread, int]] = []
2617
2796
  setattr(
2618
2797
  runtime_context,
2619
2798
  "toil_get_file",
@@ -2647,7 +2826,7 @@ class CWLJob(CWLNamedJob):
2647
2826
  # function and a path_mapper type or factory function.
2648
2827
 
2649
2828
  runtime_context.make_fs_access = cast(
2650
- Type[StdFsAccess],
2829
+ type[StdFsAccess],
2651
2830
  functools.partial(ToilFsAccess, file_store=file_store),
2652
2831
  )
2653
2832
 
@@ -2660,9 +2839,13 @@ class CWLJob(CWLNamedJob):
2660
2839
  # Collect standard output and standard error somewhere if they don't go to files.
2661
2840
  # We need to keep two FDs to these because cwltool will close what we give it.
2662
2841
  default_stdout = TemporaryFile()
2663
- runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
2842
+ runtime_context.default_stdout = os.fdopen(
2843
+ os.dup(default_stdout.fileno()), "wb"
2844
+ )
2664
2845
  default_stderr = TemporaryFile()
2665
- runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
2846
+ runtime_context.default_stderr = os.fdopen(
2847
+ os.dup(default_stderr.fileno()), "wb"
2848
+ )
2666
2849
 
2667
2850
  process_uuid = uuid.uuid4() # noqa F841
2668
2851
  started_at = datetime.datetime.now() # noqa F841
@@ -2693,17 +2876,27 @@ class CWLJob(CWLNamedJob):
2693
2876
  default_stdout.seek(0, os.SEEK_END)
2694
2877
  if default_stdout.tell() > 0:
2695
2878
  default_stdout.seek(0)
2696
- file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
2879
+ file_store.log_user_stream(
2880
+ self.description.unitName + ".stdout", default_stdout
2881
+ )
2697
2882
  if status != "success":
2698
2883
  default_stdout.seek(0)
2699
- logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
2884
+ logger.error(
2885
+ "Failed command standard output:\n%s",
2886
+ default_stdout.read().decode("utf-8", errors="replace"),
2887
+ )
2700
2888
  default_stderr.seek(0, os.SEEK_END)
2701
2889
  if default_stderr.tell():
2702
2890
  default_stderr.seek(0)
2703
- file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
2891
+ file_store.log_user_stream(
2892
+ self.description.unitName + ".stderr", default_stderr
2893
+ )
2704
2894
  if status != "success":
2705
2895
  default_stderr.seek(0)
2706
- logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
2896
+ logger.error(
2897
+ "Failed command standard error:\n%s",
2898
+ default_stderr.read().decode("utf-8", errors="replace"),
2899
+ )
2707
2900
 
2708
2901
  if status != "success":
2709
2902
  raise cwl_utils.errors.WorkflowException(status)
@@ -2716,12 +2909,18 @@ class CWLJob(CWLNamedJob):
2716
2909
  fs_access = runtime_context.make_fs_access(runtime_context.basedir)
2717
2910
 
2718
2911
  # And a file importer that can go from a file:// URI to a Toil FileID
2719
- file_import_function = functools.partial(writeGlobalFileWrapper, file_store)
2912
+ def file_import_function(url: str, log_level: int = logging.DEBUG) -> FileID:
2913
+ logger.log(log_level, "Loading %s...", url)
2914
+ return writeGlobalFileWrapper(file_store, url)
2915
+
2916
+ file_upload_function = functools.partial(
2917
+ extract_and_convert_file_to_toil_uri, file_import_function
2918
+ )
2720
2919
 
2721
2920
  # Upload all the Files and set their and the Directories' locations, if
2722
2921
  # needed.
2723
- import_files(
2724
- file_import_function,
2922
+ visit_files(
2923
+ file_upload_function,
2725
2924
  fs_access,
2726
2925
  index,
2727
2926
  existing,
@@ -2751,6 +2950,74 @@ def get_container_engine(runtime_context: cwltool.context.RuntimeContext) -> str
2751
2950
  return "docker"
2752
2951
 
2753
2952
 
2953
+ def makeRootJob(
2954
+ tool: Process,
2955
+ jobobj: CWLObjectType,
2956
+ runtime_context: cwltool.context.RuntimeContext,
2957
+ initialized_job_order: CWLObjectType,
2958
+ options: Namespace,
2959
+ toil: Toil,
2960
+ ) -> CWLNamedJob:
2961
+ """
2962
+ Create the Toil root Job object for the CWL tool. Is the same as makeJob() except this also handles import logic.
2963
+
2964
+ Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
2965
+ If only one job is created, it is returned twice.
2966
+
2967
+ :return:
2968
+ """
2969
+ if options.run_imports_on_workers:
2970
+ filenames = extract_workflow_inputs(options, initialized_job_order, tool)
2971
+ metadata = get_file_sizes(
2972
+ filenames, toil._jobStore, include_remote_files=options.reference_inputs
2973
+ )
2974
+
2975
+ # Mapping of files to metadata for files that will be imported on the worker
2976
+ # This will consist of files that we were able to get a file size for
2977
+ worker_metadata: dict[str, FileMetadata] = dict()
2978
+ # Mapping of files to metadata for files that will be imported on the leader
2979
+ # This will consist of files that we were not able to get a file size for
2980
+ leader_metadata = dict()
2981
+ for filename, file_data in metadata.items():
2982
+ if file_data.size is None:
2983
+ leader_metadata[filename] = file_data
2984
+ else:
2985
+ worker_metadata[filename] = file_data
2986
+
2987
+ # import the files for the leader first
2988
+ path_to_fileid = WorkerImportJob.import_files(
2989
+ list(leader_metadata.keys()), toil._jobStore
2990
+ )
2991
+
2992
+ # then install the imported files before importing the other files
2993
+ # this way the control flow can fall from the leader to workers
2994
+ tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
2995
+ initialized_job_order,
2996
+ tool,
2997
+ path_to_fileid,
2998
+ options.basedir,
2999
+ options.reference_inputs,
3000
+ options.bypass_file_store,
3001
+ )
3002
+
3003
+ import_job = CWLImportWrapper(
3004
+ initialized_job_order, tool, runtime_context, worker_metadata, options
3005
+ )
3006
+ return import_job
3007
+ else:
3008
+ import_workflow_inputs(
3009
+ toil._jobStore,
3010
+ options,
3011
+ initialized_job_order=initialized_job_order,
3012
+ tool=tool,
3013
+ )
3014
+ root_job, followOn = makeJob(
3015
+ tool, jobobj, runtime_context, None, None
3016
+ ) # toplevel, no name needed
3017
+ root_job.cwljob = initialized_job_order
3018
+ return root_job
3019
+
3020
+
2754
3021
  def makeJob(
2755
3022
  tool: Process,
2756
3023
  jobobj: CWLObjectType,
@@ -2758,13 +3025,16 @@ def makeJob(
2758
3025
  parent_name: Optional[str],
2759
3026
  conditional: Union[Conditional, None],
2760
3027
  ) -> Union[
2761
- Tuple["CWLWorkflow", ResolveIndirect],
2762
- Tuple[CWLJob, CWLJob],
2763
- Tuple[CWLJobWrapper, CWLJobWrapper],
3028
+ tuple["CWLWorkflow", ResolveIndirect],
3029
+ tuple[CWLJob, CWLJob],
3030
+ tuple[CWLJobWrapper, CWLJobWrapper],
2764
3031
  ]:
2765
3032
  """
2766
3033
  Create the correct Toil Job object for the CWL tool.
2767
3034
 
3035
+ Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
3036
+ If only one job is created, it is returned twice.
3037
+
2768
3038
  Types: workflow, job, or job wrapper for dynamic resource requirements.
2769
3039
 
2770
3040
  :return: "wfjob, followOn" if the input tool is a workflow, and "job, job" otherwise
@@ -2844,16 +3114,16 @@ class CWLScatter(Job):
2844
3114
  def flat_crossproduct_scatter(
2845
3115
  self,
2846
3116
  joborder: CWLObjectType,
2847
- scatter_keys: List[str],
2848
- outputs: List[Promised[CWLObjectType]],
3117
+ scatter_keys: list[str],
3118
+ outputs: list[Promised[CWLObjectType]],
2849
3119
  postScatterEval: Callable[[CWLObjectType], CWLObjectType],
2850
3120
  ) -> None:
2851
3121
  """Cartesian product of the inputs, then flattened."""
2852
3122
  scatter_key = shortname(scatter_keys[0])
2853
- for n in range(0, len(cast(List[CWLObjectType], joborder[scatter_key]))):
3123
+ for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
2854
3124
  updated_joborder = copy.copy(joborder)
2855
3125
  updated_joborder[scatter_key] = cast(
2856
- List[CWLObjectType], joborder[scatter_key]
3126
+ list[CWLObjectType], joborder[scatter_key]
2857
3127
  )[n]
2858
3128
  if len(scatter_keys) == 1:
2859
3129
  updated_joborder = postScatterEval(updated_joborder)
@@ -2874,16 +3144,16 @@ class CWLScatter(Job):
2874
3144
  def nested_crossproduct_scatter(
2875
3145
  self,
2876
3146
  joborder: CWLObjectType,
2877
- scatter_keys: List[str],
3147
+ scatter_keys: list[str],
2878
3148
  postScatterEval: Callable[[CWLObjectType], CWLObjectType],
2879
- ) -> List[Promised[CWLObjectType]]:
3149
+ ) -> list[Promised[CWLObjectType]]:
2880
3150
  """Cartesian product of the inputs."""
2881
3151
  scatter_key = shortname(scatter_keys[0])
2882
- outputs: List[Promised[CWLObjectType]] = []
2883
- for n in range(0, len(cast(List[CWLObjectType], joborder[scatter_key]))):
3152
+ outputs: list[Promised[CWLObjectType]] = []
3153
+ for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
2884
3154
  updated_joborder = copy.copy(joborder)
2885
3155
  updated_joborder[scatter_key] = cast(
2886
- List[CWLObjectType], joborder[scatter_key]
3156
+ list[CWLObjectType], joborder[scatter_key]
2887
3157
  )[n]
2888
3158
  if len(scatter_keys) == 1:
2889
3159
  updated_joborder = postScatterEval(updated_joborder)
@@ -2904,7 +3174,7 @@ class CWLScatter(Job):
2904
3174
  )
2905
3175
  return outputs
2906
3176
 
2907
- def run(self, file_store: AbstractFileStore) -> List[Promised[CWLObjectType]]:
3177
+ def run(self, file_store: AbstractFileStore) -> list[Promised[CWLObjectType]]:
2908
3178
  """Generate the follow on scatter jobs."""
2909
3179
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2910
3180
 
@@ -2916,7 +3186,7 @@ class CWLScatter(Job):
2916
3186
  scatterMethod = self.step.tool.get("scatterMethod", None)
2917
3187
  if len(scatter) == 1:
2918
3188
  scatterMethod = "dotproduct"
2919
- outputs: List[Promised[CWLObjectType]] = []
3189
+ outputs: list[Promised[CWLObjectType]] = []
2920
3190
 
2921
3191
  valueFrom = {
2922
3192
  shortname(i["id"]): i["valueFrom"]
@@ -2948,11 +3218,11 @@ class CWLScatter(Job):
2948
3218
 
2949
3219
  if scatterMethod == "dotproduct":
2950
3220
  for i in range(
2951
- 0, len(cast(List[CWLObjectType], cwljob[shortname(scatter[0])]))
3221
+ 0, len(cast(list[CWLObjectType], cwljob[shortname(scatter[0])]))
2952
3222
  ):
2953
3223
  copyjob = copy.copy(cwljob)
2954
3224
  for sc in [shortname(x) for x in scatter]:
2955
- copyjob[sc] = cast(List[CWLObjectType], cwljob[sc])[i]
3225
+ copyjob[sc] = cast(list[CWLObjectType], cwljob[sc])[i]
2956
3226
  copyjob = postScatterEval(copyjob)
2957
3227
  subjob, follow_on = makeJob(
2958
3228
  tool=self.step.embedded_tool,
@@ -2991,7 +3261,7 @@ class CWLGather(Job):
2991
3261
  def __init__(
2992
3262
  self,
2993
3263
  step: cwltool.workflow.WorkflowStep,
2994
- outputs: Promised[Union[CWLObjectType, List[CWLObjectType]]],
3264
+ outputs: Promised[Union[CWLObjectType, list[CWLObjectType]]],
2995
3265
  ):
2996
3266
  """Collect our context for later gathering."""
2997
3267
  super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
@@ -3000,24 +3270,24 @@ class CWLGather(Job):
3000
3270
 
3001
3271
  @staticmethod
3002
3272
  def extract(
3003
- obj: Union[CWLObjectType, List[CWLObjectType]], k: str
3004
- ) -> Union[CWLOutputType, List[CWLObjectType]]:
3273
+ obj: Union[CWLObjectType, list[CWLObjectType]], k: str
3274
+ ) -> Union[CWLOutputType, list[CWLObjectType]]:
3005
3275
  """
3006
3276
  Extract the given key from the obj.
3007
3277
 
3008
3278
  If the object is a list, extract it from all members of the list.
3009
3279
  """
3010
3280
  if isinstance(obj, Mapping):
3011
- return cast(Union[CWLOutputType, List[CWLObjectType]], obj.get(k))
3281
+ return cast(Union[CWLOutputType, list[CWLObjectType]], obj.get(k))
3012
3282
  elif isinstance(obj, MutableSequence):
3013
- cp: List[CWLObjectType] = []
3283
+ cp: list[CWLObjectType] = []
3014
3284
  for item in obj:
3015
3285
  cp.append(cast(CWLObjectType, CWLGather.extract(item, k)))
3016
3286
  return cp
3017
3287
  else:
3018
- return cast(List[CWLObjectType], [])
3288
+ return cast(list[CWLObjectType], [])
3019
3289
 
3020
- def run(self, file_store: AbstractFileStore) -> Dict[str, Any]:
3290
+ def run(self, file_store: AbstractFileStore) -> dict[str, Any]:
3021
3291
  """Gather all the outputs of the scatter."""
3022
3292
  outobj = {}
3023
3293
 
@@ -3028,8 +3298,8 @@ class CWLGather(Job):
3028
3298
  return shortname(n)
3029
3299
 
3030
3300
  # TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
3031
- outputs: Union[CWLObjectType, List[CWLObjectType]] = cast(
3032
- Union[CWLObjectType, List[CWLObjectType]], unwrap(self.outputs)
3301
+ outputs: Union[CWLObjectType, list[CWLObjectType]] = cast(
3302
+ Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
3033
3303
  )
3034
3304
  for k in [sn(i) for i in self.step.tool["out"]]:
3035
3305
  outobj[k] = self.extract(outputs, k)
@@ -3071,7 +3341,11 @@ ProcessType = TypeVar(
3071
3341
 
3072
3342
 
3073
3343
  def remove_pickle_problems(obj: ProcessType) -> ProcessType:
3074
- """Doc_loader does not pickle correctly, causing Toil errors, remove from objects."""
3344
+ """
3345
+ Doc_loader does not pickle correctly, causing Toil errors, remove from objects.
3346
+
3347
+ See github issue: https://github.com/mypyc/mypyc/issues/804
3348
+ """
3075
3349
  if hasattr(obj, "doc_loader"):
3076
3350
  obj.doc_loader = None
3077
3351
  if isinstance(obj, cwltool.workflow.WorkflowStep):
@@ -3103,12 +3377,11 @@ class CWLWorkflow(CWLNamedJob):
3103
3377
  self.cwlwf = cwlwf
3104
3378
  self.cwljob = cwljob
3105
3379
  self.runtime_context = runtime_context
3106
- self.cwlwf = remove_pickle_problems(self.cwlwf)
3107
3380
  self.conditional = conditional or Conditional()
3108
3381
 
3109
3382
  def run(
3110
3383
  self, file_store: AbstractFileStore
3111
- ) -> Union[UnresolvedDict, Dict[str, SkipNull]]:
3384
+ ) -> Union[UnresolvedDict, dict[str, SkipNull]]:
3112
3385
  """
3113
3386
  Convert a CWL Workflow graph into a Toil job graph.
3114
3387
 
@@ -3129,7 +3402,7 @@ class CWLWorkflow(CWLNamedJob):
3129
3402
  # that may be used as a "source" for a step input workflow output
3130
3403
  # parameter
3131
3404
  # to: the job that will produce that value.
3132
- promises: Dict[str, Job] = {}
3405
+ promises: dict[str, Job] = {}
3133
3406
 
3134
3407
  parent_name = shortname(self.cwlwf.tool["id"])
3135
3408
 
@@ -3158,7 +3431,7 @@ class CWLWorkflow(CWLNamedJob):
3158
3431
  stepinputs_fufilled = False
3159
3432
  if stepinputs_fufilled:
3160
3433
  logger.debug("Ready to make job for workflow step %s", step_id)
3161
- jobobj: Dict[
3434
+ jobobj: dict[
3162
3435
  str, Union[ResolveSource, DefaultWithSource, StepValueFrom]
3163
3436
  ] = {}
3164
3437
 
@@ -3292,30 +3565,349 @@ class CWLWorkflow(CWLNamedJob):
3292
3565
  return UnresolvedDict(outobj)
3293
3566
 
3294
3567
 
3568
+ class CWLInstallImportsJob(Job):
3569
+ def __init__(
3570
+ self,
3571
+ initialized_job_order: Promised[CWLObjectType],
3572
+ tool: Promised[Process],
3573
+ basedir: str,
3574
+ skip_remote: bool,
3575
+ bypass_file_store: bool,
3576
+ import_data: Promised[dict[str, FileID]],
3577
+ **kwargs: Any,
3578
+ ) -> None:
3579
+ """
3580
+ Job to take the entire CWL object and a mapping of filenames to the imported URIs
3581
+ to convert all file locations to URIs.
3582
+
3583
+ This class is only used when runImportsOnWorkers is enabled.
3584
+ """
3585
+ super().__init__(local=True, **kwargs)
3586
+ self.initialized_job_order = initialized_job_order
3587
+ self.tool = tool
3588
+ self.basedir = basedir
3589
+ self.skip_remote = skip_remote
3590
+ self.bypass_file_store = bypass_file_store
3591
+ self.import_data = import_data
3592
+
3593
+ @staticmethod
3594
+ def fill_in_files(
3595
+ initialized_job_order: CWLObjectType,
3596
+ tool: Process,
3597
+ candidate_to_fileid: dict[str, FileID],
3598
+ basedir: str,
3599
+ skip_remote: bool,
3600
+ bypass_file_store: bool,
3601
+ ) -> tuple[Process, CWLObjectType]:
3602
+ """
3603
+ Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
3604
+ """
3605
+
3606
+ def fill_in_file(filename: str) -> FileID:
3607
+ """
3608
+ Return the file name's associated Toil file ID
3609
+ """
3610
+ return candidate_to_fileid[filename]
3611
+
3612
+ file_convert_function = functools.partial(
3613
+ extract_and_convert_file_to_toil_uri, fill_in_file
3614
+ )
3615
+ fs_access = ToilFsAccess(basedir)
3616
+ fileindex: dict[str, str] = {}
3617
+ existing: dict[str, str] = {}
3618
+ visit_files(
3619
+ file_convert_function,
3620
+ fs_access,
3621
+ fileindex,
3622
+ existing,
3623
+ initialized_job_order,
3624
+ mark_broken=True,
3625
+ skip_remote=skip_remote,
3626
+ bypass_file_store=bypass_file_store,
3627
+ )
3628
+ visitSteps(
3629
+ tool,
3630
+ functools.partial(
3631
+ visit_files,
3632
+ file_convert_function,
3633
+ fs_access,
3634
+ fileindex,
3635
+ existing,
3636
+ mark_broken=True,
3637
+ skip_remote=skip_remote,
3638
+ bypass_file_store=bypass_file_store,
3639
+ ),
3640
+ )
3641
+
3642
+ # We always expect to have processed all files that exist
3643
+ for param_name, param_value in initialized_job_order.items():
3644
+ # Loop through all the parameters for the workflow overall.
3645
+ # Drop any files that aren't either imported (for when we use
3646
+ # the file store) or available on disk (for when we don't).
3647
+ # This will properly make them cause an error later if they
3648
+ # were required.
3649
+ rm_unprocessed_secondary_files(param_value)
3650
+ return tool, initialized_job_order
3651
+
3652
+ def run(self, file_store: AbstractFileStore) -> Tuple[Process, CWLObjectType]:
3653
+ """
3654
+ Convert the filenames in the workflow inputs into the URIs
3655
+ :return: Promise of transformed workflow inputs. A tuple of the job order and process
3656
+ """
3657
+ candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
3658
+
3659
+ initialized_job_order = unwrap(self.initialized_job_order)
3660
+ tool = unwrap(self.tool)
3661
+ return CWLInstallImportsJob.fill_in_files(
3662
+ initialized_job_order,
3663
+ tool,
3664
+ candidate_to_fileid,
3665
+ self.basedir,
3666
+ self.skip_remote,
3667
+ self.bypass_file_store,
3668
+ )
3669
+
3670
+
3671
+ class CWLImportWrapper(CWLNamedJob):
3672
+ """
3673
+ Job to organize importing files on workers instead of the leader. Responsible for extracting filenames and metadata,
3674
+ calling ImportsJob, applying imports to the job objects, and scheduling the start workflow job
3675
+
3676
+ This class is only used when runImportsOnWorkers is enabled.
3677
+ """
3678
+
3679
+ def __init__(
3680
+ self,
3681
+ initialized_job_order: CWLObjectType,
3682
+ tool: Process,
3683
+ runtime_context: cwltool.context.RuntimeContext,
3684
+ file_to_data: dict[str, FileMetadata],
3685
+ options: Namespace,
3686
+ ):
3687
+ super().__init__(local=False, disk=options.import_workers_threshold)
3688
+ self.initialized_job_order = initialized_job_order
3689
+ self.tool = tool
3690
+ self.options = options
3691
+ self.runtime_context = runtime_context
3692
+ self.file_to_data = file_to_data
3693
+
3694
+ def run(self, file_store: AbstractFileStore) -> Any:
3695
+ imports_job = ImportsJob(
3696
+ self.file_to_data,
3697
+ self.options.import_workers_threshold,
3698
+ self.options.import_workers_disk,
3699
+ )
3700
+ self.addChild(imports_job)
3701
+ install_imports_job = CWLInstallImportsJob(
3702
+ initialized_job_order=self.initialized_job_order,
3703
+ tool=self.tool,
3704
+ basedir=self.options.basedir,
3705
+ skip_remote=self.options.reference_inputs,
3706
+ bypass_file_store=self.options.bypass_file_store,
3707
+ import_data=imports_job.rv(0),
3708
+ )
3709
+ self.addChild(install_imports_job)
3710
+ imports_job.addFollowOn(install_imports_job)
3711
+
3712
+ start_job = CWLStartJob(
3713
+ install_imports_job.rv(0),
3714
+ install_imports_job.rv(1),
3715
+ runtime_context=self.runtime_context,
3716
+ )
3717
+ self.addChild(start_job)
3718
+ install_imports_job.addFollowOn(start_job)
3719
+
3720
+ return start_job.rv()
3721
+
3722
+
3723
+ class CWLStartJob(CWLNamedJob):
3724
+ """
3725
+ Job responsible for starting the CWL workflow.
3726
+
3727
+ Takes in the workflow/tool and inputs after all files are imported
3728
+ and creates jobs to run those workflows.
3729
+ """
3730
+
3731
+ def __init__(
3732
+ self,
3733
+ tool: Promised[Process],
3734
+ initialized_job_order: Promised[CWLObjectType],
3735
+ runtime_context: cwltool.context.RuntimeContext,
3736
+ **kwargs: Any,
3737
+ ) -> None:
3738
+ super().__init__(**kwargs)
3739
+ self.tool = tool
3740
+ self.initialized_job_order = initialized_job_order
3741
+ self.runtime_context = runtime_context
3742
+
3743
+ def run(self, file_store: AbstractFileStore) -> Any:
3744
+ initialized_job_order = unwrap(self.initialized_job_order)
3745
+ tool = unwrap(self.tool)
3746
+ cwljob, _ = makeJob(
3747
+ tool, initialized_job_order, self.runtime_context, None, None
3748
+ ) # toplevel, no name needed
3749
+ cwljob.cwljob = initialized_job_order
3750
+ self.addChild(cwljob)
3751
+ return cwljob.rv()
3752
+
3753
+
3754
+ def extract_workflow_inputs(
3755
+ options: Namespace, initialized_job_order: CWLObjectType, tool: Process
3756
+ ) -> list[str]:
3757
+ """
3758
+ Collect all the workflow input files to import later.
3759
+ :param options: namespace
3760
+ :param initialized_job_order: cwl object
3761
+ :param tool: tool object
3762
+ :return:
3763
+ """
3764
+ fileindex: dict[str, str] = {}
3765
+ existing: dict[str, str] = {}
3766
+
3767
+ # Extract out all the input files' filenames
3768
+ logger.info("Collecting input files...")
3769
+ fs_access = ToilFsAccess(options.basedir)
3770
+ filenames = visit_files(
3771
+ extract_file_uri_once,
3772
+ fs_access,
3773
+ fileindex,
3774
+ existing,
3775
+ initialized_job_order,
3776
+ mark_broken=True,
3777
+ skip_remote=options.reference_inputs,
3778
+ bypass_file_store=options.bypass_file_store,
3779
+ )
3780
+ # Extract filenames of all the files associated with tools (binaries, etc.).
3781
+ logger.info("Collecting tool-associated files...")
3782
+ tool_filenames = visitSteps(
3783
+ tool,
3784
+ functools.partial(
3785
+ visit_files,
3786
+ extract_file_uri_once,
3787
+ fs_access,
3788
+ fileindex,
3789
+ existing,
3790
+ mark_broken=True,
3791
+ skip_remote=options.reference_inputs,
3792
+ bypass_file_store=options.bypass_file_store,
3793
+ ),
3794
+ )
3795
+ filenames.extend(tool_filenames)
3796
+ return [file for file in filenames if file is not None]
3797
+
3798
+
3799
+ def import_workflow_inputs(
3800
+ jobstore: AbstractJobStore,
3801
+ options: Namespace,
3802
+ initialized_job_order: CWLObjectType,
3803
+ tool: Process,
3804
+ log_level: int = logging.DEBUG,
3805
+ ) -> None:
3806
+ """
3807
+ Import all workflow inputs on the leader.
3808
+
3809
+ Ran when not importing on workers.
3810
+ :param jobstore: Toil jobstore
3811
+ :param options: Namespace
3812
+ :param initialized_job_order: CWL object
3813
+ :param tool: CWL tool
3814
+ :param log_level: log level
3815
+ :return:
3816
+ """
3817
+ fileindex: dict[str, str] = {}
3818
+ existing: dict[str, str] = {}
3819
+
3820
+ # Define something we can call to import a file and get its file
3821
+ # ID.
3822
+ def file_import_function(url: str) -> FileID:
3823
+ logger.log(log_level, "Loading %s...", url)
3824
+ return jobstore.import_file(url, symlink=True)
3825
+
3826
+ import_function = functools.partial(
3827
+ extract_and_convert_file_to_toil_uri, file_import_function
3828
+ )
3829
+ # Import all the input files, some of which may be missing optional
3830
+ # files.
3831
+ logger.info("Importing input files...")
3832
+ fs_access = ToilFsAccess(options.basedir)
3833
+ visit_files(
3834
+ import_function,
3835
+ fs_access,
3836
+ fileindex,
3837
+ existing,
3838
+ initialized_job_order,
3839
+ mark_broken=True,
3840
+ skip_remote=options.reference_inputs,
3841
+ bypass_file_store=options.bypass_file_store,
3842
+ )
3843
+
3844
+ # Make another function for importing tool files. This one doesn't allow
3845
+ # symlinking, since the tools might be coming from storage not accessible
3846
+ # to all nodes.
3847
+ tool_import_function = functools.partial(
3848
+ extract_and_convert_file_to_toil_uri,
3849
+ cast(
3850
+ Callable[[str], FileID],
3851
+ functools.partial(jobstore.import_file, symlink=False),
3852
+ ),
3853
+ )
3854
+
3855
+ # Import all the files associated with tools (binaries, etc.).
3856
+ # Not sure why you would have an optional secondary file here, but
3857
+ # the spec probably needs us to support them.
3858
+ logger.info("Importing tool-associated files...")
3859
+ visitSteps(
3860
+ tool,
3861
+ functools.partial(
3862
+ visit_files,
3863
+ tool_import_function,
3864
+ fs_access,
3865
+ fileindex,
3866
+ existing,
3867
+ mark_broken=True,
3868
+ skip_remote=options.reference_inputs,
3869
+ bypass_file_store=options.bypass_file_store,
3870
+ ),
3871
+ )
3872
+
3873
+ # We always expect to have processed all files that exist
3874
+ for param_name, param_value in initialized_job_order.items():
3875
+ # Loop through all the parameters for the workflow overall.
3876
+ # Drop any files that aren't either imported (for when we use
3877
+ # the file store) or available on disk (for when we don't).
3878
+ # This will properly make them cause an error later if they
3879
+ # were required.
3880
+ rm_unprocessed_secondary_files(param_value)
3881
+
3882
+
3883
+ T = TypeVar("T")
3884
+
3885
+
3295
3886
  def visitSteps(
3296
3887
  cmdline_tool: Process,
3297
- op: Callable[[CommentedMap], None],
3298
- ) -> None:
3888
+ op: Callable[[CommentedMap], list[T]],
3889
+ ) -> list[T]:
3299
3890
  """
3300
3891
  Iterate over a CWL Process object, running the op on each tool description
3301
3892
  CWL object.
3302
3893
  """
3303
3894
  if isinstance(cmdline_tool, cwltool.workflow.Workflow):
3304
3895
  # For workflows we need to dispatch on steps
3896
+ ret = []
3305
3897
  for step in cmdline_tool.steps:
3306
3898
  # Handle the step's tool
3307
- op(step.tool)
3899
+ ret.extend(op(step.tool))
3308
3900
  # Recures on the embedded tool; maybe it's a workflow.
3309
- visitSteps(step.embedded_tool, op)
3901
+ recurse_ret = visitSteps(step.embedded_tool, op)
3902
+ ret.extend(recurse_ret)
3903
+ return ret
3310
3904
  elif isinstance(cmdline_tool, cwltool.process.Process):
3311
3905
  # All CWL Process objects (including CommandLineTool) will have tools
3312
3906
  # if they bothered to run the Process __init__.
3313
- op(cmdline_tool.tool)
3314
- else:
3315
- raise RuntimeError(
3316
- f"Unsupported type encountered in workflow "
3317
- f"traversal: {type(cmdline_tool)}"
3318
- )
3907
+ return op(cmdline_tool.tool)
3908
+ raise RuntimeError(
3909
+ f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
3910
+ )
3319
3911
 
3320
3912
 
3321
3913
  def rm_unprocessed_secondary_files(job_params: Any) -> None:
@@ -3328,7 +3920,7 @@ def rm_unprocessed_secondary_files(job_params: Any) -> None:
3328
3920
 
3329
3921
  def filtered_secondary_files(
3330
3922
  unfiltered_secondary_files: CWLObjectType,
3331
- ) -> List[CWLObjectType]:
3923
+ ) -> list[CWLObjectType]:
3332
3924
  """
3333
3925
  Remove unprocessed secondary files.
3334
3926
 
@@ -3349,28 +3941,33 @@ def filtered_secondary_files(
3349
3941
  intermediate_secondary_files = []
3350
3942
  final_secondary_files = []
3351
3943
  # remove secondary files still containing interpolated strings
3352
- for sf in cast(List[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
3944
+ for sf in cast(list[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
3353
3945
  sf_bn = cast(str, sf.get("basename", ""))
3354
3946
  sf_loc = cast(str, sf.get("location", ""))
3355
3947
  if ("$(" not in sf_bn) and ("${" not in sf_bn):
3356
3948
  if ("$(" not in sf_loc) and ("${" not in sf_loc):
3357
3949
  intermediate_secondary_files.append(sf)
3358
3950
  else:
3359
- logger.debug("Secondary file %s is dropped because it has an uninterpolated location", sf)
3951
+ logger.debug(
3952
+ "Secondary file %s is dropped because it has an uninterpolated location",
3953
+ sf,
3954
+ )
3360
3955
  else:
3361
- logger.debug("Secondary file %s is dropped because it has an uninterpolated basename", sf)
3956
+ logger.debug(
3957
+ "Secondary file %s is dropped because it has an uninterpolated basename",
3958
+ sf,
3959
+ )
3362
3960
  # remove secondary files that are not present in the filestore or pointing
3363
- # to existant things on disk
3961
+ # to existent things on disk
3364
3962
  for sf in intermediate_secondary_files:
3365
3963
  sf_loc = cast(str, sf.get("location", ""))
3366
- if (
3367
- sf_loc != MISSING_FILE
3368
- or sf.get("class", "") == "Directory"
3369
- ):
3964
+ if not sf_loc.startswith(MISSING_FILE) or sf.get("class", "") == "Directory":
3370
3965
  # Pass imported files, and all Directories
3371
3966
  final_secondary_files.append(sf)
3372
3967
  else:
3373
- logger.debug("Secondary file %s is dropped because it is known to be missing", sf)
3968
+ logger.debug(
3969
+ "Secondary file %s is dropped because it is known to be missing", sf
3970
+ )
3374
3971
  return final_secondary_files
3375
3972
 
3376
3973
 
@@ -3475,8 +4072,6 @@ def determine_load_listing(
3475
4072
  class NoAvailableJobStoreException(Exception):
3476
4073
  """Indicates that no job store name is available."""
3477
4074
 
3478
- pass
3479
-
3480
4075
 
3481
4076
  def generate_default_job_store(
3482
4077
  batch_system_name: Optional[str],
@@ -3544,37 +4139,64 @@ def generate_default_job_store(
3544
4139
 
3545
4140
  usage_message = "\n\n" + textwrap.dedent(
3546
4141
  """
3547
- * All positional arguments [cwl, yml_or_json] must always be specified last for toil-cwl-runner.
3548
- Note: If you're trying to specify a jobstore, please use --jobStore.
3549
-
3550
- Usage: toil-cwl-runner [options] example.cwl example-job.yaml
3551
- Example: toil-cwl-runner \\
3552
- --jobStore aws:us-west-2:jobstore \\
3553
- --realTimeLogging \\
3554
- --logInfo \\
3555
- example.cwl \\
3556
- example-job.yaml
3557
- """[
4142
+ NOTE: If you're trying to specify a jobstore, you must use --jobStore, not a positional argument.
4143
+
4144
+ Usage: toil-cwl-runner [options] <workflow> [<input file>] [workflow options]
4145
+
4146
+ Example: toil-cwl-runner \\
4147
+ --jobStore aws:us-west-2:jobstore \\
4148
+ --realTimeLogging \\
4149
+ --logInfo \\
4150
+ example.cwl \\
4151
+ example-job.yaml \\
4152
+ --wf_input="hello world"
4153
+ """[
3558
4154
  1:
3559
4155
  ]
3560
4156
  )
3561
4157
 
3562
- def get_options(args: List[str]) -> Namespace:
4158
+
4159
+ def get_options(args: list[str]) -> Namespace:
3563
4160
  """
3564
4161
  Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
3565
4162
  :param args: List of args from command line
3566
4163
  :return: options namespace
3567
4164
  """
3568
- parser = ArgParser()
4165
+ # We can't allow abbreviations in case the workflow defines an option that
4166
+ # is a prefix of a Toil option.
4167
+ parser = ArgParser(
4168
+ allow_abbrev=False,
4169
+ usage="%(prog)s [options] WORKFLOW [INFILE] [WF_OPTIONS...]",
4170
+ description=textwrap.dedent(
4171
+ """
4172
+ positional arguments:
4173
+
4174
+ WORKFLOW CWL file to run.
4175
+
4176
+ INFILE YAML or JSON file of workflow inputs.
4177
+
4178
+ WF_OPTIONS Additional inputs to the workflow as command-line
4179
+ flags. If CWL workflow takes an input, the name of the
4180
+ input can be used as an option. For example:
4181
+
4182
+ %(prog)s workflow.cwl --file1 file
4183
+
4184
+ If an input has the same name as a Toil option, pass
4185
+ '--' before it.
4186
+ """
4187
+ ),
4188
+ formatter_class=RawDescriptionHelpFormatter,
4189
+ )
4190
+
3569
4191
  addOptions(parser, jobstore_as_flag=True, cwl=True)
3570
4192
  options: Namespace
3571
- options, cwl_options = parser.parse_known_args(args)
3572
- options.cwljob.extend(cwl_options)
4193
+ options, extra = parser.parse_known_args(args)
4194
+ options.cwljob = extra
3573
4195
 
3574
4196
  return options
3575
4197
 
3576
4198
 
3577
- def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
4199
+ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
3578
4200
  """Run the main loop for toil-cwl-runner."""
3579
4201
  # Remove cwltool logger's stream handler so it uses Toil's
3580
4202
  cwllogger.removeHandler(defaultStreamHandler)
@@ -3586,25 +4208,21 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3586
4208
 
3587
4209
  # Do cwltool setup
3588
4210
  cwltool.main.setup_schema(args=options, custom_schema_callback=None)
3589
- tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
3590
-
3591
- # We need a workdir for the CWL runtime contexts.
3592
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3593
- # if tmpdir_prefix is not the default value, move
3594
- # workdir and the default job store under it
3595
- workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3596
- else:
3597
- # Use a directory in the default tmpdir
3598
- workdir = mkdtemp()
3599
- # Make sure workdir doesn't exist so it can be a job store
3600
- os.rmdir(workdir)
4211
+ tmpdir_prefix = options.tmpdir_prefix = (
4212
+ options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
4213
+ )
4214
+ tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
4215
+ workdir = options.workDir or tmp_outdir_prefix
3601
4216
 
3602
4217
  if options.jobStore is None:
4218
+ jobstore = cwltool.utils.create_tmp_dir(tmp_outdir_prefix)
4219
+ # Make sure directory doesn't exist so it can be a job store
4220
+ os.rmdir(jobstore)
3603
4221
  # Pick a default job store specifier appropriate to our choice of batch
3604
4222
  # system and provisioner and installed modules, given this available
3605
4223
  # local directory name. Fail if no good default can be used.
3606
4224
  options.jobStore = generate_default_job_store(
3607
- options.batchSystem, options.provisioner, workdir
4225
+ options.batchSystem, options.provisioner, jobstore
3608
4226
  )
3609
4227
 
3610
4228
  options.doc_cache = True
@@ -3612,17 +4230,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3612
4230
  options.do_validate = True
3613
4231
  options.pack = False
3614
4232
  options.print_subgraph = False
3615
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3616
- # We need to override workDir because by default Toil will pick
3617
- # somewhere under the system temp directory if unset, ignoring
3618
- # --tmpdir-prefix.
3619
- #
3620
- # If set, workDir needs to exist, so we directly use the prefix
3621
- options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3622
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
3623
- # override coordination_dir as default Toil will pick somewhere else
3624
- # ignoring --tmpdir_prefix
3625
- options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3626
4233
 
3627
4234
  if options.batchSystem == "kubernetes":
3628
4235
  # Containers under Kubernetes can only run in Singularity
@@ -3640,12 +4247,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3640
4247
  logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
3641
4248
 
3642
4249
  outdir = os.path.abspath(options.outdir or os.getcwd())
3643
- tmp_outdir_prefix = os.path.abspath(
3644
- options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
3645
- )
3646
-
3647
- fileindex: Dict[str, str] = {}
3648
- existing: Dict[str, str] = {}
3649
4250
  conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)
3650
4251
  use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
3651
4252
  job_script_provider = None
@@ -3660,11 +4261,22 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3660
4261
  )
3661
4262
  runtime_context.workdir = workdir # type: ignore[attr-defined]
3662
4263
  runtime_context.outdir = outdir
4264
+ setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
3663
4265
  runtime_context.move_outputs = "leave"
3664
4266
  runtime_context.rm_tmpdir = False
3665
4267
  runtime_context.streaming_allowed = not options.disable_streaming
4268
+ if options.cachedir is not None:
4269
+ runtime_context.cachedir = os.path.abspath(options.cachedir)
4270
+ # Automatically bypass the file store to be compatible with cwltool caching
4271
+ # Otherwise, the CWL caching code makes links to temporary local copies
4272
+ # of filestore files and caches those.
4273
+ logger.debug("CWL task caching is turned on. Bypassing file store.")
4274
+ options.bypass_file_store = True
3666
4275
  if options.mpi_config_file is not None:
3667
4276
  runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
4277
+ if cwltool.main.check_working_directories(runtime_context) is not None:
4278
+ logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
4279
+ return 1
3668
4280
  setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
3669
4281
  if options.bypass_file_store and options.destBucket:
3670
4282
  # We use the file store to write to buckets, so we can't do this (yet?)
@@ -3694,225 +4306,210 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3694
4306
  runtime_context.research_obj = research_obj
3695
4307
 
3696
4308
  try:
3697
- with Toil(options) as toil:
3698
- if options.restart:
3699
- outobj = toil.restart()
3700
- else:
3701
- loading_context.hints = [
3702
- {
3703
- "class": "ResourceRequirement",
3704
- "coresMin": toil.config.defaultCores,
3705
- "ramMin": toil.config.defaultMemory / (2**20),
3706
- "outdirMin": toil.config.defaultDisk / (2**20),
3707
- "tmpdirMin": 0,
3708
- }
3709
- ]
3710
- loading_context.construct_tool_object = toil_make_tool
3711
- loading_context.strict = not options.not_strict
3712
- options.workflow = options.cwltool
3713
- options.job_order = options.cwljob
3714
4309
 
3715
- try:
3716
- uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
3717
- options.cwltool,
3718
- loading_context.resolver,
3719
- loading_context.fetcher_constructor,
3720
- )
3721
- except ValidationException:
3722
- print(
3723
- "\nYou may be getting this error because your arguments are incorrect or out of order."
3724
- + usage_message,
3725
- file=sys.stderr,
3726
- )
3727
- raise
4310
+ # We might have workflow metadata to pass to Toil
4311
+ workflow_name=None
4312
+ trs_spec = None
4313
+
4314
+ if not options.restart:
4315
+ # Make a version of the config based on the initial options, for
4316
+ # setting up CWL option stuff
4317
+ expected_config = Config()
4318
+ expected_config.setOptions(options)
4319
+
4320
+ # Before showing the options to any cwltool stuff that wants to
4321
+ # load the workflow, transform options.cwltool, where our
4322
+ # argument for what to run is, to handle Dockstore workflows.
4323
+ options.cwltool, trs_spec = resolve_workflow(options.cwltool)
4324
+ # Figure out what to call the workflow
4325
+ workflow_name = trs_spec or options.cwltool
4326
+
4327
+ # TODO: why are we doing this? Does this get applied to all
4328
+ # tools as a default or something?
4329
+ loading_context.hints = [
4330
+ {
4331
+ "class": "ResourceRequirement",
4332
+ "coresMin": expected_config.defaultCores,
4333
+ # Don't include any RAM requirement because we want to
4334
+ # know when tools don't manually ask for RAM.
4335
+ "outdirMin": expected_config.defaultDisk / (2**20),
4336
+ "tmpdirMin": 0,
4337
+ }
4338
+ ]
4339
+ loading_context.construct_tool_object = toil_make_tool
4340
+ loading_context.strict = not options.not_strict
4341
+ options.workflow = options.cwltool
4342
+ options.job_order = options.cwljob
3728
4343
 
3729
- options.tool_help = None
3730
- options.debug = options.logLevel == "DEBUG"
3731
- job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
3732
- options,
3733
- sys.stdin,
4344
+ try:
4345
+ uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
4346
+ options.cwltool,
4347
+ loading_context.resolver,
3734
4348
  loading_context.fetcher_constructor,
3735
- loading_context.overrides_list,
3736
- tool_file_uri,
3737
4349
  )
3738
- if options.overrides:
3739
- loading_context.overrides_list.extend(
3740
- cwltool.load_tool.load_overrides(
3741
- schema_salad.ref_resolver.file_uri(
3742
- os.path.abspath(options.overrides)
3743
- ),
3744
- tool_file_uri,
3745
- )
3746
- )
3747
-
3748
- loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
3749
- uri, loading_context
4350
+ except ValidationException:
4351
+ print(
4352
+ "\nYou may be getting this error because your arguments are incorrect or out of order."
4353
+ + usage_message,
4354
+ file=sys.stderr,
3750
4355
  )
3751
- loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3752
- loading_context, workflowobj, uri
4356
+ raise
4357
+
4358
+ # Attempt to prepull the containers
4359
+ if not options.no_prepull and not options.no_container:
4360
+ try_prepull(uri, runtime_context, expected_config.batchSystem)
4361
+
4362
+ options.tool_help = None
4363
+ options.debug = options.logLevel == "DEBUG"
4364
+ job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
4365
+ options,
4366
+ sys.stdin,
4367
+ loading_context.fetcher_constructor,
4368
+ loading_context.overrides_list,
4369
+ tool_file_uri,
4370
+ )
4371
+ if options.overrides:
4372
+ loading_context.overrides_list.extend(
4373
+ cwltool.load_tool.load_overrides(
4374
+ schema_salad.ref_resolver.file_uri(
4375
+ os.path.abspath(options.overrides)
4376
+ ),
4377
+ tool_file_uri,
4378
+ )
3753
4379
  )
3754
- if not loading_context.loader:
3755
- raise RuntimeError("cwltool loader is not set.")
3756
- processobj, metadata = loading_context.loader.resolve_ref(uri)
3757
- processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3758
4380
 
3759
- document_loader = loading_context.loader
4381
+ loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
4382
+ uri, loading_context
4383
+ )
4384
+ loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
4385
+ loading_context, workflowobj, uri
4386
+ )
4387
+ if not loading_context.loader:
4388
+ raise RuntimeError("cwltool loader is not set.")
4389
+ processobj, metadata = loading_context.loader.resolve_ref(uri)
4390
+ processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3760
4391
 
3761
- if options.provenance and runtime_context.research_obj:
3762
- cwltool.cwlprov.writablebagfile.packed_workflow(
3763
- runtime_context.research_obj,
3764
- cwltool.main.print_pack(loading_context, uri),
3765
- )
4392
+ document_loader = loading_context.loader
3766
4393
 
3767
- try:
3768
- tool = cwltool.load_tool.make_tool(uri, loading_context)
3769
- scan_for_unsupported_requirements(
3770
- tool, bypass_file_store=options.bypass_file_store
3771
- )
3772
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3773
- logging.error(err)
3774
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3775
- runtime_context.secret_store = SecretStore()
4394
+ if options.provenance and runtime_context.research_obj:
4395
+ cwltool.cwlprov.writablebagfile.packed_workflow(
4396
+ runtime_context.research_obj,
4397
+ cwltool.main.print_pack(loading_context, uri),
4398
+ )
3776
4399
 
3777
- try:
3778
- # Get the "order" for the execution of the root job. CWLTool
3779
- # doesn't document this much, but this is an "order" in the
3780
- # sense of a "specification" for running a single job. It
3781
- # describes the inputs to the workflow.
3782
- initialized_job_order = cwltool.main.init_job_order(
3783
- job_order_object,
3784
- options,
3785
- tool,
3786
- jobloader,
3787
- sys.stdout,
3788
- make_fs_access=runtime_context.make_fs_access,
3789
- input_basedir=options.basedir,
3790
- secret_store=runtime_context.secret_store,
3791
- input_required=True,
4400
+ try:
4401
+ tool = cwltool.load_tool.make_tool(uri, loading_context)
4402
+ scan_for_unsupported_requirements(
4403
+ tool, bypass_file_store=options.bypass_file_store
4404
+ )
4405
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
4406
+ logging.error(err)
4407
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
4408
+ runtime_context.secret_store = SecretStore()
4409
+
4410
+ try:
4411
+ # Get the "order" for the execution of the root job. CWLTool
4412
+ # doesn't document this much, but this is an "order" in the
4413
+ # sense of a "specification" for running a single job. It
4414
+ # describes the inputs to the workflow.
4415
+ initialized_job_order = cwltool.main.init_job_order(
4416
+ job_order_object,
4417
+ options,
4418
+ tool,
4419
+ jobloader,
4420
+ sys.stdout,
4421
+ make_fs_access=runtime_context.make_fs_access,
4422
+ input_basedir=options.basedir,
4423
+ secret_store=runtime_context.secret_store,
4424
+ input_required=True,
4425
+ )
4426
+ except SystemExit as err:
4427
+ if err.code == 2: # raised by argparse's parse_args() function
4428
+ print(
4429
+ "\nIf both a CWL file and an input object (YAML/JSON) file were "
4430
+ "provided, the problem may be the argument order."
4431
+ + usage_message,
4432
+ file=sys.stderr,
3792
4433
  )
3793
- except SystemExit as err:
3794
- if err.code == 2: # raised by argparse's parse_args() function
3795
- print(
3796
- "\nIf both a CWL file and an input object (YAML/JSON) file were "
3797
- "provided, this may be the argument order." + usage_message,
3798
- file=sys.stderr,
3799
- )
3800
- raise
4434
+ raise
3801
4435
 
3802
- # Leave the defaults un-filled in the top-level order. The tool or
3803
- # workflow will fill them when it runs
3804
-
3805
- for inp in tool.tool["inputs"]:
3806
- if (
3807
- shortname(inp["id"]) in initialized_job_order
3808
- and inp["type"] == "File"
3809
- ):
3810
- cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
3811
- "streamable"
3812
- ] = inp.get("streamable", False)
3813
- # TODO also for nested types that contain streamable Files
3814
-
3815
- runtime_context.use_container = not options.no_container
3816
- runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
3817
- runtime_context.job_script_provider = job_script_provider
3818
- runtime_context.force_docker_pull = options.force_docker_pull
3819
- runtime_context.no_match_user = options.no_match_user
3820
- runtime_context.no_read_only = options.no_read_only
3821
- runtime_context.basedir = options.basedir
3822
- if not options.bypass_file_store:
3823
- # If we're using the file store we need to start moving output
3824
- # files now.
3825
- runtime_context.move_outputs = "move"
3826
-
3827
- # We instantiate an early builder object here to populate indirect
3828
- # secondaryFile references using cwltool's library because we need
3829
- # to resolve them before toil imports them into the filestore.
3830
- # A second builder will be built in the job's run method when toil
3831
- # actually starts the cwl job.
3832
- # Note that this accesses input files for tools, so the
3833
- # ToilFsAccess needs to be set up if we want to be able to use
3834
- # URLs.
3835
- builder = tool._init_job(initialized_job_order, runtime_context)
4436
+ # Leave the defaults un-filled in the top-level order. The tool or
4437
+ # workflow will fill them when it runs
3836
4438
 
4439
+ for inp in tool.tool["inputs"]:
4440
+ if (
4441
+ shortname(inp["id"]) in initialized_job_order
4442
+ and inp["type"] == "File"
4443
+ ):
4444
+ cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
4445
+ "streamable"
4446
+ ] = inp.get("streamable", False)
4447
+ # TODO also for nested types that contain streamable Files
4448
+
4449
+ runtime_context.use_container = not options.no_container
4450
+ runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
4451
+ runtime_context.job_script_provider = job_script_provider
4452
+ runtime_context.force_docker_pull = options.force_docker_pull
4453
+ runtime_context.no_match_user = options.no_match_user
4454
+ runtime_context.no_read_only = options.no_read_only
4455
+ runtime_context.basedir = options.basedir
4456
+ if not options.bypass_file_store:
4457
+ # If we're using the file store we need to start moving output
4458
+ # files now.
4459
+ runtime_context.move_outputs = "move"
4460
+
4461
+ # We instantiate an early builder object here to populate indirect
4462
+ # secondaryFile references using cwltool's library because we need
4463
+ # to resolve them before toil imports them into the filestore.
4464
+ # A second builder will be built in the job's run method when toil
4465
+ # actually starts the cwl job.
4466
+ # Note that this accesses input files for tools, so the
4467
+ # ToilFsAccess needs to be set up if we want to be able to use
4468
+ # URLs.
4469
+ builder = tool._init_job(initialized_job_order, runtime_context)
4470
+ if not isinstance(tool, cwltool.workflow.Workflow):
3837
4471
  # make sure this doesn't add listing items; if shallow_listing is
3838
4472
  # selected, it will discover dirs one deep and then again later on
3839
- # (probably when the cwltool builder gets ahold of the job in the
3840
- # CWL job's run()), producing 2+ deep listings instead of only 1.
4473
+ # (when the cwltool builder gets constructed from the job in the
4474
+ # CommandLineTool's job() method,
4475
+ # see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L951),
4476
+ # producing 2+ deep listings instead of only 1.
4477
+ # ExpressionTool also uses a builder, see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L207
4478
+ # Workflows don't need this because they don't go through CommandLineTool or ExpressionTool
3841
4479
  builder.loadListing = "no_listing"
3842
4480
 
3843
- builder.bind_input(
3844
- tool.inputs_record_schema,
3845
- initialized_job_order,
3846
- discover_secondaryFiles=True,
3847
- )
4481
+ # make sure this doesn't add listing items; if shallow_listing is
4482
+ # selected, it will discover dirs one deep and then again later on
4483
+ # (probably when the cwltool builder gets ahold of the job in the
4484
+ # CWL job's run()), producing 2+ deep listings instead of only 1.
4485
+ builder.loadListing = "no_listing"
3848
4486
 
3849
- # Define something we can call to import a file and get its file
3850
- # ID.
3851
- # We cast this because import_file is overloaded depending on if we
3852
- # pass a shared file name or not, and we know the way we call it we
3853
- # always get a FileID out.
3854
- file_import_function = cast(
3855
- Callable[[str], FileID],
3856
- functools.partial(toil.import_file, symlink=True),
3857
- )
4487
+ builder.bind_input(
4488
+ tool.inputs_record_schema,
4489
+ initialized_job_order,
4490
+ discover_secondaryFiles=True,
4491
+ )
3858
4492
 
3859
- # Import all the input files, some of which may be missing optional
3860
- # files.
3861
- logger.info("Importing input files...")
3862
- fs_access = ToilFsAccess(options.basedir)
3863
- import_files(
3864
- file_import_function,
3865
- fs_access,
3866
- fileindex,
3867
- existing,
3868
- initialized_job_order,
3869
- mark_broken=True,
3870
- skip_remote=options.reference_inputs,
3871
- bypass_file_store=options.bypass_file_store,
3872
- log_level=logging.INFO,
3873
- )
3874
- # Import all the files associated with tools (binaries, etc.).
3875
- # Not sure why you would have an optional secondary file here, but
3876
- # the spec probably needs us to support them.
3877
- logger.info("Importing tool-associated files...")
3878
- visitSteps(
3879
- tool,
3880
- functools.partial(
3881
- import_files,
3882
- file_import_function,
3883
- fs_access,
3884
- fileindex,
3885
- existing,
3886
- mark_broken=True,
3887
- skip_remote=options.reference_inputs,
3888
- bypass_file_store=options.bypass_file_store,
3889
- log_level=logging.INFO,
3890
- ),
3891
- )
4493
+ logger.info("Creating root job")
4494
+ logger.debug("Root tool: %s", tool)
4495
+ tool = remove_pickle_problems(tool)
3892
4496
 
3893
- # We always expect to have processed all files that exist
3894
- for param_name, param_value in initialized_job_order.items():
3895
- # Loop through all the parameters for the workflow overall.
3896
- # Drop any files that aren't either imported (for when we use
3897
- # the file store) or available on disk (for when we don't).
3898
- # This will properly make them cause an error later if they
3899
- # were required.
3900
- rm_unprocessed_secondary_files(param_value)
3901
-
3902
- logger.info("Creating root job")
3903
- logger.debug("Root tool: %s", tool)
4497
+ with Toil(options, workflow_name=workflow_name, trs_spec=trs_spec) as toil:
4498
+ if options.restart:
4499
+ outobj = toil.restart()
4500
+ else:
3904
4501
  try:
3905
- wf1, _ = makeJob(
4502
+ wf1 = makeRootJob(
3906
4503
  tool=tool,
3907
4504
  jobobj={},
3908
4505
  runtime_context=runtime_context,
3909
- parent_name=None, # toplevel, no name needed
3910
- conditional=None,
4506
+ initialized_job_order=initialized_job_order,
4507
+ options=options,
4508
+ toil=toil,
3911
4509
  )
3912
4510
  except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3913
4511
  logging.error(err)
3914
4512
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3915
- wf1.cwljob = initialized_job_order
3916
4513
  logger.info("Starting workflow")
3917
4514
  outobj = toil.start(wf1)
3918
4515
 
@@ -3929,7 +4526,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3929
4526
  outobj,
3930
4527
  outdir,
3931
4528
  destBucket=options.destBucket,
3932
- log_level=logging.INFO
4529
+ log_level=logging.INFO,
3933
4530
  )
3934
4531
  logger.info("Stored workflow outputs")
3935
4532
 
@@ -3992,8 +4589,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3992
4589
  else:
3993
4590
  logging.error(err)
3994
4591
  return 1
3995
- except (InsufficientSystemResources, LocatorException, InvalidImportExportUrlException, UnimplementedURLException,
3996
- JobTooBigError) as err:
4592
+ except (
4593
+ InsufficientSystemResources,
4594
+ LocatorException,
4595
+ InvalidImportExportUrlException,
4596
+ UnimplementedURLException,
4597
+ JobTooBigError,
4598
+ FileNotFoundError
4599
+ ) as err:
3997
4600
  logging.error(err)
3998
4601
  return 1
3999
4602