toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. toil/__init__.py +121 -83
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +38 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +489 -137
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +630 -359
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1114 -532
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +988 -315
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +727 -403
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +193 -58
  49. toil/lib/aws/utils.py +238 -218
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +322 -209
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +4 -2
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +99 -11
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +65 -18
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +19 -7
  71. toil/lib/retry.py +115 -77
  72. toil/lib/threading.py +282 -80
  73. toil/lib/throttle.py +15 -14
  74. toil/options/common.py +834 -401
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +70 -19
  78. toil/provisioners/__init__.py +111 -46
  79. toil/provisioners/abstractProvisioner.py +322 -157
  80. toil/provisioners/aws/__init__.py +62 -30
  81. toil/provisioners/aws/awsProvisioner.py +980 -627
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +147 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +127 -61
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +148 -64
  98. toil/test/__init__.py +263 -179
  99. toil/test/batchSystems/batchSystemTest.py +438 -195
  100. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +93 -47
  104. toil/test/cactus/test_cactus_integration.py +20 -22
  105. toil/test/cwl/cwlTest.py +271 -71
  106. toil/test/cwl/measure_default_memory.cwl +12 -0
  107. toil/test/cwl/not_run_required_input.cwl +29 -0
  108. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  109. toil/test/docs/scriptsTest.py +60 -34
  110. toil/test/jobStores/jobStoreTest.py +412 -235
  111. toil/test/lib/aws/test_iam.py +116 -48
  112. toil/test/lib/aws/test_s3.py +16 -9
  113. toil/test/lib/aws/test_utils.py +5 -6
  114. toil/test/lib/dockerTest.py +118 -141
  115. toil/test/lib/test_conversions.py +113 -115
  116. toil/test/lib/test_ec2.py +57 -49
  117. toil/test/lib/test_integration.py +104 -0
  118. toil/test/lib/test_misc.py +12 -5
  119. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  120. toil/test/mesos/helloWorld.py +7 -6
  121. toil/test/mesos/stress.py +25 -20
  122. toil/test/options/options.py +7 -2
  123. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  124. toil/test/provisioners/clusterScalerTest.py +440 -250
  125. toil/test/provisioners/clusterTest.py +81 -42
  126. toil/test/provisioners/gceProvisionerTest.py +174 -100
  127. toil/test/provisioners/provisionerTest.py +25 -13
  128. toil/test/provisioners/restartScript.py +5 -4
  129. toil/test/server/serverTest.py +188 -141
  130. toil/test/sort/restart_sort.py +137 -68
  131. toil/test/sort/sort.py +134 -66
  132. toil/test/sort/sortTest.py +91 -49
  133. toil/test/src/autoDeploymentTest.py +140 -100
  134. toil/test/src/busTest.py +20 -18
  135. toil/test/src/checkpointTest.py +8 -2
  136. toil/test/src/deferredFunctionTest.py +49 -35
  137. toil/test/src/dockerCheckTest.py +33 -26
  138. toil/test/src/environmentTest.py +20 -10
  139. toil/test/src/fileStoreTest.py +538 -271
  140. toil/test/src/helloWorldTest.py +7 -4
  141. toil/test/src/importExportFileTest.py +61 -31
  142. toil/test/src/jobDescriptionTest.py +32 -17
  143. toil/test/src/jobEncapsulationTest.py +2 -0
  144. toil/test/src/jobFileStoreTest.py +74 -50
  145. toil/test/src/jobServiceTest.py +187 -73
  146. toil/test/src/jobTest.py +120 -70
  147. toil/test/src/miscTests.py +19 -18
  148. toil/test/src/promisedRequirementTest.py +82 -36
  149. toil/test/src/promisesTest.py +7 -6
  150. toil/test/src/realtimeLoggerTest.py +6 -6
  151. toil/test/src/regularLogTest.py +71 -37
  152. toil/test/src/resourceTest.py +80 -49
  153. toil/test/src/restartDAGTest.py +36 -22
  154. toil/test/src/resumabilityTest.py +9 -2
  155. toil/test/src/retainTempDirTest.py +45 -14
  156. toil/test/src/systemTest.py +12 -8
  157. toil/test/src/threadingTest.py +44 -25
  158. toil/test/src/toilContextManagerTest.py +10 -7
  159. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  160. toil/test/src/workerTest.py +33 -16
  161. toil/test/utils/toilDebugTest.py +70 -58
  162. toil/test/utils/toilKillTest.py +4 -5
  163. toil/test/utils/utilsTest.py +239 -102
  164. toil/test/wdl/wdltoil_test.py +789 -148
  165. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  166. toil/toilState.py +52 -26
  167. toil/utils/toilConfig.py +13 -4
  168. toil/utils/toilDebugFile.py +44 -27
  169. toil/utils/toilDebugJob.py +85 -25
  170. toil/utils/toilDestroyCluster.py +11 -6
  171. toil/utils/toilKill.py +8 -3
  172. toil/utils/toilLaunchCluster.py +251 -145
  173. toil/utils/toilMain.py +37 -16
  174. toil/utils/toilRsyncCluster.py +27 -14
  175. toil/utils/toilSshCluster.py +45 -22
  176. toil/utils/toilStats.py +75 -36
  177. toil/utils/toilStatus.py +226 -119
  178. toil/utils/toilUpdateEC2Instances.py +3 -1
  179. toil/version.py +11 -11
  180. toil/wdl/utils.py +5 -5
  181. toil/wdl/wdltoil.py +3513 -1052
  182. toil/worker.py +269 -128
  183. toil-8.0.0.dist-info/METADATA +173 -0
  184. toil-8.0.0.dist-info/RECORD +253 -0
  185. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  186. toil-7.0.0.dist-info/METADATA +0 -158
  187. toil-7.0.0.dist-info/RECORD +0 -244
  188. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
  189. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  190. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Implemented support for Common Workflow Language (CWL) for Toil."""
2
+
2
3
  # Copyright (C) 2015 Curoverse, Inc
3
4
  # Copyright (C) 2015-2021 Regents of the University of California
4
5
  # Copyright (C) 2019-2020 Seven Bridges
@@ -33,25 +34,26 @@ import stat
33
34
  import sys
34
35
  import textwrap
35
36
  import uuid
37
+ from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
36
38
  from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
37
39
  from threading import Thread
38
- from typing import (IO,
39
- Any,
40
- Callable,
41
- Dict,
42
- Iterator,
43
- List,
44
- Mapping,
45
- MutableMapping,
46
- MutableSequence,
47
- Optional,
48
- Sequence,
49
- TextIO,
50
- Tuple,
51
- Type,
52
- TypeVar,
53
- Union,
54
- cast)
40
+ from typing import (
41
+ IO,
42
+ Any,
43
+ Callable,
44
+ Iterator,
45
+ Mapping,
46
+ MutableMapping,
47
+ MutableSequence,
48
+ Optional,
49
+ TextIO,
50
+ Tuple,
51
+ TypeVar,
52
+ Union,
53
+ cast,
54
+ Literal,
55
+ Protocol,
56
+ )
55
57
  from urllib.parse import quote, unquote, urlparse, urlsplit
56
58
 
57
59
  import cwl_utils.errors
@@ -65,59 +67,87 @@ import cwltool.load_tool
65
67
  import cwltool.main
66
68
  import cwltool.resolver
67
69
  import schema_salad.ref_resolver
70
+
71
+ # This is also in configargparse but MyPy doesn't know it
72
+ from argparse import RawDescriptionHelpFormatter
68
73
  from configargparse import ArgParser, Namespace
69
74
  from cwltool.loghandler import _logger as cwllogger
70
75
  from cwltool.loghandler import defaultStreamHandler
71
76
  from cwltool.mpi import MpiConfig
72
77
  from cwltool.mutation import MutationManager
73
78
  from cwltool.pathmapper import MapperEnt, PathMapper
74
- from cwltool.process import (Process,
75
- add_sizes,
76
- compute_checksums,
77
- fill_in_defaults,
78
- shortname)
79
+ from cwltool.process import (
80
+ Process,
81
+ add_sizes,
82
+ compute_checksums,
83
+ fill_in_defaults,
84
+ shortname,
85
+ )
79
86
  from cwltool.secrets import SecretStore
80
- from cwltool.software_requirements import (DependenciesConfiguration,
81
- get_container_from_software_requirements)
87
+ from cwltool.singularity import SingularityCommandLineJob
88
+ from cwltool.software_requirements import (
89
+ DependenciesConfiguration,
90
+ get_container_from_software_requirements,
91
+ )
82
92
  from cwltool.stdfsaccess import StdFsAccess, abspath
83
- from cwltool.utils import (CWLObjectType,
84
- CWLOutputType,
85
- DirectoryType,
86
- adjustDirObjs,
87
- aslist,
88
- downloadHttpFile,
89
- get_listing,
90
- normalizeFilesDirs,
91
- visit_class)
93
+ from cwltool.utils import (
94
+ CWLObjectType,
95
+ CWLOutputType,
96
+ DirectoryType,
97
+ adjustDirObjs,
98
+ aslist,
99
+ downloadHttpFile,
100
+ get_listing,
101
+ normalizeFilesDirs,
102
+ visit_class,
103
+ )
92
104
  from ruamel.yaml.comments import CommentedMap, CommentedSeq
93
105
  from schema_salad.avro.schema import Names
94
106
  from schema_salad.exceptions import ValidationException
95
107
  from schema_salad.ref_resolver import file_uri, uri_file_path
96
108
  from schema_salad.sourceline import SourceLine
97
- from typing_extensions import Literal
98
109
 
99
110
  from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
100
111
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
101
- from toil.common import Toil, addOptions
112
+ from toil.common import Config, Toil, addOptions
102
113
  from toil.cwl import check_cwltool_version
114
+ from toil.lib.integration import resolve_workflow
115
+ from toil.lib.misc import call_command
103
116
  from toil.provisioners.clusterScaler import JobTooBigError
104
117
 
105
118
  check_cwltool_version()
106
- from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
107
- CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
108
- download_structure,
109
- get_from_structure,
110
- visit_cwl_class_and_reduce)
119
+ from toil.cwl.utils import (
120
+ CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
121
+ CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
122
+ download_structure,
123
+ get_from_structure,
124
+ visit_cwl_class_and_reduce,
125
+ )
111
126
  from toil.exceptions import FailedJobsException
112
127
  from toil.fileStores import FileID
113
128
  from toil.fileStores.abstractFileStore import AbstractFileStore
114
- from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
115
- from toil.jobStores.abstractJobStore import (AbstractJobStore, NoSuchFileException, LocatorException,
116
- InvalidImportExportUrlException, UnimplementedURLException)
129
+ from toil.job import (
130
+ AcceleratorRequirement,
131
+ Job,
132
+ Promise,
133
+ Promised,
134
+ unwrap,
135
+ ImportsJob,
136
+ get_file_sizes,
137
+ FileMetadata,
138
+ WorkerImportJob,
139
+ )
140
+ from toil.jobStores.abstractJobStore import (
141
+ AbstractJobStore,
142
+ NoSuchFileException,
143
+ InvalidImportExportUrlException,
144
+ LocatorException,
145
+ )
146
+ from toil.lib.exceptions import UnimplementedURLException
117
147
  from toil.jobStores.fileJobStore import FileJobStore
118
148
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
119
149
  from toil.lib.io import mkdtemp
120
- from toil.lib.threading import ExceptionalThread
150
+ from toil.lib.threading import ExceptionalThread, global_mutex
121
151
  from toil.statsAndLogging import DEFAULT_LOGLEVEL
122
152
 
123
153
  logger = logging.getLogger(__name__)
@@ -149,7 +179,7 @@ def cwltoil_was_removed() -> None:
149
179
  # output object to the correct key of the input object.
150
180
 
151
181
 
152
- class UnresolvedDict(Dict[Any, Any]):
182
+ class UnresolvedDict(dict[Any, Any]):
153
183
  """Tag to indicate a dict contains promises that must be resolved."""
154
184
 
155
185
 
@@ -184,7 +214,7 @@ def filter_skip_null(name: str, value: Any) -> Any:
184
214
  return value
185
215
 
186
216
 
187
- def _filter_skip_null(value: Any, err_flag: List[bool]) -> Any:
217
+ def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
188
218
  """
189
219
  Private implementation for recursively filtering out SkipNull objects from 'value'.
190
220
 
@@ -233,18 +263,50 @@ def ensure_no_collisions(
233
263
  seen_names.add(wanted_name)
234
264
 
235
265
 
266
+ def try_prepull(
267
+ cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str
268
+ ) -> None:
269
+ """
270
+ Try to prepull all containers in a CWL workflow with Singularity or Docker.
271
+ This will not prepull the default container specified on the command line.
272
+ :param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well
273
+ :param runtime_context: runtime context of cwltool
274
+ :param batchsystem: type of Toil batchsystem
275
+ :return:
276
+ """
277
+ if runtime_context.singularity:
278
+ if "CWL_SINGULARITY_CACHE" in os.environ:
279
+ logger.info("Prepulling the workflow's containers with Singularity...")
280
+ call_command(
281
+ [
282
+ "cwl-docker-extract",
283
+ "--singularity",
284
+ "--dir",
285
+ os.environ["CWL_SINGULARITY_CACHE"],
286
+ cwl_tool_uri,
287
+ ]
288
+ )
289
+ elif not runtime_context.user_space_docker_cmd and not runtime_context.podman:
290
+ # For udocker and podman prefetching is unimplemented
291
+ # This is docker
292
+ if batchsystem == "single_machine":
293
+ # Only on single machine will the docker daemon be accessible by all workers and the leader
294
+ logger.info("Prepulling the workflow's containers with Docker...")
295
+ call_command(["cwl-docker-extract", cwl_tool_uri])
296
+
297
+
236
298
  class Conditional:
237
299
  """
238
300
  Object holding conditional expression until we are ready to evaluate it.
239
301
 
240
- Evaluation occurs at the moment the encloses step is ready to run.
302
+ Evaluation occurs before the enclosing step's inputs are type-checked.
241
303
  """
242
304
 
243
305
  def __init__(
244
306
  self,
245
307
  expression: Optional[str] = None,
246
- outputs: Union[Dict[str, CWLOutputType], None] = None,
247
- requirements: Optional[List[CWLObjectType]] = None,
308
+ outputs: Union[dict[str, CWLOutputType], None] = None,
309
+ requirements: Optional[list[CWLObjectType]] = None,
248
310
  container_engine: str = "docker",
249
311
  ):
250
312
  """
@@ -289,7 +351,7 @@ class Conditional:
289
351
  "'%s' evaluated to a non-boolean value" % self.expression
290
352
  )
291
353
 
292
- def skipped_outputs(self) -> Dict[str, SkipNull]:
354
+ def skipped_outputs(self) -> dict[str, SkipNull]:
293
355
  """Generate a dict of SkipNull objects corresponding to the output structure."""
294
356
  outobj = {}
295
357
 
@@ -309,14 +371,14 @@ class Conditional:
309
371
  class ResolveSource:
310
372
  """Apply linkMerge and pickValue operators to values coming into a port."""
311
373
 
312
- promise_tuples: Union[List[Tuple[str, Promise]], Tuple[str, Promise]]
374
+ promise_tuples: Union[list[tuple[str, Promise]], tuple[str, Promise]]
313
375
 
314
376
  def __init__(
315
377
  self,
316
378
  name: str,
317
- input: Dict[str, CWLObjectType],
379
+ input: dict[str, CWLObjectType],
318
380
  source_key: str,
319
- promises: Dict[str, Job],
381
+ promises: dict[str, Job],
320
382
  ):
321
383
  """
322
384
  Construct a container object.
@@ -375,7 +437,7 @@ class ResolveSource:
375
437
  )
376
438
  else:
377
439
  name, rv = self.promise_tuples
378
- result = cast(Dict[str, Any], rv).get(name)
440
+ result = cast(dict[str, Any], rv).get(name)
379
441
 
380
442
  result = self.pick_value(result)
381
443
  result = filter_skip_null(self.name, result)
@@ -383,7 +445,7 @@ class ResolveSource:
383
445
 
384
446
  def link_merge(
385
447
  self, values: CWLObjectType
386
- ) -> Union[List[CWLOutputType], CWLOutputType]:
448
+ ) -> Union[list[CWLOutputType], CWLOutputType]:
387
449
  """
388
450
  Apply linkMerge operator to `values` object.
389
451
 
@@ -396,7 +458,7 @@ class ResolveSource:
396
458
  return values
397
459
 
398
460
  elif link_merge_type == "merge_flattened":
399
- result: List[CWLOutputType] = []
461
+ result: list[CWLOutputType] = []
400
462
  for v in values:
401
463
  if isinstance(v, MutableSequence):
402
464
  result.extend(v)
@@ -409,7 +471,7 @@ class ResolveSource:
409
471
  f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
410
472
  )
411
473
 
412
- def pick_value(self, values: Union[List[Union[str, SkipNull]], Any]) -> Any:
474
+ def pick_value(self, values: Union[list[Union[str, SkipNull]], Any]) -> Any:
413
475
  """
414
476
  Apply pickValue operator to `values` object.
415
477
 
@@ -477,7 +539,7 @@ class StepValueFrom:
477
539
  """
478
540
 
479
541
  def __init__(
480
- self, expr: str, source: Any, req: List[CWLObjectType], container_engine: str
542
+ self, expr: str, source: Any, req: list[CWLObjectType], container_engine: str
481
543
  ):
482
544
  """
483
545
  Instantiate an object to carry all know about this valueFrom expression.
@@ -609,7 +671,7 @@ class JustAValue:
609
671
 
610
672
  def resolve_dict_w_promises(
611
673
  dict_w_promises: Union[
612
- UnresolvedDict, CWLObjectType, Dict[str, Union[str, StepValueFrom]]
674
+ UnresolvedDict, CWLObjectType, dict[str, Union[str, StepValueFrom]]
613
675
  ],
614
676
  file_store: Optional[AbstractFileStore] = None,
615
677
  ) -> CWLObjectType:
@@ -664,7 +726,7 @@ class ToilPathMapper(PathMapper):
664
726
 
665
727
  def __init__(
666
728
  self,
667
- referenced_files: List[CWLObjectType],
729
+ referenced_files: list[CWLObjectType],
668
730
  basedir: str,
669
731
  stagedir: str,
670
732
  separateDirs: bool = True,
@@ -779,19 +841,44 @@ class ToilPathMapper(PathMapper):
779
841
  # TODO: why would we do that?
780
842
  stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
781
843
 
782
- # Decide where to put the file or directory, as an absolute path.
783
- tgt = os.path.join(
784
- stagedir,
785
- cast(str, obj["basename"]),
786
- )
844
+ if obj["class"] not in ("File", "Directory"):
845
+ # We only handle files and directories; only they have locations.
846
+ return
847
+
848
+ location = cast(str, obj["location"])
849
+ if location in self:
850
+ # If we've already mapped this, map it consistently.
851
+ tgt = self._pathmap[location].target
852
+ logger.debug(
853
+ "ToilPathMapper re-using target %s for path %s",
854
+ tgt,
855
+ location,
856
+ )
857
+ else:
858
+ # Decide where to put the file or directory, as an absolute path.
859
+ tgt = os.path.join(
860
+ stagedir,
861
+ cast(str, obj["basename"]),
862
+ )
863
+ if self.reversemap(tgt) is not None:
864
+ # If the target already exists in the pathmap, but we haven't yet
865
+ # mapped this, it means we have a conflict.
866
+ i = 2
867
+ new_tgt = f"{tgt}_{i}"
868
+ while self.reversemap(new_tgt) is not None:
869
+ i += 1
870
+ new_tgt = f"{tgt}_{i}"
871
+ logger.debug(
872
+ "ToilPathMapper resolving mapping conflict: %s is now %s",
873
+ tgt,
874
+ new_tgt,
875
+ )
876
+ tgt = new_tgt
787
877
 
788
878
  if obj["class"] == "Directory":
789
879
  # Whether or not we've already mapped this path, we need to map all
790
880
  # children recursively.
791
881
 
792
- # Grab its location
793
- location = cast(str, obj["location"])
794
-
795
882
  logger.debug("ToilPathMapper visiting directory %s", location)
796
883
 
797
884
  # We want to check the directory to make sure it is not
@@ -877,7 +964,7 @@ class ToilPathMapper(PathMapper):
877
964
 
878
965
  # Keep recursing
879
966
  self.visitlisting(
880
- cast(List[CWLObjectType], obj.get("listing", [])),
967
+ cast(list[CWLObjectType], obj.get("listing", [])),
881
968
  tgt,
882
969
  basedir,
883
970
  copy=copy,
@@ -885,23 +972,21 @@ class ToilPathMapper(PathMapper):
885
972
  )
886
973
 
887
974
  elif obj["class"] == "File":
888
- path = cast(str, obj["location"])
975
+ logger.debug("ToilPathMapper visiting file %s", location)
889
976
 
890
- logger.debug("ToilPathMapper visiting file %s", path)
891
-
892
- if path in self._pathmap:
977
+ if location in self._pathmap:
893
978
  # Don't map the same file twice
894
979
  logger.debug(
895
980
  "ToilPathMapper stopping recursion because we have already "
896
981
  "mapped file: %s",
897
- path,
982
+ location,
898
983
  )
899
984
  return
900
985
 
901
- ab = abspath(path, basedir)
902
- if "contents" in obj and path.startswith("_:"):
986
+ ab = abspath(location, basedir)
987
+ if "contents" in obj and location.startswith("_:"):
903
988
  # We are supposed to create this file
904
- self._pathmap[path] = MapperEnt(
989
+ self._pathmap[location] = MapperEnt(
905
990
  cast(str, obj["contents"]),
906
991
  tgt,
907
992
  "CreateWritableFile" if copy else "CreateFile",
@@ -919,14 +1004,16 @@ class ToilPathMapper(PathMapper):
919
1004
  # URI for a local file it downloaded.
920
1005
  if self.get_file:
921
1006
  deref = self.get_file(
922
- path, obj.get("streamable", False), self.streaming_allowed
1007
+ location,
1008
+ obj.get("streamable", False),
1009
+ self.streaming_allowed,
923
1010
  )
924
1011
  else:
925
1012
  deref = ab
926
1013
  if deref.startswith("file:"):
927
1014
  deref = schema_salad.ref_resolver.uri_file_path(deref)
928
1015
  if urlsplit(deref).scheme in ["http", "https"]:
929
- deref = downloadHttpFile(path)
1016
+ deref = downloadHttpFile(location)
930
1017
  elif urlsplit(deref).scheme != "toilfile":
931
1018
  # Dereference symbolic links
932
1019
  st = os.lstat(deref)
@@ -944,42 +1031,18 @@ class ToilPathMapper(PathMapper):
944
1031
  # reference, we just pass that along.
945
1032
 
946
1033
  """Link or copy files to their targets. Create them as needed."""
947
- targets: Dict[str, str] = {}
948
- for _, value in self._pathmap.items():
949
- # If the target already exists in the pathmap, it means we have a conflict. But we didn't change tgt to reflect new name.
950
- if value.target == tgt: # Conflict detected in the pathmap
951
- i = 2
952
- new_tgt = f"{tgt}_{i}"
953
- while new_tgt in targets:
954
- i += 1
955
- new_tgt = f"{tgt}_{i}"
956
- targets[new_tgt] = new_tgt
957
-
958
- for _, value_conflict in targets.items():
959
- logger.debug(
960
- "ToilPathMapper adding file mapping for conflict %s -> %s",
961
- deref,
962
- value_conflict,
963
- )
964
- self._pathmap[path] = MapperEnt(
965
- deref,
966
- value_conflict,
967
- "WritableFile" if copy else "File",
968
- staged,
969
- )
970
- # No conflicts detected so we can write out the original name.
971
- if not targets:
972
- logger.debug(
973
- "ToilPathMapper adding file mapping %s -> %s", deref, tgt
974
- )
975
1034
 
976
- self._pathmap[path] = MapperEnt(
977
- deref, tgt, "WritableFile" if copy else "File", staged
978
- )
1035
+ logger.debug(
1036
+ "ToilPathMapper adding file mapping %s -> %s", deref, tgt
1037
+ )
1038
+
1039
+ self._pathmap[location] = MapperEnt(
1040
+ deref, tgt, "WritableFile" if copy else "File", staged
1041
+ )
979
1042
 
980
1043
  # Handle all secondary files that need to be next to this one.
981
1044
  self.visitlisting(
982
- cast(List[CWLObjectType], obj.get("secondaryFiles", [])),
1045
+ cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
983
1046
  stagedir,
984
1047
  basedir,
985
1048
  copy=copy,
@@ -1005,6 +1068,32 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
1005
1068
  ) -> None:
1006
1069
  """run_jobs from SingleJobExecutor, but not in a top level runtime context."""
1007
1070
  runtime_context.toplevel = False
1071
+ if isinstance(
1072
+ process, cwltool.command_line_tool.CommandLineTool
1073
+ ) and isinstance(
1074
+ process.make_job_runner(runtime_context), SingularityCommandLineJob
1075
+ ):
1076
+ # Set defaults for singularity cache environment variables, similar to what we do in wdltoil
1077
+ # Use the same place as the default singularity cache directory
1078
+ singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
1079
+ os.environ["SINGULARITY_CACHEDIR"] = os.environ.get(
1080
+ "SINGULARITY_CACHEDIR", singularity_cache
1081
+ )
1082
+
1083
+ # If singularity is detected, prepull the image to ensure locking
1084
+ (docker_req, docker_is_req) = process.get_requirement(
1085
+ feature="DockerRequirement"
1086
+ )
1087
+ with global_mutex(
1088
+ os.environ["SINGULARITY_CACHEDIR"], "toil_singularity_cache_mutex"
1089
+ ):
1090
+ SingularityCommandLineJob.get_image(
1091
+ dockerRequirement=cast(dict[str, str], docker_req),
1092
+ pull_image=runtime_context.pull_image,
1093
+ force_pull=runtime_context.force_docker_pull,
1094
+ tmp_outdir_prefix=runtime_context.tmp_outdir_prefix,
1095
+ )
1096
+
1008
1097
  return super().run_jobs(process, job_order_object, logger, runtime_context)
1009
1098
 
1010
1099
 
@@ -1019,7 +1108,7 @@ class ToilTool:
1019
1108
  # Reserve a spot for the Toil job that ends up executing this tool.
1020
1109
  self._toil_job: Optional[Job] = None
1021
1110
  # Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
1022
- self._path_mappers: List[cwltool.pathmapper.PathMapper] = []
1111
+ self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
1023
1112
 
1024
1113
  def connect_toil_job(self, job: Job) -> None:
1025
1114
  """
@@ -1031,7 +1120,7 @@ class ToilTool:
1031
1120
 
1032
1121
  def make_path_mapper(
1033
1122
  self,
1034
- reffiles: List[Any],
1123
+ reffiles: list[Any],
1035
1124
  stagedir: str,
1036
1125
  runtimeContext: cwltool.context.RuntimeContext,
1037
1126
  separateDirs: bool,
@@ -1089,13 +1178,15 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1089
1178
  # Make a table of all the places we mapped files to when downloading the inputs.
1090
1179
 
1091
1180
  # We want to hint which host paths and container (if any) paths correspond
1092
- host_and_job_paths: List[Tuple[str, str]] = []
1181
+ host_and_job_paths: list[tuple[str, str]] = []
1093
1182
 
1094
1183
  for pm in self._path_mappers:
1095
1184
  for _, mapper_entry in pm.items_exclude_children():
1096
1185
  # We know that mapper_entry.target as seen by the task is
1097
1186
  # mapper_entry.resolved on the host.
1098
- host_and_job_paths.append((mapper_entry.resolved, mapper_entry.target))
1187
+ host_and_job_paths.append(
1188
+ (mapper_entry.resolved, mapper_entry.target)
1189
+ )
1099
1190
 
1100
1191
  # Notice that we have downloaded our inputs. Explain which files
1101
1192
  # those are here and what the task will expect to call them.
@@ -1127,7 +1218,7 @@ def toil_make_tool(
1127
1218
  # URI instead of raising an error right away, in case it is optional.
1128
1219
  MISSING_FILE = "missing://"
1129
1220
 
1130
- DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1221
+ DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
1131
1222
 
1132
1223
 
1133
1224
  def check_directory_dict_invariants(contents: DirectoryContents) -> None:
@@ -1149,7 +1240,7 @@ def check_directory_dict_invariants(contents: DirectoryContents) -> None:
1149
1240
 
1150
1241
  def decode_directory(
1151
1242
  dir_path: str,
1152
- ) -> Tuple[DirectoryContents, Optional[str], str]:
1243
+ ) -> tuple[DirectoryContents, Optional[str], str]:
1153
1244
  """
1154
1245
  Decode a directory from a "toildir:" path to a directory (or a file in it).
1155
1246
 
@@ -1224,7 +1315,7 @@ class ToilFsAccess(StdFsAccess):
1224
1315
  # they know what will happen.
1225
1316
  # Also maps files and directories from external URLs to downloaded
1226
1317
  # locations.
1227
- self.dir_to_download: Dict[str, str] = {}
1318
+ self.dir_to_download: dict[str, str] = {}
1228
1319
 
1229
1320
  super().__init__(basedir)
1230
1321
 
@@ -1347,14 +1438,16 @@ class ToilFsAccess(StdFsAccess):
1347
1438
  destination = super()._abs(destination)
1348
1439
  return destination
1349
1440
 
1350
- def glob(self, pattern: str) -> List[str]:
1441
+ def glob(self, pattern: str) -> list[str]:
1351
1442
  parse = urlparse(pattern)
1352
1443
  if parse.scheme == "file":
1353
1444
  pattern = os.path.abspath(unquote(parse.path))
1354
1445
  elif parse.scheme == "":
1355
1446
  pattern = os.path.abspath(pattern)
1356
1447
  else:
1357
- raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
1448
+ raise RuntimeError(
1449
+ f"Cannot efficiently support globbing on {parse.scheme} URIs"
1450
+ )
1358
1451
 
1359
1452
  # Actually do the glob
1360
1453
  return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
@@ -1391,12 +1484,12 @@ class ToilFsAccess(StdFsAccess):
1391
1484
  else:
1392
1485
  # This should be supported by a job store.
1393
1486
  byte_stream = AbstractJobStore.open_url(fn)
1394
- if 'b' in mode:
1487
+ if "b" in mode:
1395
1488
  # Pass stream along in binary
1396
1489
  return byte_stream
1397
1490
  else:
1398
1491
  # Wrap it in a text decoder
1399
- return io.TextIOWrapper(byte_stream, encoding='utf-8')
1492
+ return io.TextIOWrapper(byte_stream, encoding="utf-8")
1400
1493
 
1401
1494
  def exists(self, path: str) -> bool:
1402
1495
  """Test for file existence."""
@@ -1503,7 +1596,7 @@ class ToilFsAccess(StdFsAccess):
1503
1596
  logger.debug("AbstractJobStore said: %s", status)
1504
1597
  return status
1505
1598
 
1506
- def listdir(self, fn: str) -> List[str]:
1599
+ def listdir(self, fn: str) -> list[str]:
1507
1600
  # This needs to return full URLs for everything in the directory.
1508
1601
  # URLs are not allowed to end in '/', even for subdirectories.
1509
1602
  logger.debug("ToilFsAccess listing %s", fn)
@@ -1524,7 +1617,9 @@ class ToilFsAccess(StdFsAccess):
1524
1617
  if got is None:
1525
1618
  raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
1526
1619
  if isinstance(got, str):
1527
- raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
1620
+ raise RuntimeError(
1621
+ f"Cannot list file or dubdirectory of a file: {fn}"
1622
+ )
1528
1623
  here = got
1529
1624
  # List all the things in here and make full URIs to them
1530
1625
  return [os.path.join(fn, k) for k in here.keys()]
@@ -1534,7 +1629,7 @@ class ToilFsAccess(StdFsAccess):
1534
1629
  for entry in AbstractJobStore.list_url(fn)
1535
1630
  ]
1536
1631
 
1537
- def join(self, path, *paths): # type: (str, *str) -> str
1632
+ def join(self, path: str, *paths: str) -> str:
1538
1633
  # This falls back on os.path.join
1539
1634
  return super().join(path, *paths)
1540
1635
 
@@ -1547,12 +1642,12 @@ class ToilFsAccess(StdFsAccess):
1547
1642
 
1548
1643
  def toil_get_file(
1549
1644
  file_store: AbstractFileStore,
1550
- index: Dict[str, str],
1551
- existing: Dict[str, str],
1645
+ index: dict[str, str],
1646
+ existing: dict[str, str],
1552
1647
  uri: str,
1553
1648
  streamable: bool = False,
1554
1649
  streaming_allowed: bool = True,
1555
- pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
1650
+ pipe_threads: Optional[list[tuple[Thread, int]]] = None,
1556
1651
  ) -> str:
1557
1652
  """
1558
1653
  Set up the given file or directory from the Toil jobstore at a file URI
@@ -1653,9 +1748,7 @@ def toil_get_file(
1653
1748
  and streamable
1654
1749
  and not isinstance(file_store.jobStore, FileJobStore)
1655
1750
  ):
1656
- logger.debug(
1657
- "Streaming file %s", uri
1658
- )
1751
+ logger.debug("Streaming file %s", uri)
1659
1752
  src_path = file_store.getLocalTempFileName()
1660
1753
  os.mkfifo(src_path)
1661
1754
  th = ExceptionalThread(
@@ -1677,34 +1770,35 @@ def toil_get_file(
1677
1770
  if uri.startswith("toilfile:"):
1678
1771
  # Download from the file store
1679
1772
  file_store_id = FileID.unpack(uri[len("toilfile:") :])
1680
- src_path = file_store.readGlobalFile(
1681
- file_store_id, symlink=True
1682
- )
1773
+ src_path = file_store.readGlobalFile(file_store_id, symlink=True)
1683
1774
  else:
1684
1775
  # Download from the URI via the job store.
1685
1776
 
1686
1777
  # Figure out where it goes.
1687
1778
  src_path = file_store.getLocalTempFileName()
1688
1779
  # Open that path exclusively to make sure we created it
1689
- with open(src_path, 'xb') as fh:
1780
+ with open(src_path, "xb") as fh:
1690
1781
  # Download into the file
1691
- size, executable = AbstractJobStore.read_from_url(uri, fh)
1692
- if executable:
1693
- # Set the execute bit in the file's permissions
1694
- os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1782
+ size, executable = AbstractJobStore.read_from_url(uri, fh)
1783
+ if executable:
1784
+ # Set the execute bit in the file's permissions
1785
+ os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1695
1786
 
1696
1787
  index[src_path] = uri
1697
1788
  existing[uri] = src_path
1698
1789
  return schema_salad.ref_resolver.file_uri(src_path)
1699
1790
 
1700
- def write_file(
1701
- writeFunc: Callable[[str], FileID],
1702
- index: Dict[str, str],
1703
- existing: Dict[str, str],
1791
+
1792
+ def convert_file_uri_to_toil_uri(
1793
+ applyFunc: Callable[[str], FileID],
1794
+ index: dict[str, str],
1795
+ existing: dict[str, str],
1704
1796
  file_uri: str,
1705
1797
  ) -> str:
1706
1798
  """
1707
- Write a file into the Toil jobstore.
1799
+ Given a file URI, convert it to a toil file URI. Uses applyFunc to handle the conversion.
1800
+
1801
+ Runs once on every unique file URI.
1708
1802
 
1709
1803
  'existing' is a set of files retrieved as inputs from toil_get_file. This
1710
1804
  ensures they are mapped back as the same name if passed through.
@@ -1721,12 +1815,8 @@ def write_file(
1721
1815
  else:
1722
1816
  file_uri = existing.get(file_uri, file_uri)
1723
1817
  if file_uri not in index:
1724
- if not urlparse(file_uri).scheme:
1725
- rp = os.path.realpath(file_uri)
1726
- else:
1727
- rp = file_uri
1728
1818
  try:
1729
- index[file_uri] = "toilfile:" + writeFunc(rp).pack()
1819
+ index[file_uri] = "toilfile:" + applyFunc(file_uri).pack()
1730
1820
  existing[index[file_uri]] = file_uri
1731
1821
  except Exception as e:
1732
1822
  logger.error("Got exception '%s' while copying '%s'", e, file_uri)
@@ -1745,17 +1835,93 @@ def path_to_loc(obj: CWLObjectType) -> None:
1745
1835
  del obj["path"]
1746
1836
 
1747
1837
 
1748
- def import_files(
1749
- import_function: Callable[[str], FileID],
1838
+ def extract_file_uri_once(
1839
+ fileindex: dict[str, str],
1840
+ existing: dict[str, str],
1841
+ file_metadata: CWLObjectType,
1842
+ mark_broken: bool = False,
1843
+ skip_remote: bool = False,
1844
+ ) -> Optional[str]:
1845
+ """
1846
+ Extract the filename from a CWL file record.
1847
+
1848
+ This function matches the predefined function signature in visit_files, which ensures
1849
+ that this function is called on all files inside a CWL object.
1850
+
1851
+ Ensures no duplicate files are returned according to fileindex. If a file has not been resolved already (and had file:// prepended)
1852
+ then resolve symlinks.
1853
+ :param fileindex: Forward mapping of filename
1854
+ :param existing: Reverse mapping of filename. This function does not use this
1855
+ :param file_metadata: CWL file record
1856
+ :param mark_broken: Whether files should be marked as missing
1857
+ :param skip_remote: Whether to skip remote files
1858
+ :return:
1859
+ """
1860
+ location = cast(str, file_metadata["location"])
1861
+ if (
1862
+ location.startswith("toilfile:")
1863
+ or location.startswith("toildir:")
1864
+ or location.startswith("_:")
1865
+ ):
1866
+ return None
1867
+ if location in fileindex:
1868
+ file_metadata["location"] = fileindex[location]
1869
+ return None
1870
+ if not location and file_metadata["path"]:
1871
+ file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
1872
+ cast(str, file_metadata["path"])
1873
+ )
1874
+ if location.startswith("file://") and not os.path.isfile(
1875
+ schema_salad.ref_resolver.uri_file_path(location)
1876
+ ):
1877
+ if mark_broken:
1878
+ logger.debug("File %s is missing", file_metadata)
1879
+ file_metadata["location"] = location = MISSING_FILE
1880
+ else:
1881
+ raise cwl_utils.errors.WorkflowException(
1882
+ "File is missing: %s" % file_metadata
1883
+ )
1884
+ if location.startswith("file://") or not skip_remote:
1885
+ # This is a local file or a remote file
1886
+ if location not in fileindex:
1887
+ # These dictionaries are meant to keep track of what we're going to import
1888
+ # In the actual import, this is used as a bidirectional mapping from unvirtualized to virtualized
1889
+ # For this case, keep track of the files to prevent returning duplicate files
1890
+ # see write_file
1891
+
1892
+ # If there is not a scheme, this file has not been resolved yet or is a URL.
1893
+ if not urlparse(location).scheme:
1894
+ rp = os.path.realpath(location)
1895
+ else:
1896
+ rp = location
1897
+ return rp
1898
+ return None
1899
+
1900
+
1901
+ V = TypeVar("V", covariant=True)
1902
+
1903
+
1904
+ class VisitFunc(Protocol[V]):
1905
+ def __call__(
1906
+ self,
1907
+ fileindex: dict[str, str],
1908
+ existing: dict[str, str],
1909
+ file_metadata: CWLObjectType,
1910
+ mark_broken: bool,
1911
+ skip_remote: bool,
1912
+ ) -> V: ...
1913
+
1914
+
1915
+ def visit_files(
1916
+ func: VisitFunc[V],
1750
1917
  fs_access: StdFsAccess,
1751
- fileindex: Dict[str, str],
1752
- existing: Dict[str, str],
1918
+ fileindex: dict[str, str],
1919
+ existing: dict[str, str],
1753
1920
  cwl_object: Optional[CWLObjectType],
1754
1921
  mark_broken: bool = False,
1755
1922
  skip_remote: bool = False,
1756
1923
  bypass_file_store: bool = False,
1757
- log_level: int = logging.DEBUG
1758
- ) -> None:
1924
+ ) -> list[V]:
1759
1925
  """
1760
1926
  Prepare all files and directories.
1761
1927
 
@@ -1801,18 +1967,12 @@ def import_files(
1801
1967
 
1802
1968
  :param log_level: Log imported files at the given level.
1803
1969
  """
1970
+ func_return: list[Any] = list()
1804
1971
  tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
1805
1972
 
1806
1973
  logger.debug("Importing files for %s", tool_id)
1807
1974
  logger.debug("Importing files in %s", cwl_object)
1808
1975
 
1809
- def import_and_log(url: str) -> FileID:
1810
- """
1811
- Upload a file and log that we are doing so.
1812
- """
1813
- logger.log(log_level, "Loading %s...", url)
1814
- return import_function(url)
1815
-
1816
1976
  # We need to upload all files to the Toil filestore, and encode structure
1817
1977
  # recursively into all Directories' locations. But we cannot safely alter
1818
1978
  # the listing fields of Directory objects, because the handling required by
@@ -1830,13 +1990,13 @@ def import_files(
1830
1990
  if bypass_file_store:
1831
1991
  # Don't go on to actually import files or encode contents for
1832
1992
  # directories.
1833
- return
1993
+ return func_return
1834
1994
 
1835
1995
  # Otherwise we actually want to put the things in the file store.
1836
1996
 
1837
1997
  def visit_file_or_directory_down(
1838
1998
  rec: CWLObjectType,
1839
- ) -> Optional[List[CWLObjectType]]:
1999
+ ) -> Optional[list[CWLObjectType]]:
1840
2000
  """
1841
2001
  Visit each CWL File or Directory on the way down.
1842
2002
 
@@ -1863,7 +2023,7 @@ def import_files(
1863
2023
  ensure_no_collisions(cast(DirectoryType, rec))
1864
2024
 
1865
2025
  # Pull out the old listing, if any
1866
- old_listing = cast(Optional[List[CWLObjectType]], rec.get("listing", None))
2026
+ old_listing = cast(Optional[list[CWLObjectType]], rec.get("listing", None))
1867
2027
 
1868
2028
  if not cast(str, rec["location"]).startswith("_:"):
1869
2029
  # This is a thing we can list and not just a literal, so we
@@ -1885,8 +2045,8 @@ def import_files(
1885
2045
 
1886
2046
  def visit_file_or_directory_up(
1887
2047
  rec: CWLObjectType,
1888
- down_result: Optional[List[CWLObjectType]],
1889
- child_results: List[DirectoryContents],
2048
+ down_result: Optional[list[CWLObjectType]],
2049
+ child_results: list[DirectoryContents],
1890
2050
  ) -> DirectoryContents:
1891
2051
  """
1892
2052
  For a CWL File or Directory, make sure it is uploaded and it has a
@@ -1908,10 +2068,15 @@ def import_files(
1908
2068
  # This is a CWL File
1909
2069
 
1910
2070
  result: DirectoryContents = {}
1911
-
1912
- # Upload the file itself, which will adjust its location.
1913
- upload_file(
1914
- import_and_log, fileindex, existing, rec, mark_broken=mark_broken, skip_remote=skip_remote
2071
+ # Run a function on the file and store the return
2072
+ func_return.append(
2073
+ func(
2074
+ fileindex,
2075
+ existing,
2076
+ rec,
2077
+ mark_broken=mark_broken,
2078
+ skip_remote=skip_remote,
2079
+ )
1915
2080
  )
1916
2081
 
1917
2082
  # Make a record for this file under its name
@@ -1955,6 +2120,7 @@ def import_files(
1955
2120
  visit_file_or_directory_down,
1956
2121
  visit_file_or_directory_up,
1957
2122
  )
2123
+ return func_return
1958
2124
 
1959
2125
 
1960
2126
  def upload_directory(
@@ -2013,52 +2179,34 @@ def upload_directory(
2013
2179
  directory_metadata["location"] = encode_directory(directory_contents)
2014
2180
 
2015
2181
 
2016
- def upload_file(
2017
- uploadfunc: Callable[[str], FileID],
2018
- fileindex: Dict[str, str],
2019
- existing: Dict[str, str],
2182
+ def extract_and_convert_file_to_toil_uri(
2183
+ convertfunc: Callable[[str], FileID],
2184
+ fileindex: dict[str, str],
2185
+ existing: dict[str, str],
2020
2186
  file_metadata: CWLObjectType,
2021
2187
  mark_broken: bool = False,
2022
- skip_remote: bool = False
2188
+ skip_remote: bool = False,
2023
2189
  ) -> None:
2024
2190
  """
2025
- Update a file object so that the file will be accessible from another machine.
2191
+ Extract the file URI out of a file object and convert it to a Toil URI.
2192
+
2193
+ Runs convertfunc on the file URI to handle conversion.
2026
2194
 
2027
- Uploads local files to the Toil file store, and sets their location to a
2028
- reference to the toil file store.
2195
+ Is used to handle importing files into the jobstore.
2029
2196
 
2030
2197
  If a file doesn't exist, fails with an error, unless mark_broken is set, in
2031
2198
  which case the missing file is given a special sentinel location.
2032
2199
 
2033
- Unless skip_remote is set, downloads remote files into the file store and
2034
- sets their locations to references into the file store as well.
2200
+ Unless skip_remote is set, also run on remote files and sets their locations
2201
+ to toil URIs as well.
2035
2202
  """
2036
- location = cast(str, file_metadata["location"])
2037
- if (
2038
- location.startswith("toilfile:")
2039
- or location.startswith("toildir:")
2040
- or location.startswith("_:")
2041
- ):
2042
- return
2043
- if location in fileindex:
2044
- file_metadata["location"] = fileindex[location]
2045
- return
2046
- if not location and file_metadata["path"]:
2047
- file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
2048
- cast(str, file_metadata["path"])
2203
+ location = extract_file_uri_once(
2204
+ fileindex, existing, file_metadata, mark_broken, skip_remote
2205
+ )
2206
+ if location is not None:
2207
+ file_metadata["location"] = convert_file_uri_to_toil_uri(
2208
+ convertfunc, fileindex, existing, location
2049
2209
  )
2050
- if location.startswith("file://") and not os.path.isfile(
2051
- schema_salad.ref_resolver.uri_file_path(location)
2052
- ):
2053
- if mark_broken:
2054
- logger.debug("File %s is missing", file_metadata)
2055
- file_metadata["location"] = location = MISSING_FILE
2056
- else:
2057
- raise cwl_utils.errors.WorkflowException("File is missing: %s" % file_metadata)
2058
-
2059
- if location.startswith("file://") or not skip_remote:
2060
- # This is a local file, or we also need to download and re-upload remote files
2061
- file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
2062
2210
 
2063
2211
  logger.debug("Sending file at: %s", file_metadata["location"])
2064
2212
 
@@ -2071,7 +2219,7 @@ def writeGlobalFileWrapper(file_store: AbstractFileStore, fileuri: str) -> FileI
2071
2219
 
2072
2220
  def remove_empty_listings(rec: CWLObjectType) -> None:
2073
2221
  if rec.get("class") != "Directory":
2074
- finddirs = [] # type: List[CWLObjectType]
2222
+ finddirs: list[CWLObjectType] = []
2075
2223
  visit_class(rec, ("Directory",), finddirs.append)
2076
2224
  for f in finddirs:
2077
2225
  remove_empty_listings(f)
@@ -2091,7 +2239,7 @@ class CWLNamedJob(Job):
2091
2239
  cores: Union[float, None] = 1,
2092
2240
  memory: Union[int, str, None] = "1GiB",
2093
2241
  disk: Union[int, str, None] = "1MiB",
2094
- accelerators: Optional[List[AcceleratorRequirement]] = None,
2242
+ accelerators: Optional[list[AcceleratorRequirement]] = None,
2095
2243
  preemptible: Optional[bool] = None,
2096
2244
  tool_id: Optional[str] = None,
2097
2245
  parent_name: Optional[str] = None,
@@ -2166,10 +2314,10 @@ class ResolveIndirect(CWLNamedJob):
2166
2314
 
2167
2315
  def toilStageFiles(
2168
2316
  toil: Toil,
2169
- cwljob: Union[CWLObjectType, List[CWLObjectType]],
2317
+ cwljob: Union[CWLObjectType, list[CWLObjectType]],
2170
2318
  outdir: str,
2171
2319
  destBucket: Union[str, None] = None,
2172
- log_level: int = logging.DEBUG
2320
+ log_level: int = logging.DEBUG,
2173
2321
  ) -> None:
2174
2322
  """
2175
2323
  Copy input files out of the global file store and update location and path.
@@ -2181,7 +2329,7 @@ def toilStageFiles(
2181
2329
  """
2182
2330
 
2183
2331
  def _collectDirEntries(
2184
- obj: Union[CWLObjectType, List[CWLObjectType]]
2332
+ obj: Union[CWLObjectType, list[CWLObjectType]]
2185
2333
  ) -> Iterator[CWLObjectType]:
2186
2334
  if isinstance(obj, dict):
2187
2335
  if obj.get("class") in ("File", "Directory"):
@@ -2263,13 +2411,17 @@ def toilStageFiles(
2263
2411
  # TODO: Use direct S3 to S3 copy on exports as well
2264
2412
  file_id_or_contents = (
2265
2413
  "toilfile:"
2266
- + toil.import_file(file_id_or_contents, symlink=False).pack()
2414
+ + toil.import_file(
2415
+ file_id_or_contents, symlink=False
2416
+ ).pack()
2267
2417
  )
2268
2418
 
2269
2419
  if file_id_or_contents.startswith("toilfile:"):
2270
2420
  # This is something we can export
2271
2421
  # TODO: Do we need to urlencode the parts before sending them to S3?
2272
- dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
2422
+ dest_url = "/".join(
2423
+ s.strip("/") for s in [destBucket, baseName]
2424
+ )
2273
2425
  logger.log(log_level, "Saving %s...", dest_url)
2274
2426
  toil.export_file(
2275
2427
  FileID.unpack(file_id_or_contents[len("toilfile:") :]),
@@ -2291,7 +2443,12 @@ def toilStageFiles(
2291
2443
  # Probably staging and bypassing file store. Just copy.
2292
2444
  logger.log(log_level, "Saving %s...", dest_url)
2293
2445
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2294
- shutil.copyfile(p.resolved, p.target)
2446
+ try:
2447
+ shutil.copyfile(p.resolved, p.target)
2448
+ except shutil.SameFileError:
2449
+ # If outdir isn't set and we're passing through an input file/directory as the output,
2450
+ # the file doesn't need to be copied because it is already there
2451
+ pass
2295
2452
  else:
2296
2453
  uri = p.resolved
2297
2454
  if not uri.startswith("toilfile:"):
@@ -2364,26 +2521,31 @@ class CWLJobWrapper(CWLNamedJob):
2364
2521
  subjob_name="_wrapper",
2365
2522
  local=True,
2366
2523
  )
2367
- self.cwltool = remove_pickle_problems(tool)
2524
+ self.cwltool = tool
2368
2525
  self.cwljob = cwljob
2369
2526
  self.runtime_context = runtime_context
2370
- self.conditional = conditional
2527
+ self.conditional = conditional or Conditional()
2371
2528
  self.parent_name = parent_name
2372
2529
 
2373
2530
  def run(self, file_store: AbstractFileStore) -> Any:
2374
2531
  """Create a child job with the correct resource requirements set."""
2375
2532
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2533
+
2534
+ # Check confitional to license full evaluation of job inputs.
2535
+ if self.conditional.is_false(cwljob):
2536
+ return self.conditional.skipped_outputs()
2537
+
2376
2538
  fill_in_defaults(
2377
2539
  self.cwltool.tool["inputs"],
2378
2540
  cwljob,
2379
2541
  self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
2380
2542
  )
2543
+ # Don't forward the conditional. We checked it already.
2381
2544
  realjob = CWLJob(
2382
2545
  tool=self.cwltool,
2383
2546
  cwljob=cwljob,
2384
2547
  runtime_context=self.runtime_context,
2385
2548
  parent_name=self.parent_name,
2386
- conditional=self.conditional,
2387
2549
  )
2388
2550
  self.addChild(realjob)
2389
2551
  return realjob.rv()
@@ -2401,7 +2563,7 @@ class CWLJob(CWLNamedJob):
2401
2563
  conditional: Union[Conditional, None] = None,
2402
2564
  ):
2403
2565
  """Store the context for later execution."""
2404
- self.cwltool = remove_pickle_problems(tool)
2566
+ self.cwltool = tool
2405
2567
  self.conditional = conditional or Conditional()
2406
2568
 
2407
2569
  if runtime_context.builder:
@@ -2418,7 +2580,7 @@ class CWLJob(CWLNamedJob):
2418
2580
  resources={},
2419
2581
  mutation_manager=runtime_context.mutation_manager,
2420
2582
  formatgraph=tool.formatgraph,
2421
- make_fs_access=cast(Type[StdFsAccess], runtime_context.make_fs_access),
2583
+ make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
2422
2584
  fs_access=runtime_context.make_fs_access(""),
2423
2585
  job_script_provider=runtime_context.job_script_provider,
2424
2586
  timeout=runtime_context.eval_timeout,
@@ -2435,7 +2597,21 @@ class CWLJob(CWLNamedJob):
2435
2597
 
2436
2598
  req = tool.evalResources(self.builder, runtime_context)
2437
2599
 
2438
- accelerators: Optional[List[AcceleratorRequirement]] = None
2600
+ tool_own_resources = tool.get_requirement("ResourceRequirement")[0] or {}
2601
+ if "ramMin" in tool_own_resources or "ramMax" in tool_own_resources:
2602
+ # The tool is actually asking for memory.
2603
+ memory = int(req["ram"] * (2**20))
2604
+ else:
2605
+ # The tool is getting a default ram allocation.
2606
+ if getattr(runtime_context, "cwl_default_ram"):
2607
+ # We will respect the CWL spec and apply the default cwltool
2608
+ # computed, which might be different than Toil's default.
2609
+ memory = int(req["ram"] * (2**20))
2610
+ else:
2611
+ # We use a None requirement and the Toil default applies.
2612
+ memory = None
2613
+
2614
+ accelerators: Optional[list[AcceleratorRequirement]] = None
2439
2615
  if req.get("cudaDeviceCount", 0) > 0:
2440
2616
  # There's a CUDARequirement, which cwltool processed for us
2441
2617
  # TODO: How is cwltool deciding what value to use between min and max?
@@ -2499,7 +2675,7 @@ class CWLJob(CWLNamedJob):
2499
2675
 
2500
2676
  super().__init__(
2501
2677
  cores=req["cores"],
2502
- memory=int(req["ram"] * (2**20)),
2678
+ memory=memory,
2503
2679
  disk=int(total_disk),
2504
2680
  accelerators=accelerators,
2505
2681
  preemptible=preemptible,
@@ -2513,7 +2689,7 @@ class CWLJob(CWLNamedJob):
2513
2689
  self.step_inputs = self.cwltool.tool["inputs"]
2514
2690
  self.workdir: str = runtime_context.workdir # type: ignore[attr-defined]
2515
2691
 
2516
- def required_env_vars(self, cwljob: Any) -> Iterator[Tuple[str, str]]:
2692
+ def required_env_vars(self, cwljob: Any) -> Iterator[tuple[str, str]]:
2517
2693
  """Yield environment variables from EnvVarRequirement."""
2518
2694
  if isinstance(cwljob, dict):
2519
2695
  if cwljob.get("class") == "EnvVarRequirement":
@@ -2525,7 +2701,7 @@ class CWLJob(CWLNamedJob):
2525
2701
  for env_var in cwljob:
2526
2702
  yield from self.required_env_vars(env_var)
2527
2703
 
2528
- def populate_env_vars(self, cwljob: CWLObjectType) -> Dict[str, str]:
2704
+ def populate_env_vars(self, cwljob: CWLObjectType) -> dict[str, str]:
2529
2705
  """
2530
2706
  Prepare environment variables necessary at runtime for the job.
2531
2707
 
@@ -2541,9 +2717,9 @@ class CWLJob(CWLNamedJob):
2541
2717
  required_env_vars = {}
2542
2718
  # iterate over EnvVarRequirement env vars, if any
2543
2719
  for k, v in self.required_env_vars(cwljob):
2544
- required_env_vars[
2545
- k
2546
- ] = v # will tell cwltool which env vars to take from the environment
2720
+ required_env_vars[k] = (
2721
+ v # will tell cwltool which env vars to take from the environment
2722
+ )
2547
2723
  os.environ[k] = v
2548
2724
  # needs to actually be populated in the environment as well or
2549
2725
  # they're not used
@@ -2553,7 +2729,7 @@ class CWLJob(CWLNamedJob):
2553
2729
  # env var with the same name is found
2554
2730
  for req in self.cwltool.requirements:
2555
2731
  if req["class"] == "EnvVarRequirement":
2556
- envDefs = cast(List[Dict[str, str]], req["envDef"])
2732
+ envDefs = cast(list[dict[str, str]], req["envDef"])
2557
2733
  for env_def in envDefs:
2558
2734
  env_name = env_def["envName"]
2559
2735
  if env_name in required_env_vars:
@@ -2585,7 +2761,7 @@ class CWLJob(CWLNamedJob):
2585
2761
  for inp_id in immobile_cwljob_dict.keys():
2586
2762
  found = False
2587
2763
  for field in cast(
2588
- List[Dict[str, str]], self.cwltool.inputs_record_schema["fields"]
2764
+ list[dict[str, str]], self.cwltool.inputs_record_schema["fields"]
2589
2765
  ):
2590
2766
  if field["name"] == inp_id:
2591
2767
  found = True
@@ -2600,8 +2776,8 @@ class CWLJob(CWLNamedJob):
2600
2776
  functools.partial(remove_empty_listings),
2601
2777
  )
2602
2778
 
2603
- index: Dict[str, str] = {}
2604
- existing: Dict[str, str] = {}
2779
+ index: dict[str, str] = {}
2780
+ existing: dict[str, str] = {}
2605
2781
 
2606
2782
  # Prepare the run instructions for cwltool
2607
2783
  runtime_context = self.runtime_context.copy()
@@ -2613,7 +2789,7 @@ class CWLJob(CWLNamedJob):
2613
2789
  # will come and grab this function for fetching files from the Toil
2614
2790
  # file store. pipe_threads is used for keeping track of separate
2615
2791
  # threads launched to stream files around.
2616
- pipe_threads: List[Tuple[Thread, int]] = []
2792
+ pipe_threads: list[tuple[Thread, int]] = []
2617
2793
  setattr(
2618
2794
  runtime_context,
2619
2795
  "toil_get_file",
@@ -2647,7 +2823,7 @@ class CWLJob(CWLNamedJob):
2647
2823
  # function and a path_mapper type or factory function.
2648
2824
 
2649
2825
  runtime_context.make_fs_access = cast(
2650
- Type[StdFsAccess],
2826
+ type[StdFsAccess],
2651
2827
  functools.partial(ToilFsAccess, file_store=file_store),
2652
2828
  )
2653
2829
 
@@ -2660,9 +2836,13 @@ class CWLJob(CWLNamedJob):
2660
2836
  # Collect standard output and standard error somewhere if they don't go to files.
2661
2837
  # We need to keep two FDs to these because cwltool will close what we give it.
2662
2838
  default_stdout = TemporaryFile()
2663
- runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
2839
+ runtime_context.default_stdout = os.fdopen(
2840
+ os.dup(default_stdout.fileno()), "wb"
2841
+ )
2664
2842
  default_stderr = TemporaryFile()
2665
- runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
2843
+ runtime_context.default_stderr = os.fdopen(
2844
+ os.dup(default_stderr.fileno()), "wb"
2845
+ )
2666
2846
 
2667
2847
  process_uuid = uuid.uuid4() # noqa F841
2668
2848
  started_at = datetime.datetime.now() # noqa F841
@@ -2693,17 +2873,27 @@ class CWLJob(CWLNamedJob):
2693
2873
  default_stdout.seek(0, os.SEEK_END)
2694
2874
  if default_stdout.tell() > 0:
2695
2875
  default_stdout.seek(0)
2696
- file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
2876
+ file_store.log_user_stream(
2877
+ self.description.unitName + ".stdout", default_stdout
2878
+ )
2697
2879
  if status != "success":
2698
2880
  default_stdout.seek(0)
2699
- logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
2881
+ logger.error(
2882
+ "Failed command standard output:\n%s",
2883
+ default_stdout.read().decode("utf-8", errors="replace"),
2884
+ )
2700
2885
  default_stderr.seek(0, os.SEEK_END)
2701
2886
  if default_stderr.tell():
2702
2887
  default_stderr.seek(0)
2703
- file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
2888
+ file_store.log_user_stream(
2889
+ self.description.unitName + ".stderr", default_stderr
2890
+ )
2704
2891
  if status != "success":
2705
2892
  default_stderr.seek(0)
2706
- logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
2893
+ logger.error(
2894
+ "Failed command standard error:\n%s",
2895
+ default_stderr.read().decode("utf-8", errors="replace"),
2896
+ )
2707
2897
 
2708
2898
  if status != "success":
2709
2899
  raise cwl_utils.errors.WorkflowException(status)
@@ -2716,12 +2906,18 @@ class CWLJob(CWLNamedJob):
2716
2906
  fs_access = runtime_context.make_fs_access(runtime_context.basedir)
2717
2907
 
2718
2908
  # And a file importer that can go from a file:// URI to a Toil FileID
2719
- file_import_function = functools.partial(writeGlobalFileWrapper, file_store)
2909
+ def file_import_function(url: str, log_level: int = logging.DEBUG) -> FileID:
2910
+ logger.log(log_level, "Loading %s...", url)
2911
+ return writeGlobalFileWrapper(file_store, url)
2912
+
2913
+ file_upload_function = functools.partial(
2914
+ extract_and_convert_file_to_toil_uri, file_import_function
2915
+ )
2720
2916
 
2721
2917
  # Upload all the Files and set their and the Directories' locations, if
2722
2918
  # needed.
2723
- import_files(
2724
- file_import_function,
2919
+ visit_files(
2920
+ file_upload_function,
2725
2921
  fs_access,
2726
2922
  index,
2727
2923
  existing,
@@ -2751,6 +2947,74 @@ def get_container_engine(runtime_context: cwltool.context.RuntimeContext) -> str
2751
2947
  return "docker"
2752
2948
 
2753
2949
 
2950
+ def makeRootJob(
2951
+ tool: Process,
2952
+ jobobj: CWLObjectType,
2953
+ runtime_context: cwltool.context.RuntimeContext,
2954
+ initialized_job_order: CWLObjectType,
2955
+ options: Namespace,
2956
+ toil: Toil,
2957
+ ) -> CWLNamedJob:
2958
+ """
2959
+ Create the Toil root Job object for the CWL tool. Is the same as makeJob() except this also handles import logic.
2960
+
2961
+ Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
2962
+ If only one job is created, it is returned twice.
2963
+
2964
+ :return:
2965
+ """
2966
+ if options.run_imports_on_workers:
2967
+ filenames = extract_workflow_inputs(options, initialized_job_order, tool)
2968
+ metadata = get_file_sizes(
2969
+ filenames, toil._jobStore, include_remote_files=options.reference_inputs
2970
+ )
2971
+
2972
+ # Mapping of files to metadata for files that will be imported on the worker
2973
+ # This will consist of files that we were able to get a file size for
2974
+ worker_metadata: dict[str, FileMetadata] = dict()
2975
+ # Mapping of files to metadata for files that will be imported on the leader
2976
+ # This will consist of files that we were not able to get a file size for
2977
+ leader_metadata = dict()
2978
+ for filename, file_data in metadata.items():
2979
+ if file_data.size is None:
2980
+ leader_metadata[filename] = file_data
2981
+ else:
2982
+ worker_metadata[filename] = file_data
2983
+
2984
+ # import the files for the leader first
2985
+ path_to_fileid = WorkerImportJob.import_files(
2986
+ list(leader_metadata.keys()), toil._jobStore
2987
+ )
2988
+
2989
+ # then install the imported files before importing the other files
2990
+ # this way the control flow can fall from the leader to workers
2991
+ tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
2992
+ initialized_job_order,
2993
+ tool,
2994
+ path_to_fileid,
2995
+ options.basedir,
2996
+ options.reference_inputs,
2997
+ options.bypass_file_store,
2998
+ )
2999
+
3000
+ import_job = CWLImportWrapper(
3001
+ initialized_job_order, tool, runtime_context, worker_metadata, options
3002
+ )
3003
+ return import_job
3004
+ else:
3005
+ import_workflow_inputs(
3006
+ toil._jobStore,
3007
+ options,
3008
+ initialized_job_order=initialized_job_order,
3009
+ tool=tool,
3010
+ )
3011
+ root_job, followOn = makeJob(
3012
+ tool, jobobj, runtime_context, None, None
3013
+ ) # toplevel, no name needed
3014
+ root_job.cwljob = initialized_job_order
3015
+ return root_job
3016
+
3017
+
2754
3018
  def makeJob(
2755
3019
  tool: Process,
2756
3020
  jobobj: CWLObjectType,
@@ -2758,13 +3022,16 @@ def makeJob(
2758
3022
  parent_name: Optional[str],
2759
3023
  conditional: Union[Conditional, None],
2760
3024
  ) -> Union[
2761
- Tuple["CWLWorkflow", ResolveIndirect],
2762
- Tuple[CWLJob, CWLJob],
2763
- Tuple[CWLJobWrapper, CWLJobWrapper],
3025
+ tuple["CWLWorkflow", ResolveIndirect],
3026
+ tuple[CWLJob, CWLJob],
3027
+ tuple[CWLJobWrapper, CWLJobWrapper],
2764
3028
  ]:
2765
3029
  """
2766
3030
  Create the correct Toil Job object for the CWL tool.
2767
3031
 
3032
+ Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
3033
+ If only one job is created, it is returned twice.
3034
+
2768
3035
  Types: workflow, job, or job wrapper for dynamic resource requirements.
2769
3036
 
2770
3037
  :return: "wfjob, followOn" if the input tool is a workflow, and "job, job" otherwise
@@ -2844,16 +3111,16 @@ class CWLScatter(Job):
2844
3111
  def flat_crossproduct_scatter(
2845
3112
  self,
2846
3113
  joborder: CWLObjectType,
2847
- scatter_keys: List[str],
2848
- outputs: List[Promised[CWLObjectType]],
3114
+ scatter_keys: list[str],
3115
+ outputs: list[Promised[CWLObjectType]],
2849
3116
  postScatterEval: Callable[[CWLObjectType], CWLObjectType],
2850
3117
  ) -> None:
2851
3118
  """Cartesian product of the inputs, then flattened."""
2852
3119
  scatter_key = shortname(scatter_keys[0])
2853
- for n in range(0, len(cast(List[CWLObjectType], joborder[scatter_key]))):
3120
+ for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
2854
3121
  updated_joborder = copy.copy(joborder)
2855
3122
  updated_joborder[scatter_key] = cast(
2856
- List[CWLObjectType], joborder[scatter_key]
3123
+ list[CWLObjectType], joborder[scatter_key]
2857
3124
  )[n]
2858
3125
  if len(scatter_keys) == 1:
2859
3126
  updated_joborder = postScatterEval(updated_joborder)
@@ -2874,16 +3141,16 @@ class CWLScatter(Job):
2874
3141
  def nested_crossproduct_scatter(
2875
3142
  self,
2876
3143
  joborder: CWLObjectType,
2877
- scatter_keys: List[str],
3144
+ scatter_keys: list[str],
2878
3145
  postScatterEval: Callable[[CWLObjectType], CWLObjectType],
2879
- ) -> List[Promised[CWLObjectType]]:
3146
+ ) -> list[Promised[CWLObjectType]]:
2880
3147
  """Cartesian product of the inputs."""
2881
3148
  scatter_key = shortname(scatter_keys[0])
2882
- outputs: List[Promised[CWLObjectType]] = []
2883
- for n in range(0, len(cast(List[CWLObjectType], joborder[scatter_key]))):
3149
+ outputs: list[Promised[CWLObjectType]] = []
3150
+ for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
2884
3151
  updated_joborder = copy.copy(joborder)
2885
3152
  updated_joborder[scatter_key] = cast(
2886
- List[CWLObjectType], joborder[scatter_key]
3153
+ list[CWLObjectType], joborder[scatter_key]
2887
3154
  )[n]
2888
3155
  if len(scatter_keys) == 1:
2889
3156
  updated_joborder = postScatterEval(updated_joborder)
@@ -2904,7 +3171,7 @@ class CWLScatter(Job):
2904
3171
  )
2905
3172
  return outputs
2906
3173
 
2907
- def run(self, file_store: AbstractFileStore) -> List[Promised[CWLObjectType]]:
3174
+ def run(self, file_store: AbstractFileStore) -> list[Promised[CWLObjectType]]:
2908
3175
  """Generate the follow on scatter jobs."""
2909
3176
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2910
3177
 
@@ -2916,7 +3183,7 @@ class CWLScatter(Job):
2916
3183
  scatterMethod = self.step.tool.get("scatterMethod", None)
2917
3184
  if len(scatter) == 1:
2918
3185
  scatterMethod = "dotproduct"
2919
- outputs: List[Promised[CWLObjectType]] = []
3186
+ outputs: list[Promised[CWLObjectType]] = []
2920
3187
 
2921
3188
  valueFrom = {
2922
3189
  shortname(i["id"]): i["valueFrom"]
@@ -2948,11 +3215,11 @@ class CWLScatter(Job):
2948
3215
 
2949
3216
  if scatterMethod == "dotproduct":
2950
3217
  for i in range(
2951
- 0, len(cast(List[CWLObjectType], cwljob[shortname(scatter[0])]))
3218
+ 0, len(cast(list[CWLObjectType], cwljob[shortname(scatter[0])]))
2952
3219
  ):
2953
3220
  copyjob = copy.copy(cwljob)
2954
3221
  for sc in [shortname(x) for x in scatter]:
2955
- copyjob[sc] = cast(List[CWLObjectType], cwljob[sc])[i]
3222
+ copyjob[sc] = cast(list[CWLObjectType], cwljob[sc])[i]
2956
3223
  copyjob = postScatterEval(copyjob)
2957
3224
  subjob, follow_on = makeJob(
2958
3225
  tool=self.step.embedded_tool,
@@ -2991,7 +3258,7 @@ class CWLGather(Job):
2991
3258
  def __init__(
2992
3259
  self,
2993
3260
  step: cwltool.workflow.WorkflowStep,
2994
- outputs: Promised[Union[CWLObjectType, List[CWLObjectType]]],
3261
+ outputs: Promised[Union[CWLObjectType, list[CWLObjectType]]],
2995
3262
  ):
2996
3263
  """Collect our context for later gathering."""
2997
3264
  super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
@@ -3000,24 +3267,24 @@ class CWLGather(Job):
3000
3267
 
3001
3268
  @staticmethod
3002
3269
  def extract(
3003
- obj: Union[CWLObjectType, List[CWLObjectType]], k: str
3004
- ) -> Union[CWLOutputType, List[CWLObjectType]]:
3270
+ obj: Union[CWLObjectType, list[CWLObjectType]], k: str
3271
+ ) -> Union[CWLOutputType, list[CWLObjectType]]:
3005
3272
  """
3006
3273
  Extract the given key from the obj.
3007
3274
 
3008
3275
  If the object is a list, extract it from all members of the list.
3009
3276
  """
3010
3277
  if isinstance(obj, Mapping):
3011
- return cast(Union[CWLOutputType, List[CWLObjectType]], obj.get(k))
3278
+ return cast(Union[CWLOutputType, list[CWLObjectType]], obj.get(k))
3012
3279
  elif isinstance(obj, MutableSequence):
3013
- cp: List[CWLObjectType] = []
3280
+ cp: list[CWLObjectType] = []
3014
3281
  for item in obj:
3015
3282
  cp.append(cast(CWLObjectType, CWLGather.extract(item, k)))
3016
3283
  return cp
3017
3284
  else:
3018
- return cast(List[CWLObjectType], [])
3285
+ return cast(list[CWLObjectType], [])
3019
3286
 
3020
- def run(self, file_store: AbstractFileStore) -> Dict[str, Any]:
3287
+ def run(self, file_store: AbstractFileStore) -> dict[str, Any]:
3021
3288
  """Gather all the outputs of the scatter."""
3022
3289
  outobj = {}
3023
3290
 
@@ -3028,8 +3295,8 @@ class CWLGather(Job):
3028
3295
  return shortname(n)
3029
3296
 
3030
3297
  # TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
3031
- outputs: Union[CWLObjectType, List[CWLObjectType]] = cast(
3032
- Union[CWLObjectType, List[CWLObjectType]], unwrap(self.outputs)
3298
+ outputs: Union[CWLObjectType, list[CWLObjectType]] = cast(
3299
+ Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
3033
3300
  )
3034
3301
  for k in [sn(i) for i in self.step.tool["out"]]:
3035
3302
  outobj[k] = self.extract(outputs, k)
@@ -3071,7 +3338,11 @@ ProcessType = TypeVar(
3071
3338
 
3072
3339
 
3073
3340
  def remove_pickle_problems(obj: ProcessType) -> ProcessType:
3074
- """Doc_loader does not pickle correctly, causing Toil errors, remove from objects."""
3341
+ """
3342
+ Doc_loader does not pickle correctly, causing Toil errors, remove from objects.
3343
+
3344
+ See github issue: https://github.com/mypyc/mypyc/issues/804
3345
+ """
3075
3346
  if hasattr(obj, "doc_loader"):
3076
3347
  obj.doc_loader = None
3077
3348
  if isinstance(obj, cwltool.workflow.WorkflowStep):
@@ -3103,12 +3374,11 @@ class CWLWorkflow(CWLNamedJob):
3103
3374
  self.cwlwf = cwlwf
3104
3375
  self.cwljob = cwljob
3105
3376
  self.runtime_context = runtime_context
3106
- self.cwlwf = remove_pickle_problems(self.cwlwf)
3107
3377
  self.conditional = conditional or Conditional()
3108
3378
 
3109
3379
  def run(
3110
3380
  self, file_store: AbstractFileStore
3111
- ) -> Union[UnresolvedDict, Dict[str, SkipNull]]:
3381
+ ) -> Union[UnresolvedDict, dict[str, SkipNull]]:
3112
3382
  """
3113
3383
  Convert a CWL Workflow graph into a Toil job graph.
3114
3384
 
@@ -3129,7 +3399,7 @@ class CWLWorkflow(CWLNamedJob):
3129
3399
  # that may be used as a "source" for a step input workflow output
3130
3400
  # parameter
3131
3401
  # to: the job that will produce that value.
3132
- promises: Dict[str, Job] = {}
3402
+ promises: dict[str, Job] = {}
3133
3403
 
3134
3404
  parent_name = shortname(self.cwlwf.tool["id"])
3135
3405
 
@@ -3158,7 +3428,7 @@ class CWLWorkflow(CWLNamedJob):
3158
3428
  stepinputs_fufilled = False
3159
3429
  if stepinputs_fufilled:
3160
3430
  logger.debug("Ready to make job for workflow step %s", step_id)
3161
- jobobj: Dict[
3431
+ jobobj: dict[
3162
3432
  str, Union[ResolveSource, DefaultWithSource, StepValueFrom]
3163
3433
  ] = {}
3164
3434
 
@@ -3292,30 +3562,348 @@ class CWLWorkflow(CWLNamedJob):
3292
3562
  return UnresolvedDict(outobj)
3293
3563
 
3294
3564
 
3565
+ class CWLInstallImportsJob(Job):
3566
+ def __init__(
3567
+ self,
3568
+ initialized_job_order: Promised[CWLObjectType],
3569
+ tool: Promised[Process],
3570
+ basedir: str,
3571
+ skip_remote: bool,
3572
+ bypass_file_store: bool,
3573
+ import_data: Promised[dict[str, FileID]],
3574
+ **kwargs: Any,
3575
+ ) -> None:
3576
+ """
3577
+ Job to take the entire CWL object and a mapping of filenames to the imported URIs
3578
+ to convert all file locations to URIs.
3579
+
3580
+ This class is only used when runImportsOnWorkers is enabled.
3581
+ """
3582
+ super().__init__(local=True, **kwargs)
3583
+ self.initialized_job_order = initialized_job_order
3584
+ self.tool = tool
3585
+ self.basedir = basedir
3586
+ self.skip_remote = skip_remote
3587
+ self.bypass_file_store = bypass_file_store
3588
+ self.import_data = import_data
3589
+
3590
+ @staticmethod
3591
+ def fill_in_files(
3592
+ initialized_job_order: CWLObjectType,
3593
+ tool: Process,
3594
+ candidate_to_fileid: dict[str, FileID],
3595
+ basedir: str,
3596
+ skip_remote: bool,
3597
+ bypass_file_store: bool,
3598
+ ) -> tuple[Process, CWLObjectType]:
3599
+ """
3600
+ Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
3601
+ """
3602
+ def fill_in_file(filename: str) -> FileID:
3603
+ """
3604
+ Return the file name's associated Toil file ID
3605
+ """
3606
+ return candidate_to_fileid[filename]
3607
+
3608
+ file_convert_function = functools.partial(
3609
+ extract_and_convert_file_to_toil_uri, fill_in_file
3610
+ )
3611
+ fs_access = ToilFsAccess(basedir)
3612
+ fileindex: dict[str, str] = {}
3613
+ existing: dict[str, str] = {}
3614
+ visit_files(
3615
+ file_convert_function,
3616
+ fs_access,
3617
+ fileindex,
3618
+ existing,
3619
+ initialized_job_order,
3620
+ mark_broken=True,
3621
+ skip_remote=skip_remote,
3622
+ bypass_file_store=bypass_file_store,
3623
+ )
3624
+ visitSteps(
3625
+ tool,
3626
+ functools.partial(
3627
+ visit_files,
3628
+ file_convert_function,
3629
+ fs_access,
3630
+ fileindex,
3631
+ existing,
3632
+ mark_broken=True,
3633
+ skip_remote=skip_remote,
3634
+ bypass_file_store=bypass_file_store,
3635
+ ),
3636
+ )
3637
+
3638
+ # We always expect to have processed all files that exist
3639
+ for param_name, param_value in initialized_job_order.items():
3640
+ # Loop through all the parameters for the workflow overall.
3641
+ # Drop any files that aren't either imported (for when we use
3642
+ # the file store) or available on disk (for when we don't).
3643
+ # This will properly make them cause an error later if they
3644
+ # were required.
3645
+ rm_unprocessed_secondary_files(param_value)
3646
+ return tool, initialized_job_order
3647
+
3648
+ def run(self, file_store: AbstractFileStore) -> Tuple[Process, CWLObjectType]:
3649
+ """
3650
+ Convert the filenames in the workflow inputs into the URIs
3651
+ :return: Promise of transformed workflow inputs. A tuple of the job order and process
3652
+ """
3653
+ candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
3654
+
3655
+ initialized_job_order = unwrap(self.initialized_job_order)
3656
+ tool = unwrap(self.tool)
3657
+ return CWLInstallImportsJob.fill_in_files(
3658
+ initialized_job_order,
3659
+ tool,
3660
+ candidate_to_fileid,
3661
+ self.basedir,
3662
+ self.skip_remote,
3663
+ self.bypass_file_store,
3664
+ )
3665
+
3666
+
3667
+ class CWLImportWrapper(CWLNamedJob):
3668
+ """
3669
+ Job to organize importing files on workers instead of the leader. Responsible for extracting filenames and metadata,
3670
+ calling ImportsJob, applying imports to the job objects, and scheduling the start workflow job
3671
+
3672
+ This class is only used when runImportsOnWorkers is enabled.
3673
+ """
3674
+
3675
+ def __init__(
3676
+ self,
3677
+ initialized_job_order: CWLObjectType,
3678
+ tool: Process,
3679
+ runtime_context: cwltool.context.RuntimeContext,
3680
+ file_to_data: dict[str, FileMetadata],
3681
+ options: Namespace,
3682
+ ):
3683
+ super().__init__(local=False, disk=options.import_workers_threshold)
3684
+ self.initialized_job_order = initialized_job_order
3685
+ self.tool = tool
3686
+ self.options = options
3687
+ self.runtime_context = runtime_context
3688
+ self.file_to_data = file_to_data
3689
+
3690
+ def run(self, file_store: AbstractFileStore) -> Any:
3691
+ imports_job = ImportsJob(
3692
+ self.file_to_data,
3693
+ self.options.import_workers_threshold,
3694
+ self.options.import_workers_disk,
3695
+ )
3696
+ self.addChild(imports_job)
3697
+ install_imports_job = CWLInstallImportsJob(
3698
+ initialized_job_order=self.initialized_job_order,
3699
+ tool=self.tool,
3700
+ basedir=self.options.basedir,
3701
+ skip_remote=self.options.reference_inputs,
3702
+ bypass_file_store=self.options.bypass_file_store,
3703
+ import_data=imports_job.rv(0),
3704
+ )
3705
+ self.addChild(install_imports_job)
3706
+ imports_job.addFollowOn(install_imports_job)
3707
+
3708
+ start_job = CWLStartJob(
3709
+ install_imports_job.rv(0),
3710
+ install_imports_job.rv(1),
3711
+ runtime_context=self.runtime_context,
3712
+ )
3713
+ self.addChild(start_job)
3714
+ install_imports_job.addFollowOn(start_job)
3715
+
3716
+ return start_job.rv()
3717
+
3718
+
3719
+ class CWLStartJob(CWLNamedJob):
3720
+ """
3721
+ Job responsible for starting the CWL workflow.
3722
+
3723
+ Takes in the workflow/tool and inputs after all files are imported
3724
+ and creates jobs to run those workflows.
3725
+ """
3726
+
3727
+ def __init__(
3728
+ self,
3729
+ tool: Promised[Process],
3730
+ initialized_job_order: Promised[CWLObjectType],
3731
+ runtime_context: cwltool.context.RuntimeContext,
3732
+ **kwargs: Any,
3733
+ ) -> None:
3734
+ super().__init__(**kwargs)
3735
+ self.tool = tool
3736
+ self.initialized_job_order = initialized_job_order
3737
+ self.runtime_context = runtime_context
3738
+
3739
+ def run(self, file_store: AbstractFileStore) -> Any:
3740
+ initialized_job_order = unwrap(self.initialized_job_order)
3741
+ tool = unwrap(self.tool)
3742
+ cwljob, _ = makeJob(
3743
+ tool, initialized_job_order, self.runtime_context, None, None
3744
+ ) # toplevel, no name needed
3745
+ cwljob.cwljob = initialized_job_order
3746
+ self.addChild(cwljob)
3747
+ return cwljob.rv()
3748
+
3749
+
3750
+ def extract_workflow_inputs(
3751
+ options: Namespace, initialized_job_order: CWLObjectType, tool: Process
3752
+ ) -> list[str]:
3753
+ """
3754
+ Collect all the workflow input files to import later.
3755
+ :param options: namespace
3756
+ :param initialized_job_order: cwl object
3757
+ :param tool: tool object
3758
+ :return:
3759
+ """
3760
+ fileindex: dict[str, str] = {}
3761
+ existing: dict[str, str] = {}
3762
+
3763
+ # Extract out all the input files' filenames
3764
+ logger.info("Collecting input files...")
3765
+ fs_access = ToilFsAccess(options.basedir)
3766
+ filenames = visit_files(
3767
+ extract_file_uri_once,
3768
+ fs_access,
3769
+ fileindex,
3770
+ existing,
3771
+ initialized_job_order,
3772
+ mark_broken=True,
3773
+ skip_remote=options.reference_inputs,
3774
+ bypass_file_store=options.bypass_file_store,
3775
+ )
3776
+ # Extract filenames of all the files associated with tools (binaries, etc.).
3777
+ logger.info("Collecting tool-associated files...")
3778
+ tool_filenames = visitSteps(
3779
+ tool,
3780
+ functools.partial(
3781
+ visit_files,
3782
+ extract_file_uri_once,
3783
+ fs_access,
3784
+ fileindex,
3785
+ existing,
3786
+ mark_broken=True,
3787
+ skip_remote=options.reference_inputs,
3788
+ bypass_file_store=options.bypass_file_store,
3789
+ ),
3790
+ )
3791
+ filenames.extend(tool_filenames)
3792
+ return [file for file in filenames if file is not None]
3793
+
3794
+
3795
+ def import_workflow_inputs(
3796
+ jobstore: AbstractJobStore,
3797
+ options: Namespace,
3798
+ initialized_job_order: CWLObjectType,
3799
+ tool: Process,
3800
+ log_level: int = logging.DEBUG,
3801
+ ) -> None:
3802
+ """
3803
+ Import all workflow inputs on the leader.
3804
+
3805
+ Ran when not importing on workers.
3806
+ :param jobstore: Toil jobstore
3807
+ :param options: Namespace
3808
+ :param initialized_job_order: CWL object
3809
+ :param tool: CWL tool
3810
+ :param log_level: log level
3811
+ :return:
3812
+ """
3813
+ fileindex: dict[str, str] = {}
3814
+ existing: dict[str, str] = {}
3815
+
3816
+ # Define something we can call to import a file and get its file
3817
+ # ID.
3818
+ def file_import_function(url: str) -> FileID:
3819
+ logger.log(log_level, "Loading %s...", url)
3820
+ return jobstore.import_file(url, symlink=True)
3821
+
3822
+ import_function = functools.partial(
3823
+ extract_and_convert_file_to_toil_uri, file_import_function
3824
+ )
3825
+ # Import all the input files, some of which may be missing optional
3826
+ # files.
3827
+ logger.info("Importing input files...")
3828
+ fs_access = ToilFsAccess(options.basedir)
3829
+ visit_files(
3830
+ import_function,
3831
+ fs_access,
3832
+ fileindex,
3833
+ existing,
3834
+ initialized_job_order,
3835
+ mark_broken=True,
3836
+ skip_remote=options.reference_inputs,
3837
+ bypass_file_store=options.bypass_file_store,
3838
+ )
3839
+
3840
+ # Make another function for importing tool files. This one doesn't allow
3841
+ # symlinking, since the tools might be coming from storage not accessible
3842
+ # to all nodes.
3843
+ tool_import_function = functools.partial(
3844
+ extract_and_convert_file_to_toil_uri,
3845
+ cast(
3846
+ Callable[[str], FileID],
3847
+ functools.partial(jobstore.import_file, symlink=False),
3848
+ ),
3849
+ )
3850
+
3851
+ # Import all the files associated with tools (binaries, etc.).
3852
+ # Not sure why you would have an optional secondary file here, but
3853
+ # the spec probably needs us to support them.
3854
+ logger.info("Importing tool-associated files...")
3855
+ visitSteps(
3856
+ tool,
3857
+ functools.partial(
3858
+ visit_files,
3859
+ tool_import_function,
3860
+ fs_access,
3861
+ fileindex,
3862
+ existing,
3863
+ mark_broken=True,
3864
+ skip_remote=options.reference_inputs,
3865
+ bypass_file_store=options.bypass_file_store,
3866
+ ),
3867
+ )
3868
+
3869
+ # We always expect to have processed all files that exist
3870
+ for param_name, param_value in initialized_job_order.items():
3871
+ # Loop through all the parameters for the workflow overall.
3872
+ # Drop any files that aren't either imported (for when we use
3873
+ # the file store) or available on disk (for when we don't).
3874
+ # This will properly make them cause an error later if they
3875
+ # were required.
3876
+ rm_unprocessed_secondary_files(param_value)
3877
+
3878
+
3879
+ T = TypeVar("T")
3880
+
3881
+
3295
3882
  def visitSteps(
3296
3883
  cmdline_tool: Process,
3297
- op: Callable[[CommentedMap], None],
3298
- ) -> None:
3884
+ op: Callable[[CommentedMap], list[T]],
3885
+ ) -> list[T]:
3299
3886
  """
3300
3887
  Iterate over a CWL Process object, running the op on each tool description
3301
3888
  CWL object.
3302
3889
  """
3303
3890
  if isinstance(cmdline_tool, cwltool.workflow.Workflow):
3304
3891
  # For workflows we need to dispatch on steps
3892
+ ret = []
3305
3893
  for step in cmdline_tool.steps:
3306
3894
  # Handle the step's tool
3307
- op(step.tool)
3895
+ ret.extend(op(step.tool))
3308
3896
  # Recures on the embedded tool; maybe it's a workflow.
3309
- visitSteps(step.embedded_tool, op)
3897
+ recurse_ret = visitSteps(step.embedded_tool, op)
3898
+ ret.extend(recurse_ret)
3899
+ return ret
3310
3900
  elif isinstance(cmdline_tool, cwltool.process.Process):
3311
3901
  # All CWL Process objects (including CommandLineTool) will have tools
3312
3902
  # if they bothered to run the Process __init__.
3313
- op(cmdline_tool.tool)
3314
- else:
3315
- raise RuntimeError(
3316
- f"Unsupported type encountered in workflow "
3317
- f"traversal: {type(cmdline_tool)}"
3318
- )
3903
+ return op(cmdline_tool.tool)
3904
+ raise RuntimeError(
3905
+ f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
3906
+ )
3319
3907
 
3320
3908
 
3321
3909
  def rm_unprocessed_secondary_files(job_params: Any) -> None:
@@ -3328,7 +3916,7 @@ def rm_unprocessed_secondary_files(job_params: Any) -> None:
3328
3916
 
3329
3917
  def filtered_secondary_files(
3330
3918
  unfiltered_secondary_files: CWLObjectType,
3331
- ) -> List[CWLObjectType]:
3919
+ ) -> list[CWLObjectType]:
3332
3920
  """
3333
3921
  Remove unprocessed secondary files.
3334
3922
 
@@ -3349,28 +3937,33 @@ def filtered_secondary_files(
3349
3937
  intermediate_secondary_files = []
3350
3938
  final_secondary_files = []
3351
3939
  # remove secondary files still containing interpolated strings
3352
- for sf in cast(List[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
3940
+ for sf in cast(list[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
3353
3941
  sf_bn = cast(str, sf.get("basename", ""))
3354
3942
  sf_loc = cast(str, sf.get("location", ""))
3355
3943
  if ("$(" not in sf_bn) and ("${" not in sf_bn):
3356
3944
  if ("$(" not in sf_loc) and ("${" not in sf_loc):
3357
3945
  intermediate_secondary_files.append(sf)
3358
3946
  else:
3359
- logger.debug("Secondary file %s is dropped because it has an uninterpolated location", sf)
3947
+ logger.debug(
3948
+ "Secondary file %s is dropped because it has an uninterpolated location",
3949
+ sf,
3950
+ )
3360
3951
  else:
3361
- logger.debug("Secondary file %s is dropped because it has an uninterpolated basename", sf)
3952
+ logger.debug(
3953
+ "Secondary file %s is dropped because it has an uninterpolated basename",
3954
+ sf,
3955
+ )
3362
3956
  # remove secondary files that are not present in the filestore or pointing
3363
3957
  # to existant things on disk
3364
3958
  for sf in intermediate_secondary_files:
3365
3959
  sf_loc = cast(str, sf.get("location", ""))
3366
- if (
3367
- sf_loc != MISSING_FILE
3368
- or sf.get("class", "") == "Directory"
3369
- ):
3960
+ if sf_loc != MISSING_FILE or sf.get("class", "") == "Directory":
3370
3961
  # Pass imported files, and all Directories
3371
3962
  final_secondary_files.append(sf)
3372
3963
  else:
3373
- logger.debug("Secondary file %s is dropped because it is known to be missing", sf)
3964
+ logger.debug(
3965
+ "Secondary file %s is dropped because it is known to be missing", sf
3966
+ )
3374
3967
  return final_secondary_files
3375
3968
 
3376
3969
 
@@ -3475,8 +4068,6 @@ def determine_load_listing(
3475
4068
  class NoAvailableJobStoreException(Exception):
3476
4069
  """Indicates that no job store name is available."""
3477
4070
 
3478
- pass
3479
-
3480
4071
 
3481
4072
  def generate_default_job_store(
3482
4073
  batch_system_name: Optional[str],
@@ -3544,37 +4135,64 @@ def generate_default_job_store(
3544
4135
 
3545
4136
  usage_message = "\n\n" + textwrap.dedent(
3546
4137
  """
3547
- * All positional arguments [cwl, yml_or_json] must always be specified last for toil-cwl-runner.
3548
- Note: If you're trying to specify a jobstore, please use --jobStore.
3549
-
3550
- Usage: toil-cwl-runner [options] example.cwl example-job.yaml
3551
- Example: toil-cwl-runner \\
3552
- --jobStore aws:us-west-2:jobstore \\
3553
- --realTimeLogging \\
3554
- --logInfo \\
3555
- example.cwl \\
3556
- example-job.yaml
3557
- """[
4138
+ NOTE: If you're trying to specify a jobstore, you must use --jobStore, not a positional argument.
4139
+
4140
+ Usage: toil-cwl-runner [options] <workflow> [<input file>] [workflow options]
4141
+
4142
+ Example: toil-cwl-runner \\
4143
+ --jobStore aws:us-west-2:jobstore \\
4144
+ --realTimeLogging \\
4145
+ --logInfo \\
4146
+ example.cwl \\
4147
+ example-job.yaml \\
4148
+ --wf_input="hello world"
4149
+ """[
3558
4150
  1:
3559
4151
  ]
3560
4152
  )
3561
4153
 
3562
- def get_options(args: List[str]) -> Namespace:
4154
+
4155
+ def get_options(args: list[str]) -> Namespace:
3563
4156
  """
3564
4157
  Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
3565
4158
  :param args: List of args from command line
3566
4159
  :return: options namespace
3567
4160
  """
3568
- parser = ArgParser()
4161
+ # We can't allow abbreviations in case the workflow defines an option that
4162
+ # is a prefix of a Toil option.
4163
+ parser = ArgParser(
4164
+ allow_abbrev=False,
4165
+ usage="%(prog)s [options] WORKFLOW [INFILE] [WF_OPTIONS...]",
4166
+ description=textwrap.dedent(
4167
+ """
4168
+ positional arguments:
4169
+
4170
+ WORKFLOW CWL file to run.
4171
+
4172
+ INFILE YAML or JSON file of workflow inputs.
4173
+
4174
+ WF_OPTIONS Additional inputs to the workflow as command-line
4175
+ flags. If CWL workflow takes an input, the name of the
4176
+ input can be used as an option. For example:
4177
+
4178
+ %(prog)s workflow.cwl --file1 file
4179
+
4180
+ If an input has the same name as a Toil option, pass
4181
+ '--' before it.
4182
+ """
4183
+ ),
4184
+ formatter_class=RawDescriptionHelpFormatter,
4185
+ )
4186
+
3569
4187
  addOptions(parser, jobstore_as_flag=True, cwl=True)
3570
4188
  options: Namespace
3571
- options, cwl_options = parser.parse_known_args(args)
3572
- options.cwljob.extend(cwl_options)
4189
+ options, extra = parser.parse_known_args(args)
4190
+ options.cwljob = extra
3573
4191
 
3574
4192
  return options
3575
4193
 
3576
4194
 
3577
- def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
4195
+ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
3578
4196
  """Run the main loop for toil-cwl-runner."""
3579
4197
  # Remove cwltool logger's stream handler so it uses Toil's
3580
4198
  cwllogger.removeHandler(defaultStreamHandler)
@@ -3586,25 +4204,21 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3586
4204
 
3587
4205
  # Do cwltool setup
3588
4206
  cwltool.main.setup_schema(args=options, custom_schema_callback=None)
3589
- tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
3590
-
3591
- # We need a workdir for the CWL runtime contexts.
3592
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3593
- # if tmpdir_prefix is not the default value, move
3594
- # workdir and the default job store under it
3595
- workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3596
- else:
3597
- # Use a directory in the default tmpdir
3598
- workdir = mkdtemp()
3599
- # Make sure workdir doesn't exist so it can be a job store
3600
- os.rmdir(workdir)
4207
+ tmpdir_prefix = options.tmpdir_prefix = (
4208
+ options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
4209
+ )
4210
+ tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
4211
+ workdir = options.workDir or tmp_outdir_prefix
3601
4212
 
3602
4213
  if options.jobStore is None:
4214
+ jobstore = cwltool.utils.create_tmp_dir(tmp_outdir_prefix)
4215
+ # Make sure directory doesn't exist so it can be a job store
4216
+ os.rmdir(jobstore)
3603
4217
  # Pick a default job store specifier appropriate to our choice of batch
3604
4218
  # system and provisioner and installed modules, given this available
3605
4219
  # local directory name. Fail if no good default can be used.
3606
4220
  options.jobStore = generate_default_job_store(
3607
- options.batchSystem, options.provisioner, workdir
4221
+ options.batchSystem, options.provisioner, jobstore
3608
4222
  )
3609
4223
 
3610
4224
  options.doc_cache = True
@@ -3612,17 +4226,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3612
4226
  options.do_validate = True
3613
4227
  options.pack = False
3614
4228
  options.print_subgraph = False
3615
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3616
- # We need to override workDir because by default Toil will pick
3617
- # somewhere under the system temp directory if unset, ignoring
3618
- # --tmpdir-prefix.
3619
- #
3620
- # If set, workDir needs to exist, so we directly use the prefix
3621
- options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3622
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.coordination_dir is None:
3623
- # override coordination_dir as default Toil will pick somewhere else
3624
- # ignoring --tmpdir_prefix
3625
- options.coordination_dir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3626
4229
 
3627
4230
  if options.batchSystem == "kubernetes":
3628
4231
  # Containers under Kubernetes can only run in Singularity
@@ -3640,12 +4243,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3640
4243
  logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
3641
4244
 
3642
4245
  outdir = os.path.abspath(options.outdir or os.getcwd())
3643
- tmp_outdir_prefix = os.path.abspath(
3644
- options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
3645
- )
3646
-
3647
- fileindex: Dict[str, str] = {}
3648
- existing: Dict[str, str] = {}
3649
4246
  conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)
3650
4247
  use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
3651
4248
  job_script_provider = None
@@ -3660,6 +4257,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3660
4257
  )
3661
4258
  runtime_context.workdir = workdir # type: ignore[attr-defined]
3662
4259
  runtime_context.outdir = outdir
4260
+ setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
3663
4261
  runtime_context.move_outputs = "leave"
3664
4262
  runtime_context.rm_tmpdir = False
3665
4263
  runtime_context.streaming_allowed = not options.disable_streaming
@@ -3694,225 +4292,204 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3694
4292
  runtime_context.research_obj = research_obj
3695
4293
 
3696
4294
  try:
3697
- with Toil(options) as toil:
3698
- if options.restart:
3699
- outobj = toil.restart()
3700
- else:
3701
- loading_context.hints = [
3702
- {
3703
- "class": "ResourceRequirement",
3704
- "coresMin": toil.config.defaultCores,
3705
- "ramMin": toil.config.defaultMemory / (2**20),
3706
- "outdirMin": toil.config.defaultDisk / (2**20),
3707
- "tmpdirMin": 0,
3708
- }
3709
- ]
3710
- loading_context.construct_tool_object = toil_make_tool
3711
- loading_context.strict = not options.not_strict
3712
- options.workflow = options.cwltool
3713
- options.job_order = options.cwljob
3714
4295
 
3715
- try:
3716
- uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
3717
- options.cwltool,
3718
- loading_context.resolver,
3719
- loading_context.fetcher_constructor,
3720
- )
3721
- except ValidationException:
3722
- print(
3723
- "\nYou may be getting this error because your arguments are incorrect or out of order."
3724
- + usage_message,
3725
- file=sys.stderr,
3726
- )
3727
- raise
4296
+ if not options.restart:
4297
+ # Make a version of the config based on the initial options, for
4298
+ # setting up CWL option stuff
4299
+ expected_config = Config()
4300
+ expected_config.setOptions(options)
3728
4301
 
3729
- options.tool_help = None
3730
- options.debug = options.logLevel == "DEBUG"
3731
- job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
3732
- options,
3733
- sys.stdin,
4302
+ # Before showing the options to any cwltool stuff that wants to
4303
+ # load the workflow, transform options.cwltool, where our
4304
+ # argument for what to run is, to handle Dockstore workflows.
4305
+ options.cwltool = resolve_workflow(options.cwltool)
4306
+
4307
+ # TODO: why are we doing this? Does this get applied to all
4308
+ # tools as a default or something?
4309
+ loading_context.hints = [
4310
+ {
4311
+ "class": "ResourceRequirement",
4312
+ "coresMin": expected_config.defaultCores,
4313
+ # Don't include any RAM requirement because we want to
4314
+ # know when tools don't manually ask for RAM.
4315
+ "outdirMin": expected_config.defaultDisk / (2**20),
4316
+ "tmpdirMin": 0,
4317
+ }
4318
+ ]
4319
+ loading_context.construct_tool_object = toil_make_tool
4320
+ loading_context.strict = not options.not_strict
4321
+ options.workflow = options.cwltool
4322
+ options.job_order = options.cwljob
4323
+
4324
+ try:
4325
+ uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
4326
+ options.cwltool,
4327
+ loading_context.resolver,
3734
4328
  loading_context.fetcher_constructor,
3735
- loading_context.overrides_list,
3736
- tool_file_uri,
3737
4329
  )
3738
- if options.overrides:
3739
- loading_context.overrides_list.extend(
3740
- cwltool.load_tool.load_overrides(
3741
- schema_salad.ref_resolver.file_uri(
3742
- os.path.abspath(options.overrides)
3743
- ),
3744
- tool_file_uri,
3745
- )
3746
- )
3747
-
3748
- loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
3749
- uri, loading_context
4330
+ except ValidationException:
4331
+ print(
4332
+ "\nYou may be getting this error because your arguments are incorrect or out of order."
4333
+ + usage_message,
4334
+ file=sys.stderr,
3750
4335
  )
3751
- loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3752
- loading_context, workflowobj, uri
4336
+ raise
4337
+
4338
+ # Attempt to prepull the containers
4339
+ if not options.no_prepull and not options.no_container:
4340
+ try_prepull(uri, runtime_context, expected_config.batchSystem)
4341
+
4342
+ options.tool_help = None
4343
+ options.debug = options.logLevel == "DEBUG"
4344
+ job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
4345
+ options,
4346
+ sys.stdin,
4347
+ loading_context.fetcher_constructor,
4348
+ loading_context.overrides_list,
4349
+ tool_file_uri,
4350
+ )
4351
+ if options.overrides:
4352
+ loading_context.overrides_list.extend(
4353
+ cwltool.load_tool.load_overrides(
4354
+ schema_salad.ref_resolver.file_uri(
4355
+ os.path.abspath(options.overrides)
4356
+ ),
4357
+ tool_file_uri,
4358
+ )
3753
4359
  )
3754
- if not loading_context.loader:
3755
- raise RuntimeError("cwltool loader is not set.")
3756
- processobj, metadata = loading_context.loader.resolve_ref(uri)
3757
- processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3758
4360
 
3759
- document_loader = loading_context.loader
4361
+ loading_context, workflowobj, uri = cwltool.load_tool.fetch_document(
4362
+ uri, loading_context
4363
+ )
4364
+ loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
4365
+ loading_context, workflowobj, uri
4366
+ )
4367
+ if not loading_context.loader:
4368
+ raise RuntimeError("cwltool loader is not set.")
4369
+ processobj, metadata = loading_context.loader.resolve_ref(uri)
4370
+ processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3760
4371
 
3761
- if options.provenance and runtime_context.research_obj:
3762
- cwltool.cwlprov.writablebagfile.packed_workflow(
3763
- runtime_context.research_obj,
3764
- cwltool.main.print_pack(loading_context, uri),
3765
- )
4372
+ document_loader = loading_context.loader
3766
4373
 
3767
- try:
3768
- tool = cwltool.load_tool.make_tool(uri, loading_context)
3769
- scan_for_unsupported_requirements(
3770
- tool, bypass_file_store=options.bypass_file_store
3771
- )
3772
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3773
- logging.error(err)
3774
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3775
- runtime_context.secret_store = SecretStore()
4374
+ if options.provenance and runtime_context.research_obj:
4375
+ cwltool.cwlprov.writablebagfile.packed_workflow(
4376
+ runtime_context.research_obj,
4377
+ cwltool.main.print_pack(loading_context, uri),
4378
+ )
3776
4379
 
3777
- try:
3778
- # Get the "order" for the execution of the root job. CWLTool
3779
- # doesn't document this much, but this is an "order" in the
3780
- # sense of a "specification" for running a single job. It
3781
- # describes the inputs to the workflow.
3782
- initialized_job_order = cwltool.main.init_job_order(
3783
- job_order_object,
3784
- options,
3785
- tool,
3786
- jobloader,
3787
- sys.stdout,
3788
- make_fs_access=runtime_context.make_fs_access,
3789
- input_basedir=options.basedir,
3790
- secret_store=runtime_context.secret_store,
3791
- input_required=True,
4380
+ try:
4381
+ tool = cwltool.load_tool.make_tool(uri, loading_context)
4382
+ scan_for_unsupported_requirements(
4383
+ tool, bypass_file_store=options.bypass_file_store
4384
+ )
4385
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
4386
+ logging.error(err)
4387
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
4388
+ runtime_context.secret_store = SecretStore()
4389
+
4390
+ try:
4391
+ # Get the "order" for the execution of the root job. CWLTool
4392
+ # doesn't document this much, but this is an "order" in the
4393
+ # sense of a "specification" for running a single job. It
4394
+ # describes the inputs to the workflow.
4395
+ initialized_job_order = cwltool.main.init_job_order(
4396
+ job_order_object,
4397
+ options,
4398
+ tool,
4399
+ jobloader,
4400
+ sys.stdout,
4401
+ make_fs_access=runtime_context.make_fs_access,
4402
+ input_basedir=options.basedir,
4403
+ secret_store=runtime_context.secret_store,
4404
+ input_required=True,
4405
+ )
4406
+ except SystemExit as err:
4407
+ if err.code == 2: # raised by argparse's parse_args() function
4408
+ print(
4409
+ "\nIf both a CWL file and an input object (YAML/JSON) file were "
4410
+ "provided, the problem may be the argument order."
4411
+ + usage_message,
4412
+ file=sys.stderr,
3792
4413
  )
3793
- except SystemExit as err:
3794
- if err.code == 2: # raised by argparse's parse_args() function
3795
- print(
3796
- "\nIf both a CWL file and an input object (YAML/JSON) file were "
3797
- "provided, this may be the argument order." + usage_message,
3798
- file=sys.stderr,
3799
- )
3800
- raise
4414
+ raise
3801
4415
 
3802
- # Leave the defaults un-filled in the top-level order. The tool or
3803
- # workflow will fill them when it runs
3804
-
3805
- for inp in tool.tool["inputs"]:
3806
- if (
3807
- shortname(inp["id"]) in initialized_job_order
3808
- and inp["type"] == "File"
3809
- ):
3810
- cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
3811
- "streamable"
3812
- ] = inp.get("streamable", False)
3813
- # TODO also for nested types that contain streamable Files
3814
-
3815
- runtime_context.use_container = not options.no_container
3816
- runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
3817
- runtime_context.job_script_provider = job_script_provider
3818
- runtime_context.force_docker_pull = options.force_docker_pull
3819
- runtime_context.no_match_user = options.no_match_user
3820
- runtime_context.no_read_only = options.no_read_only
3821
- runtime_context.basedir = options.basedir
3822
- if not options.bypass_file_store:
3823
- # If we're using the file store we need to start moving output
3824
- # files now.
3825
- runtime_context.move_outputs = "move"
3826
-
3827
- # We instantiate an early builder object here to populate indirect
3828
- # secondaryFile references using cwltool's library because we need
3829
- # to resolve them before toil imports them into the filestore.
3830
- # A second builder will be built in the job's run method when toil
3831
- # actually starts the cwl job.
3832
- # Note that this accesses input files for tools, so the
3833
- # ToilFsAccess needs to be set up if we want to be able to use
3834
- # URLs.
3835
- builder = tool._init_job(initialized_job_order, runtime_context)
4416
+ # Leave the defaults un-filled in the top-level order. The tool or
4417
+ # workflow will fill them when it runs
3836
4418
 
4419
+ for inp in tool.tool["inputs"]:
4420
+ if (
4421
+ shortname(inp["id"]) in initialized_job_order
4422
+ and inp["type"] == "File"
4423
+ ):
4424
+ cast(CWLObjectType, initialized_job_order[shortname(inp["id"])])[
4425
+ "streamable"
4426
+ ] = inp.get("streamable", False)
4427
+ # TODO also for nested types that contain streamable Files
4428
+
4429
+ runtime_context.use_container = not options.no_container
4430
+ runtime_context.tmp_outdir_prefix = os.path.realpath(tmp_outdir_prefix)
4431
+ runtime_context.job_script_provider = job_script_provider
4432
+ runtime_context.force_docker_pull = options.force_docker_pull
4433
+ runtime_context.no_match_user = options.no_match_user
4434
+ runtime_context.no_read_only = options.no_read_only
4435
+ runtime_context.basedir = options.basedir
4436
+ if not options.bypass_file_store:
4437
+ # If we're using the file store we need to start moving output
4438
+ # files now.
4439
+ runtime_context.move_outputs = "move"
4440
+
4441
+ # We instantiate an early builder object here to populate indirect
4442
+ # secondaryFile references using cwltool's library because we need
4443
+ # to resolve them before toil imports them into the filestore.
4444
+ # A second builder will be built in the job's run method when toil
4445
+ # actually starts the cwl job.
4446
+ # Note that this accesses input files for tools, so the
4447
+ # ToilFsAccess needs to be set up if we want to be able to use
4448
+ # URLs.
4449
+ builder = tool._init_job(initialized_job_order, runtime_context)
4450
+ if not isinstance(tool, cwltool.workflow.Workflow):
3837
4451
  # make sure this doesn't add listing items; if shallow_listing is
3838
4452
  # selected, it will discover dirs one deep and then again later on
3839
- # (probably when the cwltool builder gets ahold of the job in the
3840
- # CWL job's run()), producing 2+ deep listings instead of only 1.
4453
+ # (when the cwltool builder gets constructed from the job in the
4454
+ # CommandLineTool's job() method,
4455
+ # see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L951),
4456
+ # producing 2+ deep listings instead of only 1.
4457
+ # ExpressionTool also uses a builder, see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L207
4458
+ # Workflows don't need this because they don't go through CommandLineTool or ExpressionTool
3841
4459
  builder.loadListing = "no_listing"
3842
4460
 
3843
- builder.bind_input(
3844
- tool.inputs_record_schema,
3845
- initialized_job_order,
3846
- discover_secondaryFiles=True,
3847
- )
4461
+ # make sure this doesn't add listing items; if shallow_listing is
4462
+ # selected, it will discover dirs one deep and then again later on
4463
+ # (probably when the cwltool builder gets ahold of the job in the
4464
+ # CWL job's run()), producing 2+ deep listings instead of only 1.
4465
+ builder.loadListing = "no_listing"
3848
4466
 
3849
- # Define something we can call to import a file and get its file
3850
- # ID.
3851
- # We cast this because import_file is overloaded depending on if we
3852
- # pass a shared file name or not, and we know the way we call it we
3853
- # always get a FileID out.
3854
- file_import_function = cast(
3855
- Callable[[str], FileID],
3856
- functools.partial(toil.import_file, symlink=True),
3857
- )
4467
+ builder.bind_input(
4468
+ tool.inputs_record_schema,
4469
+ initialized_job_order,
4470
+ discover_secondaryFiles=True,
4471
+ )
3858
4472
 
3859
- # Import all the input files, some of which may be missing optional
3860
- # files.
3861
- logger.info("Importing input files...")
3862
- fs_access = ToilFsAccess(options.basedir)
3863
- import_files(
3864
- file_import_function,
3865
- fs_access,
3866
- fileindex,
3867
- existing,
3868
- initialized_job_order,
3869
- mark_broken=True,
3870
- skip_remote=options.reference_inputs,
3871
- bypass_file_store=options.bypass_file_store,
3872
- log_level=logging.INFO,
3873
- )
3874
- # Import all the files associated with tools (binaries, etc.).
3875
- # Not sure why you would have an optional secondary file here, but
3876
- # the spec probably needs us to support them.
3877
- logger.info("Importing tool-associated files...")
3878
- visitSteps(
3879
- tool,
3880
- functools.partial(
3881
- import_files,
3882
- file_import_function,
3883
- fs_access,
3884
- fileindex,
3885
- existing,
3886
- mark_broken=True,
3887
- skip_remote=options.reference_inputs,
3888
- bypass_file_store=options.bypass_file_store,
3889
- log_level=logging.INFO,
3890
- ),
3891
- )
4473
+ logger.info("Creating root job")
4474
+ logger.debug("Root tool: %s", tool)
4475
+ tool = remove_pickle_problems(tool)
3892
4476
 
3893
- # We always expect to have processed all files that exist
3894
- for param_name, param_value in initialized_job_order.items():
3895
- # Loop through all the parameters for the workflow overall.
3896
- # Drop any files that aren't either imported (for when we use
3897
- # the file store) or available on disk (for when we don't).
3898
- # This will properly make them cause an error later if they
3899
- # were required.
3900
- rm_unprocessed_secondary_files(param_value)
3901
-
3902
- logger.info("Creating root job")
3903
- logger.debug("Root tool: %s", tool)
4477
+ with Toil(options) as toil:
4478
+ if options.restart:
4479
+ outobj = toil.restart()
4480
+ else:
3904
4481
  try:
3905
- wf1, _ = makeJob(
4482
+ wf1 = makeRootJob(
3906
4483
  tool=tool,
3907
4484
  jobobj={},
3908
4485
  runtime_context=runtime_context,
3909
- parent_name=None, # toplevel, no name needed
3910
- conditional=None,
4486
+ initialized_job_order=initialized_job_order,
4487
+ options=options,
4488
+ toil=toil,
3911
4489
  )
3912
4490
  except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3913
4491
  logging.error(err)
3914
4492
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3915
- wf1.cwljob = initialized_job_order
3916
4493
  logger.info("Starting workflow")
3917
4494
  outobj = toil.start(wf1)
3918
4495
 
@@ -3929,7 +4506,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3929
4506
  outobj,
3930
4507
  outdir,
3931
4508
  destBucket=options.destBucket,
3932
- log_level=logging.INFO
4509
+ log_level=logging.INFO,
3933
4510
  )
3934
4511
  logger.info("Stored workflow outputs")
3935
4512
 
@@ -3992,8 +4569,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3992
4569
  else:
3993
4570
  logging.error(err)
3994
4571
  return 1
3995
- except (InsufficientSystemResources, LocatorException, InvalidImportExportUrlException, UnimplementedURLException,
3996
- JobTooBigError) as err:
4572
+ except (
4573
+ InsufficientSystemResources,
4574
+ LocatorException,
4575
+ InvalidImportExportUrlException,
4576
+ UnimplementedURLException,
4577
+ JobTooBigError,
4578
+ ) as err:
3997
4579
  logging.error(err)
3998
4580
  return 1
3999
4581