toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Implemented support for Common Workflow Language (CWL) for Toil."""
2
+
2
3
  # Copyright (C) 2015 Curoverse, Inc
3
4
  # Copyright (C) 2015-2021 Regents of the University of California
4
5
  # Copyright (C) 2019-2020 Seven Bridges
@@ -29,31 +30,29 @@ import logging
29
30
  import os
30
31
  import pprint
31
32
  import shutil
32
- import socket
33
33
  import stat
34
34
  import sys
35
35
  import textwrap
36
36
  import uuid
37
- from tempfile import NamedTemporaryFile, gettempdir
37
+ from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
38
+ from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
38
39
  from threading import Thread
39
40
  from typing import (
40
41
  IO,
41
42
  Any,
42
43
  Callable,
43
- Dict,
44
44
  Iterator,
45
- List,
46
45
  Mapping,
47
46
  MutableMapping,
48
47
  MutableSequence,
49
48
  Optional,
50
49
  TextIO,
51
50
  Tuple,
52
- Type,
53
51
  TypeVar,
54
52
  Union,
55
53
  cast,
56
- Sequence,
54
+ Literal,
55
+ Protocol,
57
56
  )
58
57
  from urllib.parse import quote, unquote, urlparse, urlsplit
59
58
 
@@ -68,7 +67,10 @@ import cwltool.load_tool
68
67
  import cwltool.main
69
68
  import cwltool.resolver
70
69
  import schema_salad.ref_resolver
71
- from configargparse import ArgParser, SUPPRESS, Namespace
70
+
71
+ # This is also in configargparse but MyPy doesn't know it
72
+ from argparse import RawDescriptionHelpFormatter
73
+ from configargparse import ArgParser, Namespace
72
74
  from cwltool.loghandler import _logger as cwllogger
73
75
  from cwltool.loghandler import defaultStreamHandler
74
76
  from cwltool.mpi import MpiConfig
@@ -82,6 +84,7 @@ from cwltool.process import (
82
84
  shortname,
83
85
  )
84
86
  from cwltool.secrets import SecretStore
87
+ from cwltool.singularity import SingularityCommandLineJob
85
88
  from cwltool.software_requirements import (
86
89
  DependenciesConfiguration,
87
90
  get_container_from_software_requirements,
@@ -103,11 +106,14 @@ from schema_salad.avro.schema import Names
103
106
  from schema_salad.exceptions import ValidationException
104
107
  from schema_salad.ref_resolver import file_uri, uri_file_path
105
108
  from schema_salad.sourceline import SourceLine
106
- from typing_extensions import Literal
107
109
 
110
+ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
108
111
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
109
- from toil.common import Toil, addOptions
112
+ from toil.common import Config, Toil, addOptions
110
113
  from toil.cwl import check_cwltool_version
114
+ from toil.lib.integration import resolve_workflow
115
+ from toil.lib.misc import call_command
116
+ from toil.provisioners.clusterScaler import JobTooBigError
111
117
 
112
118
  check_cwltool_version()
113
119
  from toil.cwl.utils import (
@@ -120,12 +126,28 @@ from toil.cwl.utils import (
120
126
  from toil.exceptions import FailedJobsException
121
127
  from toil.fileStores import FileID
122
128
  from toil.fileStores.abstractFileStore import AbstractFileStore
123
- from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
124
- from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
129
+ from toil.job import (
130
+ AcceleratorRequirement,
131
+ Job,
132
+ Promise,
133
+ Promised,
134
+ unwrap,
135
+ ImportsJob,
136
+ get_file_sizes,
137
+ FileMetadata,
138
+ WorkerImportJob,
139
+ )
140
+ from toil.jobStores.abstractJobStore import (
141
+ AbstractJobStore,
142
+ NoSuchFileException,
143
+ InvalidImportExportUrlException,
144
+ LocatorException,
145
+ )
146
+ from toil.lib.exceptions import UnimplementedURLException
125
147
  from toil.jobStores.fileJobStore import FileJobStore
126
148
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
127
149
  from toil.lib.io import mkdtemp
128
- from toil.lib.threading import ExceptionalThread
150
+ from toil.lib.threading import ExceptionalThread, global_mutex
129
151
  from toil.statsAndLogging import DEFAULT_LOGLEVEL
130
152
 
131
153
  logger = logging.getLogger(__name__)
@@ -157,7 +179,7 @@ def cwltoil_was_removed() -> None:
157
179
  # output object to the correct key of the input object.
158
180
 
159
181
 
160
- class UnresolvedDict(Dict[Any, Any]):
182
+ class UnresolvedDict(dict[Any, Any]):
161
183
  """Tag to indicate a dict contains promises that must be resolved."""
162
184
 
163
185
 
@@ -192,7 +214,7 @@ def filter_skip_null(name: str, value: Any) -> Any:
192
214
  return value
193
215
 
194
216
 
195
- def _filter_skip_null(value: Any, err_flag: List[bool]) -> Any:
217
+ def _filter_skip_null(value: Any, err_flag: list[bool]) -> Any:
196
218
  """
197
219
  Private implementation for recursively filtering out SkipNull objects from 'value'.
198
220
 
@@ -241,18 +263,50 @@ def ensure_no_collisions(
241
263
  seen_names.add(wanted_name)
242
264
 
243
265
 
266
+ def try_prepull(
267
+ cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str
268
+ ) -> None:
269
+ """
270
+ Try to prepull all containers in a CWL workflow with Singularity or Docker.
271
+ This will not prepull the default container specified on the command line.
272
+ :param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well
273
+ :param runtime_context: runtime context of cwltool
274
+ :param batchsystem: type of Toil batchsystem
275
+ :return:
276
+ """
277
+ if runtime_context.singularity:
278
+ if "CWL_SINGULARITY_CACHE" in os.environ:
279
+ logger.info("Prepulling the workflow's containers with Singularity...")
280
+ call_command(
281
+ [
282
+ "cwl-docker-extract",
283
+ "--singularity",
284
+ "--dir",
285
+ os.environ["CWL_SINGULARITY_CACHE"],
286
+ cwl_tool_uri,
287
+ ]
288
+ )
289
+ elif not runtime_context.user_space_docker_cmd and not runtime_context.podman:
290
+ # For udocker and podman prefetching is unimplemented
291
+ # This is docker
292
+ if batchsystem == "single_machine":
293
+ # Only on single machine will the docker daemon be accessible by all workers and the leader
294
+ logger.info("Prepulling the workflow's containers with Docker...")
295
+ call_command(["cwl-docker-extract", cwl_tool_uri])
296
+
297
+
244
298
  class Conditional:
245
299
  """
246
300
  Object holding conditional expression until we are ready to evaluate it.
247
301
 
248
- Evaluation occurs at the moment the encloses step is ready to run.
302
+ Evaluation occurs before the enclosing step's inputs are type-checked.
249
303
  """
250
304
 
251
305
  def __init__(
252
306
  self,
253
307
  expression: Optional[str] = None,
254
- outputs: Union[Dict[str, CWLOutputType], None] = None,
255
- requirements: Optional[List[CWLObjectType]] = None,
308
+ outputs: Union[dict[str, CWLOutputType], None] = None,
309
+ requirements: Optional[list[CWLObjectType]] = None,
256
310
  container_engine: str = "docker",
257
311
  ):
258
312
  """
@@ -297,7 +351,7 @@ class Conditional:
297
351
  "'%s' evaluated to a non-boolean value" % self.expression
298
352
  )
299
353
 
300
- def skipped_outputs(self) -> Dict[str, SkipNull]:
354
+ def skipped_outputs(self) -> dict[str, SkipNull]:
301
355
  """Generate a dict of SkipNull objects corresponding to the output structure."""
302
356
  outobj = {}
303
357
 
@@ -317,14 +371,14 @@ class Conditional:
317
371
  class ResolveSource:
318
372
  """Apply linkMerge and pickValue operators to values coming into a port."""
319
373
 
320
- promise_tuples: Union[List[Tuple[str, Promise]], Tuple[str, Promise]]
374
+ promise_tuples: Union[list[tuple[str, Promise]], tuple[str, Promise]]
321
375
 
322
376
  def __init__(
323
377
  self,
324
378
  name: str,
325
- input: Dict[str, CWLObjectType],
379
+ input: dict[str, CWLObjectType],
326
380
  source_key: str,
327
- promises: Dict[str, Job],
381
+ promises: dict[str, Job],
328
382
  ):
329
383
  """
330
384
  Construct a container object.
@@ -383,7 +437,7 @@ class ResolveSource:
383
437
  )
384
438
  else:
385
439
  name, rv = self.promise_tuples
386
- result = cast(Dict[str, Any], rv).get(name)
440
+ result = cast(dict[str, Any], rv).get(name)
387
441
 
388
442
  result = self.pick_value(result)
389
443
  result = filter_skip_null(self.name, result)
@@ -391,7 +445,7 @@ class ResolveSource:
391
445
 
392
446
  def link_merge(
393
447
  self, values: CWLObjectType
394
- ) -> Union[List[CWLOutputType], CWLOutputType]:
448
+ ) -> Union[list[CWLOutputType], CWLOutputType]:
395
449
  """
396
450
  Apply linkMerge operator to `values` object.
397
451
 
@@ -404,7 +458,7 @@ class ResolveSource:
404
458
  return values
405
459
 
406
460
  elif link_merge_type == "merge_flattened":
407
- result: List[CWLOutputType] = []
461
+ result: list[CWLOutputType] = []
408
462
  for v in values:
409
463
  if isinstance(v, MutableSequence):
410
464
  result.extend(v)
@@ -417,7 +471,7 @@ class ResolveSource:
417
471
  f"Unsupported linkMerge '{link_merge_type}' on {self.name}."
418
472
  )
419
473
 
420
- def pick_value(self, values: Union[List[Union[str, SkipNull]], Any]) -> Any:
474
+ def pick_value(self, values: Union[list[Union[str, SkipNull]], Any]) -> Any:
421
475
  """
422
476
  Apply pickValue operator to `values` object.
423
477
 
@@ -485,7 +539,7 @@ class StepValueFrom:
485
539
  """
486
540
 
487
541
  def __init__(
488
- self, expr: str, source: Any, req: List[CWLObjectType], container_engine: str
542
+ self, expr: str, source: Any, req: list[CWLObjectType], container_engine: str
489
543
  ):
490
544
  """
491
545
  Instantiate an object to carry all know about this valueFrom expression.
@@ -617,7 +671,7 @@ class JustAValue:
617
671
 
618
672
  def resolve_dict_w_promises(
619
673
  dict_w_promises: Union[
620
- UnresolvedDict, CWLObjectType, Dict[str, Union[str, StepValueFrom]]
674
+ UnresolvedDict, CWLObjectType, dict[str, Union[str, StepValueFrom]]
621
675
  ],
622
676
  file_store: Optional[AbstractFileStore] = None,
623
677
  ) -> CWLObjectType:
@@ -672,7 +726,7 @@ class ToilPathMapper(PathMapper):
672
726
 
673
727
  def __init__(
674
728
  self,
675
- referenced_files: List[CWLObjectType],
729
+ referenced_files: list[CWLObjectType],
676
730
  basedir: str,
677
731
  stagedir: str,
678
732
  separateDirs: bool = True,
@@ -787,19 +841,44 @@ class ToilPathMapper(PathMapper):
787
841
  # TODO: why would we do that?
788
842
  stagedir = cast(Optional[str], obj.get("dirname")) or stagedir
789
843
 
790
- # Decide where to put the file or directory, as an absolute path.
791
- tgt = os.path.join(
792
- stagedir,
793
- cast(str, obj["basename"]),
794
- )
844
+ if obj["class"] not in ("File", "Directory"):
845
+ # We only handle files and directories; only they have locations.
846
+ return
847
+
848
+ location = cast(str, obj["location"])
849
+ if location in self:
850
+ # If we've already mapped this, map it consistently.
851
+ tgt = self._pathmap[location].target
852
+ logger.debug(
853
+ "ToilPathMapper re-using target %s for path %s",
854
+ tgt,
855
+ location,
856
+ )
857
+ else:
858
+ # Decide where to put the file or directory, as an absolute path.
859
+ tgt = os.path.join(
860
+ stagedir,
861
+ cast(str, obj["basename"]),
862
+ )
863
+ if self.reversemap(tgt) is not None:
864
+ # If the target already exists in the pathmap, but we haven't yet
865
+ # mapped this, it means we have a conflict.
866
+ i = 2
867
+ new_tgt = f"{tgt}_{i}"
868
+ while self.reversemap(new_tgt) is not None:
869
+ i += 1
870
+ new_tgt = f"{tgt}_{i}"
871
+ logger.debug(
872
+ "ToilPathMapper resolving mapping conflict: %s is now %s",
873
+ tgt,
874
+ new_tgt,
875
+ )
876
+ tgt = new_tgt
795
877
 
796
878
  if obj["class"] == "Directory":
797
879
  # Whether or not we've already mapped this path, we need to map all
798
880
  # children recursively.
799
881
 
800
- # Grab its location
801
- location = cast(str, obj["location"])
802
-
803
882
  logger.debug("ToilPathMapper visiting directory %s", location)
804
883
 
805
884
  # We want to check the directory to make sure it is not
@@ -885,7 +964,7 @@ class ToilPathMapper(PathMapper):
885
964
 
886
965
  # Keep recursing
887
966
  self.visitlisting(
888
- cast(List[CWLObjectType], obj.get("listing", [])),
967
+ cast(list[CWLObjectType], obj.get("listing", [])),
889
968
  tgt,
890
969
  basedir,
891
970
  copy=copy,
@@ -893,23 +972,21 @@ class ToilPathMapper(PathMapper):
893
972
  )
894
973
 
895
974
  elif obj["class"] == "File":
896
- path = cast(str, obj["location"])
897
-
898
- logger.debug("ToilPathMapper visiting file %s", path)
975
+ logger.debug("ToilPathMapper visiting file %s", location)
899
976
 
900
- if path in self._pathmap:
977
+ if location in self._pathmap:
901
978
  # Don't map the same file twice
902
979
  logger.debug(
903
980
  "ToilPathMapper stopping recursion because we have already "
904
981
  "mapped file: %s",
905
- path,
982
+ location,
906
983
  )
907
984
  return
908
985
 
909
- ab = abspath(path, basedir)
910
- if "contents" in obj and path.startswith("_:"):
986
+ ab = abspath(location, basedir)
987
+ if "contents" in obj and location.startswith("_:"):
911
988
  # We are supposed to create this file
912
- self._pathmap[path] = MapperEnt(
989
+ self._pathmap[location] = MapperEnt(
913
990
  cast(str, obj["contents"]),
914
991
  tgt,
915
992
  "CreateWritableFile" if copy else "CreateFile",
@@ -927,14 +1004,16 @@ class ToilPathMapper(PathMapper):
927
1004
  # URI for a local file it downloaded.
928
1005
  if self.get_file:
929
1006
  deref = self.get_file(
930
- path, obj.get("streamable", False), self.streaming_allowed
1007
+ location,
1008
+ obj.get("streamable", False),
1009
+ self.streaming_allowed,
931
1010
  )
932
1011
  else:
933
1012
  deref = ab
934
1013
  if deref.startswith("file:"):
935
1014
  deref = schema_salad.ref_resolver.uri_file_path(deref)
936
1015
  if urlsplit(deref).scheme in ["http", "https"]:
937
- deref = downloadHttpFile(path)
1016
+ deref = downloadHttpFile(location)
938
1017
  elif urlsplit(deref).scheme != "toilfile":
939
1018
  # Dereference symbolic links
940
1019
  st = os.lstat(deref)
@@ -952,42 +1031,18 @@ class ToilPathMapper(PathMapper):
952
1031
  # reference, we just pass that along.
953
1032
 
954
1033
  """Link or copy files to their targets. Create them as needed."""
955
- targets: Dict[str, str] = {}
956
- for _, value in self._pathmap.items():
957
- # If the target already exists in the pathmap, it means we have a conflict. But we didn't change tgt to reflect new name.
958
- if value.target == tgt: # Conflict detected in the pathmap
959
- i = 2
960
- new_tgt = f"{tgt}_{i}"
961
- while new_tgt in targets:
962
- i += 1
963
- new_tgt = f"{tgt}_{i}"
964
- targets[new_tgt] = new_tgt
965
-
966
- for _, value_conflict in targets.items():
967
- logger.debug(
968
- "ToilPathMapper adding file mapping for conflict %s -> %s",
969
- deref,
970
- value_conflict,
971
- )
972
- self._pathmap[path] = MapperEnt(
973
- deref,
974
- value_conflict,
975
- "WritableFile" if copy else "File",
976
- staged,
977
- )
978
- # No conflicts detected so we can write out the original name.
979
- if not targets:
980
- logger.debug(
981
- "ToilPathMapper adding file mapping %s -> %s", deref, tgt
982
- )
983
1034
 
984
- self._pathmap[path] = MapperEnt(
985
- deref, tgt, "WritableFile" if copy else "File", staged
986
- )
1035
+ logger.debug(
1036
+ "ToilPathMapper adding file mapping %s -> %s", deref, tgt
1037
+ )
1038
+
1039
+ self._pathmap[location] = MapperEnt(
1040
+ deref, tgt, "WritableFile" if copy else "File", staged
1041
+ )
987
1042
 
988
1043
  # Handle all secondary files that need to be next to this one.
989
1044
  self.visitlisting(
990
- cast(List[CWLObjectType], obj.get("secondaryFiles", [])),
1045
+ cast(list[CWLObjectType], obj.get("secondaryFiles", [])),
991
1046
  stagedir,
992
1047
  basedir,
993
1048
  copy=copy,
@@ -1013,15 +1068,59 @@ class ToilSingleJobExecutor(cwltool.executors.SingleJobExecutor):
1013
1068
  ) -> None:
1014
1069
  """run_jobs from SingleJobExecutor, but not in a top level runtime context."""
1015
1070
  runtime_context.toplevel = False
1071
+ if isinstance(
1072
+ process, cwltool.command_line_tool.CommandLineTool
1073
+ ) and isinstance(
1074
+ process.make_job_runner(runtime_context), SingularityCommandLineJob
1075
+ ):
1076
+ # Set defaults for singularity cache environment variables, similar to what we do in wdltoil
1077
+ # Use the same place as the default singularity cache directory
1078
+ singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
1079
+ os.environ["SINGULARITY_CACHEDIR"] = os.environ.get(
1080
+ "SINGULARITY_CACHEDIR", singularity_cache
1081
+ )
1082
+
1083
+ # If singularity is detected, prepull the image to ensure locking
1084
+ (docker_req, docker_is_req) = process.get_requirement(
1085
+ feature="DockerRequirement"
1086
+ )
1087
+ with global_mutex(
1088
+ os.environ["SINGULARITY_CACHEDIR"], "toil_singularity_cache_mutex"
1089
+ ):
1090
+ SingularityCommandLineJob.get_image(
1091
+ dockerRequirement=cast(dict[str, str], docker_req),
1092
+ pull_image=runtime_context.pull_image,
1093
+ force_pull=runtime_context.force_docker_pull,
1094
+ tmp_outdir_prefix=runtime_context.tmp_outdir_prefix,
1095
+ )
1096
+
1016
1097
  return super().run_jobs(process, job_order_object, logger, runtime_context)
1017
1098
 
1018
1099
 
1019
1100
  class ToilTool:
1020
1101
  """Mixin to hook Toil into a cwltool tool type."""
1021
1102
 
1103
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
1104
+ """
1105
+ Init hook to set up member variables.
1106
+ """
1107
+ super().__init__(*args, **kwargs)
1108
+ # Reserve a spot for the Toil job that ends up executing this tool.
1109
+ self._toil_job: Optional[Job] = None
1110
+ # Remember path mappers we have used so we can interrogate them later to find out what the job mapped.
1111
+ self._path_mappers: list[cwltool.pathmapper.PathMapper] = []
1112
+
1113
+ def connect_toil_job(self, job: Job) -> None:
1114
+ """
1115
+ Attach the Toil tool to the Toil job that is executing it. This allows
1116
+ it to use the Toil job to stop at certain points if debugging flags are
1117
+ set.
1118
+ """
1119
+ self._toil_job = job
1120
+
1022
1121
  def make_path_mapper(
1023
1122
  self,
1024
- reffiles: List[Any],
1123
+ reffiles: list[Any],
1025
1124
  stagedir: str,
1026
1125
  runtimeContext: cwltool.context.RuntimeContext,
1027
1126
  separateDirs: bool,
@@ -1029,12 +1128,12 @@ class ToilTool:
1029
1128
  """Create the appropriate PathMapper for the situation."""
1030
1129
  if getattr(runtimeContext, "bypass_file_store", False):
1031
1130
  # We only need to understand cwltool's supported URIs
1032
- return PathMapper(
1131
+ mapper = PathMapper(
1033
1132
  reffiles, runtimeContext.basedir, stagedir, separateDirs=separateDirs
1034
1133
  )
1035
1134
  else:
1036
1135
  # We need to be able to read from Toil-provided URIs
1037
- return ToilPathMapper(
1136
+ mapper = ToilPathMapper(
1038
1137
  reffiles,
1039
1138
  runtimeContext.basedir,
1040
1139
  stagedir,
@@ -1043,6 +1142,10 @@ class ToilTool:
1043
1142
  streaming_allowed=runtimeContext.streaming_allowed,
1044
1143
  )
1045
1144
 
1145
+ # Remember the path mappers
1146
+ self._path_mappers.append(mapper)
1147
+ return mapper
1148
+
1046
1149
  def __str__(self) -> str:
1047
1150
  """Return string representation of this tool type."""
1048
1151
  return f'{self.__class__.__name__}({repr(getattr(self, "tool", {}).get("id", "???"))})'
@@ -1059,17 +1162,36 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1059
1162
  name conflicts at the top level of the work directory.
1060
1163
  """
1061
1164
 
1165
+ # Set up the initial work dir with all its files
1062
1166
  super()._initialworkdir(j, builder)
1063
1167
 
1064
1168
  # The initial work dir listing is now in j.generatefiles["listing"]
1065
- # Also j.generatrfiles is a CWL Directory.
1169
+ # Also j.generatefiles is a CWL Directory.
1066
1170
  # So check the initial working directory.
1067
- logger.info("Initial work dir: %s", j.generatefiles)
1171
+ logger.debug("Initial work dir: %s", j.generatefiles)
1068
1172
  ensure_no_collisions(
1069
1173
  j.generatefiles,
1070
1174
  "the job's working directory as specified by the InitialWorkDirRequirement",
1071
1175
  )
1072
1176
 
1177
+ if self._toil_job is not None:
1178
+ # Make a table of all the places we mapped files to when downloading the inputs.
1179
+
1180
+ # We want to hint which host paths and container (if any) paths correspond
1181
+ host_and_job_paths: list[tuple[str, str]] = []
1182
+
1183
+ for pm in self._path_mappers:
1184
+ for _, mapper_entry in pm.items_exclude_children():
1185
+ # We know that mapper_entry.target as seen by the task is
1186
+ # mapper_entry.resolved on the host.
1187
+ host_and_job_paths.append(
1188
+ (mapper_entry.resolved, mapper_entry.target)
1189
+ )
1190
+
1191
+ # Notice that we have downloaded our inputs. Explain which files
1192
+ # those are here and what the task will expect to call them.
1193
+ self._toil_job.files_downloaded_hook(host_and_job_paths)
1194
+
1073
1195
 
1074
1196
  class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
1075
1197
  """Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
@@ -1092,7 +1214,11 @@ def toil_make_tool(
1092
1214
  return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
1093
1215
 
1094
1216
 
1095
- DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1217
+ # When a file we want to have is missing, we can give it this sentinal location
1218
+ # URI instead of raising an error right away, in case it is optional.
1219
+ MISSING_FILE = "missing://"
1220
+
1221
+ DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
1096
1222
 
1097
1223
 
1098
1224
  def check_directory_dict_invariants(contents: DirectoryContents) -> None:
@@ -1114,7 +1240,7 @@ def check_directory_dict_invariants(contents: DirectoryContents) -> None:
1114
1240
 
1115
1241
  def decode_directory(
1116
1242
  dir_path: str,
1117
- ) -> Tuple[DirectoryContents, Optional[str], str]:
1243
+ ) -> tuple[DirectoryContents, Optional[str], str]:
1118
1244
  """
1119
1245
  Decode a directory from a "toildir:" path to a directory (or a file in it).
1120
1246
 
@@ -1189,7 +1315,7 @@ class ToilFsAccess(StdFsAccess):
1189
1315
  # they know what will happen.
1190
1316
  # Also maps files and directories from external URLs to downloaded
1191
1317
  # locations.
1192
- self.dir_to_download: Dict[str, str] = {}
1318
+ self.dir_to_download: dict[str, str] = {}
1193
1319
 
1194
1320
  super().__init__(basedir)
1195
1321
 
@@ -1312,14 +1438,16 @@ class ToilFsAccess(StdFsAccess):
1312
1438
  destination = super()._abs(destination)
1313
1439
  return destination
1314
1440
 
1315
- def glob(self, pattern: str) -> List[str]:
1441
+ def glob(self, pattern: str) -> list[str]:
1316
1442
  parse = urlparse(pattern)
1317
1443
  if parse.scheme == "file":
1318
1444
  pattern = os.path.abspath(unquote(parse.path))
1319
1445
  elif parse.scheme == "":
1320
1446
  pattern = os.path.abspath(pattern)
1321
1447
  else:
1322
- raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
1448
+ raise RuntimeError(
1449
+ f"Cannot efficiently support globbing on {parse.scheme} URIs"
1450
+ )
1323
1451
 
1324
1452
  # Actually do the glob
1325
1453
  return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
@@ -1356,12 +1484,12 @@ class ToilFsAccess(StdFsAccess):
1356
1484
  else:
1357
1485
  # This should be supported by a job store.
1358
1486
  byte_stream = AbstractJobStore.open_url(fn)
1359
- if 'b' in mode:
1487
+ if "b" in mode:
1360
1488
  # Pass stream along in binary
1361
1489
  return byte_stream
1362
1490
  else:
1363
1491
  # Wrap it in a text decoder
1364
- return io.TextIOWrapper(byte_stream, encoding='utf-8')
1492
+ return io.TextIOWrapper(byte_stream, encoding="utf-8")
1365
1493
 
1366
1494
  def exists(self, path: str) -> bool:
1367
1495
  """Test for file existence."""
@@ -1468,7 +1596,7 @@ class ToilFsAccess(StdFsAccess):
1468
1596
  logger.debug("AbstractJobStore said: %s", status)
1469
1597
  return status
1470
1598
 
1471
- def listdir(self, fn: str) -> List[str]:
1599
+ def listdir(self, fn: str) -> list[str]:
1472
1600
  # This needs to return full URLs for everything in the directory.
1473
1601
  # URLs are not allowed to end in '/', even for subdirectories.
1474
1602
  logger.debug("ToilFsAccess listing %s", fn)
@@ -1489,7 +1617,9 @@ class ToilFsAccess(StdFsAccess):
1489
1617
  if got is None:
1490
1618
  raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
1491
1619
  if isinstance(got, str):
1492
- raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
1620
+ raise RuntimeError(
1621
+ f"Cannot list file or dubdirectory of a file: {fn}"
1622
+ )
1493
1623
  here = got
1494
1624
  # List all the things in here and make full URIs to them
1495
1625
  return [os.path.join(fn, k) for k in here.keys()]
@@ -1499,7 +1629,7 @@ class ToilFsAccess(StdFsAccess):
1499
1629
  for entry in AbstractJobStore.list_url(fn)
1500
1630
  ]
1501
1631
 
1502
- def join(self, path, *paths): # type: (str, *str) -> str
1632
+ def join(self, path: str, *paths: str) -> str:
1503
1633
  # This falls back on os.path.join
1504
1634
  return super().join(path, *paths)
1505
1635
 
@@ -1512,12 +1642,12 @@ class ToilFsAccess(StdFsAccess):
1512
1642
 
1513
1643
  def toil_get_file(
1514
1644
  file_store: AbstractFileStore,
1515
- index: Dict[str, str],
1516
- existing: Dict[str, str],
1645
+ index: dict[str, str],
1646
+ existing: dict[str, str],
1517
1647
  uri: str,
1518
1648
  streamable: bool = False,
1519
1649
  streaming_allowed: bool = True,
1520
- pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
1650
+ pipe_threads: Optional[list[tuple[Thread, int]]] = None,
1521
1651
  ) -> str:
1522
1652
  """
1523
1653
  Set up the given file or directory from the Toil jobstore at a file URI
@@ -1618,9 +1748,7 @@ def toil_get_file(
1618
1748
  and streamable
1619
1749
  and not isinstance(file_store.jobStore, FileJobStore)
1620
1750
  ):
1621
- logger.debug(
1622
- "Streaming file %s", uri
1623
- )
1751
+ logger.debug("Streaming file %s", uri)
1624
1752
  src_path = file_store.getLocalTempFileName()
1625
1753
  os.mkfifo(src_path)
1626
1754
  th = ExceptionalThread(
@@ -1642,34 +1770,35 @@ def toil_get_file(
1642
1770
  if uri.startswith("toilfile:"):
1643
1771
  # Download from the file store
1644
1772
  file_store_id = FileID.unpack(uri[len("toilfile:") :])
1645
- src_path = file_store.readGlobalFile(
1646
- file_store_id, symlink=True
1647
- )
1773
+ src_path = file_store.readGlobalFile(file_store_id, symlink=True)
1648
1774
  else:
1649
1775
  # Download from the URI via the job store.
1650
1776
 
1651
1777
  # Figure out where it goes.
1652
1778
  src_path = file_store.getLocalTempFileName()
1653
1779
  # Open that path exclusively to make sure we created it
1654
- with open(src_path, 'xb') as fh:
1780
+ with open(src_path, "xb") as fh:
1655
1781
  # Download into the file
1656
- size, executable = AbstractJobStore.read_from_url(uri, fh)
1657
- if executable:
1658
- # Set the execute bit in the file's permissions
1659
- os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1782
+ size, executable = AbstractJobStore.read_from_url(uri, fh)
1783
+ if executable:
1784
+ # Set the execute bit in the file's permissions
1785
+ os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1660
1786
 
1661
1787
  index[src_path] = uri
1662
1788
  existing[uri] = src_path
1663
1789
  return schema_salad.ref_resolver.file_uri(src_path)
1664
1790
 
1665
- def write_file(
1666
- writeFunc: Callable[[str], FileID],
1667
- index: Dict[str, str],
1668
- existing: Dict[str, str],
1791
+
1792
+ def convert_file_uri_to_toil_uri(
1793
+ applyFunc: Callable[[str], FileID],
1794
+ index: dict[str, str],
1795
+ existing: dict[str, str],
1669
1796
  file_uri: str,
1670
1797
  ) -> str:
1671
1798
  """
1672
- Write a file into the Toil jobstore.
1799
+ Given a file URI, convert it to a toil file URI. Uses applyFunc to handle the conversion.
1800
+
1801
+ Runs once on every unique file URI.
1673
1802
 
1674
1803
  'existing' is a set of files retrieved as inputs from toil_get_file. This
1675
1804
  ensures they are mapped back as the same name if passed through.
@@ -1686,12 +1815,8 @@ def write_file(
1686
1815
  else:
1687
1816
  file_uri = existing.get(file_uri, file_uri)
1688
1817
  if file_uri not in index:
1689
- if not urlparse(file_uri).scheme:
1690
- rp = os.path.realpath(file_uri)
1691
- else:
1692
- rp = file_uri
1693
1818
  try:
1694
- index[file_uri] = "toilfile:" + writeFunc(rp).pack()
1819
+ index[file_uri] = "toilfile:" + applyFunc(file_uri).pack()
1695
1820
  existing[index[file_uri]] = file_uri
1696
1821
  except Exception as e:
1697
1822
  logger.error("Got exception '%s' while copying '%s'", e, file_uri)
@@ -1710,17 +1835,93 @@ def path_to_loc(obj: CWLObjectType) -> None:
1710
1835
  del obj["path"]
1711
1836
 
1712
1837
 
1713
- def import_files(
1714
- import_function: Callable[[str], FileID],
1838
+ def extract_file_uri_once(
1839
+ fileindex: dict[str, str],
1840
+ existing: dict[str, str],
1841
+ file_metadata: CWLObjectType,
1842
+ mark_broken: bool = False,
1843
+ skip_remote: bool = False,
1844
+ ) -> Optional[str]:
1845
+ """
1846
+ Extract the filename from a CWL file record.
1847
+
1848
+ This function matches the predefined function signature in visit_files, which ensures
1849
+ that this function is called on all files inside a CWL object.
1850
+
1851
+ Ensures no duplicate files are returned according to fileindex. If a file has not been resolved already (and had file:// prepended)
1852
+ then resolve symlinks.
1853
+ :param fileindex: Forward mapping of filename
1854
+ :param existing: Reverse mapping of filename. This function does not use this
1855
+ :param file_metadata: CWL file record
1856
+ :param mark_broken: Whether files should be marked as missing
1857
+ :param skip_remote: Whether to skip remote files
1858
+ :return:
1859
+ """
1860
+ location = cast(str, file_metadata["location"])
1861
+ if (
1862
+ location.startswith("toilfile:")
1863
+ or location.startswith("toildir:")
1864
+ or location.startswith("_:")
1865
+ ):
1866
+ return None
1867
+ if location in fileindex:
1868
+ file_metadata["location"] = fileindex[location]
1869
+ return None
1870
+ if not location and file_metadata["path"]:
1871
+ file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
1872
+ cast(str, file_metadata["path"])
1873
+ )
1874
+ if location.startswith("file://") and not os.path.isfile(
1875
+ schema_salad.ref_resolver.uri_file_path(location)
1876
+ ):
1877
+ if mark_broken:
1878
+ logger.debug("File %s is missing", file_metadata)
1879
+ file_metadata["location"] = location = MISSING_FILE
1880
+ else:
1881
+ raise cwl_utils.errors.WorkflowException(
1882
+ "File is missing: %s" % file_metadata
1883
+ )
1884
+ if location.startswith("file://") or not skip_remote:
1885
+ # This is a local file or a remote file
1886
+ if location not in fileindex:
1887
+ # These dictionaries are meant to keep track of what we're going to import
1888
+ # In the actual import, this is used as a bidirectional mapping from unvirtualized to virtualized
1889
+ # For this case, keep track of the files to prevent returning duplicate files
1890
+ # see write_file
1891
+
1892
+ # If there is not a scheme, this file has not been resolved yet or is a URL.
1893
+ if not urlparse(location).scheme:
1894
+ rp = os.path.realpath(location)
1895
+ else:
1896
+ rp = location
1897
+ return rp
1898
+ return None
1899
+
1900
+
1901
+ V = TypeVar("V", covariant=True)
1902
+
1903
+
1904
+ class VisitFunc(Protocol[V]):
1905
+ def __call__(
1906
+ self,
1907
+ fileindex: dict[str, str],
1908
+ existing: dict[str, str],
1909
+ file_metadata: CWLObjectType,
1910
+ mark_broken: bool,
1911
+ skip_remote: bool,
1912
+ ) -> V: ...
1913
+
1914
+
1915
+ def visit_files(
1916
+ func: VisitFunc[V],
1715
1917
  fs_access: StdFsAccess,
1716
- fileindex: Dict[str, str],
1717
- existing: Dict[str, str],
1918
+ fileindex: dict[str, str],
1919
+ existing: dict[str, str],
1718
1920
  cwl_object: Optional[CWLObjectType],
1719
- skip_broken: bool = False,
1921
+ mark_broken: bool = False,
1720
1922
  skip_remote: bool = False,
1721
1923
  bypass_file_store: bool = False,
1722
- log_level: int = logging.DEBUG
1723
- ) -> None:
1924
+ ) -> list[V]:
1724
1925
  """
1725
1926
  Prepare all files and directories.
1726
1927
 
@@ -1735,10 +1936,10 @@ def import_files(
1735
1936
  Preserves any listing fields.
1736
1937
 
1737
1938
  If a file cannot be found (like if it is an optional secondary file that
1738
- doesn't exist), fails, unless skip_broken is set, in which case it leaves
1739
- the location it was supposed to have been at.
1939
+ doesn't exist), fails, unless mark_broken is set, in which case it applies
1940
+ a sentinel location.
1740
1941
 
1741
- Also does some miscelaneous normalization.
1942
+ Also does some miscellaneous normalization.
1742
1943
 
1743
1944
  :param import_function: The function used to upload a URI and get a
1744
1945
  Toil FileID for it.
@@ -1754,8 +1955,9 @@ def import_files(
1754
1955
 
1755
1956
  :param cwl_object: CWL tool (or workflow order) we are importing files for
1756
1957
 
1757
- :param skip_broken: If True, when files can't be imported because they e.g.
1758
- don't exist, leave their locations alone rather than failing with an error.
1958
+ :param mark_broken: If True, when files can't be imported because they e.g.
1959
+ don't exist, set their locations to MISSING_FILE rather than failing
1960
+ with an error.
1759
1961
 
1760
1962
  :param skp_remote: If True, leave remote URIs in place instead of importing
1761
1963
  files.
@@ -1765,18 +1967,12 @@ def import_files(
1765
1967
 
1766
1968
  :param log_level: Log imported files at the given level.
1767
1969
  """
1970
+ func_return: list[Any] = list()
1768
1971
  tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
1769
1972
 
1770
1973
  logger.debug("Importing files for %s", tool_id)
1771
1974
  logger.debug("Importing files in %s", cwl_object)
1772
1975
 
1773
- def import_and_log(url: str) -> FileID:
1774
- """
1775
- Upload a file and log that we are doing so.
1776
- """
1777
- logger.log(log_level, "Loading %s...", url)
1778
- return import_function(url)
1779
-
1780
1976
  # We need to upload all files to the Toil filestore, and encode structure
1781
1977
  # recursively into all Directories' locations. But we cannot safely alter
1782
1978
  # the listing fields of Directory objects, because the handling required by
@@ -1794,13 +1990,13 @@ def import_files(
1794
1990
  if bypass_file_store:
1795
1991
  # Don't go on to actually import files or encode contents for
1796
1992
  # directories.
1797
- return
1993
+ return func_return
1798
1994
 
1799
1995
  # Otherwise we actually want to put the things in the file store.
1800
1996
 
1801
1997
  def visit_file_or_directory_down(
1802
1998
  rec: CWLObjectType,
1803
- ) -> Optional[List[CWLObjectType]]:
1999
+ ) -> Optional[list[CWLObjectType]]:
1804
2000
  """
1805
2001
  Visit each CWL File or Directory on the way down.
1806
2002
 
@@ -1827,7 +2023,7 @@ def import_files(
1827
2023
  ensure_no_collisions(cast(DirectoryType, rec))
1828
2024
 
1829
2025
  # Pull out the old listing, if any
1830
- old_listing = cast(Optional[List[CWLObjectType]], rec.get("listing", None))
2026
+ old_listing = cast(Optional[list[CWLObjectType]], rec.get("listing", None))
1831
2027
 
1832
2028
  if not cast(str, rec["location"]).startswith("_:"):
1833
2029
  # This is a thing we can list and not just a literal, so we
@@ -1849,8 +2045,8 @@ def import_files(
1849
2045
 
1850
2046
  def visit_file_or_directory_up(
1851
2047
  rec: CWLObjectType,
1852
- down_result: Optional[List[CWLObjectType]],
1853
- child_results: List[DirectoryContents],
2048
+ down_result: Optional[list[CWLObjectType]],
2049
+ child_results: list[DirectoryContents],
1854
2050
  ) -> DirectoryContents:
1855
2051
  """
1856
2052
  For a CWL File or Directory, make sure it is uploaded and it has a
@@ -1872,10 +2068,15 @@ def import_files(
1872
2068
  # This is a CWL File
1873
2069
 
1874
2070
  result: DirectoryContents = {}
1875
-
1876
- # Upload the file itself, which will adjust its location.
1877
- upload_file(
1878
- import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
2071
+ # Run a function on the file and store the return
2072
+ func_return.append(
2073
+ func(
2074
+ fileindex,
2075
+ existing,
2076
+ rec,
2077
+ mark_broken=mark_broken,
2078
+ skip_remote=skip_remote,
2079
+ )
1879
2080
  )
1880
2081
 
1881
2082
  # Make a record for this file under its name
@@ -1904,7 +2105,7 @@ def import_files(
1904
2105
  contents.update(child_result)
1905
2106
 
1906
2107
  # Upload the directory itself, which will adjust its location.
1907
- upload_directory(rec, contents, skip_broken=skip_broken)
2108
+ upload_directory(rec, contents, mark_broken=mark_broken)
1908
2109
 
1909
2110
  # Show those contents as being under our name in our parent.
1910
2111
  return {cast(str, rec["basename"]): contents}
@@ -1919,12 +2120,13 @@ def import_files(
1919
2120
  visit_file_or_directory_down,
1920
2121
  visit_file_or_directory_up,
1921
2122
  )
2123
+ return func_return
1922
2124
 
1923
2125
 
1924
2126
  def upload_directory(
1925
2127
  directory_metadata: CWLObjectType,
1926
2128
  directory_contents: DirectoryContents,
1927
- skip_broken: bool = False,
2129
+ mark_broken: bool = False,
1928
2130
  ) -> None:
1929
2131
  """
1930
2132
  Upload a Directory object.
@@ -1936,6 +2138,9 @@ def upload_directory(
1936
2138
  Makes sure the directory actually exists, and rewrites its location to be
1937
2139
  something we can use on another machine.
1938
2140
 
2141
+ If mark_broken is set, ignores missing directories and replaces them with
2142
+ directories containing the given (possibly empty) contents.
2143
+
1939
2144
  We can't rely on the directory's listing as visible to the next tool as a
1940
2145
  complete recursive description of the files we will need to present to the
1941
2146
  tool, since some tools require it to be cleared or single-level but still
@@ -1956,8 +2161,8 @@ def upload_directory(
1956
2161
  if location.startswith("file://") and not os.path.isdir(
1957
2162
  schema_salad.ref_resolver.uri_file_path(location)
1958
2163
  ):
1959
- if skip_broken:
1960
- return
2164
+ if mark_broken:
2165
+ logger.debug("Directory %s is missing as a whole", directory_metadata)
1961
2166
  else:
1962
2167
  raise cwl_utils.errors.WorkflowException(
1963
2168
  "Directory is missing: %s" % directory_metadata["location"]
@@ -1974,48 +2179,34 @@ def upload_directory(
1974
2179
  directory_metadata["location"] = encode_directory(directory_contents)
1975
2180
 
1976
2181
 
1977
- def upload_file(
1978
- uploadfunc: Callable[[str], FileID],
1979
- fileindex: Dict[str, str],
1980
- existing: Dict[str, str],
2182
+ def extract_and_convert_file_to_toil_uri(
2183
+ convertfunc: Callable[[str], FileID],
2184
+ fileindex: dict[str, str],
2185
+ existing: dict[str, str],
1981
2186
  file_metadata: CWLObjectType,
1982
- skip_broken: bool = False,
1983
- skip_remote: bool = False
2187
+ mark_broken: bool = False,
2188
+ skip_remote: bool = False,
1984
2189
  ) -> None:
1985
2190
  """
1986
- Update a file object so that the file will be accessible from another machine.
2191
+ Extract the file URI out of a file object and convert it to a Toil URI.
1987
2192
 
1988
- Uploads local files to the Toil file store, and sets their location to a
1989
- reference to the toil file store.
1990
-
1991
- Unless skip_remote is set, downloads remote files into the file store and
1992
- sets their locations to references into the file store as well.
2193
+ Runs convertfunc on the file URI to handle conversion.
2194
+
2195
+ Is used to handle importing files into the jobstore.
2196
+
2197
+ If a file doesn't exist, fails with an error, unless mark_broken is set, in
2198
+ which case the missing file is given a special sentinel location.
2199
+
2200
+ Unless skip_remote is set, also run on remote files and sets their locations
2201
+ to toil URIs as well.
1993
2202
  """
1994
- location = cast(str, file_metadata["location"])
1995
- if (
1996
- location.startswith("toilfile:")
1997
- or location.startswith("toildir:")
1998
- or location.startswith("_:")
1999
- ):
2000
- return
2001
- if location in fileindex:
2002
- file_metadata["location"] = fileindex[location]
2003
- return
2004
- if not location and file_metadata["path"]:
2005
- file_metadata["location"] = location = schema_salad.ref_resolver.file_uri(
2006
- cast(str, file_metadata["path"])
2203
+ location = extract_file_uri_once(
2204
+ fileindex, existing, file_metadata, mark_broken, skip_remote
2205
+ )
2206
+ if location is not None:
2207
+ file_metadata["location"] = convert_file_uri_to_toil_uri(
2208
+ convertfunc, fileindex, existing, location
2007
2209
  )
2008
- if location.startswith("file://") and not os.path.isfile(
2009
- schema_salad.ref_resolver.uri_file_path(location)
2010
- ):
2011
- if skip_broken:
2012
- return
2013
- else:
2014
- raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
2015
-
2016
- if location.startswith("file://") or not skip_remote:
2017
- # This is a local file, or we also need to download and re-upload remote files
2018
- file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
2019
2210
 
2020
2211
  logger.debug("Sending file at: %s", file_metadata["location"])
2021
2212
 
@@ -2028,7 +2219,7 @@ def writeGlobalFileWrapper(file_store: AbstractFileStore, fileuri: str) -> FileI
2028
2219
 
2029
2220
  def remove_empty_listings(rec: CWLObjectType) -> None:
2030
2221
  if rec.get("class") != "Directory":
2031
- finddirs = [] # type: List[CWLObjectType]
2222
+ finddirs: list[CWLObjectType] = []
2032
2223
  visit_class(rec, ("Directory",), finddirs.append)
2033
2224
  for f in finddirs:
2034
2225
  remove_empty_listings(f)
@@ -2048,7 +2239,7 @@ class CWLNamedJob(Job):
2048
2239
  cores: Union[float, None] = 1,
2049
2240
  memory: Union[int, str, None] = "1GiB",
2050
2241
  disk: Union[int, str, None] = "1MiB",
2051
- accelerators: Optional[List[AcceleratorRequirement]] = None,
2242
+ accelerators: Optional[list[AcceleratorRequirement]] = None,
2052
2243
  preemptible: Optional[bool] = None,
2053
2244
  tool_id: Optional[str] = None,
2054
2245
  parent_name: Optional[str] = None,
@@ -2123,10 +2314,10 @@ class ResolveIndirect(CWLNamedJob):
2123
2314
 
2124
2315
  def toilStageFiles(
2125
2316
  toil: Toil,
2126
- cwljob: Union[CWLObjectType, List[CWLObjectType]],
2317
+ cwljob: Union[CWLObjectType, list[CWLObjectType]],
2127
2318
  outdir: str,
2128
2319
  destBucket: Union[str, None] = None,
2129
- log_level: int = logging.DEBUG
2320
+ log_level: int = logging.DEBUG,
2130
2321
  ) -> None:
2131
2322
  """
2132
2323
  Copy input files out of the global file store and update location and path.
@@ -2134,11 +2325,11 @@ def toilStageFiles(
2134
2325
  :param destBucket: If set, export to this base URL instead of to the local
2135
2326
  filesystem.
2136
2327
 
2137
- :param log_level: Log each file transfered at the given level.
2328
+ :param log_level: Log each file transferred at the given level.
2138
2329
  """
2139
2330
 
2140
2331
  def _collectDirEntries(
2141
- obj: Union[CWLObjectType, List[CWLObjectType]]
2332
+ obj: Union[CWLObjectType, list[CWLObjectType]]
2142
2333
  ) -> Iterator[CWLObjectType]:
2143
2334
  if isinstance(obj, dict):
2144
2335
  if obj.get("class") in ("File", "Directory"):
@@ -2220,13 +2411,17 @@ def toilStageFiles(
2220
2411
  # TODO: Use direct S3 to S3 copy on exports as well
2221
2412
  file_id_or_contents = (
2222
2413
  "toilfile:"
2223
- + toil.import_file(file_id_or_contents, symlink=False).pack()
2414
+ + toil.import_file(
2415
+ file_id_or_contents, symlink=False
2416
+ ).pack()
2224
2417
  )
2225
2418
 
2226
2419
  if file_id_or_contents.startswith("toilfile:"):
2227
2420
  # This is something we can export
2228
2421
  # TODO: Do we need to urlencode the parts before sending them to S3?
2229
- dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
2422
+ dest_url = "/".join(
2423
+ s.strip("/") for s in [destBucket, baseName]
2424
+ )
2230
2425
  logger.log(log_level, "Saving %s...", dest_url)
2231
2426
  toil.export_file(
2232
2427
  FileID.unpack(file_id_or_contents[len("toilfile:") :]),
@@ -2248,7 +2443,12 @@ def toilStageFiles(
2248
2443
  # Probably staging and bypassing file store. Just copy.
2249
2444
  logger.log(log_level, "Saving %s...", dest_url)
2250
2445
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2251
- shutil.copyfile(p.resolved, p.target)
2446
+ try:
2447
+ shutil.copyfile(p.resolved, p.target)
2448
+ except shutil.SameFileError:
2449
+ # If outdir isn't set and we're passing through an input file/directory as the output,
2450
+ # the file doesn't need to be copied because it is already there
2451
+ pass
2252
2452
  else:
2253
2453
  uri = p.resolved
2254
2454
  if not uri.startswith("toilfile:"):
@@ -2321,26 +2521,31 @@ class CWLJobWrapper(CWLNamedJob):
2321
2521
  subjob_name="_wrapper",
2322
2522
  local=True,
2323
2523
  )
2324
- self.cwltool = remove_pickle_problems(tool)
2524
+ self.cwltool = tool
2325
2525
  self.cwljob = cwljob
2326
2526
  self.runtime_context = runtime_context
2327
- self.conditional = conditional
2527
+ self.conditional = conditional or Conditional()
2328
2528
  self.parent_name = parent_name
2329
2529
 
2330
2530
  def run(self, file_store: AbstractFileStore) -> Any:
2331
2531
  """Create a child job with the correct resource requirements set."""
2332
2532
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2533
+
2534
+ # Check confitional to license full evaluation of job inputs.
2535
+ if self.conditional.is_false(cwljob):
2536
+ return self.conditional.skipped_outputs()
2537
+
2333
2538
  fill_in_defaults(
2334
2539
  self.cwltool.tool["inputs"],
2335
2540
  cwljob,
2336
2541
  self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
2337
2542
  )
2543
+ # Don't forward the conditional. We checked it already.
2338
2544
  realjob = CWLJob(
2339
2545
  tool=self.cwltool,
2340
2546
  cwljob=cwljob,
2341
2547
  runtime_context=self.runtime_context,
2342
2548
  parent_name=self.parent_name,
2343
- conditional=self.conditional,
2344
2549
  )
2345
2550
  self.addChild(realjob)
2346
2551
  return realjob.rv()
@@ -2358,7 +2563,7 @@ class CWLJob(CWLNamedJob):
2358
2563
  conditional: Union[Conditional, None] = None,
2359
2564
  ):
2360
2565
  """Store the context for later execution."""
2361
- self.cwltool = remove_pickle_problems(tool)
2566
+ self.cwltool = tool
2362
2567
  self.conditional = conditional or Conditional()
2363
2568
 
2364
2569
  if runtime_context.builder:
@@ -2375,7 +2580,7 @@ class CWLJob(CWLNamedJob):
2375
2580
  resources={},
2376
2581
  mutation_manager=runtime_context.mutation_manager,
2377
2582
  formatgraph=tool.formatgraph,
2378
- make_fs_access=cast(Type[StdFsAccess], runtime_context.make_fs_access),
2583
+ make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
2379
2584
  fs_access=runtime_context.make_fs_access(""),
2380
2585
  job_script_provider=runtime_context.job_script_provider,
2381
2586
  timeout=runtime_context.eval_timeout,
@@ -2392,7 +2597,21 @@ class CWLJob(CWLNamedJob):
2392
2597
 
2393
2598
  req = tool.evalResources(self.builder, runtime_context)
2394
2599
 
2395
- accelerators: Optional[List[AcceleratorRequirement]] = None
2600
+ tool_own_resources = tool.get_requirement("ResourceRequirement")[0] or {}
2601
+ if "ramMin" in tool_own_resources or "ramMax" in tool_own_resources:
2602
+ # The tool is actually asking for memory.
2603
+ memory = int(req["ram"] * (2**20))
2604
+ else:
2605
+ # The tool is getting a default ram allocation.
2606
+ if getattr(runtime_context, "cwl_default_ram"):
2607
+ # We will respect the CWL spec and apply the default cwltool
2608
+ # computed, which might be different than Toil's default.
2609
+ memory = int(req["ram"] * (2**20))
2610
+ else:
2611
+ # We use a None requirement and the Toil default applies.
2612
+ memory = None
2613
+
2614
+ accelerators: Optional[list[AcceleratorRequirement]] = None
2396
2615
  if req.get("cudaDeviceCount", 0) > 0:
2397
2616
  # There's a CUDARequirement, which cwltool processed for us
2398
2617
  # TODO: How is cwltool deciding what value to use between min and max?
@@ -2456,7 +2675,7 @@ class CWLJob(CWLNamedJob):
2456
2675
 
2457
2676
  super().__init__(
2458
2677
  cores=req["cores"],
2459
- memory=int(req["ram"] * (2**20)),
2678
+ memory=memory,
2460
2679
  disk=int(total_disk),
2461
2680
  accelerators=accelerators,
2462
2681
  preemptible=preemptible,
@@ -2470,7 +2689,7 @@ class CWLJob(CWLNamedJob):
2470
2689
  self.step_inputs = self.cwltool.tool["inputs"]
2471
2690
  self.workdir: str = runtime_context.workdir # type: ignore[attr-defined]
2472
2691
 
2473
- def required_env_vars(self, cwljob: Any) -> Iterator[Tuple[str, str]]:
2692
+ def required_env_vars(self, cwljob: Any) -> Iterator[tuple[str, str]]:
2474
2693
  """Yield environment variables from EnvVarRequirement."""
2475
2694
  if isinstance(cwljob, dict):
2476
2695
  if cwljob.get("class") == "EnvVarRequirement":
@@ -2482,7 +2701,7 @@ class CWLJob(CWLNamedJob):
2482
2701
  for env_var in cwljob:
2483
2702
  yield from self.required_env_vars(env_var)
2484
2703
 
2485
- def populate_env_vars(self, cwljob: CWLObjectType) -> Dict[str, str]:
2704
+ def populate_env_vars(self, cwljob: CWLObjectType) -> dict[str, str]:
2486
2705
  """
2487
2706
  Prepare environment variables necessary at runtime for the job.
2488
2707
 
@@ -2498,9 +2717,9 @@ class CWLJob(CWLNamedJob):
2498
2717
  required_env_vars = {}
2499
2718
  # iterate over EnvVarRequirement env vars, if any
2500
2719
  for k, v in self.required_env_vars(cwljob):
2501
- required_env_vars[
2502
- k
2503
- ] = v # will tell cwltool which env vars to take from the environment
2720
+ required_env_vars[k] = (
2721
+ v # will tell cwltool which env vars to take from the environment
2722
+ )
2504
2723
  os.environ[k] = v
2505
2724
  # needs to actually be populated in the environment as well or
2506
2725
  # they're not used
@@ -2510,7 +2729,7 @@ class CWLJob(CWLNamedJob):
2510
2729
  # env var with the same name is found
2511
2730
  for req in self.cwltool.requirements:
2512
2731
  if req["class"] == "EnvVarRequirement":
2513
- envDefs = cast(List[Dict[str, str]], req["envDef"])
2732
+ envDefs = cast(list[dict[str, str]], req["envDef"])
2514
2733
  for env_def in envDefs:
2515
2734
  env_name = env_def["envName"]
2516
2735
  if env_name in required_env_vars:
@@ -2542,7 +2761,7 @@ class CWLJob(CWLNamedJob):
2542
2761
  for inp_id in immobile_cwljob_dict.keys():
2543
2762
  found = False
2544
2763
  for field in cast(
2545
- List[Dict[str, str]], self.cwltool.inputs_record_schema["fields"]
2764
+ list[dict[str, str]], self.cwltool.inputs_record_schema["fields"]
2546
2765
  ):
2547
2766
  if field["name"] == inp_id:
2548
2767
  found = True
@@ -2557,8 +2776,8 @@ class CWLJob(CWLNamedJob):
2557
2776
  functools.partial(remove_empty_listings),
2558
2777
  )
2559
2778
 
2560
- index: Dict[str, str] = {}
2561
- existing: Dict[str, str] = {}
2779
+ index: dict[str, str] = {}
2780
+ existing: dict[str, str] = {}
2562
2781
 
2563
2782
  # Prepare the run instructions for cwltool
2564
2783
  runtime_context = self.runtime_context.copy()
@@ -2570,7 +2789,7 @@ class CWLJob(CWLNamedJob):
2570
2789
  # will come and grab this function for fetching files from the Toil
2571
2790
  # file store. pipe_threads is used for keeping track of separate
2572
2791
  # threads launched to stream files around.
2573
- pipe_threads: List[Tuple[Thread, int]] = []
2792
+ pipe_threads: list[tuple[Thread, int]] = []
2574
2793
  setattr(
2575
2794
  runtime_context,
2576
2795
  "toil_get_file",
@@ -2604,7 +2823,7 @@ class CWLJob(CWLNamedJob):
2604
2823
  # function and a path_mapper type or factory function.
2605
2824
 
2606
2825
  runtime_context.make_fs_access = cast(
2607
- Type[StdFsAccess],
2826
+ type[StdFsAccess],
2608
2827
  functools.partial(ToilFsAccess, file_store=file_store),
2609
2828
  )
2610
2829
 
@@ -2614,6 +2833,17 @@ class CWLJob(CWLNamedJob):
2614
2833
  streaming_allowed=runtime_context.streaming_allowed,
2615
2834
  )
2616
2835
 
2836
+ # Collect standard output and standard error somewhere if they don't go to files.
2837
+ # We need to keep two FDs to these because cwltool will close what we give it.
2838
+ default_stdout = TemporaryFile()
2839
+ runtime_context.default_stdout = os.fdopen(
2840
+ os.dup(default_stdout.fileno()), "wb"
2841
+ )
2842
+ default_stderr = TemporaryFile()
2843
+ runtime_context.default_stderr = os.fdopen(
2844
+ os.dup(default_stderr.fileno()), "wb"
2845
+ )
2846
+
2617
2847
  process_uuid = uuid.uuid4() # noqa F841
2618
2848
  started_at = datetime.datetime.now() # noqa F841
2619
2849
 
@@ -2622,13 +2852,49 @@ class CWLJob(CWLNamedJob):
2622
2852
  logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
2623
2853
 
2624
2854
  runtime_context.name = self.description.unitName
2625
- output, status = ToilSingleJobExecutor().execute(
2626
- process=self.cwltool,
2627
- job_order_object=cwljob,
2628
- runtime_context=runtime_context,
2629
- logger=cwllogger,
2630
- )
2631
- ended_at = datetime.datetime.now() # noqa F841
2855
+
2856
+ if isinstance(self.cwltool, ToilTool):
2857
+ # Connect the CWL tool to us so it can call into the Toil job when
2858
+ # it reaches points where we might need to debug it.
2859
+ self.cwltool.connect_toil_job(self)
2860
+
2861
+ status = "did_not_run"
2862
+ try:
2863
+ output, status = ToilSingleJobExecutor().execute(
2864
+ process=self.cwltool,
2865
+ job_order_object=cwljob,
2866
+ runtime_context=runtime_context,
2867
+ logger=cwllogger,
2868
+ )
2869
+ finally:
2870
+ ended_at = datetime.datetime.now() # noqa F841
2871
+
2872
+ # Log any output/error data
2873
+ default_stdout.seek(0, os.SEEK_END)
2874
+ if default_stdout.tell() > 0:
2875
+ default_stdout.seek(0)
2876
+ file_store.log_user_stream(
2877
+ self.description.unitName + ".stdout", default_stdout
2878
+ )
2879
+ if status != "success":
2880
+ default_stdout.seek(0)
2881
+ logger.error(
2882
+ "Failed command standard output:\n%s",
2883
+ default_stdout.read().decode("utf-8", errors="replace"),
2884
+ )
2885
+ default_stderr.seek(0, os.SEEK_END)
2886
+ if default_stderr.tell():
2887
+ default_stderr.seek(0)
2888
+ file_store.log_user_stream(
2889
+ self.description.unitName + ".stderr", default_stderr
2890
+ )
2891
+ if status != "success":
2892
+ default_stderr.seek(0)
2893
+ logger.error(
2894
+ "Failed command standard error:\n%s",
2895
+ default_stderr.read().decode("utf-8", errors="replace"),
2896
+ )
2897
+
2632
2898
  if status != "success":
2633
2899
  raise cwl_utils.errors.WorkflowException(status)
2634
2900
 
@@ -2640,12 +2906,18 @@ class CWLJob(CWLNamedJob):
2640
2906
  fs_access = runtime_context.make_fs_access(runtime_context.basedir)
2641
2907
 
2642
2908
  # And a file importer that can go from a file:// URI to a Toil FileID
2643
- file_import_function = functools.partial(writeGlobalFileWrapper, file_store)
2909
+ def file_import_function(url: str, log_level: int = logging.DEBUG) -> FileID:
2910
+ logger.log(log_level, "Loading %s...", url)
2911
+ return writeGlobalFileWrapper(file_store, url)
2912
+
2913
+ file_upload_function = functools.partial(
2914
+ extract_and_convert_file_to_toil_uri, file_import_function
2915
+ )
2644
2916
 
2645
2917
  # Upload all the Files and set their and the Directories' locations, if
2646
2918
  # needed.
2647
- import_files(
2648
- file_import_function,
2919
+ visit_files(
2920
+ file_upload_function,
2649
2921
  fs_access,
2650
2922
  index,
2651
2923
  existing,
@@ -2675,6 +2947,74 @@ def get_container_engine(runtime_context: cwltool.context.RuntimeContext) -> str
2675
2947
  return "docker"
2676
2948
 
2677
2949
 
2950
+ def makeRootJob(
2951
+ tool: Process,
2952
+ jobobj: CWLObjectType,
2953
+ runtime_context: cwltool.context.RuntimeContext,
2954
+ initialized_job_order: CWLObjectType,
2955
+ options: Namespace,
2956
+ toil: Toil,
2957
+ ) -> CWLNamedJob:
2958
+ """
2959
+ Create the Toil root Job object for the CWL tool. Is the same as makeJob() except this also handles import logic.
2960
+
2961
+ Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
2962
+ If only one job is created, it is returned twice.
2963
+
2964
+ :return:
2965
+ """
2966
+ if options.run_imports_on_workers:
2967
+ filenames = extract_workflow_inputs(options, initialized_job_order, tool)
2968
+ metadata = get_file_sizes(
2969
+ filenames, toil._jobStore, include_remote_files=options.reference_inputs
2970
+ )
2971
+
2972
+ # Mapping of files to metadata for files that will be imported on the worker
2973
+ # This will consist of files that we were able to get a file size for
2974
+ worker_metadata: dict[str, FileMetadata] = dict()
2975
+ # Mapping of files to metadata for files that will be imported on the leader
2976
+ # This will consist of files that we were not able to get a file size for
2977
+ leader_metadata = dict()
2978
+ for filename, file_data in metadata.items():
2979
+ if file_data.size is None:
2980
+ leader_metadata[filename] = file_data
2981
+ else:
2982
+ worker_metadata[filename] = file_data
2983
+
2984
+ # import the files for the leader first
2985
+ path_to_fileid = WorkerImportJob.import_files(
2986
+ list(leader_metadata.keys()), toil._jobStore
2987
+ )
2988
+
2989
+ # then install the imported files before importing the other files
2990
+ # this way the control flow can fall from the leader to workers
2991
+ tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
2992
+ initialized_job_order,
2993
+ tool,
2994
+ path_to_fileid,
2995
+ options.basedir,
2996
+ options.reference_inputs,
2997
+ options.bypass_file_store,
2998
+ )
2999
+
3000
+ import_job = CWLImportWrapper(
3001
+ initialized_job_order, tool, runtime_context, worker_metadata, options
3002
+ )
3003
+ return import_job
3004
+ else:
3005
+ import_workflow_inputs(
3006
+ toil._jobStore,
3007
+ options,
3008
+ initialized_job_order=initialized_job_order,
3009
+ tool=tool,
3010
+ )
3011
+ root_job, followOn = makeJob(
3012
+ tool, jobobj, runtime_context, None, None
3013
+ ) # toplevel, no name needed
3014
+ root_job.cwljob = initialized_job_order
3015
+ return root_job
3016
+
3017
+
2678
3018
  def makeJob(
2679
3019
  tool: Process,
2680
3020
  jobobj: CWLObjectType,
@@ -2682,13 +3022,16 @@ def makeJob(
2682
3022
  parent_name: Optional[str],
2683
3023
  conditional: Union[Conditional, None],
2684
3024
  ) -> Union[
2685
- Tuple["CWLWorkflow", ResolveIndirect],
2686
- Tuple[CWLJob, CWLJob],
2687
- Tuple[CWLJobWrapper, CWLJobWrapper],
3025
+ tuple["CWLWorkflow", ResolveIndirect],
3026
+ tuple[CWLJob, CWLJob],
3027
+ tuple[CWLJobWrapper, CWLJobWrapper],
2688
3028
  ]:
2689
3029
  """
2690
3030
  Create the correct Toil Job object for the CWL tool.
2691
3031
 
3032
+ Actually creates what might be a subgraph of two jobs. The second of which may be the follow on of the first.
3033
+ If only one job is created, it is returned twice.
3034
+
2692
3035
  Types: workflow, job, or job wrapper for dynamic resource requirements.
2693
3036
 
2694
3037
  :return: "wfjob, followOn" if the input tool is a workflow, and "job, job" otherwise
@@ -2768,16 +3111,16 @@ class CWLScatter(Job):
2768
3111
  def flat_crossproduct_scatter(
2769
3112
  self,
2770
3113
  joborder: CWLObjectType,
2771
- scatter_keys: List[str],
2772
- outputs: List[Promised[CWLObjectType]],
3114
+ scatter_keys: list[str],
3115
+ outputs: list[Promised[CWLObjectType]],
2773
3116
  postScatterEval: Callable[[CWLObjectType], CWLObjectType],
2774
3117
  ) -> None:
2775
3118
  """Cartesian product of the inputs, then flattened."""
2776
3119
  scatter_key = shortname(scatter_keys[0])
2777
- for n in range(0, len(cast(List[CWLObjectType], joborder[scatter_key]))):
3120
+ for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
2778
3121
  updated_joborder = copy.copy(joborder)
2779
3122
  updated_joborder[scatter_key] = cast(
2780
- List[CWLObjectType], joborder[scatter_key]
3123
+ list[CWLObjectType], joborder[scatter_key]
2781
3124
  )[n]
2782
3125
  if len(scatter_keys) == 1:
2783
3126
  updated_joborder = postScatterEval(updated_joborder)
@@ -2798,16 +3141,16 @@ class CWLScatter(Job):
2798
3141
  def nested_crossproduct_scatter(
2799
3142
  self,
2800
3143
  joborder: CWLObjectType,
2801
- scatter_keys: List[str],
3144
+ scatter_keys: list[str],
2802
3145
  postScatterEval: Callable[[CWLObjectType], CWLObjectType],
2803
- ) -> List[Promised[CWLObjectType]]:
3146
+ ) -> list[Promised[CWLObjectType]]:
2804
3147
  """Cartesian product of the inputs."""
2805
3148
  scatter_key = shortname(scatter_keys[0])
2806
- outputs: List[Promised[CWLObjectType]] = []
2807
- for n in range(0, len(cast(List[CWLObjectType], joborder[scatter_key]))):
3149
+ outputs: list[Promised[CWLObjectType]] = []
3150
+ for n in range(0, len(cast(list[CWLObjectType], joborder[scatter_key]))):
2808
3151
  updated_joborder = copy.copy(joborder)
2809
3152
  updated_joborder[scatter_key] = cast(
2810
- List[CWLObjectType], joborder[scatter_key]
3153
+ list[CWLObjectType], joborder[scatter_key]
2811
3154
  )[n]
2812
3155
  if len(scatter_keys) == 1:
2813
3156
  updated_joborder = postScatterEval(updated_joborder)
@@ -2828,7 +3171,7 @@ class CWLScatter(Job):
2828
3171
  )
2829
3172
  return outputs
2830
3173
 
2831
- def run(self, file_store: AbstractFileStore) -> List[Promised[CWLObjectType]]:
3174
+ def run(self, file_store: AbstractFileStore) -> list[Promised[CWLObjectType]]:
2832
3175
  """Generate the follow on scatter jobs."""
2833
3176
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2834
3177
 
@@ -2840,7 +3183,7 @@ class CWLScatter(Job):
2840
3183
  scatterMethod = self.step.tool.get("scatterMethod", None)
2841
3184
  if len(scatter) == 1:
2842
3185
  scatterMethod = "dotproduct"
2843
- outputs: List[Promised[CWLObjectType]] = []
3186
+ outputs: list[Promised[CWLObjectType]] = []
2844
3187
 
2845
3188
  valueFrom = {
2846
3189
  shortname(i["id"]): i["valueFrom"]
@@ -2872,11 +3215,11 @@ class CWLScatter(Job):
2872
3215
 
2873
3216
  if scatterMethod == "dotproduct":
2874
3217
  for i in range(
2875
- 0, len(cast(List[CWLObjectType], cwljob[shortname(scatter[0])]))
3218
+ 0, len(cast(list[CWLObjectType], cwljob[shortname(scatter[0])]))
2876
3219
  ):
2877
3220
  copyjob = copy.copy(cwljob)
2878
3221
  for sc in [shortname(x) for x in scatter]:
2879
- copyjob[sc] = cast(List[CWLObjectType], cwljob[sc])[i]
3222
+ copyjob[sc] = cast(list[CWLObjectType], cwljob[sc])[i]
2880
3223
  copyjob = postScatterEval(copyjob)
2881
3224
  subjob, follow_on = makeJob(
2882
3225
  tool=self.step.embedded_tool,
@@ -2915,7 +3258,7 @@ class CWLGather(Job):
2915
3258
  def __init__(
2916
3259
  self,
2917
3260
  step: cwltool.workflow.WorkflowStep,
2918
- outputs: Promised[Union[CWLObjectType, List[CWLObjectType]]],
3261
+ outputs: Promised[Union[CWLObjectType, list[CWLObjectType]]],
2919
3262
  ):
2920
3263
  """Collect our context for later gathering."""
2921
3264
  super().__init__(cores=1, memory="1GiB", disk="1MiB", local=True)
@@ -2924,24 +3267,24 @@ class CWLGather(Job):
2924
3267
 
2925
3268
  @staticmethod
2926
3269
  def extract(
2927
- obj: Union[CWLObjectType, List[CWLObjectType]], k: str
2928
- ) -> Union[CWLOutputType, List[CWLObjectType]]:
3270
+ obj: Union[CWLObjectType, list[CWLObjectType]], k: str
3271
+ ) -> Union[CWLOutputType, list[CWLObjectType]]:
2929
3272
  """
2930
3273
  Extract the given key from the obj.
2931
3274
 
2932
3275
  If the object is a list, extract it from all members of the list.
2933
3276
  """
2934
3277
  if isinstance(obj, Mapping):
2935
- return cast(Union[CWLOutputType, List[CWLObjectType]], obj.get(k))
3278
+ return cast(Union[CWLOutputType, list[CWLObjectType]], obj.get(k))
2936
3279
  elif isinstance(obj, MutableSequence):
2937
- cp: List[CWLObjectType] = []
3280
+ cp: list[CWLObjectType] = []
2938
3281
  for item in obj:
2939
3282
  cp.append(cast(CWLObjectType, CWLGather.extract(item, k)))
2940
3283
  return cp
2941
3284
  else:
2942
- return cast(List[CWLObjectType], [])
3285
+ return cast(list[CWLObjectType], [])
2943
3286
 
2944
- def run(self, file_store: AbstractFileStore) -> Dict[str, Any]:
3287
+ def run(self, file_store: AbstractFileStore) -> dict[str, Any]:
2945
3288
  """Gather all the outputs of the scatter."""
2946
3289
  outobj = {}
2947
3290
 
@@ -2952,8 +3295,8 @@ class CWLGather(Job):
2952
3295
  return shortname(n)
2953
3296
 
2954
3297
  # TODO: MyPy can't understand that this is the type we should get by unwrapping the promise
2955
- outputs: Union[CWLObjectType, List[CWLObjectType]] = cast(
2956
- Union[CWLObjectType, List[CWLObjectType]], unwrap(self.outputs)
3298
+ outputs: Union[CWLObjectType, list[CWLObjectType]] = cast(
3299
+ Union[CWLObjectType, list[CWLObjectType]], unwrap(self.outputs)
2957
3300
  )
2958
3301
  for k in [sn(i) for i in self.step.tool["out"]]:
2959
3302
  outobj[k] = self.extract(outputs, k)
@@ -2995,7 +3338,11 @@ ProcessType = TypeVar(
2995
3338
 
2996
3339
 
2997
3340
  def remove_pickle_problems(obj: ProcessType) -> ProcessType:
2998
- """Doc_loader does not pickle correctly, causing Toil errors, remove from objects."""
3341
+ """
3342
+ Doc_loader does not pickle correctly, causing Toil errors, remove from objects.
3343
+
3344
+ See github issue: https://github.com/mypyc/mypyc/issues/804
3345
+ """
2999
3346
  if hasattr(obj, "doc_loader"):
3000
3347
  obj.doc_loader = None
3001
3348
  if isinstance(obj, cwltool.workflow.WorkflowStep):
@@ -3027,12 +3374,11 @@ class CWLWorkflow(CWLNamedJob):
3027
3374
  self.cwlwf = cwlwf
3028
3375
  self.cwljob = cwljob
3029
3376
  self.runtime_context = runtime_context
3030
- self.cwlwf = remove_pickle_problems(self.cwlwf)
3031
3377
  self.conditional = conditional or Conditional()
3032
3378
 
3033
3379
  def run(
3034
3380
  self, file_store: AbstractFileStore
3035
- ) -> Union[UnresolvedDict, Dict[str, SkipNull]]:
3381
+ ) -> Union[UnresolvedDict, dict[str, SkipNull]]:
3036
3382
  """
3037
3383
  Convert a CWL Workflow graph into a Toil job graph.
3038
3384
 
@@ -3053,7 +3399,7 @@ class CWLWorkflow(CWLNamedJob):
3053
3399
  # that may be used as a "source" for a step input workflow output
3054
3400
  # parameter
3055
3401
  # to: the job that will produce that value.
3056
- promises: Dict[str, Job] = {}
3402
+ promises: dict[str, Job] = {}
3057
3403
 
3058
3404
  parent_name = shortname(self.cwlwf.tool["id"])
3059
3405
 
@@ -3082,7 +3428,7 @@ class CWLWorkflow(CWLNamedJob):
3082
3428
  stepinputs_fufilled = False
3083
3429
  if stepinputs_fufilled:
3084
3430
  logger.debug("Ready to make job for workflow step %s", step_id)
3085
- jobobj: Dict[
3431
+ jobobj: dict[
3086
3432
  str, Union[ResolveSource, DefaultWithSource, StepValueFrom]
3087
3433
  ] = {}
3088
3434
 
@@ -3216,30 +3562,348 @@ class CWLWorkflow(CWLNamedJob):
3216
3562
  return UnresolvedDict(outobj)
3217
3563
 
3218
3564
 
3565
+ class CWLInstallImportsJob(Job):
3566
+ def __init__(
3567
+ self,
3568
+ initialized_job_order: Promised[CWLObjectType],
3569
+ tool: Promised[Process],
3570
+ basedir: str,
3571
+ skip_remote: bool,
3572
+ bypass_file_store: bool,
3573
+ import_data: Promised[dict[str, FileID]],
3574
+ **kwargs: Any,
3575
+ ) -> None:
3576
+ """
3577
+ Job to take the entire CWL object and a mapping of filenames to the imported URIs
3578
+ to convert all file locations to URIs.
3579
+
3580
+ This class is only used when runImportsOnWorkers is enabled.
3581
+ """
3582
+ super().__init__(local=True, **kwargs)
3583
+ self.initialized_job_order = initialized_job_order
3584
+ self.tool = tool
3585
+ self.basedir = basedir
3586
+ self.skip_remote = skip_remote
3587
+ self.bypass_file_store = bypass_file_store
3588
+ self.import_data = import_data
3589
+
3590
+ @staticmethod
3591
+ def fill_in_files(
3592
+ initialized_job_order: CWLObjectType,
3593
+ tool: Process,
3594
+ candidate_to_fileid: dict[str, FileID],
3595
+ basedir: str,
3596
+ skip_remote: bool,
3597
+ bypass_file_store: bool,
3598
+ ) -> tuple[Process, CWLObjectType]:
3599
+ """
3600
+ Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
3601
+ """
3602
+ def fill_in_file(filename: str) -> FileID:
3603
+ """
3604
+ Return the file name's associated Toil file ID
3605
+ """
3606
+ return candidate_to_fileid[filename]
3607
+
3608
+ file_convert_function = functools.partial(
3609
+ extract_and_convert_file_to_toil_uri, fill_in_file
3610
+ )
3611
+ fs_access = ToilFsAccess(basedir)
3612
+ fileindex: dict[str, str] = {}
3613
+ existing: dict[str, str] = {}
3614
+ visit_files(
3615
+ file_convert_function,
3616
+ fs_access,
3617
+ fileindex,
3618
+ existing,
3619
+ initialized_job_order,
3620
+ mark_broken=True,
3621
+ skip_remote=skip_remote,
3622
+ bypass_file_store=bypass_file_store,
3623
+ )
3624
+ visitSteps(
3625
+ tool,
3626
+ functools.partial(
3627
+ visit_files,
3628
+ file_convert_function,
3629
+ fs_access,
3630
+ fileindex,
3631
+ existing,
3632
+ mark_broken=True,
3633
+ skip_remote=skip_remote,
3634
+ bypass_file_store=bypass_file_store,
3635
+ ),
3636
+ )
3637
+
3638
+ # We always expect to have processed all files that exist
3639
+ for param_name, param_value in initialized_job_order.items():
3640
+ # Loop through all the parameters for the workflow overall.
3641
+ # Drop any files that aren't either imported (for when we use
3642
+ # the file store) or available on disk (for when we don't).
3643
+ # This will properly make them cause an error later if they
3644
+ # were required.
3645
+ rm_unprocessed_secondary_files(param_value)
3646
+ return tool, initialized_job_order
3647
+
3648
+ def run(self, file_store: AbstractFileStore) -> Tuple[Process, CWLObjectType]:
3649
+ """
3650
+ Convert the filenames in the workflow inputs into the URIs
3651
+ :return: Promise of transformed workflow inputs. A tuple of the job order and process
3652
+ """
3653
+ candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
3654
+
3655
+ initialized_job_order = unwrap(self.initialized_job_order)
3656
+ tool = unwrap(self.tool)
3657
+ return CWLInstallImportsJob.fill_in_files(
3658
+ initialized_job_order,
3659
+ tool,
3660
+ candidate_to_fileid,
3661
+ self.basedir,
3662
+ self.skip_remote,
3663
+ self.bypass_file_store,
3664
+ )
3665
+
3666
+
3667
+ class CWLImportWrapper(CWLNamedJob):
3668
+ """
3669
+ Job to organize importing files on workers instead of the leader. Responsible for extracting filenames and metadata,
3670
+ calling ImportsJob, applying imports to the job objects, and scheduling the start workflow job
3671
+
3672
+ This class is only used when runImportsOnWorkers is enabled.
3673
+ """
3674
+
3675
+ def __init__(
3676
+ self,
3677
+ initialized_job_order: CWLObjectType,
3678
+ tool: Process,
3679
+ runtime_context: cwltool.context.RuntimeContext,
3680
+ file_to_data: dict[str, FileMetadata],
3681
+ options: Namespace,
3682
+ ):
3683
+ super().__init__(local=False, disk=options.import_workers_threshold)
3684
+ self.initialized_job_order = initialized_job_order
3685
+ self.tool = tool
3686
+ self.options = options
3687
+ self.runtime_context = runtime_context
3688
+ self.file_to_data = file_to_data
3689
+
3690
+ def run(self, file_store: AbstractFileStore) -> Any:
3691
+ imports_job = ImportsJob(
3692
+ self.file_to_data,
3693
+ self.options.import_workers_threshold,
3694
+ self.options.import_workers_disk,
3695
+ )
3696
+ self.addChild(imports_job)
3697
+ install_imports_job = CWLInstallImportsJob(
3698
+ initialized_job_order=self.initialized_job_order,
3699
+ tool=self.tool,
3700
+ basedir=self.options.basedir,
3701
+ skip_remote=self.options.reference_inputs,
3702
+ bypass_file_store=self.options.bypass_file_store,
3703
+ import_data=imports_job.rv(0),
3704
+ )
3705
+ self.addChild(install_imports_job)
3706
+ imports_job.addFollowOn(install_imports_job)
3707
+
3708
+ start_job = CWLStartJob(
3709
+ install_imports_job.rv(0),
3710
+ install_imports_job.rv(1),
3711
+ runtime_context=self.runtime_context,
3712
+ )
3713
+ self.addChild(start_job)
3714
+ install_imports_job.addFollowOn(start_job)
3715
+
3716
+ return start_job.rv()
3717
+
3718
+
3719
+ class CWLStartJob(CWLNamedJob):
3720
+ """
3721
+ Job responsible for starting the CWL workflow.
3722
+
3723
+ Takes in the workflow/tool and inputs after all files are imported
3724
+ and creates jobs to run those workflows.
3725
+ """
3726
+
3727
+ def __init__(
3728
+ self,
3729
+ tool: Promised[Process],
3730
+ initialized_job_order: Promised[CWLObjectType],
3731
+ runtime_context: cwltool.context.RuntimeContext,
3732
+ **kwargs: Any,
3733
+ ) -> None:
3734
+ super().__init__(**kwargs)
3735
+ self.tool = tool
3736
+ self.initialized_job_order = initialized_job_order
3737
+ self.runtime_context = runtime_context
3738
+
3739
+ def run(self, file_store: AbstractFileStore) -> Any:
3740
+ initialized_job_order = unwrap(self.initialized_job_order)
3741
+ tool = unwrap(self.tool)
3742
+ cwljob, _ = makeJob(
3743
+ tool, initialized_job_order, self.runtime_context, None, None
3744
+ ) # toplevel, no name needed
3745
+ cwljob.cwljob = initialized_job_order
3746
+ self.addChild(cwljob)
3747
+ return cwljob.rv()
3748
+
3749
+
3750
+ def extract_workflow_inputs(
3751
+ options: Namespace, initialized_job_order: CWLObjectType, tool: Process
3752
+ ) -> list[str]:
3753
+ """
3754
+ Collect all the workflow input files to import later.
3755
+ :param options: namespace
3756
+ :param initialized_job_order: cwl object
3757
+ :param tool: tool object
3758
+ :return:
3759
+ """
3760
+ fileindex: dict[str, str] = {}
3761
+ existing: dict[str, str] = {}
3762
+
3763
+ # Extract out all the input files' filenames
3764
+ logger.info("Collecting input files...")
3765
+ fs_access = ToilFsAccess(options.basedir)
3766
+ filenames = visit_files(
3767
+ extract_file_uri_once,
3768
+ fs_access,
3769
+ fileindex,
3770
+ existing,
3771
+ initialized_job_order,
3772
+ mark_broken=True,
3773
+ skip_remote=options.reference_inputs,
3774
+ bypass_file_store=options.bypass_file_store,
3775
+ )
3776
+ # Extract filenames of all the files associated with tools (binaries, etc.).
3777
+ logger.info("Collecting tool-associated files...")
3778
+ tool_filenames = visitSteps(
3779
+ tool,
3780
+ functools.partial(
3781
+ visit_files,
3782
+ extract_file_uri_once,
3783
+ fs_access,
3784
+ fileindex,
3785
+ existing,
3786
+ mark_broken=True,
3787
+ skip_remote=options.reference_inputs,
3788
+ bypass_file_store=options.bypass_file_store,
3789
+ ),
3790
+ )
3791
+ filenames.extend(tool_filenames)
3792
+ return [file for file in filenames if file is not None]
3793
+
3794
+
3795
+ def import_workflow_inputs(
3796
+ jobstore: AbstractJobStore,
3797
+ options: Namespace,
3798
+ initialized_job_order: CWLObjectType,
3799
+ tool: Process,
3800
+ log_level: int = logging.DEBUG,
3801
+ ) -> None:
3802
+ """
3803
+ Import all workflow inputs on the leader.
3804
+
3805
+ Ran when not importing on workers.
3806
+ :param jobstore: Toil jobstore
3807
+ :param options: Namespace
3808
+ :param initialized_job_order: CWL object
3809
+ :param tool: CWL tool
3810
+ :param log_level: log level
3811
+ :return:
3812
+ """
3813
+ fileindex: dict[str, str] = {}
3814
+ existing: dict[str, str] = {}
3815
+
3816
+ # Define something we can call to import a file and get its file
3817
+ # ID.
3818
+ def file_import_function(url: str) -> FileID:
3819
+ logger.log(log_level, "Loading %s...", url)
3820
+ return jobstore.import_file(url, symlink=True)
3821
+
3822
+ import_function = functools.partial(
3823
+ extract_and_convert_file_to_toil_uri, file_import_function
3824
+ )
3825
+ # Import all the input files, some of which may be missing optional
3826
+ # files.
3827
+ logger.info("Importing input files...")
3828
+ fs_access = ToilFsAccess(options.basedir)
3829
+ visit_files(
3830
+ import_function,
3831
+ fs_access,
3832
+ fileindex,
3833
+ existing,
3834
+ initialized_job_order,
3835
+ mark_broken=True,
3836
+ skip_remote=options.reference_inputs,
3837
+ bypass_file_store=options.bypass_file_store,
3838
+ )
3839
+
3840
+ # Make another function for importing tool files. This one doesn't allow
3841
+ # symlinking, since the tools might be coming from storage not accessible
3842
+ # to all nodes.
3843
+ tool_import_function = functools.partial(
3844
+ extract_and_convert_file_to_toil_uri,
3845
+ cast(
3846
+ Callable[[str], FileID],
3847
+ functools.partial(jobstore.import_file, symlink=False),
3848
+ ),
3849
+ )
3850
+
3851
+ # Import all the files associated with tools (binaries, etc.).
3852
+ # Not sure why you would have an optional secondary file here, but
3853
+ # the spec probably needs us to support them.
3854
+ logger.info("Importing tool-associated files...")
3855
+ visitSteps(
3856
+ tool,
3857
+ functools.partial(
3858
+ visit_files,
3859
+ tool_import_function,
3860
+ fs_access,
3861
+ fileindex,
3862
+ existing,
3863
+ mark_broken=True,
3864
+ skip_remote=options.reference_inputs,
3865
+ bypass_file_store=options.bypass_file_store,
3866
+ ),
3867
+ )
3868
+
3869
+ # We always expect to have processed all files that exist
3870
+ for param_name, param_value in initialized_job_order.items():
3871
+ # Loop through all the parameters for the workflow overall.
3872
+ # Drop any files that aren't either imported (for when we use
3873
+ # the file store) or available on disk (for when we don't).
3874
+ # This will properly make them cause an error later if they
3875
+ # were required.
3876
+ rm_unprocessed_secondary_files(param_value)
3877
+
3878
+
3879
+ T = TypeVar("T")
3880
+
3881
+
3219
3882
  def visitSteps(
3220
3883
  cmdline_tool: Process,
3221
- op: Callable[[CommentedMap], None],
3222
- ) -> None:
3884
+ op: Callable[[CommentedMap], list[T]],
3885
+ ) -> list[T]:
3223
3886
  """
3224
3887
  Iterate over a CWL Process object, running the op on each tool description
3225
3888
  CWL object.
3226
3889
  """
3227
3890
  if isinstance(cmdline_tool, cwltool.workflow.Workflow):
3228
3891
  # For workflows we need to dispatch on steps
3892
+ ret = []
3229
3893
  for step in cmdline_tool.steps:
3230
3894
  # Handle the step's tool
3231
- op(step.tool)
3895
+ ret.extend(op(step.tool))
3232
3896
  # Recures on the embedded tool; maybe it's a workflow.
3233
- visitSteps(step.embedded_tool, op)
3897
+ recurse_ret = visitSteps(step.embedded_tool, op)
3898
+ ret.extend(recurse_ret)
3899
+ return ret
3234
3900
  elif isinstance(cmdline_tool, cwltool.process.Process):
3235
3901
  # All CWL Process objects (including CommandLineTool) will have tools
3236
3902
  # if they bothered to run the Process __init__.
3237
- op(cmdline_tool.tool)
3238
- else:
3239
- raise RuntimeError(
3240
- f"Unsupported type encountered in workflow "
3241
- f"traversal: {type(cmdline_tool)}"
3242
- )
3903
+ return op(cmdline_tool.tool)
3904
+ raise RuntimeError(
3905
+ f"Unsupported type encountered in workflow " f"traversal: {type(cmdline_tool)}"
3906
+ )
3243
3907
 
3244
3908
 
3245
3909
  def rm_unprocessed_secondary_files(job_params: Any) -> None:
@@ -3252,7 +3916,7 @@ def rm_unprocessed_secondary_files(job_params: Any) -> None:
3252
3916
 
3253
3917
  def filtered_secondary_files(
3254
3918
  unfiltered_secondary_files: CWLObjectType,
3255
- ) -> List[CWLObjectType]:
3919
+ ) -> list[CWLObjectType]:
3256
3920
  """
3257
3921
  Remove unprocessed secondary files.
3258
3922
 
@@ -3263,9 +3927,8 @@ def filtered_secondary_files(
3263
3927
  but add the resolved fields to the list of unresolved fields so we remove
3264
3928
  them here after the fact.
3265
3929
 
3266
- We keep secondary files using the 'toildir:', or '_:' protocols, or using
3267
- the 'file:' protocol and indicating files or directories that actually
3268
- exist. The 'required' logic seems to be handled deeper in
3930
+ We keep secondary files with anything other than MISSING_FILE as their
3931
+ location. The 'required' logic seems to be handled deeper in
3269
3932
  cwltool.builder.Builder(), and correctly determines which files should be
3270
3933
  imported. Therefore we remove the files here and if this file is SUPPOSED
3271
3934
  to exist, it will still give the appropriate file does not exist error, but
@@ -3274,30 +3937,33 @@ def filtered_secondary_files(
3274
3937
  intermediate_secondary_files = []
3275
3938
  final_secondary_files = []
3276
3939
  # remove secondary files still containing interpolated strings
3277
- for sf in cast(List[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
3940
+ for sf in cast(list[CWLObjectType], unfiltered_secondary_files["secondaryFiles"]):
3278
3941
  sf_bn = cast(str, sf.get("basename", ""))
3279
3942
  sf_loc = cast(str, sf.get("location", ""))
3280
3943
  if ("$(" not in sf_bn) and ("${" not in sf_bn):
3281
3944
  if ("$(" not in sf_loc) and ("${" not in sf_loc):
3282
3945
  intermediate_secondary_files.append(sf)
3946
+ else:
3947
+ logger.debug(
3948
+ "Secondary file %s is dropped because it has an uninterpolated location",
3949
+ sf,
3950
+ )
3951
+ else:
3952
+ logger.debug(
3953
+ "Secondary file %s is dropped because it has an uninterpolated basename",
3954
+ sf,
3955
+ )
3283
3956
  # remove secondary files that are not present in the filestore or pointing
3284
3957
  # to existant things on disk
3285
3958
  for sf in intermediate_secondary_files:
3286
3959
  sf_loc = cast(str, sf.get("location", ""))
3287
- if (
3288
- sf_loc.startswith("toilfile:")
3289
- or sf_loc.startswith("toildir:")
3290
- or sf_loc.startswith("_:")
3291
- or sf.get("class", "") == "Directory"
3292
- ):
3960
+ if sf_loc != MISSING_FILE or sf.get("class", "") == "Directory":
3293
3961
  # Pass imported files, and all Directories
3294
3962
  final_secondary_files.append(sf)
3295
- elif sf_loc.startswith("file:") and os.path.exists(
3296
- schema_salad.ref_resolver.uri_file_path(sf_loc)
3297
- ):
3298
- # Pass things that exist on disk (which we presumably declined to
3299
- # import because we aren't using the file store)
3300
- final_secondary_files.append(sf)
3963
+ else:
3964
+ logger.debug(
3965
+ "Secondary file %s is dropped because it is known to be missing", sf
3966
+ )
3301
3967
  return final_secondary_files
3302
3968
 
3303
3969
 
@@ -3352,12 +4018,12 @@ def determine_load_listing(
3352
4018
 
3353
4019
  1. no_listing: DIRECTORY_NAME.listing will be undefined.
3354
4020
  e.g.
3355
-
4021
+
3356
4022
  inputs.DIRECTORY_NAME.listing == unspecified
3357
4023
 
3358
4024
  2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
3359
4025
  deep of DIRECTORY_NAME's contents.
3360
- e.g.
4026
+ e.g.
3361
4027
 
3362
4028
  inputs.DIRECTORY_NAME.listing == [items in directory]
3363
4029
  inputs.DIRECTORY_NAME.listing[0].listing == undefined
@@ -3402,8 +4068,6 @@ def determine_load_listing(
3402
4068
  class NoAvailableJobStoreException(Exception):
3403
4069
  """Indicates that no job store name is available."""
3404
4070
 
3405
- pass
3406
-
3407
4071
 
3408
4072
  def generate_default_job_store(
3409
4073
  batch_system_name: Optional[str],
@@ -3471,37 +4135,64 @@ def generate_default_job_store(
3471
4135
 
3472
4136
  usage_message = "\n\n" + textwrap.dedent(
3473
4137
  """
3474
- * All positional arguments [cwl, yml_or_json] must always be specified last for toil-cwl-runner.
3475
- Note: If you're trying to specify a jobstore, please use --jobStore.
3476
-
3477
- Usage: toil-cwl-runner [options] example.cwl example-job.yaml
3478
- Example: toil-cwl-runner \\
3479
- --jobStore aws:us-west-2:jobstore \\
3480
- --realTimeLogging \\
3481
- --logInfo \\
3482
- example.cwl \\
3483
- example-job.yaml
3484
- """[
4138
+ NOTE: If you're trying to specify a jobstore, you must use --jobStore, not a positional argument.
4139
+
4140
+ Usage: toil-cwl-runner [options] <workflow> [<input file>] [workflow options]
4141
+
4142
+ Example: toil-cwl-runner \\
4143
+ --jobStore aws:us-west-2:jobstore \\
4144
+ --realTimeLogging \\
4145
+ --logInfo \\
4146
+ example.cwl \\
4147
+ example-job.yaml \\
4148
+ --wf_input="hello world"
4149
+ """[
3485
4150
  1:
3486
4151
  ]
3487
4152
  )
3488
4153
 
3489
- def get_options(args: List[str]) -> Namespace:
4154
+
4155
+ def get_options(args: list[str]) -> Namespace:
3490
4156
  """
3491
4157
  Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
3492
4158
  :param args: List of args from command line
3493
4159
  :return: options namespace
3494
4160
  """
3495
- parser = ArgParser()
4161
+ # We can't allow abbreviations in case the workflow defines an option that
4162
+ # is a prefix of a Toil option.
4163
+ parser = ArgParser(
4164
+ allow_abbrev=False,
4165
+ usage="%(prog)s [options] WORKFLOW [INFILE] [WF_OPTIONS...]",
4166
+ description=textwrap.dedent(
4167
+ """
4168
+ positional arguments:
4169
+
4170
+ WORKFLOW CWL file to run.
4171
+
4172
+ INFILE YAML or JSON file of workflow inputs.
4173
+
4174
+ WF_OPTIONS Additional inputs to the workflow as command-line
4175
+ flags. If CWL workflow takes an input, the name of the
4176
+ input can be used as an option. For example:
4177
+
4178
+ %(prog)s workflow.cwl --file1 file
4179
+
4180
+ If an input has the same name as a Toil option, pass
4181
+ '--' before it.
4182
+ """
4183
+ ),
4184
+ formatter_class=RawDescriptionHelpFormatter,
4185
+ )
4186
+
3496
4187
  addOptions(parser, jobstore_as_flag=True, cwl=True)
3497
4188
  options: Namespace
3498
- options, cwl_options = parser.parse_known_args(args)
3499
- options.cwljob.extend(cwl_options)
4189
+ options, extra = parser.parse_known_args(args)
4190
+ options.cwljob = extra
3500
4191
 
3501
4192
  return options
3502
4193
 
3503
4194
 
3504
- def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
4195
+ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
3505
4196
  """Run the main loop for toil-cwl-runner."""
3506
4197
  # Remove cwltool logger's stream handler so it uses Toil's
3507
4198
  cwllogger.removeHandler(defaultStreamHandler)
@@ -3513,25 +4204,21 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3513
4204
 
3514
4205
  # Do cwltool setup
3515
4206
  cwltool.main.setup_schema(args=options, custom_schema_callback=None)
3516
- tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
3517
-
3518
- # We need a workdir for the CWL runtime contexts.
3519
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3520
- # if tmpdir_prefix is not the default value, move
3521
- # workdir and the default job store under it
3522
- workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3523
- else:
3524
- # Use a directory in the default tmpdir
3525
- workdir = mkdtemp()
3526
- # Make sure workdir doesn't exist so it can be a job store
3527
- os.rmdir(workdir)
4207
+ tmpdir_prefix = options.tmpdir_prefix = (
4208
+ options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
4209
+ )
4210
+ tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
4211
+ workdir = options.workDir or tmp_outdir_prefix
3528
4212
 
3529
4213
  if options.jobStore is None:
4214
+ jobstore = cwltool.utils.create_tmp_dir(tmp_outdir_prefix)
4215
+ # Make sure directory doesn't exist so it can be a job store
4216
+ os.rmdir(jobstore)
3530
4217
  # Pick a default job store specifier appropriate to our choice of batch
3531
4218
  # system and provisioner and installed modules, given this available
3532
4219
  # local directory name. Fail if no good default can be used.
3533
4220
  options.jobStore = generate_default_job_store(
3534
- options.batchSystem, options.provisioner, workdir
4221
+ options.batchSystem, options.provisioner, jobstore
3535
4222
  )
3536
4223
 
3537
4224
  options.doc_cache = True
@@ -3539,13 +4226,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3539
4226
  options.do_validate = True
3540
4227
  options.pack = False
3541
4228
  options.print_subgraph = False
3542
- if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3543
- # We need to override workDir because by default Toil will pick
3544
- # somewhere under the system temp directory if unset, ignoring
3545
- # --tmpdir-prefix.
3546
- #
3547
- # If set, workDir needs to exist, so we directly use the prefix
3548
- options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3549
4229
 
3550
4230
  if options.batchSystem == "kubernetes":
3551
4231
  # Containers under Kubernetes can only run in Singularity
@@ -3563,12 +4243,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3563
4243
  logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
3564
4244
 
3565
4245
  outdir = os.path.abspath(options.outdir or os.getcwd())
3566
- tmp_outdir_prefix = os.path.abspath(
3567
- options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
3568
- )
3569
-
3570
- fileindex: Dict[str, str] = {}
3571
- existing: Dict[str, str] = {}
3572
4246
  conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)
3573
4247
  use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
3574
4248
  job_script_provider = None
@@ -3576,7 +4250,6 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3576
4250
  dependencies_configuration = DependenciesConfiguration(options)
3577
4251
  job_script_provider = dependencies_configuration
3578
4252
 
3579
- options.default_container = None
3580
4253
  runtime_context = cwltool.context.RuntimeContext(vars(options))
3581
4254
  runtime_context.toplevel = True # enable discovery of secondaryFiles
3582
4255
  runtime_context.find_default_container = functools.partial(
@@ -3584,6 +4257,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3584
4257
  )
3585
4258
  runtime_context.workdir = workdir # type: ignore[attr-defined]
3586
4259
  runtime_context.outdir = outdir
4260
+ setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
3587
4261
  runtime_context.move_outputs = "leave"
3588
4262
  runtime_context.rm_tmpdir = False
3589
4263
  runtime_context.streaming_allowed = not options.disable_streaming
@@ -3617,27 +4291,28 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3617
4291
  )
3618
4292
  runtime_context.research_obj = research_obj
3619
4293
 
3620
- with Toil(options) as toil:
3621
- if options.restart:
3622
- try:
3623
- outobj = toil.restart()
3624
- except FailedJobsException as err:
3625
- if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3626
- # We figured out that we can't support this workflow.
3627
- logging.error(err)
3628
- logging.error(
3629
- "Your workflow uses a CWL requirement that Toil does not support!"
3630
- )
3631
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3632
- else:
3633
- raise
3634
- else:
4294
+ try:
4295
+
4296
+ if not options.restart:
4297
+ # Make a version of the config based on the initial options, for
4298
+ # setting up CWL option stuff
4299
+ expected_config = Config()
4300
+ expected_config.setOptions(options)
4301
+
4302
+ # Before showing the options to any cwltool stuff that wants to
4303
+ # load the workflow, transform options.cwltool, where our
4304
+ # argument for what to run is, to handle Dockstore workflows.
4305
+ options.cwltool = resolve_workflow(options.cwltool)
4306
+
4307
+ # TODO: why are we doing this? Does this get applied to all
4308
+ # tools as a default or something?
3635
4309
  loading_context.hints = [
3636
4310
  {
3637
4311
  "class": "ResourceRequirement",
3638
- "coresMin": toil.config.defaultCores,
3639
- "ramMin": toil.config.defaultMemory / (2**20),
3640
- "outdirMin": toil.config.defaultDisk / (2**20),
4312
+ "coresMin": expected_config.defaultCores,
4313
+ # Don't include any RAM requirement because we want to
4314
+ # know when tools don't manually ask for RAM.
4315
+ "outdirMin": expected_config.defaultDisk / (2**20),
3641
4316
  "tmpdirMin": 0,
3642
4317
  }
3643
4318
  ]
@@ -3660,6 +4335,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3660
4335
  )
3661
4336
  raise
3662
4337
 
4338
+ # Attempt to prepull the containers
4339
+ if not options.no_prepull and not options.no_container:
4340
+ try_prepull(uri, runtime_context, expected_config.batchSystem)
4341
+
3663
4342
  options.tool_help = None
3664
4343
  options.debug = options.logLevel == "DEBUG"
3665
4344
  job_order_object, options.basedir, jobloader = cwltool.main.load_job_order(
@@ -3724,11 +4403,12 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3724
4403
  secret_store=runtime_context.secret_store,
3725
4404
  input_required=True,
3726
4405
  )
3727
- except SystemExit as e:
3728
- if e.code == 2: # raised by argparse's parse_args() function
4406
+ except SystemExit as err:
4407
+ if err.code == 2: # raised by argparse's parse_args() function
3729
4408
  print(
3730
4409
  "\nIf both a CWL file and an input object (YAML/JSON) file were "
3731
- "provided, this may be the argument order." + usage_message,
4410
+ "provided, the problem may be the argument order."
4411
+ + usage_message,
3732
4412
  file=sys.stderr,
3733
4413
  )
3734
4414
  raise
@@ -3767,6 +4447,16 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3767
4447
  # ToilFsAccess needs to be set up if we want to be able to use
3768
4448
  # URLs.
3769
4449
  builder = tool._init_job(initialized_job_order, runtime_context)
4450
+ if not isinstance(tool, cwltool.workflow.Workflow):
4451
+ # make sure this doesn't add listing items; if shallow_listing is
4452
+ # selected, it will discover dirs one deep and then again later on
4453
+ # (when the cwltool builder gets constructed from the job in the
4454
+ # CommandLineTool's job() method,
4455
+ # see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L951),
4456
+ # producing 2+ deep listings instead of only 1.
4457
+ # ExpressionTool also uses a builder, see https://github.com/common-workflow-language/cwltool/blob/9cda157cb4380e9d30dec29f0452c56d0c10d064/cwltool/command_line_tool.py#L207
4458
+ # Workflows don't need this because they don't go through CommandLineTool or ExpressionTool
4459
+ builder.loadListing = "no_listing"
3770
4460
 
3771
4461
  # make sure this doesn't add listing items; if shallow_listing is
3772
4462
  # selected, it will discover dirs one deep and then again later on
@@ -3780,151 +4470,114 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3780
4470
  discover_secondaryFiles=True,
3781
4471
  )
3782
4472
 
3783
- # Define something we can call to import a file and get its file
3784
- # ID.
3785
- # We cast this because import_file is overloaded depending on if we
3786
- # pass a shared file name or not, and we know the way we call it we
3787
- # always get a FileID out.
3788
- file_import_function = cast(
3789
- Callable[[str], FileID],
3790
- functools.partial(toil.import_file, symlink=True),
3791
- )
3792
-
3793
- # Import all the input files, some of which may be missing optional
3794
- # files.
3795
- logger.info("Importing input files...")
3796
- fs_access = ToilFsAccess(options.basedir)
3797
- import_files(
3798
- file_import_function,
3799
- fs_access,
3800
- fileindex,
3801
- existing,
3802
- initialized_job_order,
3803
- skip_broken=True,
3804
- skip_remote=options.reference_inputs,
3805
- bypass_file_store=options.bypass_file_store,
3806
- log_level=logging.INFO,
3807
- )
3808
- # Import all the files associated with tools (binaries, etc.).
3809
- # Not sure why you would have an optional secondary file here, but
3810
- # the spec probably needs us to support them.
3811
- logger.info("Importing tool-associated files...")
3812
- visitSteps(
3813
- tool,
3814
- functools.partial(
3815
- import_files,
3816
- file_import_function,
3817
- fs_access,
3818
- fileindex,
3819
- existing,
3820
- skip_broken=True,
3821
- skip_remote=options.reference_inputs,
3822
- bypass_file_store=options.bypass_file_store,
3823
- log_level=logging.INFO,
3824
- ),
3825
- )
3826
-
3827
- # We always expect to have processed all files that exist
3828
- for param_name, param_value in initialized_job_order.items():
3829
- # Loop through all the parameters for the workflow overall.
3830
- # Drop any files that aren't either imported (for when we use
3831
- # the file store) or available on disk (for when we don't).
3832
- # This will properly make them cause an error later if they
3833
- # were required.
3834
- rm_unprocessed_secondary_files(param_value)
3835
-
3836
4473
  logger.info("Creating root job")
3837
4474
  logger.debug("Root tool: %s", tool)
3838
- try:
3839
- wf1, _ = makeJob(
3840
- tool=tool,
3841
- jobobj={},
3842
- runtime_context=runtime_context,
3843
- parent_name=None, # toplevel, no name needed
3844
- conditional=None,
3845
- )
3846
- except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
3847
- logging.error(err)
3848
- return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3849
- wf1.cwljob = initialized_job_order
3850
- logger.info("Starting workflow")
3851
- try:
3852
- outobj = toil.start(wf1)
3853
- except FailedJobsException as err:
3854
- if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
3855
- # We figured out that we can't support this workflow.
3856
- logging.error(err)
3857
- logging.error(
3858
- "Your workflow uses a CWL requirement that Toil does not support!"
4475
+ tool = remove_pickle_problems(tool)
4476
+
4477
+ with Toil(options) as toil:
4478
+ if options.restart:
4479
+ outobj = toil.restart()
4480
+ else:
4481
+ try:
4482
+ wf1 = makeRootJob(
4483
+ tool=tool,
4484
+ jobobj={},
4485
+ runtime_context=runtime_context,
4486
+ initialized_job_order=initialized_job_order,
4487
+ options=options,
4488
+ toil=toil,
3859
4489
  )
4490
+ except CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION as err:
4491
+ logging.error(err)
3860
4492
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3861
- else:
3862
- raise
3863
-
3864
- # Now the workflow has completed. We need to make sure the outputs (and
3865
- # inputs) end up where the user wants them to be.
3866
- logger.info("Collecting workflow outputs...")
3867
- outobj = resolve_dict_w_promises(outobj)
3868
-
3869
- # Stage files. Specify destination bucket if specified in CLI
3870
- # options. If destination bucket not passed in,
3871
- # options.destBucket's value will be None.
3872
- toilStageFiles(
3873
- toil,
3874
- outobj,
3875
- outdir,
3876
- destBucket=options.destBucket,
3877
- log_level=logging.INFO
3878
- )
3879
- logger.info("Stored workflow outputs")
4493
+ logger.info("Starting workflow")
4494
+ outobj = toil.start(wf1)
3880
4495
 
3881
- if runtime_context.research_obj is not None:
3882
- cwltool.cwlprov.writablebagfile.create_job(
3883
- runtime_context.research_obj, outobj, True
3884
- )
4496
+ # Now the workflow has completed. We need to make sure the outputs (and
4497
+ # inputs) end up where the user wants them to be.
4498
+ logger.info("Collecting workflow outputs...")
4499
+ outobj = resolve_dict_w_promises(outobj)
3885
4500
 
3886
- def remove_at_id(doc: Any) -> None:
3887
- if isinstance(doc, MutableMapping):
3888
- for key in list(doc.keys()):
3889
- if key == "@id":
3890
- del doc[key]
3891
- else:
3892
- value = doc[key]
3893
- if isinstance(value, MutableMapping):
3894
- remove_at_id(value)
3895
- if isinstance(value, MutableSequence):
3896
- for entry in value:
3897
- if isinstance(value, MutableMapping):
3898
- remove_at_id(entry)
3899
-
3900
- remove_at_id(outobj)
3901
- visit_class(
4501
+ # Stage files. Specify destination bucket if specified in CLI
4502
+ # options. If destination bucket not passed in,
4503
+ # options.destBucket's value will be None.
4504
+ toilStageFiles(
4505
+ toil,
3902
4506
  outobj,
3903
- ("File",),
3904
- functools.partial(add_sizes, runtime_context.make_fs_access("")),
3905
- )
3906
- if not document_loader:
3907
- raise RuntimeError("cwltool loader is not set.")
3908
- prov_dependencies = cwltool.main.prov_deps(
3909
- workflowobj, document_loader, uri
3910
- )
3911
- runtime_context.research_obj.generate_snapshot(prov_dependencies)
3912
- cwltool.cwlprov.writablebagfile.close_ro(
3913
- runtime_context.research_obj, options.provenance
4507
+ outdir,
4508
+ destBucket=options.destBucket,
4509
+ log_level=logging.INFO,
3914
4510
  )
4511
+ logger.info("Stored workflow outputs")
3915
4512
 
3916
- if not options.destBucket and options.compute_checksum:
3917
- logger.info("Computing output file checksums...")
3918
- visit_class(
3919
- outobj,
3920
- ("File",),
3921
- functools.partial(compute_checksums, StdFsAccess("")),
3922
- )
4513
+ if runtime_context.research_obj is not None:
4514
+ cwltool.cwlprov.writablebagfile.create_job(
4515
+ runtime_context.research_obj, outobj, True
4516
+ )
4517
+
4518
+ def remove_at_id(doc: Any) -> None:
4519
+ if isinstance(doc, MutableMapping):
4520
+ for key in list(doc.keys()):
4521
+ if key == "@id":
4522
+ del doc[key]
4523
+ else:
4524
+ value = doc[key]
4525
+ if isinstance(value, MutableMapping):
4526
+ remove_at_id(value)
4527
+ if isinstance(value, MutableSequence):
4528
+ for entry in value:
4529
+ if isinstance(value, MutableMapping):
4530
+ remove_at_id(entry)
4531
+
4532
+ remove_at_id(outobj)
4533
+ visit_class(
4534
+ outobj,
4535
+ ("File",),
4536
+ functools.partial(add_sizes, runtime_context.make_fs_access("")),
4537
+ )
4538
+ if not document_loader:
4539
+ raise RuntimeError("cwltool loader is not set.")
4540
+ prov_dependencies = cwltool.main.prov_deps(
4541
+ workflowobj, document_loader, uri
4542
+ )
4543
+ runtime_context.research_obj.generate_snapshot(prov_dependencies)
4544
+ cwltool.cwlprov.writablebagfile.close_ro(
4545
+ runtime_context.research_obj, options.provenance
4546
+ )
3923
4547
 
3924
- visit_class(outobj, ("File",), MutationManager().unset_generation)
3925
- stdout.write(json.dumps(outobj, indent=4, default=str))
3926
- stdout.write("\n")
3927
- logger.info("CWL run complete!")
4548
+ if not options.destBucket and options.compute_checksum:
4549
+ logger.info("Computing output file checksums...")
4550
+ visit_class(
4551
+ outobj,
4552
+ ("File",),
4553
+ functools.partial(compute_checksums, StdFsAccess("")),
4554
+ )
4555
+
4556
+ visit_class(outobj, ("File",), MutationManager().unset_generation)
4557
+ stdout.write(json.dumps(outobj, indent=4, default=str))
4558
+ stdout.write("\n")
4559
+ logger.info("CWL run complete!")
4560
+ # Don't expose tracebacks to the user for exceptions that may be expected
4561
+ except FailedJobsException as err:
4562
+ if err.exit_code == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
4563
+ # We figured out that we can't support this workflow.
4564
+ logging.error(err)
4565
+ logging.error(
4566
+ "Your workflow uses a CWL requirement that Toil does not support!"
4567
+ )
4568
+ return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
4569
+ else:
4570
+ logging.error(err)
4571
+ return 1
4572
+ except (
4573
+ InsufficientSystemResources,
4574
+ LocatorException,
4575
+ InvalidImportExportUrlException,
4576
+ UnimplementedURLException,
4577
+ JobTooBigError,
4578
+ ) as err:
4579
+ logging.error(err)
4580
+ return 1
3928
4581
 
3929
4582
  return 0
3930
4583