toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +39 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/htcondor.py +0 -1
  8. toil/batchSystems/kubernetes.py +34 -31
  9. toil/batchSystems/local_support.py +3 -1
  10. toil/batchSystems/lsf.py +7 -7
  11. toil/batchSystems/mesos/batchSystem.py +7 -7
  12. toil/batchSystems/options.py +32 -83
  13. toil/batchSystems/registry.py +104 -23
  14. toil/batchSystems/singleMachine.py +16 -13
  15. toil/batchSystems/slurm.py +87 -16
  16. toil/batchSystems/torque.py +0 -1
  17. toil/bus.py +44 -8
  18. toil/common.py +544 -753
  19. toil/cwl/__init__.py +28 -32
  20. toil/cwl/cwltoil.py +595 -574
  21. toil/cwl/utils.py +55 -10
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/__init__.py +2 -2
  24. toil/fileStores/abstractFileStore.py +88 -14
  25. toil/fileStores/cachingFileStore.py +610 -549
  26. toil/fileStores/nonCachingFileStore.py +46 -22
  27. toil/job.py +182 -101
  28. toil/jobStores/abstractJobStore.py +161 -95
  29. toil/jobStores/aws/jobStore.py +23 -9
  30. toil/jobStores/aws/utils.py +6 -6
  31. toil/jobStores/fileJobStore.py +116 -18
  32. toil/jobStores/googleJobStore.py +16 -7
  33. toil/jobStores/utils.py +5 -6
  34. toil/leader.py +87 -56
  35. toil/lib/accelerators.py +10 -5
  36. toil/lib/aws/__init__.py +3 -14
  37. toil/lib/aws/ami.py +22 -9
  38. toil/lib/aws/iam.py +21 -13
  39. toil/lib/aws/session.py +2 -16
  40. toil/lib/aws/utils.py +4 -5
  41. toil/lib/compatibility.py +1 -1
  42. toil/lib/conversions.py +26 -3
  43. toil/lib/docker.py +22 -23
  44. toil/lib/ec2.py +10 -6
  45. toil/lib/ec2nodes.py +106 -100
  46. toil/lib/encryption/_nacl.py +2 -1
  47. toil/lib/generatedEC2Lists.py +325 -18
  48. toil/lib/io.py +49 -2
  49. toil/lib/misc.py +1 -1
  50. toil/lib/resources.py +9 -2
  51. toil/lib/threading.py +101 -38
  52. toil/options/common.py +736 -0
  53. toil/options/cwl.py +336 -0
  54. toil/options/wdl.py +37 -0
  55. toil/provisioners/abstractProvisioner.py +9 -4
  56. toil/provisioners/aws/__init__.py +3 -6
  57. toil/provisioners/aws/awsProvisioner.py +6 -0
  58. toil/provisioners/clusterScaler.py +3 -2
  59. toil/provisioners/gceProvisioner.py +2 -2
  60. toil/realtimeLogger.py +2 -1
  61. toil/resource.py +24 -18
  62. toil/server/app.py +2 -3
  63. toil/server/cli/wes_cwl_runner.py +4 -4
  64. toil/server/utils.py +1 -1
  65. toil/server/wes/abstract_backend.py +3 -2
  66. toil/server/wes/amazon_wes_utils.py +5 -4
  67. toil/server/wes/tasks.py +2 -3
  68. toil/server/wes/toil_backend.py +2 -10
  69. toil/server/wsgi_app.py +2 -0
  70. toil/serviceManager.py +12 -10
  71. toil/statsAndLogging.py +41 -9
  72. toil/test/__init__.py +29 -54
  73. toil/test/batchSystems/batchSystemTest.py +11 -111
  74. toil/test/batchSystems/test_slurm.py +24 -8
  75. toil/test/cactus/__init__.py +0 -0
  76. toil/test/cactus/test_cactus_integration.py +58 -0
  77. toil/test/cwl/cwlTest.py +438 -223
  78. toil/test/cwl/glob_dir.cwl +15 -0
  79. toil/test/cwl/preemptible.cwl +21 -0
  80. toil/test/cwl/preemptible_expression.cwl +28 -0
  81. toil/test/cwl/revsort.cwl +1 -1
  82. toil/test/cwl/revsort2.cwl +1 -1
  83. toil/test/docs/scriptsTest.py +2 -3
  84. toil/test/jobStores/jobStoreTest.py +34 -21
  85. toil/test/lib/aws/test_iam.py +4 -14
  86. toil/test/lib/aws/test_utils.py +0 -3
  87. toil/test/lib/dockerTest.py +4 -4
  88. toil/test/lib/test_ec2.py +12 -17
  89. toil/test/mesos/helloWorld.py +4 -5
  90. toil/test/mesos/stress.py +1 -1
  91. toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
  92. toil/test/options/options.py +37 -0
  93. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  94. toil/test/provisioners/clusterScalerTest.py +6 -4
  95. toil/test/provisioners/clusterTest.py +23 -11
  96. toil/test/provisioners/gceProvisionerTest.py +0 -6
  97. toil/test/provisioners/restartScript.py +3 -2
  98. toil/test/server/serverTest.py +1 -1
  99. toil/test/sort/restart_sort.py +2 -1
  100. toil/test/sort/sort.py +2 -1
  101. toil/test/sort/sortTest.py +2 -13
  102. toil/test/src/autoDeploymentTest.py +45 -45
  103. toil/test/src/busTest.py +5 -5
  104. toil/test/src/checkpointTest.py +2 -2
  105. toil/test/src/deferredFunctionTest.py +1 -1
  106. toil/test/src/fileStoreTest.py +32 -16
  107. toil/test/src/helloWorldTest.py +1 -1
  108. toil/test/src/importExportFileTest.py +1 -1
  109. toil/test/src/jobDescriptionTest.py +2 -1
  110. toil/test/src/jobServiceTest.py +1 -1
  111. toil/test/src/jobTest.py +18 -18
  112. toil/test/src/miscTests.py +5 -3
  113. toil/test/src/promisedRequirementTest.py +3 -3
  114. toil/test/src/realtimeLoggerTest.py +1 -1
  115. toil/test/src/resourceTest.py +2 -2
  116. toil/test/src/restartDAGTest.py +1 -1
  117. toil/test/src/resumabilityTest.py +36 -2
  118. toil/test/src/retainTempDirTest.py +1 -1
  119. toil/test/src/systemTest.py +2 -2
  120. toil/test/src/toilContextManagerTest.py +2 -2
  121. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  122. toil/test/utils/toilDebugTest.py +98 -32
  123. toil/test/utils/toilKillTest.py +2 -2
  124. toil/test/utils/utilsTest.py +23 -3
  125. toil/test/wdl/wdltoil_test.py +223 -45
  126. toil/toilState.py +7 -6
  127. toil/utils/toilClean.py +1 -1
  128. toil/utils/toilConfig.py +36 -0
  129. toil/utils/toilDebugFile.py +60 -33
  130. toil/utils/toilDebugJob.py +39 -12
  131. toil/utils/toilDestroyCluster.py +1 -1
  132. toil/utils/toilKill.py +1 -1
  133. toil/utils/toilLaunchCluster.py +13 -2
  134. toil/utils/toilMain.py +3 -2
  135. toil/utils/toilRsyncCluster.py +1 -1
  136. toil/utils/toilSshCluster.py +1 -1
  137. toil/utils/toilStats.py +445 -305
  138. toil/utils/toilStatus.py +2 -5
  139. toil/version.py +10 -10
  140. toil/wdl/utils.py +2 -122
  141. toil/wdl/wdltoil.py +1257 -492
  142. toil/worker.py +55 -46
  143. toil-6.1.0.dist-info/METADATA +124 -0
  144. toil-6.1.0.dist-info/RECORD +241 -0
  145. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
  146. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
  147. toil/batchSystems/parasol.py +0 -379
  148. toil/batchSystems/tes.py +0 -459
  149. toil/test/batchSystems/parasolTestSupport.py +0 -117
  150. toil/test/wdl/builtinTest.py +0 -506
  151. toil/test/wdl/toilwdlTest.py +0 -522
  152. toil/wdl/toilwdl.py +0 -141
  153. toil/wdl/versions/dev.py +0 -107
  154. toil/wdl/versions/draft2.py +0 -980
  155. toil/wdl/versions/v1.py +0 -794
  156. toil/wdl/wdl_analysis.py +0 -116
  157. toil/wdl/wdl_functions.py +0 -997
  158. toil/wdl/wdl_synthesis.py +0 -1011
  159. toil/wdl/wdl_types.py +0 -243
  160. toil-5.12.0.dist-info/METADATA +0 -118
  161. toil-5.12.0.dist-info/RECORD +0 -244
  162. /toil/{wdl/versions → options}/__init__.py +0 -0
  163. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  164. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -17,43 +17,43 @@
17
17
 
18
18
  # For an overview of how this all works, see discussion in
19
19
  # docs/architecture.rst
20
- import argparse
21
20
  import base64
22
21
  import copy
23
22
  import datetime
24
23
  import errno
25
24
  import functools
25
+ import glob
26
+ import io
26
27
  import json
27
28
  import logging
28
29
  import os
30
+ import pprint
29
31
  import shutil
30
32
  import socket
31
33
  import stat
32
34
  import sys
33
- import tempfile
34
35
  import textwrap
35
- import urllib
36
36
  import uuid
37
+ from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
37
38
  from threading import Thread
38
- from typing import (
39
- IO,
40
- Any,
41
- Callable,
42
- Dict,
43
- Iterator,
44
- List,
45
- Mapping,
46
- MutableMapping,
47
- MutableSequence,
48
- Optional,
49
- TextIO,
50
- Tuple,
51
- Type,
52
- TypeVar,
53
- Union,
54
- cast,
55
- )
56
- from urllib.parse import ParseResult, quote, unquote, urlparse, urlsplit
39
+ from typing import (IO,
40
+ Any,
41
+ Callable,
42
+ Dict,
43
+ Iterator,
44
+ List,
45
+ Mapping,
46
+ MutableMapping,
47
+ MutableSequence,
48
+ Optional,
49
+ Sequence,
50
+ TextIO,
51
+ Tuple,
52
+ Type,
53
+ TypeVar,
54
+ Union,
55
+ cast)
56
+ from urllib.parse import quote, unquote, urlparse, urlsplit
57
57
 
58
58
  import cwl_utils.errors
59
59
  import cwl_utils.expression
@@ -66,35 +66,30 @@ import cwltool.load_tool
66
66
  import cwltool.main
67
67
  import cwltool.resolver
68
68
  import schema_salad.ref_resolver
69
+ from configargparse import SUPPRESS, ArgParser, Namespace
69
70
  from cwltool.loghandler import _logger as cwllogger
70
71
  from cwltool.loghandler import defaultStreamHandler
71
72
  from cwltool.mpi import MpiConfig
72
73
  from cwltool.mutation import MutationManager
73
74
  from cwltool.pathmapper import MapperEnt, PathMapper
74
- from cwltool.process import (
75
- Process,
76
- add_sizes,
77
- compute_checksums,
78
- fill_in_defaults,
79
- shortname,
80
- )
75
+ from cwltool.process import (Process,
76
+ add_sizes,
77
+ compute_checksums,
78
+ fill_in_defaults,
79
+ shortname)
81
80
  from cwltool.secrets import SecretStore
82
- from cwltool.software_requirements import (
83
- DependenciesConfiguration,
84
- get_container_from_software_requirements,
85
- )
81
+ from cwltool.software_requirements import (DependenciesConfiguration,
82
+ get_container_from_software_requirements)
86
83
  from cwltool.stdfsaccess import StdFsAccess, abspath
87
- from cwltool.utils import (
88
- CWLObjectType,
89
- CWLOutputType,
90
- DirectoryType,
91
- adjustDirObjs,
92
- aslist,
93
- downloadHttpFile,
94
- get_listing,
95
- normalizeFilesDirs,
96
- visit_class,
97
- )
84
+ from cwltool.utils import (CWLObjectType,
85
+ CWLOutputType,
86
+ DirectoryType,
87
+ adjustDirObjs,
88
+ aslist,
89
+ downloadHttpFile,
90
+ get_listing,
91
+ normalizeFilesDirs,
92
+ visit_class)
98
93
  from ruamel.yaml.comments import CommentedMap, CommentedSeq
99
94
  from schema_salad.avro.schema import Names
100
95
  from schema_salad.exceptions import ValidationException
@@ -103,28 +98,31 @@ from schema_salad.sourceline import SourceLine
103
98
  from typing_extensions import Literal
104
99
 
105
100
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
106
- from toil.common import Config, Toil, addOptions
107
- from toil.cwl.utils import (
108
- CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
109
- CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
110
- download_structure,
111
- visit_cwl_class_and_reduce,
112
- )
101
+ from toil.common import Toil, addOptions
102
+ from toil.cwl import check_cwltool_version
103
+
104
+ check_cwltool_version()
105
+ from toil.cwl.utils import (CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
106
+ CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
107
+ download_structure,
108
+ get_from_structure,
109
+ visit_cwl_class_and_reduce)
113
110
  from toil.exceptions import FailedJobsException
114
111
  from toil.fileStores import FileID
115
112
  from toil.fileStores.abstractFileStore import AbstractFileStore
116
113
  from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
117
- from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
114
+ from toil.jobStores.abstractJobStore import (AbstractJobStore,
115
+ NoSuchFileException)
118
116
  from toil.jobStores.fileJobStore import FileJobStore
119
117
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
118
+ from toil.lib.io import mkdtemp
120
119
  from toil.lib.threading import ExceptionalThread
121
120
  from toil.statsAndLogging import DEFAULT_LOGLEVEL
122
- from toil.version import baseVersion
123
121
 
124
122
  logger = logging.getLogger(__name__)
125
123
 
126
124
  # Find the default temporary directory
127
- DEFAULT_TMPDIR = tempfile.gettempdir()
125
+ DEFAULT_TMPDIR = gettempdir()
128
126
  # And compose a CWL-style default prefix inside it.
129
127
  # We used to not put this inside anything and we would drop loads of temp
130
128
  # directories in the current directory and leave them there.
@@ -349,16 +347,24 @@ class ResolveSource:
349
347
 
350
348
  def __repr__(self) -> str:
351
349
  """Allow for debug printing."""
352
- try:
353
- return "ResolveSource(" + repr(self.resolve()) + ")"
354
- except Exception:
355
- return (
356
- f"ResolveSource({self.name}, {self.input}, {self.source_key}, "
357
- f"{self.promise_tuples})"
358
- )
350
+
351
+ parts = [f"source key {self.source_key}"]
352
+
353
+ if "pickValue" in self.input:
354
+ parts.append(f"pick value {self.input['pickValue']} from")
355
+
356
+ if isinstance(self.promise_tuples, list):
357
+ names = [n for n, _ in self.promise_tuples]
358
+ parts.append(f"names {names} in promises")
359
+ else:
360
+ name, _ = self.promise_tuples
361
+ parts.append(f"name {name} in promise")
362
+
363
+ return f"ResolveSource({', '.join(parts)})"
359
364
 
360
365
  def resolve(self) -> Any:
361
366
  """First apply linkMerge then pickValue if either present."""
367
+
362
368
  result: Optional[Any] = None
363
369
  if isinstance(self.promise_tuples, list):
364
370
  result = self.link_merge(
@@ -382,6 +388,7 @@ class ResolveSource:
382
388
 
383
389
  :param values: result of step
384
390
  """
391
+
385
392
  link_merge_type = self.input.get("linkMerge", "merge_nested")
386
393
 
387
394
  if link_merge_type == "merge_nested":
@@ -409,6 +416,7 @@ class ResolveSource:
409
416
  without modification.
410
417
  :return:
411
418
  """
419
+
412
420
  pick_value_type = cast(str, self.input.get("pickValue"))
413
421
 
414
422
  if pick_value_type is None:
@@ -425,6 +433,11 @@ class ResolveSource:
425
433
 
426
434
  if pick_value_type == "first_non_null":
427
435
  if len(result) < 1:
436
+ logger.error(
437
+ "Could not find non-null entry for %s:\n%s",
438
+ self.name,
439
+ pprint.pformat(self.promise_tuples),
440
+ )
428
441
  raise cwl_utils.errors.WorkflowException(
429
442
  "%s: first_non_null operator found no non-null values" % self.name
430
443
  )
@@ -479,6 +492,11 @@ class StepValueFrom:
479
492
  self.req = req
480
493
  self.container_engine = container_engine
481
494
 
495
+ def __repr__(self) -> str:
496
+ """Allow for debug printing."""
497
+
498
+ return f"StepValueFrom({self.expr}, {self.source}, {self.req}, {self.container_engine})"
499
+
482
500
  def eval_prep(
483
501
  self, step_inputs: CWLObjectType, file_store: AbstractFileStore
484
502
  ) -> None:
@@ -551,6 +569,11 @@ class DefaultWithSource:
551
569
  self.default = default
552
570
  self.source = source
553
571
 
572
+ def __repr__(self) -> str:
573
+ """Allow for debug printing."""
574
+
575
+ return f"DefaultWithSource({self.default}, {self.source})"
576
+
554
577
  def resolve(self) -> Any:
555
578
  """
556
579
  Determine the final input value when the time is right.
@@ -573,6 +596,11 @@ class JustAValue:
573
596
  """Store the value."""
574
597
  self.val = val
575
598
 
599
+ def __repr__(self) -> str:
600
+ """Allow for debug printing."""
601
+
602
+ return f"JustAValue({self.val})"
603
+
576
604
  def resolve(self) -> Any:
577
605
  """Return the value."""
578
606
  return self.val
@@ -654,6 +682,8 @@ class ToilPathMapper(PathMapper):
654
682
  streaming on, and returns a file: URI to where the file or
655
683
  directory has been downloaded to. Meant to be a partially-bound
656
684
  version of toil_get_file().
685
+ :param referenced_files: List of CWL File and Directory objects, which can have their locations set as both
686
+ virtualized and absolute local paths
657
687
  """
658
688
  self.get_file = get_file
659
689
  self.stage_listing = stage_listing
@@ -675,28 +705,29 @@ class ToilPathMapper(PathMapper):
675
705
  This is called on each File or Directory CWL object. The Files and
676
706
  Directories all have "location" fields. For the Files, these are from
677
707
  upload_file(), and for the Directories, these are from
678
- upload_directory(), with their children being assigned
679
- locations based on listing the Directories using ToilFsAccess.
708
+ upload_directory() or cwltool internally. With upload_directory(), they and their children will be assigned
709
+ locations based on listing the Directories using ToilFsAccess. With cwltool, locations will be set as absolute
710
+ paths.
680
711
 
681
712
  :param obj: The CWL File or Directory to process
682
713
 
683
714
  :param stagedir: The base path for target paths to be generated under,
684
- except when a File or Directory has an overriding parent directory in
685
- dirname
715
+ except when a File or Directory has an overriding parent directory in
716
+ dirname
686
717
 
687
718
  :param basedir: The directory from which relative paths should be
688
- resolved; used as the base directory for the StdFsAccess that generated
689
- the listing being processed.
719
+ resolved; used as the base directory for the StdFsAccess that generated
720
+ the listing being processed.
690
721
 
691
722
  :param copy: If set, use writable types for Files and Directories.
692
723
 
693
724
  :param staged: Starts as True at the top of the recursion. Set to False
694
- when entering a directory that we can actually download, so we don't
695
- stage files and subdirectories separately from the directory as a
696
- whole. Controls the staged flag on generated mappings, and therefore
697
- whether files and directories are actually placed at their mapped-to
698
- target locations. If stage_listing is True, we will leave this True
699
- throughout and stage everything.
725
+ when entering a directory that we can actually download, so we don't
726
+ stage files and subdirectories separately from the directory as a
727
+ whole. Controls the staged flag on generated mappings, and therefore
728
+ whether files and directories are actually placed at their mapped-to
729
+ target locations. If stage_listing is True, we will leave this True
730
+ throughout and stage everything.
700
731
 
701
732
  Produces one MapperEnt for every unique location for a File or
702
733
  Directory. These MapperEnt objects are instructions to cwltool's
@@ -807,6 +838,14 @@ class ToilPathMapper(PathMapper):
807
838
  # We can't really make the directory. Maybe we are
808
839
  # exporting from the leader and it doesn't matter.
809
840
  resolved = location
841
+ elif location.startswith("/"):
842
+ # Test if path is an absolute local path
843
+ # Does not check if the path is relative
844
+ # While Toil encodes paths into a URL with ToilPathMapper,
845
+ # something called internally in cwltool may return an absolute path
846
+ # ex: if cwltool calls itself internally in command_line_tool.py,
847
+ # it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
848
+ resolved = location
810
849
  else:
811
850
  raise RuntimeError("Unsupported location: " + location)
812
851
 
@@ -883,7 +922,6 @@ class ToilPathMapper(PathMapper):
883
922
  )
884
923
  else:
885
924
  deref = ab
886
-
887
925
  if deref.startswith("file:"):
888
926
  deref = schema_salad.ref_resolver.uri_file_path(deref)
889
927
  if urlsplit(deref).scheme in ["http", "https"]:
@@ -1027,8 +1065,6 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1027
1065
  class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
1028
1066
  """Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
1029
1067
 
1030
- pass
1031
-
1032
1068
 
1033
1069
  def toil_make_tool(
1034
1070
  toolpath_object: CommentedMap,
@@ -1047,10 +1083,7 @@ def toil_make_tool(
1047
1083
  return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
1048
1084
 
1049
1085
 
1050
- # This should really be Dict[str, Union[str, "DirectoryContents"]], but we
1051
- # can't say that until https://github.com/python/mypy/issues/731 is fixed
1052
- # because it's recursive.
1053
- DirectoryContents = Dict[str, Union[str, Dict[str, Any]]]
1086
+ DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1054
1087
 
1055
1088
 
1056
1089
  def check_directory_dict_invariants(contents: DirectoryContents) -> None:
@@ -1080,9 +1113,8 @@ def decode_directory(
1080
1113
  None), and the deduplication key string that uniquely identifies the
1081
1114
  directory.
1082
1115
  """
1083
- assert dir_path.startswith(
1084
- "toildir:"
1085
- ), f"Cannot decode non-directory path: {dir_path}"
1116
+ if not dir_path.startswith("toildir:"):
1117
+ raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
1086
1118
 
1087
1119
  # We will decode the directory and then look inside it
1088
1120
 
@@ -1203,7 +1235,8 @@ class ToilFsAccess(StdFsAccess):
1203
1235
 
1204
1236
  logger.debug("ToilFsAccess downloading %s to %s", cache_key, temp_dir)
1205
1237
 
1206
- # Save it all into this new temp directory
1238
+ # Save it all into this new temp directory.
1239
+ # Guaranteed to fill it with real files and not symlinks.
1207
1240
  download_structure(self.file_store, {}, {}, contents, temp_dir)
1208
1241
 
1209
1242
  # Make sure we use the same temp directory if we go traversing
@@ -1233,7 +1266,7 @@ class ToilFsAccess(StdFsAccess):
1233
1266
  logger.debug(
1234
1267
  "ToilFsAccess fetching directory %s from a JobStore", path
1235
1268
  )
1236
- dest_dir = tempfile.mkdtemp()
1269
+ dest_dir = mkdtemp()
1237
1270
 
1238
1271
  # Recursively fetch all the files in the directory.
1239
1272
  def download_to(url: str, dest: str) -> None:
@@ -1256,7 +1289,7 @@ class ToilFsAccess(StdFsAccess):
1256
1289
  logger.debug("ToilFsAccess fetching file %s from a JobStore", path)
1257
1290
  # Try to grab it with a jobstore implementation, and save it
1258
1291
  # somewhere arbitrary.
1259
- dest_file = tempfile.NamedTemporaryFile(delete=False)
1292
+ dest_file = NamedTemporaryFile(delete=False)
1260
1293
  AbstractJobStore.read_from_url(path, dest_file)
1261
1294
  dest_file.close()
1262
1295
  self.dir_to_download[path] = dest_file.name
@@ -1271,72 +1304,160 @@ class ToilFsAccess(StdFsAccess):
1271
1304
  return destination
1272
1305
 
1273
1306
  def glob(self, pattern: str) -> List[str]:
1274
- # We know this falls back on _abs
1275
- return super().glob(pattern)
1307
+ parse = urlparse(pattern)
1308
+ if parse.scheme == "file":
1309
+ pattern = os.path.abspath(unquote(parse.path))
1310
+ elif parse.scheme == "":
1311
+ pattern = os.path.abspath(pattern)
1312
+ else:
1313
+ raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
1314
+
1315
+ # Actually do the glob
1316
+ return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
1276
1317
 
1277
1318
  def open(self, fn: str, mode: str) -> IO[Any]:
1278
- # TODO: Also implement JobStore-supported URLs through JobStore methods.
1279
- # We know this falls back on _abs
1280
- return super().open(fn, mode)
1319
+ if "w" in mode or "x" in mode or "+" in mode or "a" in mode:
1320
+ raise RuntimeError(f"Mode {mode} for opening {fn} involves writing")
1321
+
1322
+ parse = urlparse(fn)
1323
+ if parse.scheme in ["", "file"]:
1324
+ # Handle local files
1325
+ return open(self._abs(fn), mode)
1326
+ elif parse.scheme == "toildir":
1327
+ contents, subpath, cache_key = decode_directory(fn)
1328
+ if cache_key in self.dir_to_download:
1329
+ # This is already available locally, so fall back on the local copy
1330
+ return open(self._abs(fn), mode)
1331
+ else:
1332
+ # We need to get the URI out of the virtual directory
1333
+ if subpath is None:
1334
+ raise RuntimeError(f"{fn} is a toildir directory")
1335
+ uri = get_from_structure(contents, subpath)
1336
+ if not isinstance(uri, str):
1337
+ raise RuntimeError(f"{fn} does not point to a file")
1338
+ # Recurse on that URI
1339
+ return self.open(uri, mode)
1340
+ elif parse.scheme == "toilfile":
1341
+ if self.file_store is None:
1342
+ raise RuntimeError("URL requires a file store: " + fn)
1343
+ # Streaming access to Toil file store files requires being inside a
1344
+ # context manager, which we can't require. So we need to download
1345
+ # the file.
1346
+ return open(self._abs(fn), mode)
1347
+ else:
1348
+ # This should be supported by a job store.
1349
+ byte_stream = AbstractJobStore.open_url(fn)
1350
+ if 'b' in mode:
1351
+ # Pass stream along in binary
1352
+ return byte_stream
1353
+ else:
1354
+ # Wrap it in a text decoder
1355
+ return io.TextIOWrapper(byte_stream, encoding='utf-8')
1281
1356
 
1282
1357
  def exists(self, path: str) -> bool:
1283
1358
  """Test for file existence."""
1284
- # toil's _abs() throws errors when files are not found and cwltool's _abs() does not
1285
- try:
1286
- # TODO: Also implement JobStore-supported URLs through JobStore methods.
1287
- return os.path.exists(self._abs(path))
1288
- except NoSuchFileException:
1289
- return False
1359
+ parse = urlparse(path)
1360
+ if parse.scheme in ["", "file"]:
1361
+ # Handle local files
1362
+ # toil's _abs() throws errors when files are not found and cwltool's _abs() does not
1363
+ try:
1364
+ return os.path.exists(self._abs(path))
1365
+ except NoSuchFileException:
1366
+ return False
1367
+ elif parse.scheme == "toildir":
1368
+ contents, subpath, cache_key = decode_directory(path)
1369
+ if subpath is None:
1370
+ # The toildir directory itself exists
1371
+ return True
1372
+ uri = get_from_structure(contents, subpath)
1373
+ if uri is None:
1374
+ # It's not in the virtual directory, so it doesn't exist
1375
+ return False
1376
+ if isinstance(uri, dict):
1377
+ # Actually it's a subdirectory, so it exists.
1378
+ return True
1379
+ # We recurse and poll the URI directly to make sure it really exists
1380
+ return self.exists(uri)
1381
+ elif parse.scheme == "toilfile":
1382
+ # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1383
+ return True
1384
+ else:
1385
+ # This should be supported by a job store.
1386
+ return AbstractJobStore.url_exists(path)
1290
1387
 
1291
1388
  def size(self, path: str) -> int:
1292
- # This should avoid _abs for things actually in the file store, to
1293
- # prevent multiple downloads as in
1294
- # https://github.com/DataBiosphere/toil/issues/3665
1295
- if path.startswith("toilfile:"):
1296
- if self.file_store is None:
1297
- raise RuntimeError("URL requires a file store: " + path)
1298
- return self.file_store.getGlobalFileSize(
1299
- FileID.unpack(path[len("toilfile:") :])
1300
- )
1301
- elif path.startswith("toildir:"):
1389
+ parse = urlparse(path)
1390
+ if parse.scheme in ["", "file"]:
1391
+ return os.stat(self._abs(path)).st_size
1392
+ elif parse.scheme == "toildir":
1302
1393
  # Decode its contents, the path inside it to the file (if any), and
1303
1394
  # the key to use for caching the directory.
1304
- here, subpath, cache_key = decode_directory(path)
1395
+ contents, subpath, cache_key = decode_directory(path)
1305
1396
 
1306
1397
  # We can't get the size of just a directory.
1307
- assert subpath is not None, f"Attempted to check size of directory {path}"
1308
-
1309
- for part in subpath.split("/"):
1310
- # Follow the path inside the directory contents.
1311
- here = cast(DirectoryContents, here[part])
1398
+ if subpath is None:
1399
+ raise RuntimeError(f"Attempted to check size of directory {path}")
1312
1400
 
1313
- # We ought to end up with a toilfile: URI.
1314
- assert isinstance(here, str), f"Did not find a file at {path}"
1315
- assert here.startswith(
1316
- "toilfile:"
1317
- ), f"Did not find a filestore file at {path}"
1401
+ uri = get_from_structure(contents, subpath)
1318
1402
 
1319
- return self.size(here)
1403
+ # We ought to end up with a URI.
1404
+ if not isinstance(uri, str):
1405
+ raise RuntimeError(f"Did not find a file at {path}")
1406
+ return self.size(uri)
1407
+ elif parse.scheme == "toilfile":
1408
+ if self.file_store is None:
1409
+ raise RuntimeError("URL requires a file store: " + path)
1410
+ return self.file_store.getGlobalFileSize(
1411
+ FileID.unpack(path[len("toilfile:") :])
1412
+ )
1320
1413
  else:
1321
- # TODO: Also implement JobStore-supported URLs through JobStore methods.
1322
- # We know this falls back on _abs
1323
- return super().size(path)
1414
+ # This should be supported by a job store.
1415
+ size = AbstractJobStore.get_size(path)
1416
+ if size is None:
1417
+ # get_size can be unimplemented or unavailable
1418
+ raise RuntimeError(f"Could not get size of {path}")
1419
+ return size
1324
1420
 
1325
1421
  def isfile(self, fn: str) -> bool:
1326
1422
  parse = urlparse(fn)
1327
- if parse.scheme in ["toilfile", "toildir", "file", ""]:
1328
- # We know this falls back on _abs
1329
- return super().isfile(fn)
1423
+ if parse.scheme in ["file", ""]:
1424
+ return os.path.isfile(self._abs(fn))
1425
+ elif parse.scheme == "toilfile":
1426
+ # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1427
+ return True
1428
+ elif parse.scheme == "toildir":
1429
+ contents, subpath, cache_key = decode_directory(fn)
1430
+ if subpath is None:
1431
+ # This is the toildir directory itself
1432
+ return False
1433
+ found = get_from_structure(contents, subpath)
1434
+ # If we find a string, that's a file
1435
+ # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1436
+ return isinstance(found, str)
1330
1437
  else:
1331
- return not AbstractJobStore.get_is_directory(fn)
1438
+ return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
1332
1439
 
1333
1440
  def isdir(self, fn: str) -> bool:
1441
+ logger.debug("ToilFsAccess checking type of %s", fn)
1334
1442
  parse = urlparse(fn)
1335
- if parse.scheme in ["toilfile", "toildir", "file", ""]:
1336
- # We know this falls back on _abs
1337
- return super().isdir(fn)
1443
+ if parse.scheme in ["file", ""]:
1444
+ return os.path.isdir(self._abs(fn))
1445
+ elif parse.scheme == "toilfile":
1446
+ return False
1447
+ elif parse.scheme == "toildir":
1448
+ contents, subpath, cache_key = decode_directory(fn)
1449
+ if subpath is None:
1450
+ # This is the toildir directory itself.
1451
+ # TODO: We assume directories can't be deleted.
1452
+ return True
1453
+ found = get_from_structure(contents, subpath)
1454
+ # If we find a dict, that's a directory.
1455
+ # TODO: We assume directories can't be deleted.
1456
+ return isinstance(found, dict)
1338
1457
  else:
1339
- return AbstractJobStore.get_is_directory(fn)
1458
+ status = AbstractJobStore.get_is_directory(fn)
1459
+ logger.debug("AbstractJobStore said: %s", status)
1460
+ return status
1340
1461
 
1341
1462
  def listdir(self, fn: str) -> List[str]:
1342
1463
  # This needs to return full URLs for everything in the directory.
@@ -1344,12 +1465,25 @@ class ToilFsAccess(StdFsAccess):
1344
1465
  logger.debug("ToilFsAccess listing %s", fn)
1345
1466
 
1346
1467
  parse = urlparse(fn)
1347
- if parse.scheme in ["toilfile", "toildir", "file", ""]:
1348
- # Download the file or directory to a local path
1468
+ if parse.scheme in ["file", ""]:
1469
+ # Find the local path
1349
1470
  directory = self._abs(fn)
1350
-
1351
1471
  # Now list it (it is probably a directory)
1352
1472
  return [abspath(quote(entry), fn) for entry in os.listdir(directory)]
1473
+ elif parse.scheme == "toilfile":
1474
+ raise RuntimeError(f"Cannot list a file: {fn}")
1475
+ elif parse.scheme == "toildir":
1476
+ contents, subpath, cache_key = decode_directory(fn)
1477
+ here = contents
1478
+ if subpath is not None:
1479
+ got = get_from_structure(contents, subpath)
1480
+ if got is None:
1481
+ raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
1482
+ if isinstance(got, str):
1483
+ raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
1484
+ here = got
1485
+ # List all the things in here and make full URIs to them
1486
+ return [os.path.join(fn, k) for k in here.keys()]
1353
1487
  else:
1354
1488
  return [
1355
1489
  os.path.join(fn, entry.rstrip("/"))
@@ -1371,7 +1505,7 @@ def toil_get_file(
1371
1505
  file_store: AbstractFileStore,
1372
1506
  index: Dict[str, str],
1373
1507
  existing: Dict[str, str],
1374
- file_store_id: str,
1508
+ uri: str,
1375
1509
  streamable: bool = False,
1376
1510
  streaming_allowed: bool = True,
1377
1511
  pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
@@ -1388,28 +1522,28 @@ def toil_get_file(
1388
1522
 
1389
1523
  :param index: Maps from downloaded file path back to input Toil URI.
1390
1524
 
1391
- :param existing: Maps from file_store_id URI to downloaded file path.
1525
+ :param existing: Maps from URI to downloaded file path.
1392
1526
 
1393
- :param file_store_id: The URI for the file to download.
1527
+ :param uri: The URI for the file to download.
1394
1528
 
1395
1529
  :param streamable: If the file is has 'streamable' flag set
1396
1530
 
1397
1531
  :param streaming_allowed: If streaming is allowed
1398
1532
 
1399
1533
  :param pipe_threads: List of threads responsible for streaming the data
1400
- and open file descriptors corresponding to those files. Caller is responsible
1401
- to close the file descriptors (to break the pipes) and join the threads
1534
+ and open file descriptors corresponding to those files. Caller is responsible
1535
+ to close the file descriptors (to break the pipes) and join the threads
1402
1536
  """
1403
1537
  pipe_threads_real = pipe_threads or []
1404
1538
  # We can't use urlparse here because we need to handle the '_:' scheme and
1405
1539
  # urlparse sees that as a path and not a URI scheme.
1406
- if file_store_id.startswith("toildir:"):
1540
+ if uri.startswith("toildir:"):
1407
1541
  # This is a file in a directory, or maybe a directory itself.
1408
1542
  # See ToilFsAccess and upload_directory.
1409
1543
  # We will go look for the actual file in the encoded directory
1410
1544
  # structure which will tell us where the toilfile: name for the file is.
1411
1545
 
1412
- parts = file_store_id[len("toildir:") :].split("/")
1546
+ parts = uri[len("toildir:") :].split("/")
1413
1547
  contents = json.loads(
1414
1548
  base64.urlsafe_b64decode(parts[0].encode("utf-8")).decode("utf-8")
1415
1549
  )
@@ -1429,21 +1563,41 @@ def toil_get_file(
1429
1563
  download_structure(file_store, index, existing, contents, dest_path)
1430
1564
  # Return where we put it, but as a file:// URI
1431
1565
  return schema_salad.ref_resolver.file_uri(dest_path)
1432
- elif file_store_id.startswith("toilfile:"):
1433
- # This is a plain file with no context.
1566
+ elif uri.startswith("_:"):
1567
+ # Someone is asking us for an empty temp directory.
1568
+ # We need to check this before the file path case because urlsplit()
1569
+ # will call this a path with no scheme.
1570
+ dest_path = file_store.getLocalTempDir()
1571
+ return schema_salad.ref_resolver.file_uri(dest_path)
1572
+ elif uri.startswith("file:") or urlsplit(uri).scheme == "":
1573
+ # There's a file: scheme or no scheme, and we know this isn't a _: URL.
1574
+
1575
+ # We need to support file: URIs and local paths, because we might be
1576
+ # involved in moving files around on the local disk when uploading
1577
+ # things after a job. We might want to catch cases where a leader
1578
+ # filesystem file URI leaks in here, but we can't, so we just rely on
1579
+ # the rest of the code to be correct.
1580
+ return uri
1581
+ else:
1582
+ # This is a toilfile: uri or other remote URI
1434
1583
  def write_to_pipe(
1435
- file_store: AbstractFileStore, pipe_name: str, file_store_id: FileID
1584
+ file_store: AbstractFileStore, pipe_name: str, uri: str
1436
1585
  ) -> None:
1437
1586
  try:
1438
1587
  with open(pipe_name, "wb") as pipe:
1439
- with file_store.jobStore.read_file_stream(file_store_id) as fi:
1440
- file_store.logAccess(file_store_id)
1441
- chunk_sz = 1024
1442
- while True:
1443
- data = fi.read(chunk_sz)
1444
- if not data:
1445
- break
1446
- pipe.write(data)
1588
+ if uri.startswith("toilfile:"):
1589
+ # Stream from the file store
1590
+ file_store_id = FileID.unpack(uri[len("toilfile:") :])
1591
+ with file_store.readGlobalFileStream(file_store_id) as fi:
1592
+ chunk_sz = 1024
1593
+ while True:
1594
+ data = fi.read(chunk_sz)
1595
+ if not data:
1596
+ break
1597
+ pipe.write(data)
1598
+ else:
1599
+ # Stream from some other URI
1600
+ AbstractJobStore.read_from_url(uri, pipe)
1447
1601
  except OSError as e:
1448
1602
  # The other side of the pipe may have been closed by the
1449
1603
  # reading thread, which is OK.
@@ -1456,7 +1610,7 @@ def toil_get_file(
1456
1610
  and not isinstance(file_store.jobStore, FileJobStore)
1457
1611
  ):
1458
1612
  logger.debug(
1459
- "Streaming file %s", FileID.unpack(file_store_id[len("toilfile:") :])
1613
+ "Streaming file %s", uri
1460
1614
  )
1461
1615
  src_path = file_store.getLocalTempFileName()
1462
1616
  os.mkfifo(src_path)
@@ -1465,42 +1619,39 @@ def toil_get_file(
1465
1619
  args=(
1466
1620
  file_store,
1467
1621
  src_path,
1468
- FileID.unpack(file_store_id[len("toilfile:") :]),
1622
+ uri,
1469
1623
  ),
1470
1624
  )
1471
1625
  th.start()
1472
1626
  pipe_threads_real.append((th, os.open(src_path, os.O_RDONLY)))
1473
1627
  else:
1474
- src_path = file_store.readGlobalFile(
1475
- FileID.unpack(file_store_id[len("toilfile:") :]), symlink=True
1476
- )
1477
-
1478
- # TODO: shouldn't we be using these as a cache?
1479
- index[src_path] = file_store_id
1480
- existing[file_store_id] = src_path
1628
+ # We need to do a real file
1629
+ if uri in existing:
1630
+ # Already did it
1631
+ src_path = existing[uri]
1632
+ else:
1633
+ if uri.startswith("toilfile:"):
1634
+ # Download from the file store
1635
+ file_store_id = FileID.unpack(uri[len("toilfile:") :])
1636
+ src_path = file_store.readGlobalFile(
1637
+ file_store_id, symlink=True
1638
+ )
1639
+ else:
1640
+ # Download from the URI via the job store.
1641
+
1642
+ # Figure out where it goes.
1643
+ src_path = file_store.getLocalTempFileName()
1644
+ # Open that path exclusively to make sure we created it
1645
+ with open(src_path, 'xb') as fh:
1646
+ # Download into the file
1647
+ size, executable = AbstractJobStore.read_from_url(uri, fh)
1648
+ if executable:
1649
+ # Set the execute bit in the file's permissions
1650
+ os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1651
+
1652
+ index[src_path] = uri
1653
+ existing[uri] = src_path
1481
1654
  return schema_salad.ref_resolver.file_uri(src_path)
1482
- elif file_store_id.startswith("_:"):
1483
- # Someone is asking us for an empty temp directory.
1484
- # We need to check this before the file path case because urlsplit()
1485
- # will call this a path with no scheme.
1486
- dest_path = file_store.getLocalTempDir()
1487
- return schema_salad.ref_resolver.file_uri(dest_path)
1488
- elif file_store_id.startswith("file:") or urlsplit(file_store_id).scheme == "":
1489
- # There's a file: scheme or no scheme, and we know this isn't a _: URL.
1490
-
1491
- # We need to support file: URIs and local paths, because we might be
1492
- # involved in moving files around on the local disk when uploading
1493
- # things after a job. We might want to catch cases where a leader
1494
- # filesystem file URI leaks in here, but we can't, so we just rely on
1495
- # the rest of the code to be correct.
1496
- return file_store_id
1497
- else:
1498
- raise RuntimeError(
1499
- f"Cannot obtain file {file_store_id} while on host "
1500
- f"{socket.gethostname()}; all imports must happen on the "
1501
- f"leader!"
1502
- )
1503
-
1504
1655
 
1505
1656
  def write_file(
1506
1657
  writeFunc: Callable[[str], FileID],
@@ -1557,7 +1708,9 @@ def import_files(
1557
1708
  existing: Dict[str, str],
1558
1709
  cwl_object: Optional[CWLObjectType],
1559
1710
  skip_broken: bool = False,
1711
+ skip_remote: bool = False,
1560
1712
  bypass_file_store: bool = False,
1713
+ log_level: int = logging.DEBUG
1561
1714
  ) -> None:
1562
1715
  """
1563
1716
  Prepare all files and directories.
@@ -1579,28 +1732,41 @@ def import_files(
1579
1732
  Also does some miscelaneous normalization.
1580
1733
 
1581
1734
  :param import_function: The function used to upload a URI and get a
1582
- Toil FileID for it.
1735
+ Toil FileID for it.
1583
1736
 
1584
1737
  :param fs_access: the CWL FS access object we use to access the filesystem
1585
- to find files to import. Needs to support the URI schemes used.
1738
+ to find files to import. Needs to support the URI schemes used.
1586
1739
 
1587
1740
  :param fileindex: Forward map to fill in from file URI to Toil storage
1588
- location, used by write_file to deduplicate writes.
1741
+ location, used by write_file to deduplicate writes.
1589
1742
 
1590
1743
  :param existing: Reverse map to fill in from Toil storage location to file
1591
- URI. Not read from.
1744
+ URI. Not read from.
1592
1745
 
1593
1746
  :param cwl_object: CWL tool (or workflow order) we are importing files for
1594
1747
 
1595
1748
  :param skip_broken: If True, when files can't be imported because they e.g.
1596
- don't exist, leave their locations alone rather than failing with an error.
1749
+ don't exist, leave their locations alone rather than failing with an error.
1750
+
1751
+ :param skp_remote: If True, leave remote URIs in place instead of importing
1752
+ files.
1597
1753
 
1598
1754
  :param bypass_file_store: If True, leave file:// URIs in place instead of
1599
- importing files and directories.
1755
+ importing files and directories.
1756
+
1757
+ :param log_level: Log imported files at the given level.
1600
1758
  """
1601
1759
  tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
1602
1760
 
1603
1761
  logger.debug("Importing files for %s", tool_id)
1762
+ logger.debug("Importing files in %s", cwl_object)
1763
+
1764
+ def import_and_log(url: str) -> FileID:
1765
+ """
1766
+ Upload a file and log that we are doing so.
1767
+ """
1768
+ logger.log(log_level, "Loading %s...", url)
1769
+ return import_function(url)
1604
1770
 
1605
1771
  # We need to upload all files to the Toil filestore, and encode structure
1606
1772
  # recursively into all Directories' locations. But we cannot safely alter
@@ -1700,7 +1866,7 @@ def import_files(
1700
1866
 
1701
1867
  # Upload the file itself, which will adjust its location.
1702
1868
  upload_file(
1703
- import_function, fileindex, existing, rec, skip_broken=skip_broken
1869
+ import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
1704
1870
  )
1705
1871
 
1706
1872
  # Make a record for this file under its name
@@ -1805,11 +1971,16 @@ def upload_file(
1805
1971
  existing: Dict[str, str],
1806
1972
  file_metadata: CWLObjectType,
1807
1973
  skip_broken: bool = False,
1974
+ skip_remote: bool = False
1808
1975
  ) -> None:
1809
1976
  """
1810
- Update a file object so that the location is a reference to the toil file store.
1977
+ Update a file object so that the file will be accessible from another machine.
1811
1978
 
1812
- Write the file object to the file store if necessary.
1979
+ Uploads local files to the Toil file store, and sets their location to a
1980
+ reference to the toil file store.
1981
+
1982
+ Unless skip_remote is set, downloads remote files into the file store and
1983
+ sets their locations to references into the file store as well.
1813
1984
  """
1814
1985
  location = cast(str, file_metadata["location"])
1815
1986
  if (
@@ -1832,7 +2003,10 @@ def upload_file(
1832
2003
  return
1833
2004
  else:
1834
2005
  raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
1835
- file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
2006
+
2007
+ if location.startswith("file://") or not skip_remote:
2008
+ # This is a local file, or we also need to download and re-upload remote files
2009
+ file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
1836
2010
 
1837
2011
  logger.debug("Sending file at: %s", file_metadata["location"])
1838
2012
 
@@ -1866,6 +2040,7 @@ class CWLNamedJob(Job):
1866
2040
  memory: Union[int, str, None] = "1GiB",
1867
2041
  disk: Union[int, str, None] = "1MiB",
1868
2042
  accelerators: Optional[List[AcceleratorRequirement]] = None,
2043
+ preemptible: Optional[bool] = None,
1869
2044
  tool_id: Optional[str] = None,
1870
2045
  parent_name: Optional[str] = None,
1871
2046
  subjob_name: Optional[str] = None,
@@ -1910,6 +2085,7 @@ class CWLNamedJob(Job):
1910
2085
  memory=memory,
1911
2086
  disk=disk,
1912
2087
  accelerators=accelerators,
2088
+ preemptible=preemptible,
1913
2089
  unitName=unit_name,
1914
2090
  displayName=display_name,
1915
2091
  local=local,
@@ -1941,12 +2117,15 @@ def toilStageFiles(
1941
2117
  cwljob: Union[CWLObjectType, List[CWLObjectType]],
1942
2118
  outdir: str,
1943
2119
  destBucket: Union[str, None] = None,
2120
+ log_level: int = logging.DEBUG
1944
2121
  ) -> None:
1945
2122
  """
1946
2123
  Copy input files out of the global file store and update location and path.
1947
2124
 
1948
2125
  :param destBucket: If set, export to this base URL instead of to the local
1949
2126
  filesystem.
2127
+
2128
+ :param log_level: Log each file transfered at the given level.
1950
2129
  """
1951
2130
 
1952
2131
  def _collectDirEntries(
@@ -1986,7 +2165,6 @@ def toilStageFiles(
1986
2165
  stage_listing=True,
1987
2166
  )
1988
2167
  for _, p in pm.items():
1989
- logger.debug("Staging output: %s", p)
1990
2168
  if p.staged:
1991
2169
  # We're supposed to copy/expose something.
1992
2170
  # Note that we have to handle writable versions of everything
@@ -2008,7 +2186,7 @@ def toilStageFiles(
2008
2186
  "CreateFile",
2009
2187
  "CreateWritableFile",
2010
2188
  ]: # TODO: CreateFile for buckets is not under testing
2011
- with tempfile.NamedTemporaryFile() as f:
2189
+ with NamedTemporaryFile() as f:
2012
2190
  # Make a file with the right contents
2013
2191
  f.write(file_id_or_contents.encode("utf-8"))
2014
2192
  f.close()
@@ -2027,39 +2205,63 @@ def toilStageFiles(
2027
2205
  # At the end we should get a direct toilfile: URI
2028
2206
  file_id_or_contents = cast(str, here)
2029
2207
 
2208
+ # This might be an e.g. S3 URI now
2209
+ if not file_id_or_contents.startswith("toilfile:"):
2210
+ # We need to import it so we can export it.
2211
+ # TODO: Use direct S3 to S3 copy on exports as well
2212
+ file_id_or_contents = (
2213
+ "toilfile:"
2214
+ + toil.import_file(file_id_or_contents, symlink=False).pack()
2215
+ )
2216
+
2030
2217
  if file_id_or_contents.startswith("toilfile:"):
2031
2218
  # This is something we can export
2032
- destUrl = "/".join(s.strip("/") for s in [destBucket, baseName])
2033
- toil.exportFile(
2219
+ # TODO: Do we need to urlencode the parts before sending them to S3?
2220
+ dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
2221
+ logger.log(log_level, "Saving %s...", dest_url)
2222
+ toil.export_file(
2034
2223
  FileID.unpack(file_id_or_contents[len("toilfile:") :]),
2035
- destUrl,
2224
+ dest_url,
2036
2225
  )
2037
2226
  # TODO: can a toildir: "file" get here?
2038
2227
  else:
2039
- # We are saving to the filesystem so we only really need exportFile for actual files.
2228
+ # We are saving to the filesystem.
2229
+ dest_url = "file://" + quote(p.target)
2230
+
2231
+ # We only really need export_file for actual files.
2040
2232
  if not os.path.exists(p.target) and p.type in [
2041
2233
  "Directory",
2042
2234
  "WritableDirectory",
2043
2235
  ]:
2044
2236
  os.makedirs(p.target)
2045
- if not os.path.exists(p.target) and p.type in ["File", "WritableFile"]:
2046
- if p.resolved.startswith("toilfile:"):
2047
- # We can actually export this
2048
- os.makedirs(os.path.dirname(p.target), exist_ok=True)
2049
- toil.exportFile(
2050
- FileID.unpack(p.resolved[len("toilfile:") :]),
2051
- "file://" + p.target,
2052
- )
2053
- elif p.resolved.startswith("/"):
2237
+ if p.type in ["File", "WritableFile"]:
2238
+ if p.resolved.startswith("/"):
2054
2239
  # Probably staging and bypassing file store. Just copy.
2240
+ logger.log(log_level, "Saving %s...", dest_url)
2055
2241
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2056
2242
  shutil.copyfile(p.resolved, p.target)
2057
- # TODO: can a toildir: "file" get here?
2058
- if not os.path.exists(p.target) and p.type in [
2243
+ else:
2244
+ uri = p.resolved
2245
+ if not uri.startswith("toilfile:"):
2246
+ # We need to import so we can export
2247
+ uri = (
2248
+ "toilfile:"
2249
+ + toil.import_file(uri, symlink=False).pack()
2250
+ )
2251
+
2252
+ # Actually export from the file store
2253
+ logger.log(log_level, "Saving %s...", dest_url)
2254
+ os.makedirs(os.path.dirname(p.target), exist_ok=True)
2255
+ toil.export_file(
2256
+ FileID.unpack(uri[len("toilfile:") :]),
2257
+ dest_url,
2258
+ )
2259
+ if p.type in [
2059
2260
  "CreateFile",
2060
2261
  "CreateWritableFile",
2061
2262
  ]:
2062
2263
  # We just need to make a file with particular contents
2264
+ logger.log(log_level, "Saving %s...", dest_url)
2063
2265
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2064
2266
  with open(p.target, "wb") as n:
2065
2267
  n.write(p.resolved.encode("utf-8"))
@@ -2078,6 +2280,7 @@ def toilStageFiles(
2078
2280
  # Make the location point to the place we put this thing on the
2079
2281
  # local filesystem.
2080
2282
  f["location"] = schema_salad.ref_resolver.file_uri(mapped_location.target)
2283
+ f["path"] = mapped_location.target
2081
2284
 
2082
2285
  if "contents" in f:
2083
2286
  del f["contents"]
@@ -2182,7 +2385,7 @@ class CWLJob(CWLNamedJob):
2182
2385
 
2183
2386
  accelerators: Optional[List[AcceleratorRequirement]] = None
2184
2387
  if req.get("cudaDeviceCount", 0) > 0:
2185
- # There's a CUDARequirement
2388
+ # There's a CUDARequirement, which cwltool processed for us
2186
2389
  # TODO: How is cwltool deciding what value to use between min and max?
2187
2390
  accelerators = [
2188
2391
  {
@@ -2192,14 +2395,62 @@ class CWLJob(CWLNamedJob):
2192
2395
  }
2193
2396
  ]
2194
2397
 
2398
+ # cwltool doesn't handle http://arvados.org/cwl#UsePreemptible as part
2399
+ # of its resource logic so we have to do it manually.
2400
+ #
2401
+ # Note that according to
2402
+ # https://github.com/arvados/arvados/blob/48a0d575e6de34bcda91c489e4aa98df291a8cca/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml#L345
2403
+ # this can only be a literal boolean! cwltool doesn't want to evaluate
2404
+ # expressions in the value for us like it does for CUDARequirement
2405
+ # which has a schema which allows for CWL expressions:
2406
+ # https://github.com/common-workflow-language/cwltool/blob/1573509eea2faa3cd1dc959224e52ff1d796d3eb/cwltool/extensions.yml#L221
2407
+ #
2408
+ # By default we have default preemptibility.
2409
+ preemptible: Optional[bool] = None
2410
+ preemptible_req, _ = tool.get_requirement(
2411
+ "http://arvados.org/cwl#UsePreemptible"
2412
+ )
2413
+ if preemptible_req:
2414
+ if "usePreemptible" not in preemptible_req:
2415
+ # If we have a requirement it has to have the value
2416
+ raise ValidationException(
2417
+ f"Unacceptable syntax for http://arvados.org/cwl#UsePreemptible: "
2418
+ f"expected key usePreemptible but got: {preemptible_req}"
2419
+ )
2420
+ parsed_value = preemptible_req["usePreemptible"]
2421
+ if isinstance(parsed_value, str) and (
2422
+ "$(" in parsed_value or "${" in parsed_value
2423
+ ):
2424
+ # Looks like they tried to use an expression
2425
+ raise ValidationException(
2426
+ f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
2427
+ f"expected true or false but got what appears to be an expression: {repr(parsed_value)}. "
2428
+ f"Note that expressions are not allowed here by Arvados's schema."
2429
+ )
2430
+ if not isinstance(parsed_value, bool):
2431
+ # If we have a value it has to be a bool flag
2432
+ raise ValidationException(
2433
+ f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
2434
+ f"expected true or false but got: {repr(parsed_value)}"
2435
+ )
2436
+ preemptible = parsed_value
2437
+
2438
+ # We always need space for the temporary files for the job
2439
+ total_disk = cast(int, req["tmpdirSize"]) * (2**20)
2440
+ if not getattr(runtime_context, "bypass_file_store", False):
2441
+ # If using the Toil file store, we also need space for the output
2442
+ # files, which may need to be stored locally and copied off the
2443
+ # node.
2444
+ total_disk += cast(int, req["outdirSize"]) * (2**20)
2445
+ # If not using the Toil file store, output files just go directly to
2446
+ # their final homes their space doesn't need to be accounted per-job.
2447
+
2195
2448
  super().__init__(
2196
2449
  cores=req["cores"],
2197
2450
  memory=int(req["ram"] * (2**20)),
2198
- disk=int(
2199
- (cast(int, req["tmpdirSize"]) * (2**20))
2200
- + (cast(int, req["outdirSize"]) * (2**20))
2201
- ),
2451
+ disk=int(total_disk),
2202
2452
  accelerators=accelerators,
2453
+ preemptible=preemptible,
2203
2454
  tool_id=self.cwltool.tool["id"],
2204
2455
  parent_name=parent_name,
2205
2456
  local=isinstance(tool, cwltool.command_line_tool.ExpressionTool),
@@ -2265,7 +2516,7 @@ class CWLJob(CWLNamedJob):
2265
2516
  cwllogger.removeHandler(defaultStreamHandler)
2266
2517
  cwllogger.setLevel(logger.getEffectiveLevel())
2267
2518
 
2268
- logger.debug("Loaded order: %s", self.cwljob)
2519
+ logger.debug("Loaded order:\n%s", self.cwljob)
2269
2520
 
2270
2521
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2271
2522
 
@@ -2354,6 +2605,13 @@ class CWLJob(CWLNamedJob):
2354
2605
  streaming_allowed=runtime_context.streaming_allowed,
2355
2606
  )
2356
2607
 
2608
+ # Collect standard output and standard error somewhere if they don't go to files.
2609
+ # We need to keep two FDs to these because cwltool will close what we give it.
2610
+ default_stdout = TemporaryFile()
2611
+ runtime_context.default_stdout = os.fdopen(os.dup(default_stdout.fileno()), 'wb')
2612
+ default_stderr = TemporaryFile()
2613
+ runtime_context.default_stderr = os.fdopen(os.dup(default_stderr.fileno()), 'wb')
2614
+
2357
2615
  process_uuid = uuid.uuid4() # noqa F841
2358
2616
  started_at = datetime.datetime.now() # noqa F841
2359
2617
 
@@ -2362,13 +2620,34 @@ class CWLJob(CWLNamedJob):
2362
2620
  logger.debug("Running tool %s with order: %s", self.cwltool, self.cwljob)
2363
2621
 
2364
2622
  runtime_context.name = self.description.unitName
2365
- output, status = ToilSingleJobExecutor().execute(
2366
- process=self.cwltool,
2367
- job_order_object=cwljob,
2368
- runtime_context=runtime_context,
2369
- logger=cwllogger,
2370
- )
2371
- ended_at = datetime.datetime.now() # noqa F841
2623
+
2624
+ status = "did_not_run"
2625
+ try:
2626
+ output, status = ToilSingleJobExecutor().execute(
2627
+ process=self.cwltool,
2628
+ job_order_object=cwljob,
2629
+ runtime_context=runtime_context,
2630
+ logger=cwllogger,
2631
+ )
2632
+ finally:
2633
+ ended_at = datetime.datetime.now() # noqa F841
2634
+
2635
+ # Log any output/error data
2636
+ default_stdout.seek(0, os.SEEK_END)
2637
+ if default_stdout.tell() > 0:
2638
+ default_stdout.seek(0)
2639
+ file_store.log_user_stream(self.description.unitName + '.stdout', default_stdout)
2640
+ if status != "success":
2641
+ default_stdout.seek(0)
2642
+ logger.error("Failed command standard output:\n%s", default_stdout.read().decode("utf-8", errors="replace"))
2643
+ default_stderr.seek(0, os.SEEK_END)
2644
+ if default_stderr.tell():
2645
+ default_stderr.seek(0)
2646
+ file_store.log_user_stream(self.description.unitName + '.stderr', default_stderr)
2647
+ if status != "success":
2648
+ default_stderr.seek(0)
2649
+ logger.error("Failed command standard error:\n%s", default_stderr.read().decode("utf-8", errors="replace"))
2650
+
2372
2651
  if status != "success":
2373
2652
  raise cwl_utils.errors.WorkflowException(status)
2374
2653
 
@@ -2395,6 +2674,8 @@ class CWLJob(CWLNamedJob):
2395
2674
 
2396
2675
  logger.debug("Emitting output: %s", output)
2397
2676
 
2677
+ file_store.log_to_leader(f"CWL step complete: {runtime_context.name}")
2678
+
2398
2679
  # metadata[process_uuid] = {
2399
2680
  # 'started_at': started_at,
2400
2681
  # 'ended_at': ended_at,
@@ -2782,6 +3063,10 @@ class CWLWorkflow(CWLNamedJob):
2782
3063
  if self.conditional.is_false(cwljob):
2783
3064
  return self.conditional.skipped_outputs()
2784
3065
 
3066
+ # Apply default values set in the workflow
3067
+ fs_access = ToilFsAccess(self.runtime_context.basedir, file_store=file_store)
3068
+ fill_in_defaults(self.cwlwf.tool["inputs"], cwljob, fs_access)
3069
+
2785
3070
  # `promises` dict
2786
3071
  # from: each parameter (workflow input or step output)
2787
3072
  # that may be used as a "source" for a step input workflow output
@@ -2844,6 +3129,10 @@ class CWLWorkflow(CWLNamedJob):
2844
3129
  get_container_engine(self.runtime_context),
2845
3130
  )
2846
3131
 
3132
+ logger.debug(
3133
+ "Value will come from %s", jobobj.get(key, None)
3134
+ )
3135
+
2847
3136
  conditional = Conditional(
2848
3137
  expression=step.tool.get("when"),
2849
3138
  outputs=step.tool["out"],
@@ -3042,8 +3331,8 @@ def scan_for_unsupported_requirements(
3042
3331
  :param tool: The CWL tool to check for unsupported requirements.
3043
3332
 
3044
3333
  :param bypass_file_store: True if the Toil file store is not being used to
3045
- transport files between nodes, and raw origin node file:// URIs are exposed
3046
- to tools instead.
3334
+ transport files between nodes, and raw origin node file:// URIs are exposed
3335
+ to tools instead.
3047
3336
 
3048
3337
  """
3049
3338
 
@@ -3080,24 +3369,31 @@ def determine_load_listing(
3080
3369
  DIRECTORY_NAME is any variable name) set to one of the following three
3081
3370
  options:
3082
3371
 
3083
- no_listing: DIRECTORY_NAME.listing will be undefined.
3084
- e.g. inputs.DIRECTORY_NAME.listing == unspecified
3372
+ 1. no_listing: DIRECTORY_NAME.listing will be undefined.
3373
+ e.g.
3374
+
3375
+ inputs.DIRECTORY_NAME.listing == unspecified
3376
+
3377
+ 2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
3378
+ deep of DIRECTORY_NAME's contents.
3379
+ e.g.
3380
+
3381
+ inputs.DIRECTORY_NAME.listing == [items in directory]
3382
+ inputs.DIRECTORY_NAME.listing[0].listing == undefined
3383
+ inputs.DIRECTORY_NAME.listing.length == # of items in directory
3085
3384
 
3086
- shallow_listing: DIRECTORY_NAME.listing will return a list one level
3087
- deep of DIRECTORY_NAME's contents.
3088
- e.g. inputs.DIRECTORY_NAME.listing == [items in directory]
3089
- inputs.DIRECTORY_NAME.listing[0].listing == undefined
3090
- inputs.DIRECTORY_NAME.listing.length == # of items in directory
3385
+ 3. deep_listing: DIRECTORY_NAME.listing will return a list of the entire
3386
+ contents of DIRECTORY_NAME.
3387
+ e.g.
3091
3388
 
3092
- deep_listing: DIRECTORY_NAME.listing will return a list of the entire
3093
- contents of DIRECTORY_NAME.
3094
- e.g. inputs.DIRECTORY_NAME.listing == [items in directory]
3095
- inputs.DIRECTORY_NAME.listing[0].listing == [items
3096
- in subdirectory if it exists and is the first item listed]
3097
- inputs.DIRECTORY_NAME.listing.length == # of items in directory
3389
+ inputs.DIRECTORY_NAME.listing == [items in directory]
3390
+ inputs.DIRECTORY_NAME.listing[0].listing == [items in subdirectory
3391
+ if it exists and is the first item listed]
3392
+ inputs.DIRECTORY_NAME.listing.length == # of items in directory
3098
3393
 
3099
- See: https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
3100
- https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingEnum
3394
+ See
3395
+ https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
3396
+ and https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingEnum
3101
3397
 
3102
3398
  DIRECTORY_NAME.listing should be determined first from loadListing.
3103
3399
  If that's not specified, from LoadListingRequirement.
@@ -3209,6 +3505,20 @@ usage_message = "\n\n" + textwrap.dedent(
3209
3505
  ]
3210
3506
  )
3211
3507
 
3508
+ def get_options(args: List[str]) -> Namespace:
3509
+ """
3510
+ Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
3511
+ :param args: List of args from command line
3512
+ :return: options namespace
3513
+ """
3514
+ parser = ArgParser()
3515
+ addOptions(parser, jobstore_as_flag=True, cwl=True)
3516
+ options: Namespace
3517
+ options, cwl_options = parser.parse_known_args(args)
3518
+ options.cwljob.extend(cwl_options)
3519
+
3520
+ return options
3521
+
3212
3522
 
3213
3523
  def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3214
3524
  """Run the main loop for toil-cwl-runner."""
@@ -3218,334 +3528,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3218
3528
  if args is None:
3219
3529
  args = sys.argv[1:]
3220
3530
 
3221
- config = Config()
3222
- config.disableChaining = True
3223
- config.cwl = True
3224
- parser = argparse.ArgumentParser()
3225
- addOptions(parser, config, jobstore_as_flag=True)
3226
- parser.add_argument("cwltool", type=str)
3227
- parser.add_argument("cwljob", nargs=argparse.REMAINDER)
3228
-
3229
- parser.add_argument("--not-strict", action="store_true")
3230
- parser.add_argument(
3231
- "--enable-dev",
3232
- action="store_true",
3233
- help="Enable loading and running development versions of CWL",
3234
- )
3235
- parser.add_argument(
3236
- "--enable-ext",
3237
- action="store_true",
3238
- help="Enable loading and running 'cwltool:' extensions to the CWL standards.",
3239
- default=False,
3240
- )
3241
- parser.add_argument("--quiet", dest="quiet", action="store_true", default=False)
3242
- parser.add_argument("--basedir", type=str) # TODO: Might be hard-coded?
3243
- parser.add_argument("--outdir", type=str, default=os.getcwd())
3244
- parser.add_argument("--version", action="version", version=baseVersion)
3245
- parser.add_argument(
3246
- "--log-dir",
3247
- type=str,
3248
- default="",
3249
- help="Log your tools stdout/stderr to this location outside of container",
3250
- )
3251
- dockergroup = parser.add_mutually_exclusive_group()
3252
- dockergroup.add_argument(
3253
- "--user-space-docker-cmd",
3254
- help="(Linux/OS X only) Specify a user space docker command (like "
3255
- "udocker or dx-docker) that will be used to call 'pull' and 'run'",
3256
- )
3257
- dockergroup.add_argument(
3258
- "--singularity",
3259
- action="store_true",
3260
- default=False,
3261
- help="Use Singularity runtime for running containers. "
3262
- "Requires Singularity v2.6.1+ and Linux with kernel version v3.18+ or "
3263
- "with overlayfs support backported.",
3264
- )
3265
- dockergroup.add_argument(
3266
- "--podman",
3267
- action="store_true",
3268
- default=False,
3269
- help="Use Podman runtime for running containers. ",
3270
- )
3271
- dockergroup.add_argument(
3272
- "--no-container",
3273
- action="store_true",
3274
- help="Do not execute jobs in a "
3275
- "Docker container, even when `DockerRequirement` "
3276
- "is specified under `hints`.",
3277
- )
3278
- dockergroup.add_argument(
3279
- "--leave-container",
3280
- action="store_false",
3281
- default=True,
3282
- help="Do not delete Docker container used by jobs after they exit",
3283
- dest="rm_container",
3284
- )
3285
- extra_dockergroup = parser.add_argument_group()
3286
- extra_dockergroup.add_argument(
3287
- "--custom-net",
3288
- help="Specify docker network name to pass to docker run command",
3289
- )
3290
- cidgroup = parser.add_argument_group(
3291
- "Options for recording the Docker container identifier into a file."
3292
- )
3293
- cidgroup.add_argument(
3294
- # Disabled as containerid is now saved by default
3295
- "--record-container-id",
3296
- action="store_true",
3297
- default=False,
3298
- help=argparse.SUPPRESS,
3299
- dest="record_container_id",
3300
- )
3301
-
3302
- cidgroup.add_argument(
3303
- "--cidfile-dir",
3304
- type=str,
3305
- help="Store the Docker container ID into a file in the specified directory.",
3306
- default=None,
3307
- dest="cidfile_dir",
3308
- )
3309
-
3310
- cidgroup.add_argument(
3311
- "--cidfile-prefix",
3312
- type=str,
3313
- help="Specify a prefix to the container ID filename. "
3314
- "Final file name will be followed by a timestamp. "
3315
- "The default is no prefix.",
3316
- default=None,
3317
- dest="cidfile_prefix",
3318
- )
3319
-
3320
- parser.add_argument(
3321
- "--preserve-environment",
3322
- type=str,
3323
- nargs="+",
3324
- help="Preserve specified environment variables when running"
3325
- " CommandLineTools",
3326
- metavar=("VAR1 VAR2"),
3327
- default=("PATH",),
3328
- dest="preserve_environment",
3329
- )
3330
- parser.add_argument(
3331
- "--preserve-entire-environment",
3332
- action="store_true",
3333
- help="Preserve all environment variable when running CommandLineTools.",
3334
- default=False,
3335
- dest="preserve_entire_environment",
3336
- )
3337
- parser.add_argument(
3338
- "--destBucket",
3339
- type=str,
3340
- help="Specify a cloud bucket endpoint for output files.",
3341
- )
3342
- parser.add_argument("--beta-dependency-resolvers-configuration", default=None)
3343
- parser.add_argument("--beta-dependencies-directory", default=None)
3344
- parser.add_argument("--beta-use-biocontainers", default=None, action="store_true")
3345
- parser.add_argument("--beta-conda-dependencies", default=None, action="store_true")
3346
- parser.add_argument(
3347
- "--tmpdir-prefix",
3348
- type=str,
3349
- help="Path prefix for temporary directories",
3350
- default=DEFAULT_TMPDIR_PREFIX,
3351
- )
3352
- parser.add_argument(
3353
- "--tmp-outdir-prefix",
3354
- type=str,
3355
- help="Path prefix for intermediate output directories",
3356
- default=DEFAULT_TMPDIR_PREFIX,
3357
- )
3358
- parser.add_argument(
3359
- "--force-docker-pull",
3360
- action="store_true",
3361
- default=False,
3362
- dest="force_docker_pull",
3363
- help="Pull latest docker image even if it is locally present",
3364
- )
3365
- parser.add_argument(
3366
- "--no-match-user",
3367
- action="store_true",
3368
- default=False,
3369
- help="Disable passing the current uid to `docker run --user`",
3370
- )
3371
- parser.add_argument(
3372
- "--no-read-only",
3373
- action="store_true",
3374
- default=False,
3375
- help="Do not set root directory in the container as read-only",
3376
- )
3377
- parser.add_argument(
3378
- "--strict-memory-limit",
3379
- action="store_true",
3380
- help="When running with "
3381
- "software containers and the Docker engine, pass either the "
3382
- "calculated memory allocation from ResourceRequirements or the "
3383
- "default of 1 gigabyte to Docker's --memory option.",
3384
- )
3385
- parser.add_argument(
3386
- "--strict-cpu-limit",
3387
- action="store_true",
3388
- help="When running with "
3389
- "software containers and the Docker engine, pass either the "
3390
- "calculated cpu allocation from ResourceRequirements or the "
3391
- "default of 1 core to Docker's --cpu option. "
3392
- "Requires docker version >= v1.13.",
3393
- )
3394
- parser.add_argument(
3395
- "--relax-path-checks",
3396
- action="store_true",
3397
- default=False,
3398
- help="Relax requirements on path names to permit "
3399
- "spaces and hash characters.",
3400
- dest="relax_path_checks",
3401
- )
3402
- parser.add_argument(
3403
- "--default-container",
3404
- help="Specify a default docker container that will be "
3405
- "used if the workflow fails to specify one.",
3406
- )
3407
- parser.add_argument(
3408
- "--disable-validate",
3409
- dest="do_validate",
3410
- action="store_false",
3411
- default=True,
3412
- help=argparse.SUPPRESS,
3413
- )
3414
- parser.add_argument(
3415
- "--fast-parser",
3416
- dest="fast_parser",
3417
- action="store_true",
3418
- default=False,
3419
- help=argparse.SUPPRESS,
3420
- )
3421
- checkgroup = parser.add_mutually_exclusive_group()
3422
- checkgroup.add_argument(
3423
- "--compute-checksum",
3424
- action="store_true",
3425
- default=True,
3426
- help="Compute checksum of contents while collecting outputs",
3427
- dest="compute_checksum",
3428
- )
3429
- checkgroup.add_argument(
3430
- "--no-compute-checksum",
3431
- action="store_false",
3432
- help="Do not compute checksum of contents while collecting outputs",
3433
- dest="compute_checksum",
3434
- )
3435
-
3436
- parser.add_argument(
3437
- "--eval-timeout",
3438
- help="Time to wait for a Javascript expression to evaluate before giving "
3439
- "an error, default 20s.",
3440
- type=float,
3441
- default=20,
3442
- )
3443
- parser.add_argument(
3444
- "--overrides",
3445
- type=str,
3446
- default=None,
3447
- help="Read process requirement overrides from file.",
3448
- )
3449
-
3450
- parser.add_argument(
3451
- "--mpi-config-file",
3452
- type=str,
3453
- default=None,
3454
- help="Platform specific configuration for MPI (parallel "
3455
- "launcher, its flag etc). See the cwltool README "
3456
- "section 'Running MPI-based tools' for details of the format: "
3457
- "https://github.com/common-workflow-language/cwltool#running-mpi-based-tools-that-need-to-be-launched",
3458
- )
3459
- parser.add_argument(
3460
- "--bypass-file-store",
3461
- action="store_true",
3462
- default=False,
3463
- help="Do not use Toil's file store and assume all "
3464
- "paths are accessible in place from all nodes.",
3465
- dest="bypass_file_store",
3466
- )
3467
- parser.add_argument(
3468
- "--disable-streaming",
3469
- action="store_true",
3470
- default=False,
3471
- help="Disable file streaming for files that have 'streamable' flag True",
3472
- dest="disable_streaming",
3473
- )
3474
-
3475
- provgroup = parser.add_argument_group(
3476
- "Options for recording provenance information of the execution"
3477
- )
3478
- provgroup.add_argument(
3479
- "--provenance",
3480
- help="Save provenance to specified folder as a "
3481
- "Research Object that captures and aggregates "
3482
- "workflow execution and data products.",
3483
- type=str,
3484
- )
3485
-
3486
- provgroup.add_argument(
3487
- "--enable-user-provenance",
3488
- default=False,
3489
- action="store_true",
3490
- help="Record user account info as part of provenance.",
3491
- dest="user_provenance",
3492
- )
3493
- provgroup.add_argument(
3494
- "--disable-user-provenance",
3495
- default=False,
3496
- action="store_false",
3497
- help="Do not record user account info in provenance.",
3498
- dest="user_provenance",
3499
- )
3500
- provgroup.add_argument(
3501
- "--enable-host-provenance",
3502
- default=False,
3503
- action="store_true",
3504
- help="Record host info as part of provenance.",
3505
- dest="host_provenance",
3506
- )
3507
- provgroup.add_argument(
3508
- "--disable-host-provenance",
3509
- default=False,
3510
- action="store_false",
3511
- help="Do not record host info in provenance.",
3512
- dest="host_provenance",
3513
- )
3514
- provgroup.add_argument(
3515
- "--orcid",
3516
- help="Record user ORCID identifier as part of "
3517
- "provenance, e.g. https://orcid.org/0000-0002-1825-0097 "
3518
- "or 0000-0002-1825-0097. Alternatively the environment variable "
3519
- "ORCID may be set.",
3520
- dest="orcid",
3521
- default=os.environ.get("ORCID", ""),
3522
- type=str,
3523
- )
3524
- provgroup.add_argument(
3525
- "--full-name",
3526
- help="Record full name of user as part of provenance, "
3527
- "e.g. Josiah Carberry. You may need to use shell quotes to preserve "
3528
- "spaces. Alternatively the environment variable CWL_FULL_NAME may "
3529
- "be set.",
3530
- dest="cwl_full_name",
3531
- default=os.environ.get("CWL_FULL_NAME", ""),
3532
- type=str,
3533
- )
3534
-
3535
- # Parse all the options once.
3536
- options = parser.parse_args(args)
3531
+ options = get_options(args)
3537
3532
 
3538
3533
  # Do cwltool setup
3539
3534
  cwltool.main.setup_schema(args=options, custom_schema_callback=None)
3535
+ tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
3540
3536
 
3541
3537
  # We need a workdir for the CWL runtime contexts.
3542
- if options.tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3538
+ if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3543
3539
  # if tmpdir_prefix is not the default value, move
3544
3540
  # workdir and the default job store under it
3545
- workdir = cwltool.utils.create_tmp_dir(options.tmpdir_prefix)
3541
+ workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3546
3542
  else:
3547
3543
  # Use a directory in the default tmpdir
3548
- workdir = tempfile.mkdtemp()
3544
+ workdir = mkdtemp()
3549
3545
  # Make sure workdir doesn't exist so it can be a job store
3550
3546
  os.rmdir(workdir)
3551
3547
 
@@ -3562,13 +3558,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3562
3558
  options.do_validate = True
3563
3559
  options.pack = False
3564
3560
  options.print_subgraph = False
3565
- if options.tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3561
+ if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3566
3562
  # We need to override workDir because by default Toil will pick
3567
3563
  # somewhere under the system temp directory if unset, ignoring
3568
3564
  # --tmpdir-prefix.
3569
3565
  #
3570
3566
  # If set, workDir needs to exist, so we directly use the prefix
3571
- options.workDir = cwltool.utils.create_tmp_dir(options.tmpdir_prefix)
3567
+ options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3572
3568
 
3573
3569
  if options.batchSystem == "kubernetes":
3574
3570
  # Containers under Kubernetes can only run in Singularity
@@ -3585,8 +3581,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3585
3581
 
3586
3582
  logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
3587
3583
 
3588
- outdir = os.path.abspath(options.outdir)
3589
- tmp_outdir_prefix = os.path.abspath(options.tmp_outdir_prefix)
3584
+ outdir = os.path.abspath(options.outdir or os.getcwd())
3585
+ tmp_outdir_prefix = os.path.abspath(
3586
+ options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
3587
+ )
3590
3588
 
3591
3589
  fileindex: Dict[str, str] = {}
3592
3590
  existing: Dict[str, str] = {}
@@ -3597,13 +3595,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3597
3595
  dependencies_configuration = DependenciesConfiguration(options)
3598
3596
  job_script_provider = dependencies_configuration
3599
3597
 
3600
- options.default_container = None
3601
3598
  runtime_context = cwltool.context.RuntimeContext(vars(options))
3602
3599
  runtime_context.toplevel = True # enable discovery of secondaryFiles
3603
3600
  runtime_context.find_default_container = functools.partial(
3604
3601
  find_default_container, options
3605
3602
  )
3606
3603
  runtime_context.workdir = workdir # type: ignore[attr-defined]
3604
+ runtime_context.outdir = outdir
3607
3605
  runtime_context.move_outputs = "leave"
3608
3606
  runtime_context.rm_tmpdir = False
3609
3607
  runtime_context.streaming_allowed = not options.disable_streaming
@@ -3621,12 +3619,16 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3621
3619
  # Otherwise, if it takes a File with loadContents from a URL, we won't
3622
3620
  # be able to load the contents when we need to.
3623
3621
  runtime_context.make_fs_access = ToilFsAccess
3622
+ if options.reference_inputs and options.bypass_file_store:
3623
+ # We can't do both of these at the same time.
3624
+ logger.error("Cannot reference inputs when bypassing the file store")
3625
+ return 1
3624
3626
 
3625
3627
  loading_context = cwltool.main.setup_loadingContext(None, runtime_context, options)
3626
3628
 
3627
3629
  if options.provenance:
3628
3630
  research_obj = cwltool.cwlprov.ro.ResearchObject(
3629
- temp_prefix_ro=options.tmp_outdir_prefix,
3631
+ temp_prefix_ro=tmp_outdir_prefix,
3630
3632
  orcid=options.orcid,
3631
3633
  full_name=options.cwl_full_name,
3632
3634
  fsaccess=runtime_context.make_fs_access(""),
@@ -3701,7 +3703,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3701
3703
  loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3702
3704
  loading_context, workflowobj, uri
3703
3705
  )
3704
- assert loading_context.loader
3706
+ if not loading_context.loader:
3707
+ raise RuntimeError("cwltool loader is not set.")
3705
3708
  processobj, metadata = loading_context.loader.resolve_ref(uri)
3706
3709
  processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3707
3710
 
@@ -3748,10 +3751,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3748
3751
  )
3749
3752
  raise
3750
3753
 
3751
- # We make a ToilFSAccess to access URLs with, but it has no
3752
- # FileStore so it can't do toildir: and toilfile:
3753
- fs_access = ToilFsAccess(options.basedir)
3754
- fill_in_defaults(tool.tool["inputs"], initialized_job_order, fs_access)
3754
+ # Leave the defaults un-filled in the top-level order. The tool or
3755
+ # workflow will fill them when it runs
3755
3756
 
3756
3757
  for inp in tool.tool["inputs"]:
3757
3758
  if (
@@ -3809,6 +3810,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3809
3810
 
3810
3811
  # Import all the input files, some of which may be missing optional
3811
3812
  # files.
3813
+ logger.info("Importing input files...")
3814
+ fs_access = ToilFsAccess(options.basedir)
3812
3815
  import_files(
3813
3816
  file_import_function,
3814
3817
  fs_access,
@@ -3816,11 +3819,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3816
3819
  existing,
3817
3820
  initialized_job_order,
3818
3821
  skip_broken=True,
3822
+ skip_remote=options.reference_inputs,
3819
3823
  bypass_file_store=options.bypass_file_store,
3824
+ log_level=logging.INFO,
3820
3825
  )
3821
3826
  # Import all the files associated with tools (binaries, etc.).
3822
3827
  # Not sure why you would have an optional secondary file here, but
3823
3828
  # the spec probably needs us to support them.
3829
+ logger.info("Importing tool-associated files...")
3824
3830
  visitSteps(
3825
3831
  tool,
3826
3832
  functools.partial(
@@ -3830,7 +3836,9 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3830
3836
  fileindex,
3831
3837
  existing,
3832
3838
  skip_broken=True,
3839
+ skip_remote=options.reference_inputs,
3833
3840
  bypass_file_store=options.bypass_file_store,
3841
+ log_level=logging.INFO,
3834
3842
  ),
3835
3843
  )
3836
3844
 
@@ -3843,7 +3851,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3843
3851
  # were required.
3844
3852
  rm_unprocessed_secondary_files(param_value)
3845
3853
 
3846
- logger.debug("tool %s", tool)
3854
+ logger.info("Creating root job")
3855
+ logger.debug("Root tool: %s", tool)
3847
3856
  try:
3848
3857
  wf1, _ = makeJob(
3849
3858
  tool=tool,
@@ -3856,6 +3865,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3856
3865
  logging.error(err)
3857
3866
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3858
3867
  wf1.cwljob = initialized_job_order
3868
+ logger.info("Starting workflow")
3859
3869
  try:
3860
3870
  outobj = toil.start(wf1)
3861
3871
  except FailedJobsException as err:
@@ -3871,13 +3881,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3871
3881
 
3872
3882
  # Now the workflow has completed. We need to make sure the outputs (and
3873
3883
  # inputs) end up where the user wants them to be.
3874
-
3884
+ logger.info("Collecting workflow outputs...")
3875
3885
  outobj = resolve_dict_w_promises(outobj)
3876
3886
 
3877
3887
  # Stage files. Specify destination bucket if specified in CLI
3878
3888
  # options. If destination bucket not passed in,
3879
3889
  # options.destBucket's value will be None.
3880
- toilStageFiles(toil, outobj, outdir, destBucket=options.destBucket)
3890
+ toilStageFiles(
3891
+ toil,
3892
+ outobj,
3893
+ outdir,
3894
+ destBucket=options.destBucket,
3895
+ log_level=logging.INFO
3896
+ )
3897
+ logger.info("Stored workflow outputs")
3881
3898
 
3882
3899
  if runtime_context.research_obj is not None:
3883
3900
  cwltool.cwlprov.writablebagfile.create_job(
@@ -3904,7 +3921,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3904
3921
  ("File",),
3905
3922
  functools.partial(add_sizes, runtime_context.make_fs_access("")),
3906
3923
  )
3907
- assert document_loader
3924
+ if not document_loader:
3925
+ raise RuntimeError("cwltool loader is not set.")
3908
3926
  prov_dependencies = cwltool.main.prov_deps(
3909
3927
  workflowobj, document_loader, uri
3910
3928
  )
@@ -3914,6 +3932,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3914
3932
  )
3915
3933
 
3916
3934
  if not options.destBucket and options.compute_checksum:
3935
+ logger.info("Computing output file checksums...")
3917
3936
  visit_class(
3918
3937
  outobj,
3919
3938
  ("File",),
@@ -3922,12 +3941,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3922
3941
 
3923
3942
  visit_class(outobj, ("File",), MutationManager().unset_generation)
3924
3943
  stdout.write(json.dumps(outobj, indent=4, default=str))
3944
+ stdout.write("\n")
3945
+ logger.info("CWL run complete!")
3925
3946
 
3926
3947
  return 0
3927
3948
 
3928
3949
 
3929
3950
  def find_default_container(
3930
- args: argparse.Namespace, builder: cwltool.builder.Builder
3951
+ args: Namespace, builder: cwltool.builder.Builder
3931
3952
  ) -> Optional[str]:
3932
3953
  """Find the default constructor by consulting a Toil.options object."""
3933
3954
  if args.default_container: