toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/cwl/cwltoil.py CHANGED
@@ -17,23 +17,24 @@
17
17
 
18
18
  # For an overview of how this all works, see discussion in
19
19
  # docs/architecture.rst
20
- import argparse
21
20
  import base64
22
21
  import copy
23
22
  import datetime
24
23
  import errno
25
24
  import functools
25
+ import glob
26
+ import io
26
27
  import json
27
28
  import logging
28
29
  import os
30
+ import pprint
29
31
  import shutil
30
32
  import socket
31
33
  import stat
32
34
  import sys
33
- import tempfile
34
35
  import textwrap
35
- import urllib
36
36
  import uuid
37
+ from tempfile import NamedTemporaryFile, gettempdir
37
38
  from threading import Thread
38
39
  from typing import (
39
40
  IO,
@@ -52,8 +53,9 @@ from typing import (
52
53
  TypeVar,
53
54
  Union,
54
55
  cast,
56
+ Sequence,
55
57
  )
56
- from urllib.parse import ParseResult, quote, unquote, urlparse, urlsplit
58
+ from urllib.parse import quote, unquote, urlparse, urlsplit
57
59
 
58
60
  import cwl_utils.errors
59
61
  import cwl_utils.expression
@@ -66,6 +68,7 @@ import cwltool.load_tool
66
68
  import cwltool.main
67
69
  import cwltool.resolver
68
70
  import schema_salad.ref_resolver
71
+ from configargparse import ArgParser, SUPPRESS, Namespace
69
72
  from cwltool.loghandler import _logger as cwllogger
70
73
  from cwltool.loghandler import defaultStreamHandler
71
74
  from cwltool.mpi import MpiConfig
@@ -103,11 +106,15 @@ from schema_salad.sourceline import SourceLine
103
106
  from typing_extensions import Literal
104
107
 
105
108
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
106
- from toil.common import Config, Toil, addOptions
109
+ from toil.common import Toil, addOptions
110
+ from toil.cwl import check_cwltool_version
111
+
112
+ check_cwltool_version()
107
113
  from toil.cwl.utils import (
108
114
  CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION,
109
115
  CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE,
110
116
  download_structure,
117
+ get_from_structure,
111
118
  visit_cwl_class_and_reduce,
112
119
  )
113
120
  from toil.exceptions import FailedJobsException
@@ -117,14 +124,14 @@ from toil.job import AcceleratorRequirement, Job, Promise, Promised, unwrap
117
124
  from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchFileException
118
125
  from toil.jobStores.fileJobStore import FileJobStore
119
126
  from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
127
+ from toil.lib.io import mkdtemp
120
128
  from toil.lib.threading import ExceptionalThread
121
129
  from toil.statsAndLogging import DEFAULT_LOGLEVEL
122
- from toil.version import baseVersion
123
130
 
124
131
  logger = logging.getLogger(__name__)
125
132
 
126
133
  # Find the default temporary directory
127
- DEFAULT_TMPDIR = tempfile.gettempdir()
134
+ DEFAULT_TMPDIR = gettempdir()
128
135
  # And compose a CWL-style default prefix inside it.
129
136
  # We used to not put this inside anything and we would drop loads of temp
130
137
  # directories in the current directory and leave them there.
@@ -349,16 +356,24 @@ class ResolveSource:
349
356
 
350
357
  def __repr__(self) -> str:
351
358
  """Allow for debug printing."""
352
- try:
353
- return "ResolveSource(" + repr(self.resolve()) + ")"
354
- except Exception:
355
- return (
356
- f"ResolveSource({self.name}, {self.input}, {self.source_key}, "
357
- f"{self.promise_tuples})"
358
- )
359
+
360
+ parts = [f"source key {self.source_key}"]
361
+
362
+ if "pickValue" in self.input:
363
+ parts.append(f"pick value {self.input['pickValue']} from")
364
+
365
+ if isinstance(self.promise_tuples, list):
366
+ names = [n for n, _ in self.promise_tuples]
367
+ parts.append(f"names {names} in promises")
368
+ else:
369
+ name, _ = self.promise_tuples
370
+ parts.append(f"name {name} in promise")
371
+
372
+ return f"ResolveSource({', '.join(parts)})"
359
373
 
360
374
  def resolve(self) -> Any:
361
375
  """First apply linkMerge then pickValue if either present."""
376
+
362
377
  result: Optional[Any] = None
363
378
  if isinstance(self.promise_tuples, list):
364
379
  result = self.link_merge(
@@ -382,6 +397,7 @@ class ResolveSource:
382
397
 
383
398
  :param values: result of step
384
399
  """
400
+
385
401
  link_merge_type = self.input.get("linkMerge", "merge_nested")
386
402
 
387
403
  if link_merge_type == "merge_nested":
@@ -409,6 +425,7 @@ class ResolveSource:
409
425
  without modification.
410
426
  :return:
411
427
  """
428
+
412
429
  pick_value_type = cast(str, self.input.get("pickValue"))
413
430
 
414
431
  if pick_value_type is None:
@@ -425,6 +442,11 @@ class ResolveSource:
425
442
 
426
443
  if pick_value_type == "first_non_null":
427
444
  if len(result) < 1:
445
+ logger.error(
446
+ "Could not find non-null entry for %s:\n%s",
447
+ self.name,
448
+ pprint.pformat(self.promise_tuples),
449
+ )
428
450
  raise cwl_utils.errors.WorkflowException(
429
451
  "%s: first_non_null operator found no non-null values" % self.name
430
452
  )
@@ -479,6 +501,11 @@ class StepValueFrom:
479
501
  self.req = req
480
502
  self.container_engine = container_engine
481
503
 
504
+ def __repr__(self) -> str:
505
+ """Allow for debug printing."""
506
+
507
+ return f"StepValueFrom({self.expr}, {self.source}, {self.req}, {self.container_engine})"
508
+
482
509
  def eval_prep(
483
510
  self, step_inputs: CWLObjectType, file_store: AbstractFileStore
484
511
  ) -> None:
@@ -551,6 +578,11 @@ class DefaultWithSource:
551
578
  self.default = default
552
579
  self.source = source
553
580
 
581
+ def __repr__(self) -> str:
582
+ """Allow for debug printing."""
583
+
584
+ return f"DefaultWithSource({self.default}, {self.source})"
585
+
554
586
  def resolve(self) -> Any:
555
587
  """
556
588
  Determine the final input value when the time is right.
@@ -573,6 +605,11 @@ class JustAValue:
573
605
  """Store the value."""
574
606
  self.val = val
575
607
 
608
+ def __repr__(self) -> str:
609
+ """Allow for debug printing."""
610
+
611
+ return f"JustAValue({self.val})"
612
+
576
613
  def resolve(self) -> Any:
577
614
  """Return the value."""
578
615
  return self.val
@@ -654,6 +691,8 @@ class ToilPathMapper(PathMapper):
654
691
  streaming on, and returns a file: URI to where the file or
655
692
  directory has been downloaded to. Meant to be a partially-bound
656
693
  version of toil_get_file().
694
+ :param referenced_files: List of CWL File and Directory objects, which can have their locations set as both
695
+ virtualized and absolute local paths
657
696
  """
658
697
  self.get_file = get_file
659
698
  self.stage_listing = stage_listing
@@ -675,28 +714,29 @@ class ToilPathMapper(PathMapper):
675
714
  This is called on each File or Directory CWL object. The Files and
676
715
  Directories all have "location" fields. For the Files, these are from
677
716
  upload_file(), and for the Directories, these are from
678
- upload_directory(), with their children being assigned
679
- locations based on listing the Directories using ToilFsAccess.
717
+ upload_directory() or cwltool internally. With upload_directory(), they and their children will be assigned
718
+ locations based on listing the Directories using ToilFsAccess. With cwltool, locations will be set as absolute
719
+ paths.
680
720
 
681
721
  :param obj: The CWL File or Directory to process
682
722
 
683
723
  :param stagedir: The base path for target paths to be generated under,
684
- except when a File or Directory has an overriding parent directory in
685
- dirname
724
+ except when a File or Directory has an overriding parent directory in
725
+ dirname
686
726
 
687
727
  :param basedir: The directory from which relative paths should be
688
- resolved; used as the base directory for the StdFsAccess that generated
689
- the listing being processed.
728
+ resolved; used as the base directory for the StdFsAccess that generated
729
+ the listing being processed.
690
730
 
691
731
  :param copy: If set, use writable types for Files and Directories.
692
732
 
693
733
  :param staged: Starts as True at the top of the recursion. Set to False
694
- when entering a directory that we can actually download, so we don't
695
- stage files and subdirectories separately from the directory as a
696
- whole. Controls the staged flag on generated mappings, and therefore
697
- whether files and directories are actually placed at their mapped-to
698
- target locations. If stage_listing is True, we will leave this True
699
- throughout and stage everything.
734
+ when entering a directory that we can actually download, so we don't
735
+ stage files and subdirectories separately from the directory as a
736
+ whole. Controls the staged flag on generated mappings, and therefore
737
+ whether files and directories are actually placed at their mapped-to
738
+ target locations. If stage_listing is True, we will leave this True
739
+ throughout and stage everything.
700
740
 
701
741
  Produces one MapperEnt for every unique location for a File or
702
742
  Directory. These MapperEnt objects are instructions to cwltool's
@@ -807,6 +847,14 @@ class ToilPathMapper(PathMapper):
807
847
  # We can't really make the directory. Maybe we are
808
848
  # exporting from the leader and it doesn't matter.
809
849
  resolved = location
850
+ elif location.startswith("/"):
851
+ # Test if path is an absolute local path
852
+ # Does not check if the path is relative
853
+ # While Toil encodes paths into a URL with ToilPathMapper,
854
+ # something called internally in cwltool may return an absolute path
855
+ # ex: if cwltool calls itself internally in command_line_tool.py,
856
+ # it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
857
+ resolved = location
810
858
  else:
811
859
  raise RuntimeError("Unsupported location: " + location)
812
860
 
@@ -883,7 +931,6 @@ class ToilPathMapper(PathMapper):
883
931
  )
884
932
  else:
885
933
  deref = ab
886
-
887
934
  if deref.startswith("file:"):
888
935
  deref = schema_salad.ref_resolver.uri_file_path(deref)
889
936
  if urlsplit(deref).scheme in ["http", "https"]:
@@ -1027,8 +1074,6 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1027
1074
  class ToilExpressionTool(ToilTool, cwltool.command_line_tool.ExpressionTool):
1028
1075
  """Subclass the cwltool expression tool to provide the custom ToilPathMapper."""
1029
1076
 
1030
- pass
1031
-
1032
1077
 
1033
1078
  def toil_make_tool(
1034
1079
  toolpath_object: CommentedMap,
@@ -1047,10 +1092,7 @@ def toil_make_tool(
1047
1092
  return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
1048
1093
 
1049
1094
 
1050
- # This should really be Dict[str, Union[str, "DirectoryContents"]], but we
1051
- # can't say that until https://github.com/python/mypy/issues/731 is fixed
1052
- # because it's recursive.
1053
- DirectoryContents = Dict[str, Union[str, Dict[str, Any]]]
1095
+ DirectoryContents = Dict[str, Union[str, "DirectoryContents"]]
1054
1096
 
1055
1097
 
1056
1098
  def check_directory_dict_invariants(contents: DirectoryContents) -> None:
@@ -1080,9 +1122,8 @@ def decode_directory(
1080
1122
  None), and the deduplication key string that uniquely identifies the
1081
1123
  directory.
1082
1124
  """
1083
- assert dir_path.startswith(
1084
- "toildir:"
1085
- ), f"Cannot decode non-directory path: {dir_path}"
1125
+ if not dir_path.startswith("toildir:"):
1126
+ raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
1086
1127
 
1087
1128
  # We will decode the directory and then look inside it
1088
1129
 
@@ -1203,7 +1244,8 @@ class ToilFsAccess(StdFsAccess):
1203
1244
 
1204
1245
  logger.debug("ToilFsAccess downloading %s to %s", cache_key, temp_dir)
1205
1246
 
1206
- # Save it all into this new temp directory
1247
+ # Save it all into this new temp directory.
1248
+ # Guaranteed to fill it with real files and not symlinks.
1207
1249
  download_structure(self.file_store, {}, {}, contents, temp_dir)
1208
1250
 
1209
1251
  # Make sure we use the same temp directory if we go traversing
@@ -1233,7 +1275,7 @@ class ToilFsAccess(StdFsAccess):
1233
1275
  logger.debug(
1234
1276
  "ToilFsAccess fetching directory %s from a JobStore", path
1235
1277
  )
1236
- dest_dir = tempfile.mkdtemp()
1278
+ dest_dir = mkdtemp()
1237
1279
 
1238
1280
  # Recursively fetch all the files in the directory.
1239
1281
  def download_to(url: str, dest: str) -> None:
@@ -1256,7 +1298,7 @@ class ToilFsAccess(StdFsAccess):
1256
1298
  logger.debug("ToilFsAccess fetching file %s from a JobStore", path)
1257
1299
  # Try to grab it with a jobstore implementation, and save it
1258
1300
  # somewhere arbitrary.
1259
- dest_file = tempfile.NamedTemporaryFile(delete=False)
1301
+ dest_file = NamedTemporaryFile(delete=False)
1260
1302
  AbstractJobStore.read_from_url(path, dest_file)
1261
1303
  dest_file.close()
1262
1304
  self.dir_to_download[path] = dest_file.name
@@ -1271,72 +1313,160 @@ class ToilFsAccess(StdFsAccess):
1271
1313
  return destination
1272
1314
 
1273
1315
  def glob(self, pattern: str) -> List[str]:
1274
- # We know this falls back on _abs
1275
- return super().glob(pattern)
1316
+ parse = urlparse(pattern)
1317
+ if parse.scheme == "file":
1318
+ pattern = os.path.abspath(unquote(parse.path))
1319
+ elif parse.scheme == "":
1320
+ pattern = os.path.abspath(pattern)
1321
+ else:
1322
+ raise RuntimeError(f"Cannot efficiently support globbing on {parse.scheme} URIs")
1323
+
1324
+ # Actually do the glob
1325
+ return [schema_salad.ref_resolver.file_uri(f) for f in glob.glob(pattern)]
1276
1326
 
1277
1327
  def open(self, fn: str, mode: str) -> IO[Any]:
1278
- # TODO: Also implement JobStore-supported URLs through JobStore methods.
1279
- # We know this falls back on _abs
1280
- return super().open(fn, mode)
1328
+ if "w" in mode or "x" in mode or "+" in mode or "a" in mode:
1329
+ raise RuntimeError(f"Mode {mode} for opening {fn} involves writing")
1330
+
1331
+ parse = urlparse(fn)
1332
+ if parse.scheme in ["", "file"]:
1333
+ # Handle local files
1334
+ return open(self._abs(fn), mode)
1335
+ elif parse.scheme == "toildir":
1336
+ contents, subpath, cache_key = decode_directory(fn)
1337
+ if cache_key in self.dir_to_download:
1338
+ # This is already available locally, so fall back on the local copy
1339
+ return open(self._abs(fn), mode)
1340
+ else:
1341
+ # We need to get the URI out of the virtual directory
1342
+ if subpath is None:
1343
+ raise RuntimeError(f"{fn} is a toildir directory")
1344
+ uri = get_from_structure(contents, subpath)
1345
+ if not isinstance(uri, str):
1346
+ raise RuntimeError(f"{fn} does not point to a file")
1347
+ # Recurse on that URI
1348
+ return self.open(uri, mode)
1349
+ elif parse.scheme == "toilfile":
1350
+ if self.file_store is None:
1351
+ raise RuntimeError("URL requires a file store: " + fn)
1352
+ # Streaming access to Toil file store files requires being inside a
1353
+ # context manager, which we can't require. So we need to download
1354
+ # the file.
1355
+ return open(self._abs(fn), mode)
1356
+ else:
1357
+ # This should be supported by a job store.
1358
+ byte_stream = AbstractJobStore.open_url(fn)
1359
+ if 'b' in mode:
1360
+ # Pass stream along in binary
1361
+ return byte_stream
1362
+ else:
1363
+ # Wrap it in a text decoder
1364
+ return io.TextIOWrapper(byte_stream, encoding='utf-8')
1281
1365
 
1282
1366
  def exists(self, path: str) -> bool:
1283
1367
  """Test for file existence."""
1284
- # toil's _abs() throws errors when files are not found and cwltool's _abs() does not
1285
- try:
1286
- # TODO: Also implement JobStore-supported URLs through JobStore methods.
1287
- return os.path.exists(self._abs(path))
1288
- except NoSuchFileException:
1289
- return False
1368
+ parse = urlparse(path)
1369
+ if parse.scheme in ["", "file"]:
1370
+ # Handle local files
1371
+ # toil's _abs() throws errors when files are not found and cwltool's _abs() does not
1372
+ try:
1373
+ return os.path.exists(self._abs(path))
1374
+ except NoSuchFileException:
1375
+ return False
1376
+ elif parse.scheme == "toildir":
1377
+ contents, subpath, cache_key = decode_directory(path)
1378
+ if subpath is None:
1379
+ # The toildir directory itself exists
1380
+ return True
1381
+ uri = get_from_structure(contents, subpath)
1382
+ if uri is None:
1383
+ # It's not in the virtual directory, so it doesn't exist
1384
+ return False
1385
+ if isinstance(uri, dict):
1386
+ # Actually it's a subdirectory, so it exists.
1387
+ return True
1388
+ # We recurse and poll the URI directly to make sure it really exists
1389
+ return self.exists(uri)
1390
+ elif parse.scheme == "toilfile":
1391
+ # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1392
+ return True
1393
+ else:
1394
+ # This should be supported by a job store.
1395
+ return AbstractJobStore.url_exists(path)
1290
1396
 
1291
1397
  def size(self, path: str) -> int:
1292
- # This should avoid _abs for things actually in the file store, to
1293
- # prevent multiple downloads as in
1294
- # https://github.com/DataBiosphere/toil/issues/3665
1295
- if path.startswith("toilfile:"):
1296
- if self.file_store is None:
1297
- raise RuntimeError("URL requires a file store: " + path)
1298
- return self.file_store.getGlobalFileSize(
1299
- FileID.unpack(path[len("toilfile:") :])
1300
- )
1301
- elif path.startswith("toildir:"):
1398
+ parse = urlparse(path)
1399
+ if parse.scheme in ["", "file"]:
1400
+ return os.stat(self._abs(path)).st_size
1401
+ elif parse.scheme == "toildir":
1302
1402
  # Decode its contents, the path inside it to the file (if any), and
1303
1403
  # the key to use for caching the directory.
1304
- here, subpath, cache_key = decode_directory(path)
1404
+ contents, subpath, cache_key = decode_directory(path)
1305
1405
 
1306
1406
  # We can't get the size of just a directory.
1307
- assert subpath is not None, f"Attempted to check size of directory {path}"
1308
-
1309
- for part in subpath.split("/"):
1310
- # Follow the path inside the directory contents.
1311
- here = cast(DirectoryContents, here[part])
1407
+ if subpath is None:
1408
+ raise RuntimeError(f"Attempted to check size of directory {path}")
1312
1409
 
1313
- # We ought to end up with a toilfile: URI.
1314
- assert isinstance(here, str), f"Did not find a file at {path}"
1315
- assert here.startswith(
1316
- "toilfile:"
1317
- ), f"Did not find a filestore file at {path}"
1410
+ uri = get_from_structure(contents, subpath)
1318
1411
 
1319
- return self.size(here)
1412
+ # We ought to end up with a URI.
1413
+ if not isinstance(uri, str):
1414
+ raise RuntimeError(f"Did not find a file at {path}")
1415
+ return self.size(uri)
1416
+ elif parse.scheme == "toilfile":
1417
+ if self.file_store is None:
1418
+ raise RuntimeError("URL requires a file store: " + path)
1419
+ return self.file_store.getGlobalFileSize(
1420
+ FileID.unpack(path[len("toilfile:") :])
1421
+ )
1320
1422
  else:
1321
- # TODO: Also implement JobStore-supported URLs through JobStore methods.
1322
- # We know this falls back on _abs
1323
- return super().size(path)
1423
+ # This should be supported by a job store.
1424
+ size = AbstractJobStore.get_size(path)
1425
+ if size is None:
1426
+ # get_size can be unimplemented or unavailable
1427
+ raise RuntimeError(f"Could not get size of {path}")
1428
+ return size
1324
1429
 
1325
1430
  def isfile(self, fn: str) -> bool:
1326
1431
  parse = urlparse(fn)
1327
- if parse.scheme in ["toilfile", "toildir", "file", ""]:
1328
- # We know this falls back on _abs
1329
- return super().isfile(fn)
1432
+ if parse.scheme in ["file", ""]:
1433
+ return os.path.isfile(self._abs(fn))
1434
+ elif parse.scheme == "toilfile":
1435
+ # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1436
+ return True
1437
+ elif parse.scheme == "toildir":
1438
+ contents, subpath, cache_key = decode_directory(fn)
1439
+ if subpath is None:
1440
+ # This is the toildir directory itself
1441
+ return False
1442
+ found = get_from_structure(contents, subpath)
1443
+ # If we find a string, that's a file
1444
+ # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1445
+ return isinstance(found, str)
1330
1446
  else:
1331
- return not AbstractJobStore.get_is_directory(fn)
1447
+ return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
1332
1448
 
1333
1449
  def isdir(self, fn: str) -> bool:
1450
+ logger.debug("ToilFsAccess checking type of %s", fn)
1334
1451
  parse = urlparse(fn)
1335
- if parse.scheme in ["toilfile", "toildir", "file", ""]:
1336
- # We know this falls back on _abs
1337
- return super().isdir(fn)
1452
+ if parse.scheme in ["file", ""]:
1453
+ return os.path.isdir(self._abs(fn))
1454
+ elif parse.scheme == "toilfile":
1455
+ return False
1456
+ elif parse.scheme == "toildir":
1457
+ contents, subpath, cache_key = decode_directory(fn)
1458
+ if subpath is None:
1459
+ # This is the toildir directory itself.
1460
+ # TODO: We assume directories can't be deleted.
1461
+ return True
1462
+ found = get_from_structure(contents, subpath)
1463
+ # If we find a dict, that's a directory.
1464
+ # TODO: We assume directories can't be deleted.
1465
+ return isinstance(found, dict)
1338
1466
  else:
1339
- return AbstractJobStore.get_is_directory(fn)
1467
+ status = AbstractJobStore.get_is_directory(fn)
1468
+ logger.debug("AbstractJobStore said: %s", status)
1469
+ return status
1340
1470
 
1341
1471
  def listdir(self, fn: str) -> List[str]:
1342
1472
  # This needs to return full URLs for everything in the directory.
@@ -1344,12 +1474,25 @@ class ToilFsAccess(StdFsAccess):
1344
1474
  logger.debug("ToilFsAccess listing %s", fn)
1345
1475
 
1346
1476
  parse = urlparse(fn)
1347
- if parse.scheme in ["toilfile", "toildir", "file", ""]:
1348
- # Download the file or directory to a local path
1477
+ if parse.scheme in ["file", ""]:
1478
+ # Find the local path
1349
1479
  directory = self._abs(fn)
1350
-
1351
1480
  # Now list it (it is probably a directory)
1352
1481
  return [abspath(quote(entry), fn) for entry in os.listdir(directory)]
1482
+ elif parse.scheme == "toilfile":
1483
+ raise RuntimeError(f"Cannot list a file: {fn}")
1484
+ elif parse.scheme == "toildir":
1485
+ contents, subpath, cache_key = decode_directory(fn)
1486
+ here = contents
1487
+ if subpath is not None:
1488
+ got = get_from_structure(contents, subpath)
1489
+ if got is None:
1490
+ raise RuntimeError(f"Cannot list nonexistent directory: {fn}")
1491
+ if isinstance(got, str):
1492
+ raise RuntimeError(f"Cannot list file or dubdirectory of a file: {fn}")
1493
+ here = got
1494
+ # List all the things in here and make full URIs to them
1495
+ return [os.path.join(fn, k) for k in here.keys()]
1353
1496
  else:
1354
1497
  return [
1355
1498
  os.path.join(fn, entry.rstrip("/"))
@@ -1371,7 +1514,7 @@ def toil_get_file(
1371
1514
  file_store: AbstractFileStore,
1372
1515
  index: Dict[str, str],
1373
1516
  existing: Dict[str, str],
1374
- file_store_id: str,
1517
+ uri: str,
1375
1518
  streamable: bool = False,
1376
1519
  streaming_allowed: bool = True,
1377
1520
  pipe_threads: Optional[List[Tuple[Thread, int]]] = None,
@@ -1388,28 +1531,28 @@ def toil_get_file(
1388
1531
 
1389
1532
  :param index: Maps from downloaded file path back to input Toil URI.
1390
1533
 
1391
- :param existing: Maps from file_store_id URI to downloaded file path.
1534
+ :param existing: Maps from URI to downloaded file path.
1392
1535
 
1393
- :param file_store_id: The URI for the file to download.
1536
+ :param uri: The URI for the file to download.
1394
1537
 
1395
1538
  :param streamable: If the file is has 'streamable' flag set
1396
1539
 
1397
1540
  :param streaming_allowed: If streaming is allowed
1398
1541
 
1399
1542
  :param pipe_threads: List of threads responsible for streaming the data
1400
- and open file descriptors corresponding to those files. Caller is responsible
1401
- to close the file descriptors (to break the pipes) and join the threads
1543
+ and open file descriptors corresponding to those files. Caller is responsible
1544
+ to close the file descriptors (to break the pipes) and join the threads
1402
1545
  """
1403
1546
  pipe_threads_real = pipe_threads or []
1404
1547
  # We can't use urlparse here because we need to handle the '_:' scheme and
1405
1548
  # urlparse sees that as a path and not a URI scheme.
1406
- if file_store_id.startswith("toildir:"):
1549
+ if uri.startswith("toildir:"):
1407
1550
  # This is a file in a directory, or maybe a directory itself.
1408
1551
  # See ToilFsAccess and upload_directory.
1409
1552
  # We will go look for the actual file in the encoded directory
1410
1553
  # structure which will tell us where the toilfile: name for the file is.
1411
1554
 
1412
- parts = file_store_id[len("toildir:") :].split("/")
1555
+ parts = uri[len("toildir:") :].split("/")
1413
1556
  contents = json.loads(
1414
1557
  base64.urlsafe_b64decode(parts[0].encode("utf-8")).decode("utf-8")
1415
1558
  )
@@ -1429,21 +1572,41 @@ def toil_get_file(
1429
1572
  download_structure(file_store, index, existing, contents, dest_path)
1430
1573
  # Return where we put it, but as a file:// URI
1431
1574
  return schema_salad.ref_resolver.file_uri(dest_path)
1432
- elif file_store_id.startswith("toilfile:"):
1433
- # This is a plain file with no context.
1575
+ elif uri.startswith("_:"):
1576
+ # Someone is asking us for an empty temp directory.
1577
+ # We need to check this before the file path case because urlsplit()
1578
+ # will call this a path with no scheme.
1579
+ dest_path = file_store.getLocalTempDir()
1580
+ return schema_salad.ref_resolver.file_uri(dest_path)
1581
+ elif uri.startswith("file:") or urlsplit(uri).scheme == "":
1582
+ # There's a file: scheme or no scheme, and we know this isn't a _: URL.
1583
+
1584
+ # We need to support file: URIs and local paths, because we might be
1585
+ # involved in moving files around on the local disk when uploading
1586
+ # things after a job. We might want to catch cases where a leader
1587
+ # filesystem file URI leaks in here, but we can't, so we just rely on
1588
+ # the rest of the code to be correct.
1589
+ return uri
1590
+ else:
1591
+ # This is a toilfile: uri or other remote URI
1434
1592
  def write_to_pipe(
1435
- file_store: AbstractFileStore, pipe_name: str, file_store_id: FileID
1593
+ file_store: AbstractFileStore, pipe_name: str, uri: str
1436
1594
  ) -> None:
1437
1595
  try:
1438
1596
  with open(pipe_name, "wb") as pipe:
1439
- with file_store.jobStore.read_file_stream(file_store_id) as fi:
1440
- file_store.logAccess(file_store_id)
1441
- chunk_sz = 1024
1442
- while True:
1443
- data = fi.read(chunk_sz)
1444
- if not data:
1445
- break
1446
- pipe.write(data)
1597
+ if uri.startswith("toilfile:"):
1598
+ # Stream from the file store
1599
+ file_store_id = FileID.unpack(uri[len("toilfile:") :])
1600
+ with file_store.readGlobalFileStream(file_store_id) as fi:
1601
+ chunk_sz = 1024
1602
+ while True:
1603
+ data = fi.read(chunk_sz)
1604
+ if not data:
1605
+ break
1606
+ pipe.write(data)
1607
+ else:
1608
+ # Stream from some other URI
1609
+ AbstractJobStore.read_from_url(uri, pipe)
1447
1610
  except OSError as e:
1448
1611
  # The other side of the pipe may have been closed by the
1449
1612
  # reading thread, which is OK.
@@ -1456,7 +1619,7 @@ def toil_get_file(
1456
1619
  and not isinstance(file_store.jobStore, FileJobStore)
1457
1620
  ):
1458
1621
  logger.debug(
1459
- "Streaming file %s", FileID.unpack(file_store_id[len("toilfile:") :])
1622
+ "Streaming file %s", uri
1460
1623
  )
1461
1624
  src_path = file_store.getLocalTempFileName()
1462
1625
  os.mkfifo(src_path)
@@ -1465,42 +1628,39 @@ def toil_get_file(
1465
1628
  args=(
1466
1629
  file_store,
1467
1630
  src_path,
1468
- FileID.unpack(file_store_id[len("toilfile:") :]),
1631
+ uri,
1469
1632
  ),
1470
1633
  )
1471
1634
  th.start()
1472
1635
  pipe_threads_real.append((th, os.open(src_path, os.O_RDONLY)))
1473
1636
  else:
1474
- src_path = file_store.readGlobalFile(
1475
- FileID.unpack(file_store_id[len("toilfile:") :]), symlink=True
1476
- )
1477
-
1478
- # TODO: shouldn't we be using these as a cache?
1479
- index[src_path] = file_store_id
1480
- existing[file_store_id] = src_path
1637
+ # We need to do a real file
1638
+ if uri in existing:
1639
+ # Already did it
1640
+ src_path = existing[uri]
1641
+ else:
1642
+ if uri.startswith("toilfile:"):
1643
+ # Download from the file store
1644
+ file_store_id = FileID.unpack(uri[len("toilfile:") :])
1645
+ src_path = file_store.readGlobalFile(
1646
+ file_store_id, symlink=True
1647
+ )
1648
+ else:
1649
+ # Download from the URI via the job store.
1650
+
1651
+ # Figure out where it goes.
1652
+ src_path = file_store.getLocalTempFileName()
1653
+ # Open that path exclusively to make sure we created it
1654
+ with open(src_path, 'xb') as fh:
1655
+ # Download into the file
1656
+ size, executable = AbstractJobStore.read_from_url(uri, fh)
1657
+ if executable:
1658
+ # Set the execute bit in the file's permissions
1659
+ os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
1660
+
1661
+ index[src_path] = uri
1662
+ existing[uri] = src_path
1481
1663
  return schema_salad.ref_resolver.file_uri(src_path)
1482
- elif file_store_id.startswith("_:"):
1483
- # Someone is asking us for an empty temp directory.
1484
- # We need to check this before the file path case because urlsplit()
1485
- # will call this a path with no scheme.
1486
- dest_path = file_store.getLocalTempDir()
1487
- return schema_salad.ref_resolver.file_uri(dest_path)
1488
- elif file_store_id.startswith("file:") or urlsplit(file_store_id).scheme == "":
1489
- # There's a file: scheme or no scheme, and we know this isn't a _: URL.
1490
-
1491
- # We need to support file: URIs and local paths, because we might be
1492
- # involved in moving files around on the local disk when uploading
1493
- # things after a job. We might want to catch cases where a leader
1494
- # filesystem file URI leaks in here, but we can't, so we just rely on
1495
- # the rest of the code to be correct.
1496
- return file_store_id
1497
- else:
1498
- raise RuntimeError(
1499
- f"Cannot obtain file {file_store_id} while on host "
1500
- f"{socket.gethostname()}; all imports must happen on the "
1501
- f"leader!"
1502
- )
1503
-
1504
1664
 
1505
1665
  def write_file(
1506
1666
  writeFunc: Callable[[str], FileID],
@@ -1557,7 +1717,9 @@ def import_files(
1557
1717
  existing: Dict[str, str],
1558
1718
  cwl_object: Optional[CWLObjectType],
1559
1719
  skip_broken: bool = False,
1720
+ skip_remote: bool = False,
1560
1721
  bypass_file_store: bool = False,
1722
+ log_level: int = logging.DEBUG
1561
1723
  ) -> None:
1562
1724
  """
1563
1725
  Prepare all files and directories.
@@ -1579,28 +1741,41 @@ def import_files(
1579
1741
  Also does some miscelaneous normalization.
1580
1742
 
1581
1743
  :param import_function: The function used to upload a URI and get a
1582
- Toil FileID for it.
1744
+ Toil FileID for it.
1583
1745
 
1584
1746
  :param fs_access: the CWL FS access object we use to access the filesystem
1585
- to find files to import. Needs to support the URI schemes used.
1747
+ to find files to import. Needs to support the URI schemes used.
1586
1748
 
1587
1749
  :param fileindex: Forward map to fill in from file URI to Toil storage
1588
- location, used by write_file to deduplicate writes.
1750
+ location, used by write_file to deduplicate writes.
1589
1751
 
1590
1752
  :param existing: Reverse map to fill in from Toil storage location to file
1591
- URI. Not read from.
1753
+ URI. Not read from.
1592
1754
 
1593
1755
  :param cwl_object: CWL tool (or workflow order) we are importing files for
1594
1756
 
1595
1757
  :param skip_broken: If True, when files can't be imported because they e.g.
1596
- don't exist, leave their locations alone rather than failing with an error.
1758
+ don't exist, leave their locations alone rather than failing with an error.
1759
+
1760
+ :param skp_remote: If True, leave remote URIs in place instead of importing
1761
+ files.
1597
1762
 
1598
1763
  :param bypass_file_store: If True, leave file:// URIs in place instead of
1599
- importing files and directories.
1764
+ importing files and directories.
1765
+
1766
+ :param log_level: Log imported files at the given level.
1600
1767
  """
1601
1768
  tool_id = cwl_object.get("id", str(cwl_object)) if cwl_object else ""
1602
1769
 
1603
1770
  logger.debug("Importing files for %s", tool_id)
1771
+ logger.debug("Importing files in %s", cwl_object)
1772
+
1773
+ def import_and_log(url: str) -> FileID:
1774
+ """
1775
+ Upload a file and log that we are doing so.
1776
+ """
1777
+ logger.log(log_level, "Loading %s...", url)
1778
+ return import_function(url)
1604
1779
 
1605
1780
  # We need to upload all files to the Toil filestore, and encode structure
1606
1781
  # recursively into all Directories' locations. But we cannot safely alter
@@ -1700,7 +1875,7 @@ def import_files(
1700
1875
 
1701
1876
  # Upload the file itself, which will adjust its location.
1702
1877
  upload_file(
1703
- import_function, fileindex, existing, rec, skip_broken=skip_broken
1878
+ import_and_log, fileindex, existing, rec, skip_broken=skip_broken, skip_remote=skip_remote
1704
1879
  )
1705
1880
 
1706
1881
  # Make a record for this file under its name
@@ -1805,11 +1980,16 @@ def upload_file(
1805
1980
  existing: Dict[str, str],
1806
1981
  file_metadata: CWLObjectType,
1807
1982
  skip_broken: bool = False,
1983
+ skip_remote: bool = False
1808
1984
  ) -> None:
1809
1985
  """
1810
- Update a file object so that the location is a reference to the toil file store.
1986
+ Update a file object so that the file will be accessible from another machine.
1811
1987
 
1812
- Write the file object to the file store if necessary.
1988
+ Uploads local files to the Toil file store, and sets their location to a
1989
+ reference to the toil file store.
1990
+
1991
+ Unless skip_remote is set, downloads remote files into the file store and
1992
+ sets their locations to references into the file store as well.
1813
1993
  """
1814
1994
  location = cast(str, file_metadata["location"])
1815
1995
  if (
@@ -1832,7 +2012,10 @@ def upload_file(
1832
2012
  return
1833
2013
  else:
1834
2014
  raise cwl_utils.errors.WorkflowException("File is missing: %s" % location)
1835
- file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
2015
+
2016
+ if location.startswith("file://") or not skip_remote:
2017
+ # This is a local file, or we also need to download and re-upload remote files
2018
+ file_metadata["location"] = write_file(uploadfunc, fileindex, existing, location)
1836
2019
 
1837
2020
  logger.debug("Sending file at: %s", file_metadata["location"])
1838
2021
 
@@ -1866,6 +2049,7 @@ class CWLNamedJob(Job):
1866
2049
  memory: Union[int, str, None] = "1GiB",
1867
2050
  disk: Union[int, str, None] = "1MiB",
1868
2051
  accelerators: Optional[List[AcceleratorRequirement]] = None,
2052
+ preemptible: Optional[bool] = None,
1869
2053
  tool_id: Optional[str] = None,
1870
2054
  parent_name: Optional[str] = None,
1871
2055
  subjob_name: Optional[str] = None,
@@ -1910,6 +2094,7 @@ class CWLNamedJob(Job):
1910
2094
  memory=memory,
1911
2095
  disk=disk,
1912
2096
  accelerators=accelerators,
2097
+ preemptible=preemptible,
1913
2098
  unitName=unit_name,
1914
2099
  displayName=display_name,
1915
2100
  local=local,
@@ -1941,12 +2126,15 @@ def toilStageFiles(
1941
2126
  cwljob: Union[CWLObjectType, List[CWLObjectType]],
1942
2127
  outdir: str,
1943
2128
  destBucket: Union[str, None] = None,
2129
+ log_level: int = logging.DEBUG
1944
2130
  ) -> None:
1945
2131
  """
1946
2132
  Copy input files out of the global file store and update location and path.
1947
2133
 
1948
2134
  :param destBucket: If set, export to this base URL instead of to the local
1949
2135
  filesystem.
2136
+
2137
+ :param log_level: Log each file transfered at the given level.
1950
2138
  """
1951
2139
 
1952
2140
  def _collectDirEntries(
@@ -1986,7 +2174,6 @@ def toilStageFiles(
1986
2174
  stage_listing=True,
1987
2175
  )
1988
2176
  for _, p in pm.items():
1989
- logger.debug("Staging output: %s", p)
1990
2177
  if p.staged:
1991
2178
  # We're supposed to copy/expose something.
1992
2179
  # Note that we have to handle writable versions of everything
@@ -2008,7 +2195,7 @@ def toilStageFiles(
2008
2195
  "CreateFile",
2009
2196
  "CreateWritableFile",
2010
2197
  ]: # TODO: CreateFile for buckets is not under testing
2011
- with tempfile.NamedTemporaryFile() as f:
2198
+ with NamedTemporaryFile() as f:
2012
2199
  # Make a file with the right contents
2013
2200
  f.write(file_id_or_contents.encode("utf-8"))
2014
2201
  f.close()
@@ -2027,39 +2214,63 @@ def toilStageFiles(
2027
2214
  # At the end we should get a direct toilfile: URI
2028
2215
  file_id_or_contents = cast(str, here)
2029
2216
 
2217
+ # This might be an e.g. S3 URI now
2218
+ if not file_id_or_contents.startswith("toilfile:"):
2219
+ # We need to import it so we can export it.
2220
+ # TODO: Use direct S3 to S3 copy on exports as well
2221
+ file_id_or_contents = (
2222
+ "toilfile:"
2223
+ + toil.import_file(file_id_or_contents, symlink=False).pack()
2224
+ )
2225
+
2030
2226
  if file_id_or_contents.startswith("toilfile:"):
2031
2227
  # This is something we can export
2032
- destUrl = "/".join(s.strip("/") for s in [destBucket, baseName])
2033
- toil.exportFile(
2228
+ # TODO: Do we need to urlencode the parts before sending them to S3?
2229
+ dest_url = "/".join(s.strip("/") for s in [destBucket, baseName])
2230
+ logger.log(log_level, "Saving %s...", dest_url)
2231
+ toil.export_file(
2034
2232
  FileID.unpack(file_id_or_contents[len("toilfile:") :]),
2035
- destUrl,
2233
+ dest_url,
2036
2234
  )
2037
2235
  # TODO: can a toildir: "file" get here?
2038
2236
  else:
2039
- # We are saving to the filesystem so we only really need exportFile for actual files.
2237
+ # We are saving to the filesystem.
2238
+ dest_url = "file://" + quote(p.target)
2239
+
2240
+ # We only really need export_file for actual files.
2040
2241
  if not os.path.exists(p.target) and p.type in [
2041
2242
  "Directory",
2042
2243
  "WritableDirectory",
2043
2244
  ]:
2044
2245
  os.makedirs(p.target)
2045
- if not os.path.exists(p.target) and p.type in ["File", "WritableFile"]:
2046
- if p.resolved.startswith("toilfile:"):
2047
- # We can actually export this
2048
- os.makedirs(os.path.dirname(p.target), exist_ok=True)
2049
- toil.exportFile(
2050
- FileID.unpack(p.resolved[len("toilfile:") :]),
2051
- "file://" + p.target,
2052
- )
2053
- elif p.resolved.startswith("/"):
2246
+ if p.type in ["File", "WritableFile"]:
2247
+ if p.resolved.startswith("/"):
2054
2248
  # Probably staging and bypassing file store. Just copy.
2249
+ logger.log(log_level, "Saving %s...", dest_url)
2055
2250
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2056
2251
  shutil.copyfile(p.resolved, p.target)
2057
- # TODO: can a toildir: "file" get here?
2058
- if not os.path.exists(p.target) and p.type in [
2252
+ else:
2253
+ uri = p.resolved
2254
+ if not uri.startswith("toilfile:"):
2255
+ # We need to import so we can export
2256
+ uri = (
2257
+ "toilfile:"
2258
+ + toil.import_file(uri, symlink=False).pack()
2259
+ )
2260
+
2261
+ # Actually export from the file store
2262
+ logger.log(log_level, "Saving %s...", dest_url)
2263
+ os.makedirs(os.path.dirname(p.target), exist_ok=True)
2264
+ toil.export_file(
2265
+ FileID.unpack(uri[len("toilfile:") :]),
2266
+ dest_url,
2267
+ )
2268
+ if p.type in [
2059
2269
  "CreateFile",
2060
2270
  "CreateWritableFile",
2061
2271
  ]:
2062
2272
  # We just need to make a file with particular contents
2273
+ logger.log(log_level, "Saving %s...", dest_url)
2063
2274
  os.makedirs(os.path.dirname(p.target), exist_ok=True)
2064
2275
  with open(p.target, "wb") as n:
2065
2276
  n.write(p.resolved.encode("utf-8"))
@@ -2078,6 +2289,7 @@ def toilStageFiles(
2078
2289
  # Make the location point to the place we put this thing on the
2079
2290
  # local filesystem.
2080
2291
  f["location"] = schema_salad.ref_resolver.file_uri(mapped_location.target)
2292
+ f["path"] = mapped_location.target
2081
2293
 
2082
2294
  if "contents" in f:
2083
2295
  del f["contents"]
@@ -2182,7 +2394,7 @@ class CWLJob(CWLNamedJob):
2182
2394
 
2183
2395
  accelerators: Optional[List[AcceleratorRequirement]] = None
2184
2396
  if req.get("cudaDeviceCount", 0) > 0:
2185
- # There's a CUDARequirement
2397
+ # There's a CUDARequirement, which cwltool processed for us
2186
2398
  # TODO: How is cwltool deciding what value to use between min and max?
2187
2399
  accelerators = [
2188
2400
  {
@@ -2192,14 +2404,62 @@ class CWLJob(CWLNamedJob):
2192
2404
  }
2193
2405
  ]
2194
2406
 
2407
+ # cwltool doesn't handle http://arvados.org/cwl#UsePreemptible as part
2408
+ # of its resource logic so we have to do it manually.
2409
+ #
2410
+ # Note that according to
2411
+ # https://github.com/arvados/arvados/blob/48a0d575e6de34bcda91c489e4aa98df291a8cca/sdk/cwl/arvados_cwl/arv-cwl-schema-v1.1.yml#L345
2412
+ # this can only be a literal boolean! cwltool doesn't want to evaluate
2413
+ # expressions in the value for us like it does for CUDARequirement
2414
+ # which has a schema which allows for CWL expressions:
2415
+ # https://github.com/common-workflow-language/cwltool/blob/1573509eea2faa3cd1dc959224e52ff1d796d3eb/cwltool/extensions.yml#L221
2416
+ #
2417
+ # By default we have default preemptibility.
2418
+ preemptible: Optional[bool] = None
2419
+ preemptible_req, _ = tool.get_requirement(
2420
+ "http://arvados.org/cwl#UsePreemptible"
2421
+ )
2422
+ if preemptible_req:
2423
+ if "usePreemptible" not in preemptible_req:
2424
+ # If we have a requirement it has to have the value
2425
+ raise ValidationException(
2426
+ f"Unacceptable syntax for http://arvados.org/cwl#UsePreemptible: "
2427
+ f"expected key usePreemptible but got: {preemptible_req}"
2428
+ )
2429
+ parsed_value = preemptible_req["usePreemptible"]
2430
+ if isinstance(parsed_value, str) and (
2431
+ "$(" in parsed_value or "${" in parsed_value
2432
+ ):
2433
+ # Looks like they tried to use an expression
2434
+ raise ValidationException(
2435
+ f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
2436
+ f"expected true or false but got what appears to be an expression: {repr(parsed_value)}. "
2437
+ f"Note that expressions are not allowed here by Arvados's schema."
2438
+ )
2439
+ if not isinstance(parsed_value, bool):
2440
+ # If we have a value it has to be a bool flag
2441
+ raise ValidationException(
2442
+ f"Unacceptable value for usePreemptible in http://arvados.org/cwl#UsePreemptible: "
2443
+ f"expected true or false but got: {repr(parsed_value)}"
2444
+ )
2445
+ preemptible = parsed_value
2446
+
2447
+ # We always need space for the temporary files for the job
2448
+ total_disk = cast(int, req["tmpdirSize"]) * (2**20)
2449
+ if not getattr(runtime_context, "bypass_file_store", False):
2450
+ # If using the Toil file store, we also need space for the output
2451
+ # files, which may need to be stored locally and copied off the
2452
+ # node.
2453
+ total_disk += cast(int, req["outdirSize"]) * (2**20)
2454
+ # If not using the Toil file store, output files just go directly to
2455
+ # their final homes their space doesn't need to be accounted per-job.
2456
+
2195
2457
  super().__init__(
2196
2458
  cores=req["cores"],
2197
2459
  memory=int(req["ram"] * (2**20)),
2198
- disk=int(
2199
- (cast(int, req["tmpdirSize"]) * (2**20))
2200
- + (cast(int, req["outdirSize"]) * (2**20))
2201
- ),
2460
+ disk=int(total_disk),
2202
2461
  accelerators=accelerators,
2462
+ preemptible=preemptible,
2203
2463
  tool_id=self.cwltool.tool["id"],
2204
2464
  parent_name=parent_name,
2205
2465
  local=isinstance(tool, cwltool.command_line_tool.ExpressionTool),
@@ -2265,7 +2525,7 @@ class CWLJob(CWLNamedJob):
2265
2525
  cwllogger.removeHandler(defaultStreamHandler)
2266
2526
  cwllogger.setLevel(logger.getEffectiveLevel())
2267
2527
 
2268
- logger.debug("Loaded order: %s", self.cwljob)
2528
+ logger.debug("Loaded order:\n%s", self.cwljob)
2269
2529
 
2270
2530
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2271
2531
 
@@ -2395,6 +2655,8 @@ class CWLJob(CWLNamedJob):
2395
2655
 
2396
2656
  logger.debug("Emitting output: %s", output)
2397
2657
 
2658
+ file_store.log_to_leader(f"CWL step complete: {runtime_context.name}")
2659
+
2398
2660
  # metadata[process_uuid] = {
2399
2661
  # 'started_at': started_at,
2400
2662
  # 'ended_at': ended_at,
@@ -2782,6 +3044,10 @@ class CWLWorkflow(CWLNamedJob):
2782
3044
  if self.conditional.is_false(cwljob):
2783
3045
  return self.conditional.skipped_outputs()
2784
3046
 
3047
+ # Apply default values set in the workflow
3048
+ fs_access = ToilFsAccess(self.runtime_context.basedir, file_store=file_store)
3049
+ fill_in_defaults(self.cwlwf.tool["inputs"], cwljob, fs_access)
3050
+
2785
3051
  # `promises` dict
2786
3052
  # from: each parameter (workflow input or step output)
2787
3053
  # that may be used as a "source" for a step input workflow output
@@ -2844,6 +3110,10 @@ class CWLWorkflow(CWLNamedJob):
2844
3110
  get_container_engine(self.runtime_context),
2845
3111
  )
2846
3112
 
3113
+ logger.debug(
3114
+ "Value will come from %s", jobobj.get(key, None)
3115
+ )
3116
+
2847
3117
  conditional = Conditional(
2848
3118
  expression=step.tool.get("when"),
2849
3119
  outputs=step.tool["out"],
@@ -3042,8 +3312,8 @@ def scan_for_unsupported_requirements(
3042
3312
  :param tool: The CWL tool to check for unsupported requirements.
3043
3313
 
3044
3314
  :param bypass_file_store: True if the Toil file store is not being used to
3045
- transport files between nodes, and raw origin node file:// URIs are exposed
3046
- to tools instead.
3315
+ transport files between nodes, and raw origin node file:// URIs are exposed
3316
+ to tools instead.
3047
3317
 
3048
3318
  """
3049
3319
 
@@ -3080,24 +3350,31 @@ def determine_load_listing(
3080
3350
  DIRECTORY_NAME is any variable name) set to one of the following three
3081
3351
  options:
3082
3352
 
3083
- no_listing: DIRECTORY_NAME.listing will be undefined.
3084
- e.g. inputs.DIRECTORY_NAME.listing == unspecified
3353
+ 1. no_listing: DIRECTORY_NAME.listing will be undefined.
3354
+ e.g.
3355
+
3356
+ inputs.DIRECTORY_NAME.listing == unspecified
3085
3357
 
3086
- shallow_listing: DIRECTORY_NAME.listing will return a list one level
3087
- deep of DIRECTORY_NAME's contents.
3088
- e.g. inputs.DIRECTORY_NAME.listing == [items in directory]
3089
- inputs.DIRECTORY_NAME.listing[0].listing == undefined
3090
- inputs.DIRECTORY_NAME.listing.length == # of items in directory
3358
+ 2. shallow_listing: DIRECTORY_NAME.listing will return a list one level
3359
+ deep of DIRECTORY_NAME's contents.
3360
+ e.g.
3091
3361
 
3092
- deep_listing: DIRECTORY_NAME.listing will return a list of the entire
3093
- contents of DIRECTORY_NAME.
3094
- e.g. inputs.DIRECTORY_NAME.listing == [items in directory]
3095
- inputs.DIRECTORY_NAME.listing[0].listing == [items
3096
- in subdirectory if it exists and is the first item listed]
3097
- inputs.DIRECTORY_NAME.listing.length == # of items in directory
3362
+ inputs.DIRECTORY_NAME.listing == [items in directory]
3363
+ inputs.DIRECTORY_NAME.listing[0].listing == undefined
3364
+ inputs.DIRECTORY_NAME.listing.length == # of items in directory
3098
3365
 
3099
- See: https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
3100
- https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingEnum
3366
+ 3. deep_listing: DIRECTORY_NAME.listing will return a list of the entire
3367
+ contents of DIRECTORY_NAME.
3368
+ e.g.
3369
+
3370
+ inputs.DIRECTORY_NAME.listing == [items in directory]
3371
+ inputs.DIRECTORY_NAME.listing[0].listing == [items in subdirectory
3372
+ if it exists and is the first item listed]
3373
+ inputs.DIRECTORY_NAME.listing.length == # of items in directory
3374
+
3375
+ See
3376
+ https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
3377
+ and https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingEnum
3101
3378
 
3102
3379
  DIRECTORY_NAME.listing should be determined first from loadListing.
3103
3380
  If that's not specified, from LoadListingRequirement.
@@ -3209,6 +3486,20 @@ usage_message = "\n\n" + textwrap.dedent(
3209
3486
  ]
3210
3487
  )
3211
3488
 
3489
+ def get_options(args: List[str]) -> Namespace:
3490
+ """
3491
+ Parse given args and properly add non-Toil arguments into the cwljob of the Namespace.
3492
+ :param args: List of args from command line
3493
+ :return: options namespace
3494
+ """
3495
+ parser = ArgParser()
3496
+ addOptions(parser, jobstore_as_flag=True, cwl=True)
3497
+ options: Namespace
3498
+ options, cwl_options = parser.parse_known_args(args)
3499
+ options.cwljob.extend(cwl_options)
3500
+
3501
+ return options
3502
+
3212
3503
 
3213
3504
  def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3214
3505
  """Run the main loop for toil-cwl-runner."""
@@ -3218,334 +3509,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3218
3509
  if args is None:
3219
3510
  args = sys.argv[1:]
3220
3511
 
3221
- config = Config()
3222
- config.disableChaining = True
3223
- config.cwl = True
3224
- parser = argparse.ArgumentParser()
3225
- addOptions(parser, config, jobstore_as_flag=True)
3226
- parser.add_argument("cwltool", type=str)
3227
- parser.add_argument("cwljob", nargs=argparse.REMAINDER)
3228
-
3229
- parser.add_argument("--not-strict", action="store_true")
3230
- parser.add_argument(
3231
- "--enable-dev",
3232
- action="store_true",
3233
- help="Enable loading and running development versions of CWL",
3234
- )
3235
- parser.add_argument(
3236
- "--enable-ext",
3237
- action="store_true",
3238
- help="Enable loading and running 'cwltool:' extensions to the CWL standards.",
3239
- default=False,
3240
- )
3241
- parser.add_argument("--quiet", dest="quiet", action="store_true", default=False)
3242
- parser.add_argument("--basedir", type=str) # TODO: Might be hard-coded?
3243
- parser.add_argument("--outdir", type=str, default=os.getcwd())
3244
- parser.add_argument("--version", action="version", version=baseVersion)
3245
- parser.add_argument(
3246
- "--log-dir",
3247
- type=str,
3248
- default="",
3249
- help="Log your tools stdout/stderr to this location outside of container",
3250
- )
3251
- dockergroup = parser.add_mutually_exclusive_group()
3252
- dockergroup.add_argument(
3253
- "--user-space-docker-cmd",
3254
- help="(Linux/OS X only) Specify a user space docker command (like "
3255
- "udocker or dx-docker) that will be used to call 'pull' and 'run'",
3256
- )
3257
- dockergroup.add_argument(
3258
- "--singularity",
3259
- action="store_true",
3260
- default=False,
3261
- help="Use Singularity runtime for running containers. "
3262
- "Requires Singularity v2.6.1+ and Linux with kernel version v3.18+ or "
3263
- "with overlayfs support backported.",
3264
- )
3265
- dockergroup.add_argument(
3266
- "--podman",
3267
- action="store_true",
3268
- default=False,
3269
- help="Use Podman runtime for running containers. ",
3270
- )
3271
- dockergroup.add_argument(
3272
- "--no-container",
3273
- action="store_true",
3274
- help="Do not execute jobs in a "
3275
- "Docker container, even when `DockerRequirement` "
3276
- "is specified under `hints`.",
3277
- )
3278
- dockergroup.add_argument(
3279
- "--leave-container",
3280
- action="store_false",
3281
- default=True,
3282
- help="Do not delete Docker container used by jobs after they exit",
3283
- dest="rm_container",
3284
- )
3285
- extra_dockergroup = parser.add_argument_group()
3286
- extra_dockergroup.add_argument(
3287
- "--custom-net",
3288
- help="Specify docker network name to pass to docker run command",
3289
- )
3290
- cidgroup = parser.add_argument_group(
3291
- "Options for recording the Docker container identifier into a file."
3292
- )
3293
- cidgroup.add_argument(
3294
- # Disabled as containerid is now saved by default
3295
- "--record-container-id",
3296
- action="store_true",
3297
- default=False,
3298
- help=argparse.SUPPRESS,
3299
- dest="record_container_id",
3300
- )
3301
-
3302
- cidgroup.add_argument(
3303
- "--cidfile-dir",
3304
- type=str,
3305
- help="Store the Docker container ID into a file in the specified directory.",
3306
- default=None,
3307
- dest="cidfile_dir",
3308
- )
3309
-
3310
- cidgroup.add_argument(
3311
- "--cidfile-prefix",
3312
- type=str,
3313
- help="Specify a prefix to the container ID filename. "
3314
- "Final file name will be followed by a timestamp. "
3315
- "The default is no prefix.",
3316
- default=None,
3317
- dest="cidfile_prefix",
3318
- )
3319
-
3320
- parser.add_argument(
3321
- "--preserve-environment",
3322
- type=str,
3323
- nargs="+",
3324
- help="Preserve specified environment variables when running"
3325
- " CommandLineTools",
3326
- metavar=("VAR1 VAR2"),
3327
- default=("PATH",),
3328
- dest="preserve_environment",
3329
- )
3330
- parser.add_argument(
3331
- "--preserve-entire-environment",
3332
- action="store_true",
3333
- help="Preserve all environment variable when running CommandLineTools.",
3334
- default=False,
3335
- dest="preserve_entire_environment",
3336
- )
3337
- parser.add_argument(
3338
- "--destBucket",
3339
- type=str,
3340
- help="Specify a cloud bucket endpoint for output files.",
3341
- )
3342
- parser.add_argument("--beta-dependency-resolvers-configuration", default=None)
3343
- parser.add_argument("--beta-dependencies-directory", default=None)
3344
- parser.add_argument("--beta-use-biocontainers", default=None, action="store_true")
3345
- parser.add_argument("--beta-conda-dependencies", default=None, action="store_true")
3346
- parser.add_argument(
3347
- "--tmpdir-prefix",
3348
- type=str,
3349
- help="Path prefix for temporary directories",
3350
- default=DEFAULT_TMPDIR_PREFIX,
3351
- )
3352
- parser.add_argument(
3353
- "--tmp-outdir-prefix",
3354
- type=str,
3355
- help="Path prefix for intermediate output directories",
3356
- default=DEFAULT_TMPDIR_PREFIX,
3357
- )
3358
- parser.add_argument(
3359
- "--force-docker-pull",
3360
- action="store_true",
3361
- default=False,
3362
- dest="force_docker_pull",
3363
- help="Pull latest docker image even if it is locally present",
3364
- )
3365
- parser.add_argument(
3366
- "--no-match-user",
3367
- action="store_true",
3368
- default=False,
3369
- help="Disable passing the current uid to `docker run --user`",
3370
- )
3371
- parser.add_argument(
3372
- "--no-read-only",
3373
- action="store_true",
3374
- default=False,
3375
- help="Do not set root directory in the container as read-only",
3376
- )
3377
- parser.add_argument(
3378
- "--strict-memory-limit",
3379
- action="store_true",
3380
- help="When running with "
3381
- "software containers and the Docker engine, pass either the "
3382
- "calculated memory allocation from ResourceRequirements or the "
3383
- "default of 1 gigabyte to Docker's --memory option.",
3384
- )
3385
- parser.add_argument(
3386
- "--strict-cpu-limit",
3387
- action="store_true",
3388
- help="When running with "
3389
- "software containers and the Docker engine, pass either the "
3390
- "calculated cpu allocation from ResourceRequirements or the "
3391
- "default of 1 core to Docker's --cpu option. "
3392
- "Requires docker version >= v1.13.",
3393
- )
3394
- parser.add_argument(
3395
- "--relax-path-checks",
3396
- action="store_true",
3397
- default=False,
3398
- help="Relax requirements on path names to permit "
3399
- "spaces and hash characters.",
3400
- dest="relax_path_checks",
3401
- )
3402
- parser.add_argument(
3403
- "--default-container",
3404
- help="Specify a default docker container that will be "
3405
- "used if the workflow fails to specify one.",
3406
- )
3407
- parser.add_argument(
3408
- "--disable-validate",
3409
- dest="do_validate",
3410
- action="store_false",
3411
- default=True,
3412
- help=argparse.SUPPRESS,
3413
- )
3414
- parser.add_argument(
3415
- "--fast-parser",
3416
- dest="fast_parser",
3417
- action="store_true",
3418
- default=False,
3419
- help=argparse.SUPPRESS,
3420
- )
3421
- checkgroup = parser.add_mutually_exclusive_group()
3422
- checkgroup.add_argument(
3423
- "--compute-checksum",
3424
- action="store_true",
3425
- default=True,
3426
- help="Compute checksum of contents while collecting outputs",
3427
- dest="compute_checksum",
3428
- )
3429
- checkgroup.add_argument(
3430
- "--no-compute-checksum",
3431
- action="store_false",
3432
- help="Do not compute checksum of contents while collecting outputs",
3433
- dest="compute_checksum",
3434
- )
3435
-
3436
- parser.add_argument(
3437
- "--eval-timeout",
3438
- help="Time to wait for a Javascript expression to evaluate before giving "
3439
- "an error, default 20s.",
3440
- type=float,
3441
- default=20,
3442
- )
3443
- parser.add_argument(
3444
- "--overrides",
3445
- type=str,
3446
- default=None,
3447
- help="Read process requirement overrides from file.",
3448
- )
3449
-
3450
- parser.add_argument(
3451
- "--mpi-config-file",
3452
- type=str,
3453
- default=None,
3454
- help="Platform specific configuration for MPI (parallel "
3455
- "launcher, its flag etc). See the cwltool README "
3456
- "section 'Running MPI-based tools' for details of the format: "
3457
- "https://github.com/common-workflow-language/cwltool#running-mpi-based-tools-that-need-to-be-launched",
3458
- )
3459
- parser.add_argument(
3460
- "--bypass-file-store",
3461
- action="store_true",
3462
- default=False,
3463
- help="Do not use Toil's file store and assume all "
3464
- "paths are accessible in place from all nodes.",
3465
- dest="bypass_file_store",
3466
- )
3467
- parser.add_argument(
3468
- "--disable-streaming",
3469
- action="store_true",
3470
- default=False,
3471
- help="Disable file streaming for files that have 'streamable' flag True",
3472
- dest="disable_streaming",
3473
- )
3474
-
3475
- provgroup = parser.add_argument_group(
3476
- "Options for recording provenance information of the execution"
3477
- )
3478
- provgroup.add_argument(
3479
- "--provenance",
3480
- help="Save provenance to specified folder as a "
3481
- "Research Object that captures and aggregates "
3482
- "workflow execution and data products.",
3483
- type=str,
3484
- )
3485
-
3486
- provgroup.add_argument(
3487
- "--enable-user-provenance",
3488
- default=False,
3489
- action="store_true",
3490
- help="Record user account info as part of provenance.",
3491
- dest="user_provenance",
3492
- )
3493
- provgroup.add_argument(
3494
- "--disable-user-provenance",
3495
- default=False,
3496
- action="store_false",
3497
- help="Do not record user account info in provenance.",
3498
- dest="user_provenance",
3499
- )
3500
- provgroup.add_argument(
3501
- "--enable-host-provenance",
3502
- default=False,
3503
- action="store_true",
3504
- help="Record host info as part of provenance.",
3505
- dest="host_provenance",
3506
- )
3507
- provgroup.add_argument(
3508
- "--disable-host-provenance",
3509
- default=False,
3510
- action="store_false",
3511
- help="Do not record host info in provenance.",
3512
- dest="host_provenance",
3513
- )
3514
- provgroup.add_argument(
3515
- "--orcid",
3516
- help="Record user ORCID identifier as part of "
3517
- "provenance, e.g. https://orcid.org/0000-0002-1825-0097 "
3518
- "or 0000-0002-1825-0097. Alternatively the environment variable "
3519
- "ORCID may be set.",
3520
- dest="orcid",
3521
- default=os.environ.get("ORCID", ""),
3522
- type=str,
3523
- )
3524
- provgroup.add_argument(
3525
- "--full-name",
3526
- help="Record full name of user as part of provenance, "
3527
- "e.g. Josiah Carberry. You may need to use shell quotes to preserve "
3528
- "spaces. Alternatively the environment variable CWL_FULL_NAME may "
3529
- "be set.",
3530
- dest="cwl_full_name",
3531
- default=os.environ.get("CWL_FULL_NAME", ""),
3532
- type=str,
3533
- )
3534
-
3535
- # Parse all the options once.
3536
- options = parser.parse_args(args)
3512
+ options = get_options(args)
3537
3513
 
3538
3514
  # Do cwltool setup
3539
3515
  cwltool.main.setup_schema(args=options, custom_schema_callback=None)
3516
+ tmpdir_prefix = options.tmpdir_prefix = options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
3540
3517
 
3541
3518
  # We need a workdir for the CWL runtime contexts.
3542
- if options.tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3519
+ if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX:
3543
3520
  # if tmpdir_prefix is not the default value, move
3544
3521
  # workdir and the default job store under it
3545
- workdir = cwltool.utils.create_tmp_dir(options.tmpdir_prefix)
3522
+ workdir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3546
3523
  else:
3547
3524
  # Use a directory in the default tmpdir
3548
- workdir = tempfile.mkdtemp()
3525
+ workdir = mkdtemp()
3549
3526
  # Make sure workdir doesn't exist so it can be a job store
3550
3527
  os.rmdir(workdir)
3551
3528
 
@@ -3562,13 +3539,13 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3562
3539
  options.do_validate = True
3563
3540
  options.pack = False
3564
3541
  options.print_subgraph = False
3565
- if options.tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3542
+ if tmpdir_prefix != DEFAULT_TMPDIR_PREFIX and options.workDir is None:
3566
3543
  # We need to override workDir because by default Toil will pick
3567
3544
  # somewhere under the system temp directory if unset, ignoring
3568
3545
  # --tmpdir-prefix.
3569
3546
  #
3570
3547
  # If set, workDir needs to exist, so we directly use the prefix
3571
- options.workDir = cwltool.utils.create_tmp_dir(options.tmpdir_prefix)
3548
+ options.workDir = cwltool.utils.create_tmp_dir(tmpdir_prefix)
3572
3549
 
3573
3550
  if options.batchSystem == "kubernetes":
3574
3551
  # Containers under Kubernetes can only run in Singularity
@@ -3585,8 +3562,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3585
3562
 
3586
3563
  logger.debug(f"Final job store {options.jobStore} and workDir {options.workDir}")
3587
3564
 
3588
- outdir = os.path.abspath(options.outdir)
3589
- tmp_outdir_prefix = os.path.abspath(options.tmp_outdir_prefix)
3565
+ outdir = os.path.abspath(options.outdir or os.getcwd())
3566
+ tmp_outdir_prefix = os.path.abspath(
3567
+ options.tmp_outdir_prefix or DEFAULT_TMPDIR_PREFIX
3568
+ )
3590
3569
 
3591
3570
  fileindex: Dict[str, str] = {}
3592
3571
  existing: Dict[str, str] = {}
@@ -3604,6 +3583,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3604
3583
  find_default_container, options
3605
3584
  )
3606
3585
  runtime_context.workdir = workdir # type: ignore[attr-defined]
3586
+ runtime_context.outdir = outdir
3607
3587
  runtime_context.move_outputs = "leave"
3608
3588
  runtime_context.rm_tmpdir = False
3609
3589
  runtime_context.streaming_allowed = not options.disable_streaming
@@ -3621,12 +3601,16 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3621
3601
  # Otherwise, if it takes a File with loadContents from a URL, we won't
3622
3602
  # be able to load the contents when we need to.
3623
3603
  runtime_context.make_fs_access = ToilFsAccess
3604
+ if options.reference_inputs and options.bypass_file_store:
3605
+ # We can't do both of these at the same time.
3606
+ logger.error("Cannot reference inputs when bypassing the file store")
3607
+ return 1
3624
3608
 
3625
3609
  loading_context = cwltool.main.setup_loadingContext(None, runtime_context, options)
3626
3610
 
3627
3611
  if options.provenance:
3628
3612
  research_obj = cwltool.cwlprov.ro.ResearchObject(
3629
- temp_prefix_ro=options.tmp_outdir_prefix,
3613
+ temp_prefix_ro=tmp_outdir_prefix,
3630
3614
  orcid=options.orcid,
3631
3615
  full_name=options.cwl_full_name,
3632
3616
  fsaccess=runtime_context.make_fs_access(""),
@@ -3701,7 +3685,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3701
3685
  loading_context, uri = cwltool.load_tool.resolve_and_validate_document(
3702
3686
  loading_context, workflowobj, uri
3703
3687
  )
3704
- assert loading_context.loader
3688
+ if not loading_context.loader:
3689
+ raise RuntimeError("cwltool loader is not set.")
3705
3690
  processobj, metadata = loading_context.loader.resolve_ref(uri)
3706
3691
  processobj = cast(Union[CommentedMap, CommentedSeq], processobj)
3707
3692
 
@@ -3748,10 +3733,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3748
3733
  )
3749
3734
  raise
3750
3735
 
3751
- # We make a ToilFSAccess to access URLs with, but it has no
3752
- # FileStore so it can't do toildir: and toilfile:
3753
- fs_access = ToilFsAccess(options.basedir)
3754
- fill_in_defaults(tool.tool["inputs"], initialized_job_order, fs_access)
3736
+ # Leave the defaults un-filled in the top-level order. The tool or
3737
+ # workflow will fill them when it runs
3755
3738
 
3756
3739
  for inp in tool.tool["inputs"]:
3757
3740
  if (
@@ -3806,9 +3789,11 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3806
3789
  Callable[[str], FileID],
3807
3790
  functools.partial(toil.import_file, symlink=True),
3808
3791
  )
3809
-
3792
+
3810
3793
  # Import all the input files, some of which may be missing optional
3811
3794
  # files.
3795
+ logger.info("Importing input files...")
3796
+ fs_access = ToilFsAccess(options.basedir)
3812
3797
  import_files(
3813
3798
  file_import_function,
3814
3799
  fs_access,
@@ -3816,11 +3801,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3816
3801
  existing,
3817
3802
  initialized_job_order,
3818
3803
  skip_broken=True,
3804
+ skip_remote=options.reference_inputs,
3819
3805
  bypass_file_store=options.bypass_file_store,
3806
+ log_level=logging.INFO,
3820
3807
  )
3821
3808
  # Import all the files associated with tools (binaries, etc.).
3822
3809
  # Not sure why you would have an optional secondary file here, but
3823
3810
  # the spec probably needs us to support them.
3811
+ logger.info("Importing tool-associated files...")
3824
3812
  visitSteps(
3825
3813
  tool,
3826
3814
  functools.partial(
@@ -3830,7 +3818,9 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3830
3818
  fileindex,
3831
3819
  existing,
3832
3820
  skip_broken=True,
3821
+ skip_remote=options.reference_inputs,
3833
3822
  bypass_file_store=options.bypass_file_store,
3823
+ log_level=logging.INFO,
3834
3824
  ),
3835
3825
  )
3836
3826
 
@@ -3843,7 +3833,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3843
3833
  # were required.
3844
3834
  rm_unprocessed_secondary_files(param_value)
3845
3835
 
3846
- logger.debug("tool %s", tool)
3836
+ logger.info("Creating root job")
3837
+ logger.debug("Root tool: %s", tool)
3847
3838
  try:
3848
3839
  wf1, _ = makeJob(
3849
3840
  tool=tool,
@@ -3856,6 +3847,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3856
3847
  logging.error(err)
3857
3848
  return CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
3858
3849
  wf1.cwljob = initialized_job_order
3850
+ logger.info("Starting workflow")
3859
3851
  try:
3860
3852
  outobj = toil.start(wf1)
3861
3853
  except FailedJobsException as err:
@@ -3871,13 +3863,20 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3871
3863
 
3872
3864
  # Now the workflow has completed. We need to make sure the outputs (and
3873
3865
  # inputs) end up where the user wants them to be.
3874
-
3866
+ logger.info("Collecting workflow outputs...")
3875
3867
  outobj = resolve_dict_w_promises(outobj)
3876
3868
 
3877
3869
  # Stage files. Specify destination bucket if specified in CLI
3878
3870
  # options. If destination bucket not passed in,
3879
3871
  # options.destBucket's value will be None.
3880
- toilStageFiles(toil, outobj, outdir, destBucket=options.destBucket)
3872
+ toilStageFiles(
3873
+ toil,
3874
+ outobj,
3875
+ outdir,
3876
+ destBucket=options.destBucket,
3877
+ log_level=logging.INFO
3878
+ )
3879
+ logger.info("Stored workflow outputs")
3881
3880
 
3882
3881
  if runtime_context.research_obj is not None:
3883
3882
  cwltool.cwlprov.writablebagfile.create_job(
@@ -3904,7 +3903,8 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3904
3903
  ("File",),
3905
3904
  functools.partial(add_sizes, runtime_context.make_fs_access("")),
3906
3905
  )
3907
- assert document_loader
3906
+ if not document_loader:
3907
+ raise RuntimeError("cwltool loader is not set.")
3908
3908
  prov_dependencies = cwltool.main.prov_deps(
3909
3909
  workflowobj, document_loader, uri
3910
3910
  )
@@ -3914,6 +3914,7 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3914
3914
  )
3915
3915
 
3916
3916
  if not options.destBucket and options.compute_checksum:
3917
+ logger.info("Computing output file checksums...")
3917
3918
  visit_class(
3918
3919
  outobj,
3919
3920
  ("File",),
@@ -3922,12 +3923,14 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int:
3922
3923
 
3923
3924
  visit_class(outobj, ("File",), MutationManager().unset_generation)
3924
3925
  stdout.write(json.dumps(outobj, indent=4, default=str))
3926
+ stdout.write("\n")
3927
+ logger.info("CWL run complete!")
3925
3928
 
3926
3929
  return 0
3927
3930
 
3928
3931
 
3929
3932
  def find_default_container(
3930
- args: argparse.Namespace, builder: cwltool.builder.Builder
3933
+ args: Namespace, builder: cwltool.builder.Builder
3931
3934
  ) -> Optional[str]:
3932
3935
  """Find the default constructor by consulting a Toil.options object."""
3933
3936
  if args.default_container: