toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. toil/__init__.py +5 -9
  2. toil/batchSystems/abstractBatchSystem.py +23 -22
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +4 -4
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/gridengine.py +3 -4
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +65 -63
  10. toil/batchSystems/local_support.py +2 -3
  11. toil/batchSystems/lsf.py +6 -7
  12. toil/batchSystems/mesos/batchSystem.py +11 -7
  13. toil/batchSystems/mesos/test/__init__.py +1 -2
  14. toil/batchSystems/options.py +9 -10
  15. toil/batchSystems/registry.py +3 -7
  16. toil/batchSystems/singleMachine.py +8 -11
  17. toil/batchSystems/slurm.py +49 -38
  18. toil/batchSystems/torque.py +3 -4
  19. toil/bus.py +36 -34
  20. toil/common.py +129 -89
  21. toil/cwl/cwltoil.py +857 -729
  22. toil/cwl/utils.py +44 -35
  23. toil/fileStores/__init__.py +3 -1
  24. toil/fileStores/abstractFileStore.py +28 -30
  25. toil/fileStores/cachingFileStore.py +8 -8
  26. toil/fileStores/nonCachingFileStore.py +10 -21
  27. toil/job.py +159 -158
  28. toil/jobStores/abstractJobStore.py +68 -69
  29. toil/jobStores/aws/jobStore.py +249 -213
  30. toil/jobStores/aws/utils.py +13 -24
  31. toil/jobStores/fileJobStore.py +28 -22
  32. toil/jobStores/googleJobStore.py +21 -17
  33. toil/jobStores/utils.py +3 -7
  34. toil/leader.py +17 -22
  35. toil/lib/accelerators.py +6 -4
  36. toil/lib/aws/__init__.py +9 -10
  37. toil/lib/aws/ami.py +33 -19
  38. toil/lib/aws/iam.py +6 -6
  39. toil/lib/aws/s3.py +259 -157
  40. toil/lib/aws/session.py +76 -76
  41. toil/lib/aws/utils.py +51 -43
  42. toil/lib/checksum.py +19 -15
  43. toil/lib/compatibility.py +3 -2
  44. toil/lib/conversions.py +45 -18
  45. toil/lib/directory.py +29 -26
  46. toil/lib/docker.py +93 -99
  47. toil/lib/dockstore.py +77 -50
  48. toil/lib/ec2.py +39 -38
  49. toil/lib/ec2nodes.py +11 -4
  50. toil/lib/exceptions.py +8 -5
  51. toil/lib/ftp_utils.py +9 -14
  52. toil/lib/generatedEC2Lists.py +161 -20
  53. toil/lib/history.py +141 -97
  54. toil/lib/history_submission.py +163 -72
  55. toil/lib/io.py +27 -17
  56. toil/lib/memoize.py +2 -1
  57. toil/lib/misc.py +15 -11
  58. toil/lib/pipes.py +40 -25
  59. toil/lib/plugins.py +12 -8
  60. toil/lib/resources.py +1 -0
  61. toil/lib/retry.py +32 -38
  62. toil/lib/threading.py +12 -12
  63. toil/lib/throttle.py +1 -2
  64. toil/lib/trs.py +113 -51
  65. toil/lib/url.py +14 -23
  66. toil/lib/web.py +7 -2
  67. toil/options/common.py +18 -15
  68. toil/options/cwl.py +2 -2
  69. toil/options/runner.py +9 -5
  70. toil/options/wdl.py +1 -3
  71. toil/provisioners/__init__.py +9 -9
  72. toil/provisioners/abstractProvisioner.py +22 -20
  73. toil/provisioners/aws/__init__.py +20 -14
  74. toil/provisioners/aws/awsProvisioner.py +10 -8
  75. toil/provisioners/clusterScaler.py +19 -18
  76. toil/provisioners/gceProvisioner.py +2 -3
  77. toil/provisioners/node.py +11 -13
  78. toil/realtimeLogger.py +4 -4
  79. toil/resource.py +5 -5
  80. toil/server/app.py +2 -2
  81. toil/server/cli/wes_cwl_runner.py +11 -11
  82. toil/server/utils.py +18 -21
  83. toil/server/wes/abstract_backend.py +9 -8
  84. toil/server/wes/amazon_wes_utils.py +3 -3
  85. toil/server/wes/tasks.py +3 -5
  86. toil/server/wes/toil_backend.py +17 -21
  87. toil/server/wsgi_app.py +3 -3
  88. toil/serviceManager.py +3 -4
  89. toil/statsAndLogging.py +12 -13
  90. toil/test/__init__.py +33 -24
  91. toil/test/batchSystems/batchSystemTest.py +12 -11
  92. toil/test/batchSystems/batch_system_plugin_test.py +3 -5
  93. toil/test/batchSystems/test_slurm.py +38 -24
  94. toil/test/cwl/conftest.py +5 -6
  95. toil/test/cwl/cwlTest.py +194 -78
  96. toil/test/cwl/download_file_uri.json +6 -0
  97. toil/test/cwl/download_file_uri_no_hostname.json +6 -0
  98. toil/test/docs/scripts/tutorial_staging.py +1 -0
  99. toil/test/jobStores/jobStoreTest.py +9 -7
  100. toil/test/lib/aws/test_iam.py +1 -3
  101. toil/test/lib/aws/test_s3.py +1 -1
  102. toil/test/lib/dockerTest.py +9 -9
  103. toil/test/lib/test_ec2.py +12 -11
  104. toil/test/lib/test_history.py +4 -4
  105. toil/test/lib/test_trs.py +16 -14
  106. toil/test/lib/test_url.py +7 -6
  107. toil/test/lib/url_plugin_test.py +12 -18
  108. toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
  109. toil/test/provisioners/clusterScalerTest.py +2 -5
  110. toil/test/provisioners/clusterTest.py +1 -3
  111. toil/test/server/serverTest.py +13 -4
  112. toil/test/sort/restart_sort.py +2 -6
  113. toil/test/sort/sort.py +3 -8
  114. toil/test/src/deferredFunctionTest.py +7 -7
  115. toil/test/src/environmentTest.py +1 -2
  116. toil/test/src/fileStoreTest.py +5 -5
  117. toil/test/src/importExportFileTest.py +5 -6
  118. toil/test/src/jobServiceTest.py +22 -14
  119. toil/test/src/jobTest.py +121 -25
  120. toil/test/src/miscTests.py +5 -7
  121. toil/test/src/promisedRequirementTest.py +8 -7
  122. toil/test/src/regularLogTest.py +2 -3
  123. toil/test/src/resourceTest.py +5 -8
  124. toil/test/src/restartDAGTest.py +5 -6
  125. toil/test/src/resumabilityTest.py +2 -2
  126. toil/test/src/retainTempDirTest.py +3 -3
  127. toil/test/src/systemTest.py +3 -3
  128. toil/test/src/threadingTest.py +1 -1
  129. toil/test/src/workerTest.py +1 -2
  130. toil/test/utils/toilDebugTest.py +6 -4
  131. toil/test/utils/toilKillTest.py +1 -1
  132. toil/test/utils/utilsTest.py +15 -14
  133. toil/test/wdl/wdltoil_test.py +247 -124
  134. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  135. toil/toilState.py +2 -3
  136. toil/utils/toilDebugFile.py +3 -8
  137. toil/utils/toilDebugJob.py +1 -2
  138. toil/utils/toilLaunchCluster.py +1 -2
  139. toil/utils/toilSshCluster.py +2 -0
  140. toil/utils/toilStats.py +19 -24
  141. toil/utils/toilStatus.py +11 -14
  142. toil/version.py +10 -10
  143. toil/wdl/wdltoil.py +313 -209
  144. toil/worker.py +18 -12
  145. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
  146. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
  147. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
  148. toil/test/cwl/staging_cat.cwl +0 -27
  149. toil/test/cwl/staging_make_file.cwl +0 -25
  150. toil/test/cwl/staging_workflow.cwl +0 -43
  151. toil/test/cwl/zero_default.cwl +0 -61
  152. toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
  153. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
  154. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
  155. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py CHANGED
@@ -33,35 +33,23 @@ import sys
33
33
  import tempfile
34
34
  import textwrap
35
35
  import uuid
36
- from collections.abc import Generator, Iterable, Iterator, Sequence
36
+ from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
37
37
  from contextlib import ExitStack, contextmanager
38
38
  from graphlib import TopologicalSorter
39
39
  from tempfile import mkstemp
40
40
  from typing import (
41
+ IO,
41
42
  Any,
42
- Callable,
43
- Dict,
44
- Generator,
45
- Iterable,
46
- Iterator,
47
- List,
48
43
  Optional,
49
- Sequence,
50
- Tuple,
44
+ Protocol,
45
+ TypedDict,
46
+ TypeGuard,
51
47
  TypeVar,
52
48
  Union,
53
49
  cast,
54
- TypedDict,
55
- IO,
56
- Protocol,
57
50
  overload,
58
51
  )
59
52
 
60
- if sys.version_info < (3, 10):
61
- from typing_extensions import TypeGuard
62
- else:
63
- from typing import TypeGuard
64
-
65
53
  if sys.version_info < (3, 11):
66
54
  from typing_extensions import NotRequired
67
55
  else:
@@ -73,11 +61,11 @@ from urllib.error import HTTPError
73
61
  from urllib.parse import quote, unquote, urljoin, urlsplit
74
62
 
75
63
  import WDL.Error
64
+ import WDL.Lint
76
65
  import WDL.runtime.config
77
66
  from configargparse import ArgParser, Namespace
78
67
  from WDL._util import byte_size_units, chmod_R_plus
79
- from WDL.CLI import print_error, outline
80
- import WDL.Lint
68
+ from WDL.CLI import outline, print_error
81
69
  from WDL.runtime.backend.docker_swarm import SwarmContainer
82
70
  from WDL.runtime.backend.singularity import SingularityContainer
83
71
  from WDL.runtime.error import DownloadFailed
@@ -91,45 +79,56 @@ from toil.fileStores import FileID
91
79
  from toil.fileStores.abstractFileStore import AbstractFileStore
92
80
  from toil.job import (
93
81
  AcceleratorRequirement,
82
+ FileMetadata,
83
+ ImportsJob,
94
84
  Job,
85
+ ParseableIndivisibleResource,
95
86
  Promise,
96
87
  Promised,
97
88
  TemporaryID,
89
+ get_file_sizes,
98
90
  parse_accelerator,
91
+ potential_absolute_uris,
99
92
  unwrap,
100
93
  unwrap_all,
101
- ParseableIndivisibleResource,
102
- ImportsJob,
103
- FileMetadata,
104
- potential_absolute_uris,
105
- get_file_sizes
106
94
  )
107
95
  from toil.jobStores.abstractJobStore import (
108
96
  AbstractJobStore,
109
97
  InvalidImportExportUrlException,
110
98
  LocatorException,
111
99
  )
112
- from toil.lib.exceptions import UnimplementedURLException
113
100
  from toil.lib.accelerators import get_individual_local_accelerators
114
101
  from toil.lib.conversions import VALID_PREFIXES, convert_units, human2bytes
115
102
  from toil.lib.directory import (
116
103
  DirectoryContents,
117
104
  decode_directory,
118
- encode_directory,
105
+ directory_contents_items,
119
106
  directory_item_exists,
107
+ directory_items,
108
+ encode_directory,
120
109
  get_directory_contents_item,
121
110
  get_directory_item,
122
- directory_items,
123
- directory_contents_items,
124
111
  )
125
- from toil.lib.trs import resolve_workflow
126
- from toil.lib.io import mkdtemp, is_any_url, is_file_url, TOIL_URI_SCHEME, is_standard_url, is_toil_url, is_toil_file_url, is_toil_dir_url, is_remote_url, is_directory_url
112
+ from toil.lib.exceptions import UnimplementedURLException
113
+ from toil.lib.io import (
114
+ TOIL_URI_SCHEME,
115
+ is_any_url,
116
+ is_directory_url,
117
+ is_file_url,
118
+ is_remote_url,
119
+ is_standard_url,
120
+ is_toil_dir_url,
121
+ is_toil_file_url,
122
+ is_toil_url,
123
+ mkdtemp,
124
+ )
127
125
  from toil.lib.memoize import memoize
128
126
  from toil.lib.misc import get_user_name
129
127
  from toil.lib.resources import ResourceMonitor
130
128
  from toil.lib.threading import global_mutex
131
- from toil.provisioners.clusterScaler import JobTooBigError
129
+ from toil.lib.trs import resolve_workflow
132
130
  from toil.lib.url import URLAccess
131
+ from toil.provisioners.clusterScaler import JobTooBigError
133
132
 
134
133
  logger = logging.getLogger(__name__)
135
134
 
@@ -141,6 +140,7 @@ WDLINode = Union[WDL.Value.File, WDL.Value.Directory]
141
140
  # Some functions take either a File or Directory and return the same type.
142
141
  AnyINode = TypeVar("AnyINode", bound=WDLINode)
143
142
 
143
+
144
144
  # TODO: Is there a way to get out of needing this? Or make this support N types?
145
145
  class INodeTransform(Protocol):
146
146
  """
@@ -152,12 +152,14 @@ class INodeTransform(Protocol):
152
152
  complicated type for functions that transform inodes to the same type of
153
153
  inodes.
154
154
  """
155
+
155
156
  @overload
156
- def __call__(self, __file: WDL.Value.File) -> WDL.Value.File | None:
157
- ...
157
+ def __call__(self, __file: WDL.Value.File) -> WDL.Value.File | None: ...
158
158
  @overload
159
- def __call__(self, __directory: WDL.Value.Directory) -> WDL.Value.Directory | None:
160
- ...
159
+ def __call__(
160
+ self, __directory: WDL.Value.Directory
161
+ ) -> WDL.Value.Directory | None: ...
162
+
161
163
 
162
164
  def is_inode(value: WDL.Value.Base) -> TypeGuard[WDLINode]:
163
165
  """
@@ -169,12 +171,14 @@ def is_inode(value: WDL.Value.Base) -> TypeGuard[WDLINode]:
169
171
  """
170
172
  return isinstance(value, WDL.Value.File) or isinstance(value, WDL.Value.Directory)
171
173
 
174
+
172
175
  # In regards to "toilfile:" URIs:
173
176
  # We define a URI scheme kind of like but not actually compatible with the one
174
177
  # we use for CWL. CWL brings along the file basename in its file and directory
175
178
  # types, but WDL inode types don't. So we need to make sure we stash that
176
179
  # somewhere in the URI.
177
180
 
181
+
178
182
  # We want to use hashlib.file_digest to avoid a 3-line hashing loop like
179
183
  # MiniWDL has. But it is only in 3.11+
180
184
  #
@@ -213,22 +217,12 @@ class FileDigester(Protocol):
213
217
  def __call__(self, __f: ReadableFileObj, __alg_name: str) -> hashlib._Hash: ...
214
218
 
215
219
 
216
- try:
217
- # Don't do a direct conditional import to the final name here because then
218
- # the polyfill needs *exactly* the signature of file_digest, and not just
219
- # one that can accept all calls we make in the file, or MyPy will complain.
220
- #
221
- # We need to tell MyPy we expect this import to fail, when typechecking on
222
- # pythons that don't have it. But we also need to tell it that it is fine
223
- # if it succeeds, for Pythons that do have it.
224
- #
225
- # TODO: Change to checking sys.version_info because MyPy understands that
226
- # better?
227
- from hashlib import file_digest as file_digest_impl # type: ignore[attr-defined,unused-ignore]
220
+ if sys.version_info >= (3, 11):
221
+ from hashlib import file_digest as file_digest_impl
228
222
 
229
223
  file_digest: FileDigester = file_digest_impl
230
- except ImportError:
231
- # Polyfill file_digest from 3.11+
224
+ else: # Polyfill file_digest from 3.11+
225
+
232
226
  def file_digest_fallback_impl(f: ReadableFileObj, alg_name: str) -> hashlib._Hash:
233
227
  BUFFER_SIZE = 1024 * 1024
234
228
  hasher = hashlib.new(alg_name)
@@ -240,22 +234,20 @@ except ImportError:
240
234
 
241
235
  file_digest = file_digest_fallback_impl
242
236
 
243
- # WDL options to pass into the WDL jobs and standard libraries
244
- # task_path: Dotted WDL name of the part of the workflow this library is working for.
245
- # namespace: namespace of the WDL that the current job is in
246
- # execution_dir: Directory to use as the working directory for workflow code.
247
- # container: The type of container to use when executing a WDL task. Carries through the value of the commandline --container option
248
- # all_call_outputs: whether a job should include all calls outputs
249
- WDLContext = TypedDict(
250
- "WDLContext",
251
- {
252
- "execution_dir": NotRequired[str],
253
- "container": NotRequired[str],
254
- "task_path": str,
255
- "namespace": str,
256
- "all_call_outputs": bool,
257
- },
258
- )
237
+
238
+ class WDLContext(TypedDict):
239
+ """WDL options to pass into the WDL jobs and standard libraries"""
240
+
241
+ execution_dir: NotRequired[str]
242
+ """Directory to use as the working directory for workflow code"""
243
+ container: NotRequired[str]
244
+ """The type of container to use when executing a WDL task. Carries through the value of the commandline --container option."""
245
+ task_path: str
246
+ """Dotted WDL name of the part of the workflow this library is working for"""
247
+ namespace: str
248
+ """Namespace of the WDL that the current job is in"""
249
+ all_call_outputs: bool
250
+ """Whether a job should include all calls outputs"""
259
251
 
260
252
 
261
253
  class InsufficientMountDiskSpace(Exception):
@@ -357,7 +349,9 @@ async def toil_read_source(
357
349
  # We track our own failures for debugging
358
350
  tried = []
359
351
 
360
- for candidate_uri in potential_absolute_uris(uri, path, importer=importer.pos.abspath if importer else None):
352
+ for candidate_uri in potential_absolute_uris(
353
+ uri, path, importer=importer.pos.abspath if importer else None
354
+ ):
361
355
  # For each place to try in order
362
356
  destination_buffer = io.BytesIO()
363
357
  logger.debug("Fetching %s", candidate_uri)
@@ -373,7 +367,13 @@ async def toil_read_source(
373
367
  # TODO: we need to assume in general that an error is just a
374
368
  # not-found, because the exceptions thrown by read_from_url()
375
369
  # implementations are not specified.
376
- logger.debug("Tried to fetch %s from %s but got %s: %s", uri, candidate_uri, type(e), e)
370
+ logger.debug(
371
+ "Tried to fetch %s from %s but got %s: %s",
372
+ uri,
373
+ candidate_uri,
374
+ type(e),
375
+ e,
376
+ )
377
377
  continue
378
378
  # If we get here, we got it probably.
379
379
  try:
@@ -699,7 +699,9 @@ def clone_metadata(old_inode: AnyINode, new_inode: AnyINode) -> None:
699
699
  setattr(new_inode, attribute, getattr(old_inode, attribute))
700
700
 
701
701
 
702
- def make_inode(example_inode: AnyINode, value: str, expr: Optional[WDL.Expr.Base]) -> AnyINode:
702
+ def make_inode(
703
+ example_inode: AnyINode, value: str, expr: WDL.Expr.Base | None
704
+ ) -> AnyINode:
703
705
  """
704
706
  Make a new File or Directory of the same type as the example with the given arguments.
705
707
 
@@ -709,6 +711,7 @@ def make_inode(example_inode: AnyINode, value: str, expr: Optional[WDL.Expr.Base
709
711
 
710
712
  return cast(AnyINode, type(example_inode)(value, expr))
711
713
 
714
+
712
715
  def set_inode_value(inode: AnyINode, new_value: str) -> AnyINode:
713
716
  """
714
717
  Return a copy of a WDL File/Directory with the value changed.
@@ -740,9 +743,7 @@ def get_inode_nonexistent(inode: WDLINode) -> bool:
740
743
  return cast(bool, getattr(inode, "nonexistent", False))
741
744
 
742
745
 
743
- def set_inode_virtualized_value(
744
- inode: AnyINode, virtualized_value: str
745
- ) -> AnyINode:
746
+ def set_inode_virtualized_value(inode: AnyINode, virtualized_value: str) -> AnyINode:
746
747
  """
747
748
  Return a copy of a WDL File/Directory with the virtualized_value attribute set.
748
749
 
@@ -754,14 +755,14 @@ def set_inode_virtualized_value(
754
755
  return new_inode
755
756
 
756
757
 
757
- def get_inode_virtualized_value(inode: WDLINode) -> Optional[str]:
758
+ def get_inode_virtualized_value(inode: WDLINode) -> str | None:
758
759
  """
759
760
  Get the virtualized storage location for a File/Directory.
760
761
  """
761
762
  return cast(Optional[str], getattr(inode, "virtualized_value", None))
762
763
 
763
764
 
764
- def get_shared_fs_path(inode: WDLINode) -> Optional[str]:
765
+ def get_shared_fs_path(inode: WDLINode) -> str | None:
765
766
  """
766
767
  If a File/Directory has a shared filesystem path, get that path.
767
768
 
@@ -814,7 +815,7 @@ def view_shared_fs_paths(
814
815
 
815
816
 
816
817
  def poll_execution_cache(
817
- node: Union[WDL.Tree.Workflow, WDL.Tree.Task], bindings: WDLBindings
818
+ node: WDL.Tree.Workflow | WDL.Tree.Task, bindings: WDLBindings
818
819
  ) -> tuple[WDLBindings | None, str]:
819
820
  """
820
821
  Return the cached result of calling this workflow or task, and its key.
@@ -832,7 +833,7 @@ def poll_execution_cache(
832
833
  # TODO: Ship config from leader? It might not see the right environment.
833
834
  miniwdl_config = WDL.runtime.config.Loader(miniwdl_logger)
834
835
  miniwdl_cache = WDL.runtime.cache.new(miniwdl_config, miniwdl_logger)
835
- cached_result: Optional[WDLBindings] = miniwdl_cache.get(
836
+ cached_result: WDLBindings | None = miniwdl_cache.get(
836
837
  cache_key, transformed_bindings, node.effective_outputs
837
838
  )
838
839
  if cached_result is not None:
@@ -848,8 +849,8 @@ def fill_execution_cache(
848
849
  output_bindings: WDLBindings,
849
850
  file_store: AbstractFileStore,
850
851
  wdl_options: WDLContext,
851
- miniwdl_logger: Optional[logging.Logger] = None,
852
- miniwdl_config: Optional[WDL.runtime.config.Loader] = None,
852
+ miniwdl_logger: logging.Logger | None = None,
853
+ miniwdl_config: WDL.runtime.config.Loader | None = None,
853
854
  ) -> WDLBindings:
854
855
  """
855
856
  Cache the result of calling a workflow or task.
@@ -908,9 +909,7 @@ def fill_execution_cache(
908
909
  if virtualized is None:
909
910
  # TODO: If we're passing things around by URL reference and
910
911
  # some of them are file: is this actually allowed?
911
- raise RuntimeError(
912
- f"{inode} caught escaping from task unvirtualized"
913
- )
912
+ raise RuntimeError(f"{inode} caught escaping from task unvirtualized")
914
913
 
915
914
  # We need to save this somewhere.
916
915
  # This needs to exist before we can export to it. And now we know
@@ -933,7 +932,9 @@ def fill_execution_cache(
933
932
 
934
933
  return inode
935
934
 
936
- output_bindings = map_over_inodes_in_bindings(output_bindings, assign_shared_fs_path)
935
+ output_bindings = map_over_inodes_in_bindings(
936
+ output_bindings, assign_shared_fs_path
937
+ )
937
938
 
938
939
  # Save the bindings to the cache, representing all files with their shared filesystem paths.
939
940
  miniwdl_cache.put(cache_key, view_shared_fs_paths(output_bindings))
@@ -943,6 +944,7 @@ def fill_execution_cache(
943
944
  # the cached files in their input digests.
944
945
  return output_bindings
945
946
 
947
+
946
948
  def choose_human_readable_directory(
947
949
  root_dir: str,
948
950
  source_task_path: str,
@@ -1042,9 +1044,7 @@ def evaluate_decls_to_bindings(
1042
1044
  each_decl, all_bindings, standard_library
1043
1045
  )
1044
1046
  else:
1045
- output_value = evaluate_decl(
1046
- each_decl, all_bindings, standard_library
1047
- )
1047
+ output_value = evaluate_decl(each_decl, all_bindings, standard_library)
1048
1048
  if drop_missing_files:
1049
1049
  dropped_output_value = map_over_typed_inodes_in_value(
1050
1050
  output_value, missing_inode_dropper(standard_library)
@@ -1142,6 +1142,7 @@ def extract_inode_values(environment: WDLBindings) -> list[str]:
1142
1142
  map_over_inodes_in_bindings(environment, add_value)
1143
1143
  return values
1144
1144
 
1145
+
1145
1146
  def extract_inode_virtualized_values(environment: WDLBindings) -> list[str]:
1146
1147
  """
1147
1148
  Get a list of all File/Directory object virtualized values in the bindings.
@@ -1159,6 +1160,7 @@ def extract_inode_virtualized_values(environment: WDLBindings) -> list[str]:
1159
1160
  map_over_inodes_in_bindings(environment, add_value)
1160
1161
  return values
1161
1162
 
1163
+
1162
1164
  def extract_toil_file_uris(environment: WDLBindings) -> Iterable[str]:
1163
1165
  """
1164
1166
  Get the toilfile: URIs in the given bindings.
@@ -1181,8 +1183,8 @@ def extract_toil_file_uris(environment: WDLBindings) -> Iterable[str]:
1181
1183
 
1182
1184
  def virtualize_inodes_in_bindings(
1183
1185
  environment: WDLBindings,
1184
- file_to_id: Dict[str, FileID],
1185
- file_to_metadata: Dict[str, FileMetadata],
1186
+ file_to_id: dict[str, FileID],
1187
+ file_to_metadata: dict[str, FileMetadata],
1186
1188
  task_path: str,
1187
1189
  ) -> WDLBindings:
1188
1190
  """
@@ -1235,9 +1237,9 @@ def convert_remote_files(
1235
1237
  environment: WDLBindings,
1236
1238
  file_source: AbstractJobStore,
1237
1239
  task_path: str,
1238
- search_paths: Optional[list[str]] = None,
1240
+ search_paths: list[str] | None = None,
1239
1241
  import_remote_files: bool = True,
1240
- execution_dir: Optional[str] = None,
1242
+ execution_dir: str | None = None,
1241
1243
  ) -> WDLBindings:
1242
1244
  """
1243
1245
  Resolve relative-URI files in the given environment and import all files.
@@ -1529,7 +1531,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1529
1531
  file, self._virtualize_filename(file.value)
1530
1532
  )
1531
1533
  with open(
1532
- self._devirtualize_filename(get_inode_virtualized_value(file)), "r"
1534
+ self._devirtualize_filename(get_inode_virtualized_value(file))
1533
1535
  ) as infile:
1534
1536
  return parse(infile.read())
1535
1537
 
@@ -1607,9 +1609,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1607
1609
  # Mark the inode nonexistent.
1608
1610
  return set_inode_nonexistent(inode, True)
1609
1611
 
1610
- logger.debug(
1611
- "For %s got virtualized value %s", inode, virtualized_filename
1612
- )
1612
+ logger.debug("For %s got virtualized value %s", inode, virtualized_filename)
1613
1613
  marked_inode = set_inode_virtualized_value(inode, virtualized_filename)
1614
1614
  return marked_inode
1615
1615
 
@@ -1635,8 +1635,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1635
1635
  filename: str,
1636
1636
  dest_path: str,
1637
1637
  file_source: AbstractFileStore | Toil,
1638
- export: Optional[bool] = None,
1639
- symlink: Optional[bool] = None
1638
+ export: bool | None = None,
1639
+ symlink: bool | None = None,
1640
1640
  ) -> None:
1641
1641
  """
1642
1642
  Given a filename/URI, write it to the given dest_path.
@@ -1674,7 +1674,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1674
1674
  )
1675
1675
  if result != dest_path:
1676
1676
  # We definitely want this to be put where we asked.
1677
- raise RuntimeError(f"Tried to read file to {dest_path} but it went to {result} instead")
1677
+ raise RuntimeError(
1678
+ f"Tried to read file to {dest_path} but it went to {result} instead"
1679
+ )
1678
1680
  else:
1679
1681
  raise RuntimeError(f"Unsupported file source: {file_source}")
1680
1682
  else:
@@ -1741,7 +1743,11 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1741
1743
  )
1742
1744
  return result
1743
1745
  else:
1744
- logger.debug("Virtualized filename %s is not any of the %s cached items", filename, len(virtualized_to_devirtualized))
1746
+ logger.debug(
1747
+ "Virtualized filename %s is not any of the %s cached items",
1748
+ filename,
1749
+ len(virtualized_to_devirtualized),
1750
+ )
1745
1751
 
1746
1752
  if is_directory_url(filename):
1747
1753
  # This points to a directory, so handle it as a tree.
@@ -1754,12 +1760,20 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1754
1760
 
1755
1761
  if is_toil_dir_url(filename):
1756
1762
  # This is a Toil directory URL directory.
1757
- base_dir_decoded, remaining_path, _, base_dir_source_uri, source_task = decode_directory(filename)
1763
+ (
1764
+ base_dir_decoded,
1765
+ remaining_path,
1766
+ _,
1767
+ base_dir_source_uri,
1768
+ source_task,
1769
+ ) = decode_directory(filename)
1758
1770
  # We always set the directory URI and source task.
1759
1771
  assert base_dir_source_uri is not None
1760
1772
  assert source_task is not None
1761
1773
 
1762
- contents = get_directory_contents_item(base_dir_decoded, remaining_path)
1774
+ contents = get_directory_contents_item(
1775
+ base_dir_decoded, remaining_path
1776
+ )
1763
1777
 
1764
1778
  # This is a directory and we have its decoded structure.
1765
1779
  assert not isinstance(contents, str)
@@ -1767,12 +1781,19 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1767
1781
  # Work out where the root uploaded directory would go
1768
1782
  dir_basename = os.path.basename(urlsplit(base_dir_source_uri).path)
1769
1783
  parent_url = urljoin(base_dir_source_uri, ".")
1770
- parent_path = os.path.join(choose_human_readable_directory(
1771
- dest_dir, source_task, parent_url
1772
- ), dir_basename)
1784
+ parent_path = os.path.join(
1785
+ choose_human_readable_directory(
1786
+ dest_dir, source_task, parent_url
1787
+ ),
1788
+ dir_basename,
1789
+ )
1773
1790
 
1774
1791
  # And where this particular subdirectory we're fetching goes
1775
- dest_path = os.path.join(parent_path, remaining_path) if remaining_path is not None else parent_path
1792
+ dest_path = (
1793
+ os.path.join(parent_path, remaining_path)
1794
+ if remaining_path is not None
1795
+ else parent_path
1796
+ )
1776
1797
 
1777
1798
  # contents is already a dict from basename to sub-dict or full URL.
1778
1799
  else:
@@ -1793,7 +1814,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1793
1814
  # Synthesize a contents dict
1794
1815
  contents = {}
1795
1816
 
1796
- def list_recursively(url: str, contents_to_fill: DirectoryContents) -> None:
1817
+ def list_recursively(
1818
+ url: str, contents_to_fill: DirectoryContents
1819
+ ) -> None:
1797
1820
  """
1798
1821
  Recursively list the given URL into the given dict.
1799
1822
 
@@ -1818,7 +1841,10 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1818
1841
  # Now we know we have filename (the directory), dest_path (the
1819
1842
  # desired local path), and contents (all the files and
1820
1843
  # subdirectories we need to materialize).
1821
- logger.debug("Devirtualizing %s directly contained items, and their children", len(contents))
1844
+ logger.debug(
1845
+ "Devirtualizing %s directly contained items, and their children",
1846
+ len(contents),
1847
+ )
1822
1848
 
1823
1849
  for relative_path, item_value in directory_contents_items(contents):
1824
1850
  # Recursively visit the directory itself and its contents.
@@ -1834,22 +1860,39 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1834
1860
  item_devirtualized_path = os.path.join(dest_path, relative_path)
1835
1861
  if item_virtualized_path in virtualized_to_devirtualized:
1836
1862
  # This has been downloaded already
1837
- assert virtualized_to_devirtualized[item_virtualized_path] == item_devirtualized_path, f"Devirtualized version of {item_virtualized_path} expected at {item_devirtualized_path} but is actually already at {virtualized_to_devirtualized[item_virtualized_path]}"
1863
+ assert (
1864
+ virtualized_to_devirtualized[item_virtualized_path]
1865
+ == item_devirtualized_path
1866
+ ), f"Devirtualized version of {item_virtualized_path} expected at {item_devirtualized_path} but is actually already at {virtualized_to_devirtualized[item_virtualized_path]}"
1838
1867
  # We don't do the back-check because we will have
1839
1868
  # entries with the directory URL *and* the base file ID
1840
1869
  # URL for files.
1841
1870
  assert os.path.exists(item_devirtualized_path)
1842
- elif item_value is not None and item_value in virtualized_to_devirtualized:
1871
+ elif (
1872
+ item_value is not None
1873
+ and item_value in virtualized_to_devirtualized
1874
+ ):
1843
1875
  # The target file is already downloaded.
1844
1876
  # TODO: Are there circumstances where we're going to
1845
1877
  # need multiple copies, such as distinct base
1846
1878
  # directories that can't be nested?
1847
- logger.debug("%s points to %s which is already cached", item_virtualized_path, item_value)
1848
- assert virtualized_to_devirtualized[item_value] == item_devirtualized_path, f"Directory item {item_virtualized_path} points to file {item_value}, which was already devirtualized to {virtualized_to_devirtualized[item_value]}, but for the directory we need it to be at {item_devirtualized_path} instead!"
1879
+ logger.debug(
1880
+ "%s points to %s which is already cached",
1881
+ item_virtualized_path,
1882
+ item_value,
1883
+ )
1884
+ assert (
1885
+ virtualized_to_devirtualized[item_value]
1886
+ == item_devirtualized_path
1887
+ ), f"Directory item {item_virtualized_path} points to file {item_value}, which was already devirtualized to {virtualized_to_devirtualized[item_value]}, but for the directory we need it to be at {item_devirtualized_path} instead!"
1849
1888
  assert os.path.exists(item_devirtualized_path)
1850
1889
  # Cache the file's devirtualized version also under the directory-based path.
1851
- virtualized_to_devirtualized[item_virtualized_path] = virtualized_to_devirtualized[item_value]
1852
- logger.debug("Cache now has %s items", len(virtualized_to_devirtualized))
1890
+ virtualized_to_devirtualized[item_virtualized_path] = (
1891
+ virtualized_to_devirtualized[item_value]
1892
+ )
1893
+ logger.debug(
1894
+ "Cache now has %s items", len(virtualized_to_devirtualized)
1895
+ )
1853
1896
  else:
1854
1897
  # We need to download this now and cache it.
1855
1898
  if item_value is None:
@@ -1860,12 +1903,22 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1860
1903
  os.makedirs(item_devirtualized_path, exist_ok=True)
1861
1904
 
1862
1905
  # Cache the directory
1863
- logger.debug("Add %s to cache at %s", item_virtualized_path, item_devirtualized_path)
1864
- virtualized_to_devirtualized[item_virtualized_path] = item_devirtualized_path
1865
- devirtualized_to_virtualized[item_devirtualized_path] = item_virtualized_path
1906
+ logger.debug(
1907
+ "Add %s to cache at %s",
1908
+ item_virtualized_path,
1909
+ item_devirtualized_path,
1910
+ )
1911
+ virtualized_to_devirtualized[item_virtualized_path] = (
1912
+ item_devirtualized_path
1913
+ )
1914
+ devirtualized_to_virtualized[item_devirtualized_path] = (
1915
+ item_virtualized_path
1916
+ )
1866
1917
  else:
1867
1918
  # Download files from their stored locations.
1868
- assert not os.path.exists(item_devirtualized_path), f"Virtualized file {item_virtualized_path} pointing to {item_value} already exists at {item_devirtualized_path}, but is not in cache. Back-cache says: {devirtualized_to_virtualized.get(item_devirtualized_path)}"
1919
+ assert not os.path.exists(
1920
+ item_devirtualized_path
1921
+ ), f"Virtualized file {item_virtualized_path} pointing to {item_value} already exists at {item_devirtualized_path}, but is not in cache. Back-cache says: {devirtualized_to_virtualized.get(item_devirtualized_path)}"
1869
1922
 
1870
1923
  # Download, not allowing a symlink.
1871
1924
  #
@@ -1883,21 +1936,38 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1883
1936
  item_devirtualized_path,
1884
1937
  file_source,
1885
1938
  export,
1886
- symlink=False
1939
+ symlink=False,
1887
1940
  )
1888
1941
 
1889
- logger.debug("Add %s pointing to %s to cache at %s", item_virtualized_path, item_value, item_devirtualized_path)
1942
+ logger.debug(
1943
+ "Add %s pointing to %s to cache at %s",
1944
+ item_virtualized_path,
1945
+ item_value,
1946
+ item_devirtualized_path,
1947
+ )
1890
1948
  # Cache the file in its own right
1891
- virtualized_to_devirtualized[item_value] = item_devirtualized_path
1892
- devirtualized_to_virtualized[item_devirtualized_path] = item_value
1949
+ virtualized_to_devirtualized[item_value] = (
1950
+ item_devirtualized_path
1951
+ )
1952
+ devirtualized_to_virtualized[item_devirtualized_path] = (
1953
+ item_value
1954
+ )
1893
1955
  # And the directory entry as pointing to the file.
1894
- virtualized_to_devirtualized[item_virtualized_path] = virtualized_to_devirtualized[item_value]
1956
+ virtualized_to_devirtualized[item_virtualized_path] = (
1957
+ virtualized_to_devirtualized[item_value]
1958
+ )
1895
1959
 
1896
- logger.debug("Cache now has %s items", len(virtualized_to_devirtualized))
1960
+ logger.debug(
1961
+ "Cache now has %s items", len(virtualized_to_devirtualized)
1962
+ )
1897
1963
 
1898
1964
  # We should now have it in the cache.
1899
- assert virtualized_to_devirtualized[filename] == dest_path, f"Cached devirtualized path for {filename} should be {dest_path} but is {virtualized_to_devirtualized[filename]} instead!"
1900
- logger.debug("Devirtualized %s as local directory %s", filename, dest_path)
1965
+ assert (
1966
+ virtualized_to_devirtualized[filename] == dest_path
1967
+ ), f"Cached devirtualized path for {filename} should be {dest_path} but is {virtualized_to_devirtualized[filename]} instead!"
1968
+ logger.debug(
1969
+ "Devirtualized %s as local directory %s", filename, dest_path
1970
+ )
1901
1971
  # Return where we put it.
1902
1972
  return dest_path
1903
1973
 
@@ -1919,7 +1989,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1919
1989
  wdl_options,
1920
1990
  devirtualized_to_virtualized,
1921
1991
  virtualized_to_devirtualized,
1922
- export
1992
+ export,
1923
1993
  )
1924
1994
  # Otherwise, we have a direct URL to a file to get. Base case.
1925
1995
 
@@ -1952,12 +2022,16 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
1952
2022
  # Download the file into it.
1953
2023
  cls._write_uri_to(filename, dest_path, file_source, export)
1954
2024
 
1955
- logger.debug("Devirtualized %s as openable file %s", filename, dest_path)
2025
+ logger.debug(
2026
+ "Devirtualized %s as openable file %s", filename, dest_path
2027
+ )
1956
2028
 
1957
2029
  # Store it in the cache
1958
2030
  virtualized_to_devirtualized[filename] = dest_path
1959
2031
  devirtualized_to_virtualized[dest_path] = filename
1960
- logger.debug("Cache now has %s items", len(virtualized_to_devirtualized))
2032
+ logger.debug(
2033
+ "Cache now has %s items", len(virtualized_to_devirtualized)
2034
+ )
1961
2035
  return dest_path
1962
2036
  else:
1963
2037
  # This is a local file or file URL
@@ -2013,7 +2087,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
2013
2087
 
2014
2088
  if real_path.startswith(execution_prefix):
2015
2089
  # This is a task working firectory relative file
2016
- return real_path[len(execution_prefix):]
2090
+ return real_path[len(execution_prefix) :]
2017
2091
 
2018
2092
  if real_path.startswith(ltd_prefix):
2019
2093
  # This file is relative to the Toil working directory.
@@ -2023,11 +2097,10 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
2023
2097
  #
2024
2098
  # We already inject _miniwdl_inputs in there, so just inject
2025
2099
  # another underscore-prefixed thing.
2026
- return "_toil_job/" + real_path[len(ltd_prefix):]
2100
+ return "_toil_job/" + real_path[len(ltd_prefix) :]
2027
2101
 
2028
2102
  return path
2029
2103
 
2030
-
2031
2104
  @memoize
2032
2105
  def _virtualize_filename(self, filename: str) -> str:
2033
2106
  """
@@ -2117,7 +2190,9 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
2117
2190
  # Satisfy mypy. This should never happen though as we don't
2118
2191
  # pass a shared file name (which is the only way import_file
2119
2192
  # returns None)
2120
- raise RuntimeError("Failed to import URL %s into jobstore." % normalized_uri)
2193
+ raise RuntimeError(
2194
+ "Failed to import URL %s into jobstore." % normalized_uri
2195
+ )
2121
2196
  file_basename = os.path.basename(urlsplit(normalized_uri).path)
2122
2197
  # Get the URL to the parent directory and use that.
2123
2198
  parent_dir = urljoin(normalized_uri, ".")
@@ -2144,9 +2219,7 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
2144
2219
  # This is a previously devirtualized thing so we can just use the
2145
2220
  # virtual version we remembered instead of reuploading it.
2146
2221
  result = self._devirtualized_to_virtualized[abs_filename]
2147
- logger.debug(
2148
- "Re-using virtualized WDL %s for %s", result, filename
2149
- )
2222
+ logger.debug("Re-using virtualized WDL %s for %s", result, filename)
2150
2223
  return result
2151
2224
 
2152
2225
  if not os.path.exists(abs_filename):
@@ -2181,7 +2254,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
2181
2254
  def __init__(self, *args: Any, **kwargs: Any) -> None:
2182
2255
  super().__init__(*args, **kwargs)
2183
2256
 
2184
- self._miniwdl_cache: Optional[WDL.runtime.cache.CallCache] = None
2257
+ self._miniwdl_cache: WDL.runtime.cache.CallCache | None = None
2185
2258
 
2186
2259
  def _virtualize_inode(
2187
2260
  self, inode: AnyINode, enforce_existence: bool = True
@@ -2192,10 +2265,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
2192
2265
  if (
2193
2266
  get_inode_virtualized_value(inode) is None
2194
2267
  and get_shared_fs_path(inode) is None
2195
- and (
2196
- not is_any_url(inode.value)
2197
- or is_file_url(inode.value)
2198
- )
2268
+ and (not is_any_url(inode.value) or is_file_url(inode.value))
2199
2269
  ):
2200
2270
  # This is a never-virtualized inode that is a path or URI and
2201
2271
  # has no shared FS path associated with it. We just made it at
@@ -2219,7 +2289,7 @@ class ToilWDLStdLibWorkflow(ToilWDLStdLibBase):
2219
2289
  "Applied shared filesystem path %s to %s that appears to "
2220
2290
  "have been coerced from String at workflow scope.",
2221
2291
  cache_path,
2222
- inode
2292
+ inode,
2223
2293
  )
2224
2294
 
2225
2295
  # Do the virtualization
@@ -2690,7 +2760,9 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
2690
2760
  raise FileNotFoundError(filename)
2691
2761
  filename = here
2692
2762
 
2693
- logger.debug("WDL task outputs stdlib thinks we really need to virtualize %s", filename)
2763
+ logger.debug(
2764
+ "WDL task outputs stdlib thinks we really need to virtualize %s", filename
2765
+ )
2694
2766
  return super()._virtualize_filename(filename)
2695
2767
 
2696
2768
 
@@ -2755,6 +2827,7 @@ def evaluate_decl(
2755
2827
  log_bindings(logger.error, "Statement was evaluated in:", [environment])
2756
2828
  raise
2757
2829
 
2830
+
2758
2831
  def evaluate_call_inputs(
2759
2832
  context: WDL.Error.SourceNode | WDL.Error.SourcePosition,
2760
2833
  expressions: dict[str, WDL.Expr.Base],
@@ -2800,8 +2873,7 @@ def evaluate_defaultable_decl(
2800
2873
  node.name in environment
2801
2874
  and not isinstance(environment[node.name], WDL.Value.Null)
2802
2875
  ) or (
2803
- isinstance(environment.get(node.name), WDL.Value.Null)
2804
- and node.type.optional
2876
+ isinstance(environment.get(node.name), WDL.Value.Null) and node.type.optional
2805
2877
  ):
2806
2878
  logger.debug("Name %s is already defined, not using default", node.name)
2807
2879
  if not isinstance(environment[node.name].type, type(node.type)):
@@ -2819,7 +2891,6 @@ def evaluate_defaultable_decl(
2819
2891
  return evaluate_decl(node, environment, stdlib)
2820
2892
 
2821
2893
 
2822
-
2823
2894
  # TODO: make these stdlib methods???
2824
2895
  def devirtualize_inodes(
2825
2896
  environment: WDLBindings, stdlib: ToilWDLStdLibBase
@@ -2843,14 +2914,16 @@ def virtualize_inodes(
2843
2914
  logger.debug("Virtualizing files and directories")
2844
2915
  virtualize_func = cast(
2845
2916
  INodeTransform,
2846
- partial(
2847
- stdlib._virtualize_inode,
2848
- enforce_existence=enforce_existence
2849
- )
2917
+ partial(stdlib._virtualize_inode, enforce_existence=enforce_existence),
2850
2918
  )
2851
2919
  return map_over_inodes_in_bindings(environment, virtualize_func)
2852
2920
 
2853
- def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[WDLBindings], file_store: AbstractFileStore) -> None:
2921
+
2922
+ def delete_dead_files(
2923
+ internal_bindings: WDLBindings,
2924
+ live_bindings_list: list[WDLBindings],
2925
+ file_store: AbstractFileStore,
2926
+ ) -> None:
2854
2927
  """
2855
2928
  Delete any files that are in the given bindings but not in the live list.
2856
2929
 
@@ -2859,22 +2932,20 @@ def delete_dead_files(internal_bindings: WDLBindings, live_bindings_list: list[W
2859
2932
  """
2860
2933
 
2861
2934
  # Get all the files in the first bindings and not any of the others.
2862
- unused_files = set(
2863
- extract_toil_file_uris(internal_bindings)
2864
- ).difference(
2865
- *(
2866
- extract_toil_file_uris(bindings)
2867
- for bindings in live_bindings_list
2868
- )
2935
+ unused_files = set(extract_toil_file_uris(internal_bindings)).difference(
2936
+ *(extract_toil_file_uris(bindings) for bindings in live_bindings_list)
2869
2937
  )
2870
2938
 
2871
2939
  for file_uri in unused_files:
2872
2940
  # Delete them
2873
- assert is_toil_url(file_uri), f"Trying to clean up file {file_uri} not managed by Toil"
2941
+ assert is_toil_url(
2942
+ file_uri
2943
+ ), f"Trying to clean up file {file_uri} not managed by Toil"
2874
2944
  logger.debug("Delete file %s that is not needed", file_uri)
2875
2945
  file_id, _, _, _ = unpack_toil_uri(file_uri)
2876
2946
  file_store.deleteGlobalFile(file_id)
2877
2947
 
2948
+
2878
2949
  def all_parents(path: str) -> Iterable[str]:
2879
2950
  """
2880
2951
  Yield all parents of the given path, up to the filesystem root.
@@ -2900,6 +2971,7 @@ def all_parents(path: str) -> Iterable[str]:
2900
2971
  here = os.path.dirname(here).rstrip("/")
2901
2972
  yield here + "/"
2902
2973
 
2974
+
2903
2975
  def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
2904
2976
  """
2905
2977
  Based off of WDL.runtime.task_container.add_paths from miniwdl
@@ -2929,7 +3001,14 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
2929
3001
  #
2930
3002
  # TODO: I wish I had a BWT here but that seems fiddly.
2931
3003
 
2932
- paths_with_slashes = (host_path + "/" if not host_path.endswith("/") and os.path.isdir(host_path) else host_path for host_path in host_paths)
3004
+ paths_with_slashes = (
3005
+ (
3006
+ host_path + "/"
3007
+ if not host_path.endswith("/") and os.path.isdir(host_path)
3008
+ else host_path
3009
+ )
3010
+ for host_path in host_paths
3011
+ )
2933
3012
  paths_by_length = list(sorted(paths_with_slashes, key=len))
2934
3013
 
2935
3014
  # This stores all the paths that need to be mounted, organized by top
@@ -2954,7 +3033,9 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
2954
3033
  # We need to preserve sibling relationships among top items. So organize them by parents.
2955
3034
  top_items_by_parent = collections.defaultdict(list)
2956
3035
  for top_item in paths_by_top_item.keys():
2957
- top_items_by_parent[os.path.dirname(top_item.rstrip("/")) + "/"].append(top_item)
3036
+ top_items_by_parent[os.path.dirname(top_item.rstrip("/")) + "/"].append(
3037
+ top_item
3038
+ )
2958
3039
 
2959
3040
  logger.debug("Top items by parent: %s", top_items_by_parent)
2960
3041
 
@@ -2982,7 +3063,9 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
2982
3063
  for host_path in paths_by_top_item[top_item]:
2983
3064
  # Figure out where relative to the parent's assigned path
2984
3065
  # in the container we should put this file/directory.
2985
- container_path = os.path.join(parent_container_base, host_path[len(parent):])
3066
+ container_path = os.path.join(
3067
+ parent_container_base, host_path[len(parent) :]
3068
+ )
2986
3069
 
2987
3070
  # Put it there.
2988
3071
  task_container.input_path_map[host_path] = container_path
@@ -2990,6 +3073,7 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None:
2990
3073
 
2991
3074
  logger.debug("Mount %s at %s", host_path, container_path)
2992
3075
 
3076
+
2993
3077
  def drop_if_missing(
2994
3078
  inode: WDLINode, standard_library: ToilWDLStdLibBase
2995
3079
  ) -> WDLINode | None:
@@ -3006,12 +3090,9 @@ def drop_if_missing(
3006
3090
  if reference is not None and is_any_url(reference):
3007
3091
  try:
3008
3092
  if (
3009
- is_toil_file_url(reference) or
3010
- (
3011
- is_toil_dir_url(reference) and
3012
- directory_item_exists(reference)
3013
- ) or
3014
- URLAccess.url_exists(reference)
3093
+ is_toil_file_url(reference)
3094
+ or (is_toil_dir_url(reference) and directory_item_exists(reference))
3095
+ or URLAccess.url_exists(reference)
3015
3096
  ):
3016
3097
  # We assume anything in the filestore actually exists.
3017
3098
  devirtualized_filename = standard_library._devirtualize_filename(
@@ -3037,9 +3118,7 @@ def drop_if_missing(
3037
3118
  raise
3038
3119
  else:
3039
3120
  # Get the absolute path, not resolving symlinks
3040
- effective_path = os.path.abspath(
3041
- os.path.join(work_dir, reference)
3042
- )
3121
+ effective_path = os.path.abspath(os.path.join(work_dir, reference))
3043
3122
  if os.path.islink(effective_path) or os.path.exists(effective_path):
3044
3123
  # This is a broken symlink or a working symlink or a file/directory.
3045
3124
  return inode
@@ -3052,6 +3131,7 @@ def drop_if_missing(
3052
3131
  )
3053
3132
  return None
3054
3133
 
3134
+
3055
3135
  def missing_inode_dropper(standard_library: ToilWDLStdLibBase) -> INodeTransform:
3056
3136
  """
3057
3137
  Get a function to null out missing File/Directory values.
@@ -3063,13 +3143,10 @@ def missing_inode_dropper(standard_library: ToilWDLStdLibBase) -> INodeTransform
3063
3143
  # We need this to wrap partial() because MyPy can't really understand the
3064
3144
  # effects of partial() on making a function match a protocol.
3065
3145
  return cast(
3066
- INodeTransform,
3067
- partial(
3068
- drop_if_missing,
3069
- standard_library=standard_library
3070
- )
3146
+ INodeTransform, partial(drop_if_missing, standard_library=standard_library)
3071
3147
  )
3072
3148
 
3149
+
3073
3150
  def drop_missing_files(
3074
3151
  environment: WDLBindings, standard_library: ToilWDLStdLibBase
3075
3152
  ) -> WDLBindings:
@@ -3080,7 +3157,9 @@ def drop_missing_files(
3080
3157
  Files must not be virtualized.
3081
3158
  """
3082
3159
 
3083
- return map_over_inodes_in_bindings(environment, missing_inode_dropper(standard_library))
3160
+ return map_over_inodes_in_bindings(
3161
+ environment, missing_inode_dropper(standard_library)
3162
+ )
3084
3163
 
3085
3164
 
3086
3165
  def get_paths_in_bindings(environment: WDLBindings) -> list[str]:
@@ -3139,12 +3218,14 @@ def map_over_inodes_in_binding(
3139
3218
  binding.info,
3140
3219
  )
3141
3220
 
3221
+
3142
3222
  def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
3143
3223
  """
3144
3224
  Remove the expression from a WDL value
3145
3225
  :param value: Original WDL value
3146
3226
  :return: New WDL value without the expr field
3147
3227
  """
3228
+
3148
3229
  # TODO: This is an extra copy that we could get rid of by dropping the immutability idea
3149
3230
  def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
3150
3231
  # Do a shallow copy to preserve immutability
@@ -3159,8 +3240,10 @@ def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
3159
3240
  else:
3160
3241
  new_value._expr = value.expr
3161
3242
  return new_value
3243
+
3162
3244
  return map_over_typed_value(value, predicate)
3163
3245
 
3246
+
3164
3247
  # TODO: We want to type this to say, for anything descended from a WDL type, we
3165
3248
  # return something descended from the same WDL type or a null. But I can't
3166
3249
  # quite do that with generics, since you could pass in some extended WDL value
@@ -3168,7 +3251,9 @@ def remove_expr_from_value(value: WDL.Value.Base) -> WDL.Value.Base:
3168
3251
  #
3169
3252
  # For now we assume that any types extending the WDL value types will implement
3170
3253
  # compatible constructors.
3171
- def map_over_typed_value(value: WDL.Value.Base, transform: Callable[[WDL.Value.Base], WDL.Value.Base]) -> WDL.Value.Base:
3254
+ def map_over_typed_value(
3255
+ value: WDL.Value.Base, transform: Callable[[WDL.Value.Base], WDL.Value.Base]
3256
+ ) -> WDL.Value.Base:
3172
3257
  """
3173
3258
  Apply a transform to a WDL value and all contained WDL values.
3174
3259
  :param value: WDL value to transform
@@ -3211,10 +3296,7 @@ def map_over_typed_value(value: WDL.Value.Base, transform: Callable[[WDL.Value.B
3211
3296
  # This is a struct, so recurse on the values in the backing dict
3212
3297
  value = WDL.Value.Struct(
3213
3298
  cast(Union[WDL.Type.StructInstance, WDL.Type.Object], value.type),
3214
- {
3215
- k: map_over_typed_value(v, transform)
3216
- for k, v in value.value.items()
3217
- },
3299
+ {k: map_over_typed_value(v, transform) for k, v in value.value.items()},
3218
3300
  value.expr,
3219
3301
  )
3220
3302
  # Run the predicate on the final value
@@ -3239,6 +3321,7 @@ def map_over_typed_inodes_in_value(
3239
3321
  actually be used, to allow for scans. So error checking needs to be part of
3240
3322
  the transform itself.
3241
3323
  """
3324
+
3242
3325
  def predicate(value: WDL.Value.Base) -> WDL.Value.Base:
3243
3326
  if is_inode(value):
3244
3327
  # This is a File or Directory so we need to process it
@@ -3406,7 +3489,9 @@ class WDLBaseJob(Job):
3406
3489
  def remove_expr_from_bindings(self, bindings: WDLBindings) -> WDLBindings:
3407
3490
  # We have to throw out the expressions because they drag the entire WDL document into the WDL outputs
3408
3491
  # which causes duplicate pickling and linear growth in scatter memory usage
3409
- return bindings.map(lambda b: WDL.Env.Binding(b.name, remove_expr_from_value(b.value), b.info))
3492
+ return bindings.map(
3493
+ lambda b: WDL.Env.Binding(b.name, remove_expr_from_value(b.value), b.info)
3494
+ )
3410
3495
 
3411
3496
  def postprocess(self, bindings: WDLBindings) -> WDLBindings:
3412
3497
  """
@@ -3557,15 +3642,11 @@ class WDLTaskWrapperJob(WDLBaseJob):
3557
3642
  # Throw away anything input but not available outside the call or
3558
3643
  # output.
3559
3644
  delete_dead_files(
3560
- bindings,
3561
- [cached_bindings, self._enclosing_bindings],
3562
- file_store
3645
+ bindings, [cached_bindings, self._enclosing_bindings], file_store
3563
3646
  )
3564
3647
 
3565
3648
  # Postprocess and ship the output bindings.
3566
- return self.postprocess(
3567
- cached_bindings
3568
- )
3649
+ return self.postprocess(cached_bindings)
3569
3650
 
3570
3651
  if self._task.inputs:
3571
3652
  logger.debug("Evaluating task code")
@@ -3575,7 +3656,7 @@ class WDLTaskWrapperJob(WDLBaseJob):
3575
3656
  bindings,
3576
3657
  standard_library,
3577
3658
  include_previous=True,
3578
- expressions_are_defaults=True
3659
+ expressions_are_defaults=True,
3579
3660
  )
3580
3661
  if self._task.postinputs:
3581
3662
  # Evaluate all the postinput decls.
@@ -4139,8 +4220,10 @@ class WDLTaskJob(WDLBaseJob):
4139
4220
  "is not yet implemented in the MiniWDL Docker "
4140
4221
  "containerization implementation."
4141
4222
  )
4142
- if runtime_bindings.has_binding("memory") and human2bytes(runtime_bindings.resolve("memory").value) < human2bytes("4MiB"):
4143
- runtime_bindings.resolve("memory").value = "4MiB"
4223
+ if runtime_bindings.has_binding("memory") and human2bytes(
4224
+ runtime_bindings.resolve("memory").value
4225
+ ) < human2bytes("4MiB"):
4226
+ runtime_bindings.resolve("memory").value = "4MiB"
4144
4227
  else:
4145
4228
  raise RuntimeError(
4146
4229
  f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}"
@@ -4544,7 +4627,7 @@ class WDLTaskJob(WDLBaseJob):
4544
4627
  delete_dead_files(
4545
4628
  combine_bindings([bindings, runtime_bindings]),
4546
4629
  [output_bindings, self._enclosing_bindings],
4547
- file_store
4630
+ file_store,
4548
4631
  )
4549
4632
  # If File objects somehow made it to the runtime block they shouldn't
4550
4633
  # have been virtualized so don't bother with them.
@@ -4602,7 +4685,9 @@ class WDLWorkflowNodeJob(WDLBaseJob):
4602
4685
  value = evaluate_decl(self._node, incoming_bindings, standard_library)
4603
4686
  bindings = incoming_bindings.bind(self._node.name, value)
4604
4687
  # TODO: Only virtualize the new binding
4605
- return self.postprocess(virtualize_inodes(bindings, standard_library, enforce_existence=False))
4688
+ return self.postprocess(
4689
+ virtualize_inodes(bindings, standard_library, enforce_existence=False)
4690
+ )
4606
4691
  elif isinstance(self._node, WDL.Tree.Call):
4607
4692
  # This is a call of a task or workflow
4608
4693
 
@@ -4624,7 +4709,9 @@ class WDLWorkflowNodeJob(WDLBaseJob):
4624
4709
  inputs_mapping,
4625
4710
  )
4626
4711
  # Prepare call inputs to move to another node
4627
- input_bindings = virtualize_inodes(input_bindings, standard_library, enforce_existence=False)
4712
+ input_bindings = virtualize_inodes(
4713
+ input_bindings, standard_library, enforce_existence=False
4714
+ )
4628
4715
 
4629
4716
  # Bindings may also be added in from the enclosing workflow inputs
4630
4717
  # TODO: this is letting us also inject them from the workflow body.
@@ -4756,7 +4843,11 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
4756
4843
  )
4757
4844
 
4758
4845
  # TODO: Only virtualize the new bindings created
4759
- return self.postprocess(virtualize_inodes(current_bindings, standard_library, enforce_existence=False))
4846
+ return self.postprocess(
4847
+ virtualize_inodes(
4848
+ current_bindings, standard_library, enforce_existence=False
4849
+ )
4850
+ )
4760
4851
 
4761
4852
 
4762
4853
  class WDLCombineBindingsJob(WDLBaseJob):
@@ -5611,7 +5702,9 @@ class WDLWorkflowJob(WDLSectionJob):
5611
5702
  [(p, p) for p in standard_library.get_local_paths()]
5612
5703
  )
5613
5704
 
5614
- bindings = virtualize_inodes(bindings, standard_library, enforce_existence=False)
5705
+ bindings = virtualize_inodes(
5706
+ bindings, standard_library, enforce_existence=False
5707
+ )
5615
5708
  # Make jobs to run all the parts of the workflow
5616
5709
  sink = self.create_subgraph(self._workflow.body, [], bindings)
5617
5710
 
@@ -5758,11 +5851,12 @@ class WDLOutputsJob(WDLBaseJob):
5758
5851
  delete_dead_files(
5759
5852
  unwrap(self._bindings),
5760
5853
  [output_bindings, self._enclosing_bindings],
5761
- file_store
5854
+ file_store,
5762
5855
  )
5763
5856
 
5764
5857
  return self.postprocess(output_bindings)
5765
5858
 
5859
+
5766
5860
  class WDLStartJob(WDLSectionJob):
5767
5861
  """
5768
5862
  Job that evaluates an entire WDL workflow, and returns the workflow outputs
@@ -5830,7 +5924,7 @@ class WDLInstallImportsJob(Job):
5830
5924
  self,
5831
5925
  task_path: str,
5832
5926
  inputs: WDLBindings,
5833
- import_data: Promised[Tuple[Dict[str, FileID], Dict[str, FileMetadata]]],
5927
+ import_data: Promised[tuple[dict[str, FileID], dict[str, FileMetadata]]],
5834
5928
  **kwargs: Any,
5835
5929
  ) -> None:
5836
5930
  """
@@ -5851,7 +5945,9 @@ class WDLInstallImportsJob(Job):
5851
5945
  """
5852
5946
  candidate_to_fileid = unwrap(self._import_data)[0]
5853
5947
  file_to_metadata = unwrap(self._import_data)[1]
5854
- return virtualize_inodes_in_bindings(self._inputs, candidate_to_fileid, file_to_metadata, self._task_path)
5948
+ return virtualize_inodes_in_bindings(
5949
+ self._inputs, candidate_to_fileid, file_to_metadata, self._task_path
5950
+ )
5855
5951
 
5856
5952
 
5857
5953
  class WDLImportWrapper(WDLSectionJob):
@@ -5864,7 +5960,7 @@ class WDLImportWrapper(WDLSectionJob):
5864
5960
 
5865
5961
  def __init__(
5866
5962
  self,
5867
- target: Union[WDL.Tree.Workflow, WDL.Tree.Task],
5963
+ target: WDL.Tree.Workflow | WDL.Tree.Task,
5868
5964
  inputs: WDLBindings,
5869
5965
  wdl_options: WDLContext,
5870
5966
  inputs_search_path: list[str],
@@ -5893,9 +5989,11 @@ class WDLImportWrapper(WDLSectionJob):
5893
5989
  file_store.jobStore,
5894
5990
  self._inputs_search_path,
5895
5991
  include_remote_files=self._import_remote_files,
5896
- execution_dir=self._wdl_options.get("execution_dir")
5992
+ execution_dir=self._wdl_options.get("execution_dir"),
5993
+ )
5994
+ imports_job = ImportsJob(
5995
+ file_to_metadata, self._import_workers_batchsize, self._import_workers_disk
5897
5996
  )
5898
- imports_job = ImportsJob(file_to_metadata, self._import_workers_batchsize, self._import_workers_disk)
5899
5997
  self.addChild(imports_job)
5900
5998
  install_imports_job = WDLInstallImportsJob(
5901
5999
  self._target.name, self._inputs, imports_job.rv()
@@ -5928,7 +6026,7 @@ def make_root_job(
5928
6026
  inputs_search_path=inputs_search_path,
5929
6027
  import_remote_files=options.reference_inputs,
5930
6028
  import_workers_batchsize=options.import_workers_batchsize,
5931
- import_workers_disk=options.import_workers_disk
6029
+ import_workers_disk=options.import_workers_disk,
5932
6030
  )
5933
6031
  else:
5934
6032
  # Run WDL imports on leader
@@ -5968,7 +6066,7 @@ def main() -> None:
5968
6066
  raise RuntimeError(
5969
6067
  f"Workflow inputs cannot be specified with both the -i/--input/--inputs flag "
5970
6068
  f"and as a positional argument at the same time. Cannot use both "
5971
- f"\"{input_sources[0]}\" and \"{input_sources[1]}\"."
6069
+ f'"{input_sources[0]}" and "{input_sources[1]}".'
5972
6070
  )
5973
6071
 
5974
6072
  # Make sure we have an output directory (or URL prefix) and we don't need
@@ -5981,9 +6079,13 @@ def main() -> None:
5981
6079
  )
5982
6080
 
5983
6081
  try:
5984
- wdl_uri, trs_spec = resolve_workflow(options.wdl_uri, supported_languages={"WDL"})
6082
+ wdl_uri, trs_spec = resolve_workflow(
6083
+ options.wdl_uri, supported_languages={"WDL"}
6084
+ )
5985
6085
 
5986
- with Toil(options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec) as toil:
6086
+ with Toil(
6087
+ options, workflow_name=trs_spec or wdl_uri, trs_spec=trs_spec
6088
+ ) as toil:
5987
6089
  # TODO: Move all the input parsing outside the Toil context
5988
6090
  # manager to avoid leaving a job store behind if the workflow
5989
6091
  # can't start.
@@ -5999,9 +6101,7 @@ def main() -> None:
5999
6101
 
6000
6102
  # Load the WDL document.
6001
6103
  document: WDL.Tree.Document = WDL.load(
6002
- wdl_uri,
6003
- read_source=toil_read_source,
6004
- check_quant=options.quant_check
6104
+ wdl_uri, read_source=toil_read_source, check_quant=options.quant_check
6005
6105
  )
6006
6106
 
6007
6107
  # See if we're going to run a workflow or a task
@@ -6057,12 +6157,16 @@ def main() -> None:
6057
6157
  ) # type: ignore[no-untyped-call]
6058
6158
 
6059
6159
  if getattr(WDL.Lint, "_shellcheck_available", None) is False:
6060
- logger.info("Suggestion: install shellcheck (www.shellcheck.net) to check task commands")
6160
+ logger.info(
6161
+ "Suggestion: install shellcheck (www.shellcheck.net) to check task commands"
6162
+ )
6061
6163
 
6062
6164
  if lint_warnings_counter[0]:
6063
- logger.warning('Workflow lint warnings:\n%s', lint_warnings_io.getvalue().rstrip())
6165
+ logger.warning(
6166
+ "Workflow lint warnings:\n%s", lint_warnings_io.getvalue().rstrip()
6167
+ )
6064
6168
  if options.strict:
6065
- logger.critical(f'Workflow did not pass linting in strict mode')
6169
+ logger.critical(f"Workflow did not pass linting in strict mode")
6066
6170
  # MiniWDL uses exit code 2 to indicate linting errors, so replicate that behavior
6067
6171
  sys.exit(2)
6068
6172