toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/registry.py +15 -118
  6. toil/batchSystems/slurm.py +191 -16
  7. toil/common.py +20 -1
  8. toil/cwl/cwltoil.py +97 -119
  9. toil/cwl/utils.py +103 -3
  10. toil/fileStores/__init__.py +1 -1
  11. toil/fileStores/abstractFileStore.py +5 -2
  12. toil/fileStores/cachingFileStore.py +1 -1
  13. toil/job.py +30 -14
  14. toil/jobStores/abstractJobStore.py +35 -255
  15. toil/jobStores/aws/jobStore.py +864 -1964
  16. toil/jobStores/aws/utils.py +24 -270
  17. toil/jobStores/fileJobStore.py +2 -1
  18. toil/jobStores/googleJobStore.py +32 -13
  19. toil/jobStores/utils.py +0 -327
  20. toil/leader.py +27 -22
  21. toil/lib/accelerators.py +1 -1
  22. toil/lib/aws/config.py +22 -0
  23. toil/lib/aws/s3.py +477 -9
  24. toil/lib/aws/utils.py +22 -33
  25. toil/lib/checksum.py +88 -0
  26. toil/lib/conversions.py +33 -31
  27. toil/lib/directory.py +217 -0
  28. toil/lib/ec2.py +97 -29
  29. toil/lib/exceptions.py +2 -1
  30. toil/lib/expando.py +2 -2
  31. toil/lib/generatedEC2Lists.py +138 -19
  32. toil/lib/io.py +33 -2
  33. toil/lib/memoize.py +21 -7
  34. toil/lib/misc.py +1 -1
  35. toil/lib/pipes.py +385 -0
  36. toil/lib/plugins.py +106 -0
  37. toil/lib/retry.py +1 -1
  38. toil/lib/threading.py +1 -1
  39. toil/lib/url.py +320 -0
  40. toil/lib/web.py +4 -5
  41. toil/options/cwl.py +13 -1
  42. toil/options/runner.py +17 -10
  43. toil/options/wdl.py +12 -1
  44. toil/provisioners/__init__.py +5 -2
  45. toil/provisioners/aws/__init__.py +43 -36
  46. toil/provisioners/aws/awsProvisioner.py +47 -15
  47. toil/provisioners/node.py +60 -12
  48. toil/resource.py +3 -13
  49. toil/server/app.py +12 -6
  50. toil/server/cli/wes_cwl_runner.py +2 -2
  51. toil/server/wes/abstract_backend.py +21 -43
  52. toil/server/wes/toil_backend.py +2 -2
  53. toil/test/__init__.py +16 -18
  54. toil/test/batchSystems/batchSystemTest.py +2 -9
  55. toil/test/batchSystems/batch_system_plugin_test.py +7 -0
  56. toil/test/batchSystems/test_slurm.py +103 -14
  57. toil/test/cwl/cwlTest.py +181 -8
  58. toil/test/cwl/staging_cat.cwl +27 -0
  59. toil/test/cwl/staging_make_file.cwl +25 -0
  60. toil/test/cwl/staging_workflow.cwl +43 -0
  61. toil/test/cwl/zero_default.cwl +61 -0
  62. toil/test/docs/scripts/tutorial_staging.py +17 -8
  63. toil/test/docs/scriptsTest.py +2 -1
  64. toil/test/jobStores/jobStoreTest.py +23 -133
  65. toil/test/lib/aws/test_iam.py +7 -7
  66. toil/test/lib/aws/test_s3.py +30 -33
  67. toil/test/lib/aws/test_utils.py +9 -9
  68. toil/test/lib/test_url.py +69 -0
  69. toil/test/lib/url_plugin_test.py +105 -0
  70. toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
  71. toil/test/provisioners/clusterTest.py +15 -2
  72. toil/test/provisioners/gceProvisionerTest.py +1 -1
  73. toil/test/server/serverTest.py +78 -36
  74. toil/test/src/autoDeploymentTest.py +2 -3
  75. toil/test/src/fileStoreTest.py +89 -87
  76. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  77. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  78. toil/test/utils/toilKillTest.py +35 -28
  79. toil/test/wdl/md5sum/md5sum-gs.json +1 -1
  80. toil/test/wdl/md5sum/md5sum.json +1 -1
  81. toil/test/wdl/testfiles/read_file.wdl +18 -0
  82. toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
  83. toil/test/wdl/wdltoil_test.py +171 -162
  84. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  85. toil/utils/toilDebugFile.py +6 -3
  86. toil/utils/toilSshCluster.py +23 -0
  87. toil/utils/toilStats.py +17 -2
  88. toil/utils/toilUpdateEC2Instances.py +1 -0
  89. toil/version.py +10 -10
  90. toil/wdl/wdltoil.py +1179 -825
  91. toil/worker.py +16 -8
  92. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
  93. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
  94. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
  95. toil/lib/iterables.py +0 -112
  96. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  97. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
  98. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
  99. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/common.py CHANGED
@@ -86,6 +86,7 @@ from toil.provisioners import add_provisioner_options, cluster_factory
86
86
  from toil.realtimeLogger import RealtimeLogger
87
87
  from toil.statsAndLogging import add_logging_options, set_logging_from_options
88
88
  from toil.version import dockerRegistry, dockerTag, version, baseVersion
89
+ from toil.lib.url import URLAccess
89
90
 
90
91
  if TYPE_CHECKING:
91
92
  from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
@@ -449,6 +450,11 @@ class Config:
449
450
 
450
451
  self.check_configuration_consistency()
451
452
 
453
+ # Check for deprecated Toil built-in autoscaling
454
+ # --provisioner is guaranteed to be set
455
+ if self.provisioner is not None and self.batchSystem == "mesos":
456
+ logger.warning("Toil built-in autoscaling with Mesos is deprecated as Mesos is no longer active. Please use Kubernetes-based autoscaling instead.")
457
+
452
458
  def check_configuration_consistency(self) -> None:
453
459
  """Old checks that cannot be fit into an action class for argparse"""
454
460
  if self.writeLogs and self.writeLogsGzip:
@@ -545,6 +551,19 @@ def generate_config(filepath: str) -> None:
545
551
  "enableCaching",
546
552
  "disableCaching",
547
553
  "version",
554
+ # Toil built-in autoscaling with mesos is deprecated as mesos has not been updated since Python 3.10
555
+ "provisioner",
556
+ "nodeTypes"
557
+ "minNodes",
558
+ "maxNodes",
559
+ "targetTime",
560
+ "betaInertia",
561
+ "scaleInterval",
562
+ "preemtibleCompensation",
563
+ "nodeStorage",
564
+ "nodeStorageOverrides",
565
+ "metrics",
566
+ "assumeZeroOverhead"
548
567
  )
549
568
 
550
569
  def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
@@ -1397,7 +1416,7 @@ class Toil(ContextManager["Toil"]):
1397
1416
  self._batchSystem.setUserScript(userScriptResource)
1398
1417
 
1399
1418
  def url_exists(self, src_uri: str) -> bool:
1400
- return self._jobStore.url_exists(self.normalize_uri(src_uri))
1419
+ return URLAccess.url_exists(self.normalize_uri(src_uri))
1401
1420
 
1402
1421
  # Importing a file with a shared file name returns None, but without one it
1403
1422
  # returns a file ID. Explain this to MyPy.
toil/cwl/cwltoil.py CHANGED
@@ -34,7 +34,6 @@ import stat
34
34
  import sys
35
35
  import textwrap
36
36
  import uuid
37
- from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
38
37
  from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
39
38
  from threading import Thread
40
39
  from typing import (
@@ -111,6 +110,11 @@ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
111
110
  from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
112
111
  from toil.common import Config, Toil, addOptions
113
112
  from toil.cwl import check_cwltool_version
113
+ from toil.lib.directory import (
114
+ DirectoryContents,
115
+ decode_directory,
116
+ encode_directory,
117
+ )
114
118
  from toil.lib.trs import resolve_workflow
115
119
  from toil.lib.misc import call_command
116
120
  from toil.provisioners.clusterScaler import JobTooBigError
@@ -122,6 +126,7 @@ from toil.cwl.utils import (
122
126
  download_structure,
123
127
  get_from_structure,
124
128
  visit_cwl_class_and_reduce,
129
+ remove_redundant_mounts
125
130
  )
126
131
  from toil.exceptions import FailedJobsException
127
132
  from toil.fileStores import FileID
@@ -149,6 +154,7 @@ from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
149
154
  from toil.lib.io import mkdtemp
150
155
  from toil.lib.threading import ExceptionalThread, global_mutex
151
156
  from toil.statsAndLogging import DEFAULT_LOGLEVEL
157
+ from toil.lib.url import URLAccess
152
158
 
153
159
  logger = logging.getLogger(__name__)
154
160
 
@@ -1155,7 +1161,7 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1155
1161
  """Subclass the cwltool command line tool to provide the custom ToilPathMapper."""
1156
1162
 
1157
1163
  def _initialworkdir(
1158
- self, j: cwltool.job.JobBase, builder: cwltool.builder.Builder
1164
+ self, j: Optional[cwltool.job.JobBase], builder: cwltool.builder.Builder
1159
1165
  ) -> None:
1160
1166
  """
1161
1167
  Hook the InitialWorkDirRequirement setup to make sure that there are no
@@ -1165,6 +1171,9 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
1165
1171
  # Set up the initial work dir with all its files
1166
1172
  super()._initialworkdir(j, builder)
1167
1173
 
1174
+ if j is None:
1175
+ return # Only testing
1176
+
1168
1177
  # The initial work dir listing is now in j.generatefiles["listing"]
1169
1178
  # Also j.generatefiles is a CWL Directory.
1170
1179
  # So check the initial working directory.
@@ -1218,79 +1227,6 @@ def toil_make_tool(
1218
1227
  # URI instead of raising an error right away, in case it is optional.
1219
1228
  MISSING_FILE = "missing://"
1220
1229
 
1221
- DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
1222
-
1223
-
1224
- def check_directory_dict_invariants(contents: DirectoryContents) -> None:
1225
- """
1226
- Make sure a directory structure dict makes sense. Throws an error
1227
- otherwise.
1228
-
1229
- Currently just checks to make sure no empty-string keys exist.
1230
- """
1231
-
1232
- for name, item in contents.items():
1233
- if name == "":
1234
- raise RuntimeError(
1235
- "Found nameless entry in directory: " + json.dumps(contents, indent=2)
1236
- )
1237
- if isinstance(item, dict):
1238
- check_directory_dict_invariants(item)
1239
-
1240
-
1241
- def decode_directory(
1242
- dir_path: str,
1243
- ) -> tuple[DirectoryContents, Optional[str], str]:
1244
- """
1245
- Decode a directory from a "toildir:" path to a directory (or a file in it).
1246
-
1247
- Returns the decoded directory dict, the remaining part of the path (which may be
1248
- None), and the deduplication key string that uniquely identifies the
1249
- directory.
1250
- """
1251
- if not dir_path.startswith("toildir:"):
1252
- raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
1253
-
1254
- # We will decode the directory and then look inside it
1255
-
1256
- # Since this was encoded by upload_directory we know the
1257
- # next piece is encoded JSON describing the directory structure,
1258
- # and it can't contain any slashes.
1259
- parts = dir_path[len("toildir:") :].split("/", 1)
1260
-
1261
- # Before the first slash is the encoded data describing the directory contents
1262
- dir_data = parts[0]
1263
-
1264
- # Decode what to download
1265
- contents = json.loads(
1266
- base64.urlsafe_b64decode(dir_data.encode("utf-8")).decode("utf-8")
1267
- )
1268
-
1269
- check_directory_dict_invariants(contents)
1270
-
1271
- if len(parts) == 1 or parts[1] == "/":
1272
- # We didn't have any subdirectory
1273
- return contents, None, dir_data
1274
- else:
1275
- # We have a path below this
1276
- return contents, parts[1], dir_data
1277
-
1278
-
1279
- def encode_directory(contents: DirectoryContents) -> str:
1280
- """
1281
- Encode a directory from a "toildir:" path to a directory (or a file in it).
1282
-
1283
- Takes the directory dict, which is a dict from name to URI for a file or
1284
- dict for a subdirectory.
1285
- """
1286
-
1287
- check_directory_dict_invariants(contents)
1288
-
1289
- return "toildir:" + base64.urlsafe_b64encode(
1290
- json.dumps(contents).encode("utf-8")
1291
- ).decode("utf-8")
1292
-
1293
-
1294
1230
  class ToilFsAccess(StdFsAccess):
1295
1231
  """
1296
1232
  Custom filesystem access class which handles toil filestore references.
@@ -1359,7 +1295,7 @@ class ToilFsAccess(StdFsAccess):
1359
1295
 
1360
1296
  # Decode its contents, the path inside it to the file (if any), and
1361
1297
  # the key to use for caching the directory.
1362
- contents, subpath, cache_key = decode_directory(path)
1298
+ contents, subpath, cache_key, _, _ = decode_directory(path)
1363
1299
  logger.debug("Decoded directory contents: %s", contents)
1364
1300
 
1365
1301
  if cache_key not in self.dir_to_download:
@@ -1395,7 +1331,7 @@ class ToilFsAccess(StdFsAccess):
1395
1331
  destination = path
1396
1332
  else:
1397
1333
  # The destination is something else.
1398
- if AbstractJobStore.get_is_directory(path):
1334
+ if URLAccess.get_is_directory(path):
1399
1335
  # Treat this as a directory
1400
1336
  if path not in self.dir_to_download:
1401
1337
  logger.debug(
@@ -1405,14 +1341,14 @@ class ToilFsAccess(StdFsAccess):
1405
1341
 
1406
1342
  # Recursively fetch all the files in the directory.
1407
1343
  def download_to(url: str, dest: str) -> None:
1408
- if AbstractJobStore.get_is_directory(url):
1344
+ if URLAccess.get_is_directory(url):
1409
1345
  os.mkdir(dest)
1410
- for part in AbstractJobStore.list_url(url):
1346
+ for part in URLAccess.list_url(url):
1411
1347
  download_to(
1412
1348
  os.path.join(url, part), os.path.join(dest, part)
1413
1349
  )
1414
1350
  else:
1415
- AbstractJobStore.read_from_url(url, open(dest, "wb"))
1351
+ URLAccess.read_from_url(url, open(dest, "wb"))
1416
1352
 
1417
1353
  download_to(path, dest_dir)
1418
1354
  self.dir_to_download[path] = dest_dir
@@ -1425,7 +1361,7 @@ class ToilFsAccess(StdFsAccess):
1425
1361
  # Try to grab it with a jobstore implementation, and save it
1426
1362
  # somewhere arbitrary.
1427
1363
  dest_file = NamedTemporaryFile(delete=False)
1428
- AbstractJobStore.read_from_url(path, dest_file)
1364
+ URLAccess.read_from_url(path, dest_file)
1429
1365
  dest_file.close()
1430
1366
  self.dir_to_download[path] = dest_file.name
1431
1367
  destination = self.dir_to_download[path]
@@ -1461,7 +1397,7 @@ class ToilFsAccess(StdFsAccess):
1461
1397
  # Handle local files
1462
1398
  return open(self._abs(fn), mode)
1463
1399
  elif parse.scheme == "toildir":
1464
- contents, subpath, cache_key = decode_directory(fn)
1400
+ contents, subpath, cache_key, _, _ = decode_directory(fn)
1465
1401
  if cache_key in self.dir_to_download:
1466
1402
  # This is already available locally, so fall back on the local copy
1467
1403
  return open(self._abs(fn), mode)
@@ -1483,7 +1419,7 @@ class ToilFsAccess(StdFsAccess):
1483
1419
  return open(self._abs(fn), mode)
1484
1420
  else:
1485
1421
  # This should be supported by a job store.
1486
- byte_stream = AbstractJobStore.open_url(fn)
1422
+ byte_stream = URLAccess.open_url(fn)
1487
1423
  if "b" in mode:
1488
1424
  # Pass stream along in binary
1489
1425
  return byte_stream
@@ -1502,7 +1438,7 @@ class ToilFsAccess(StdFsAccess):
1502
1438
  except NoSuchFileException:
1503
1439
  return False
1504
1440
  elif parse.scheme == "toildir":
1505
- contents, subpath, cache_key = decode_directory(path)
1441
+ contents, subpath, cache_key, _, _ = decode_directory(path)
1506
1442
  if subpath is None:
1507
1443
  # The toildir directory itself exists
1508
1444
  return True
@@ -1520,7 +1456,7 @@ class ToilFsAccess(StdFsAccess):
1520
1456
  return True
1521
1457
  else:
1522
1458
  # This should be supported by a job store.
1523
- return AbstractJobStore.url_exists(path)
1459
+ return URLAccess.url_exists(path)
1524
1460
 
1525
1461
  def size(self, path: str) -> int:
1526
1462
  parse = urlparse(path)
@@ -1529,7 +1465,7 @@ class ToilFsAccess(StdFsAccess):
1529
1465
  elif parse.scheme == "toildir":
1530
1466
  # Decode its contents, the path inside it to the file (if any), and
1531
1467
  # the key to use for caching the directory.
1532
- contents, subpath, cache_key = decode_directory(path)
1468
+ contents, subpath, cache_key, _, _ = decode_directory(path)
1533
1469
 
1534
1470
  # We can't get the size of just a directory.
1535
1471
  if subpath is None:
@@ -1549,7 +1485,7 @@ class ToilFsAccess(StdFsAccess):
1549
1485
  )
1550
1486
  else:
1551
1487
  # This should be supported by a job store.
1552
- size = AbstractJobStore.get_size(path)
1488
+ size = URLAccess.get_size(path)
1553
1489
  if size is None:
1554
1490
  # get_size can be unimplemented or unavailable
1555
1491
  raise RuntimeError(f"Could not get size of {path}")
@@ -1563,7 +1499,7 @@ class ToilFsAccess(StdFsAccess):
1563
1499
  # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1564
1500
  return True
1565
1501
  elif parse.scheme == "toildir":
1566
- contents, subpath, cache_key = decode_directory(fn)
1502
+ contents, subpath, cache_key, _, _ = decode_directory(fn)
1567
1503
  if subpath is None:
1568
1504
  # This is the toildir directory itself
1569
1505
  return False
@@ -1572,7 +1508,7 @@ class ToilFsAccess(StdFsAccess):
1572
1508
  # TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
1573
1509
  return isinstance(found, str)
1574
1510
  else:
1575
- return self.exists(fn) and not AbstractJobStore.get_is_directory(fn)
1511
+ return self.exists(fn) and not URLAccess.get_is_directory(fn)
1576
1512
 
1577
1513
  def isdir(self, fn: str) -> bool:
1578
1514
  logger.debug("ToilFsAccess checking type of %s", fn)
@@ -1582,7 +1518,7 @@ class ToilFsAccess(StdFsAccess):
1582
1518
  elif parse.scheme == "toilfile":
1583
1519
  return False
1584
1520
  elif parse.scheme == "toildir":
1585
- contents, subpath, cache_key = decode_directory(fn)
1521
+ contents, subpath, cache_key, _, _ = decode_directory(fn)
1586
1522
  if subpath is None:
1587
1523
  # This is the toildir directory itself.
1588
1524
  # TODO: We assume directories can't be deleted.
@@ -1592,7 +1528,7 @@ class ToilFsAccess(StdFsAccess):
1592
1528
  # TODO: We assume directories can't be deleted.
1593
1529
  return isinstance(found, dict)
1594
1530
  else:
1595
- status = AbstractJobStore.get_is_directory(fn)
1531
+ status = URLAccess.get_is_directory(fn)
1596
1532
  logger.debug("AbstractJobStore said: %s", status)
1597
1533
  return status
1598
1534
 
@@ -1610,7 +1546,7 @@ class ToilFsAccess(StdFsAccess):
1610
1546
  elif parse.scheme == "toilfile":
1611
1547
  raise RuntimeError(f"Cannot list a file: {fn}")
1612
1548
  elif parse.scheme == "toildir":
1613
- contents, subpath, cache_key = decode_directory(fn)
1549
+ contents, subpath, cache_key, _, _ = decode_directory(fn)
1614
1550
  here = contents
1615
1551
  if subpath is not None:
1616
1552
  got = get_from_structure(contents, subpath)
@@ -1626,7 +1562,7 @@ class ToilFsAccess(StdFsAccess):
1626
1562
  else:
1627
1563
  return [
1628
1564
  os.path.join(fn, entry.rstrip("/"))
1629
- for entry in AbstractJobStore.list_url(fn)
1565
+ for entry in URLAccess.list_url(fn)
1630
1566
  ]
1631
1567
 
1632
1568
  def join(self, path: str, *paths: str) -> str:
@@ -1736,7 +1672,7 @@ def toil_get_file(
1736
1672
  pipe.write(data)
1737
1673
  else:
1738
1674
  # Stream from some other URI
1739
- AbstractJobStore.read_from_url(uri, pipe)
1675
+ URLAccess.read_from_url(uri, pipe)
1740
1676
  except OSError as e:
1741
1677
  # The other side of the pipe may have been closed by the
1742
1678
  # reading thread, which is OK.
@@ -1779,7 +1715,7 @@ def toil_get_file(
1779
1715
  # Open that path exclusively to make sure we created it
1780
1716
  with open(src_path, "xb") as fh:
1781
1717
  # Download into the file
1782
- size, executable = AbstractJobStore.read_from_url(uri, fh)
1718
+ size, executable = URLAccess.read_from_url(uri, fh)
1783
1719
  if executable:
1784
1720
  # Set the execute bit in the file's permissions
1785
1721
  os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
@@ -2401,7 +2337,7 @@ def toilStageFiles(
2401
2337
 
2402
2338
  if file_id_or_contents.startswith("toildir:"):
2403
2339
  # Get the directory contents and the path into them, if any
2404
- here, subpath, _ = decode_directory(file_id_or_contents)
2340
+ here, subpath, _, _, _ = decode_directory(file_id_or_contents)
2405
2341
  if subpath is not None:
2406
2342
  for part in subpath.split("/"):
2407
2343
  here = cast(DirectoryContents, here[part])
@@ -2583,7 +2519,7 @@ class CWLJob(CWLNamedJob):
2583
2519
  resources={},
2584
2520
  mutation_manager=runtime_context.mutation_manager,
2585
2521
  formatgraph=tool.formatgraph,
2586
- make_fs_access=cast(type[StdFsAccess], runtime_context.make_fs_access),
2522
+ make_fs_access=runtime_context.make_fs_access,
2587
2523
  fs_access=runtime_context.make_fs_access(""),
2588
2524
  job_script_provider=runtime_context.job_script_provider,
2589
2525
  timeout=runtime_context.eval_timeout,
@@ -2613,6 +2549,12 @@ class CWLJob(CWLNamedJob):
2613
2549
  else:
2614
2550
  # We use a None requirement and the Toil default applies.
2615
2551
  memory = None
2552
+
2553
+ # Imposing a minimum memory limit
2554
+ min_ram = getattr(runtime_context, "cwl_min_ram")
2555
+ if min_ram is not None and memory is not None:
2556
+ # Note: if the job is using the toil default memory, it won't be increased
2557
+ memory = max(memory, min_ram)
2616
2558
 
2617
2559
  accelerators: Optional[list[AcceleratorRequirement]] = None
2618
2560
  if req.get("cudaDeviceCount", 0) > 0:
@@ -2751,6 +2693,9 @@ class CWLJob(CWLNamedJob):
2751
2693
 
2752
2694
  cwljob = resolve_dict_w_promises(self.cwljob, file_store)
2753
2695
 
2696
+ # Deletes duplicate listings
2697
+ remove_redundant_mounts(cwljob)
2698
+
2754
2699
  if self.conditional.is_false(cwljob):
2755
2700
  return self.conditional.skipped_outputs()
2756
2701
 
@@ -2984,24 +2929,23 @@ def makeRootJob(
2984
2929
  else:
2985
2930
  worker_metadata[filename] = file_data
2986
2931
 
2932
+ if worker_metadata:
2933
+ logger.info(
2934
+ "Planning to import %s files on workers",
2935
+ len(worker_metadata),
2936
+ )
2937
+
2987
2938
  # import the files for the leader first
2988
2939
  path_to_fileid = WorkerImportJob.import_files(
2989
2940
  list(leader_metadata.keys()), toil._jobStore
2990
2941
  )
2991
2942
 
2992
- # then install the imported files before importing the other files
2993
- # this way the control flow can fall from the leader to workers
2994
- tool, initialized_job_order = CWLInstallImportsJob.fill_in_files(
2995
- initialized_job_order,
2996
- tool,
2997
- path_to_fileid,
2998
- options.basedir,
2999
- options.reference_inputs,
3000
- options.bypass_file_store,
3001
- )
2943
+ # Because installing the imported files expects all files to have been
2944
+ # imported, we don't do that here; we combine the leader imports and
2945
+ # the worker imports and install them all at once.
3002
2946
 
3003
2947
  import_job = CWLImportWrapper(
3004
- initialized_job_order, tool, runtime_context, worker_metadata, options
2948
+ initialized_job_order, tool, runtime_context, worker_metadata, path_to_fileid, options
3005
2949
  )
3006
2950
  return import_job
3007
2951
  else:
@@ -3573,7 +3517,7 @@ class CWLInstallImportsJob(Job):
3573
3517
  basedir: str,
3574
3518
  skip_remote: bool,
3575
3519
  bypass_file_store: bool,
3576
- import_data: Promised[dict[str, FileID]],
3520
+ import_data: list[Promised[dict[str, FileID]]],
3577
3521
  **kwargs: Any,
3578
3522
  ) -> None:
3579
3523
  """
@@ -3581,6 +3525,8 @@ class CWLInstallImportsJob(Job):
3581
3525
  to convert all file locations to URIs.
3582
3526
 
3583
3527
  This class is only used when runImportsOnWorkers is enabled.
3528
+
3529
+ :param import_data: List of mappings from file URI to imported file ID.
3584
3530
  """
3585
3531
  super().__init__(local=True, **kwargs)
3586
3532
  self.initialized_job_order = initialized_job_order
@@ -3590,6 +3536,8 @@ class CWLInstallImportsJob(Job):
3590
3536
  self.bypass_file_store = bypass_file_store
3591
3537
  self.import_data = import_data
3592
3538
 
3539
+ # TODO: Since we only call this from the class itself now it doesn't really
3540
+ # need to be static anymore.
3593
3541
  @staticmethod
3594
3542
  def fill_in_files(
3595
3543
  initialized_job_order: CWLObjectType,
@@ -3607,7 +3555,12 @@ class CWLInstallImportsJob(Job):
3607
3555
  """
3608
3556
  Return the file name's associated Toil file ID
3609
3557
  """
3610
- return candidate_to_fileid[filename]
3558
+ try:
3559
+ return candidate_to_fileid[filename]
3560
+ except KeyError:
3561
+ # Give something more useful than a KeyError if something went
3562
+ # wrong with the importing.
3563
+ raise RuntimeError(f"File at \"{filename}\" was never imported.")
3611
3564
 
3612
3565
  file_convert_function = functools.partial(
3613
3566
  extract_and_convert_file_to_toil_uri, fill_in_file
@@ -3654,11 +3607,19 @@ class CWLInstallImportsJob(Job):
3654
3607
  Convert the filenames in the workflow inputs into the URIs
3655
3608
  :return: Promise of transformed workflow inputs. A tuple of the job order and process
3656
3609
  """
3657
- candidate_to_fileid: dict[str, FileID] = unwrap(self.import_data)
3610
+
3611
+ # Merge all the input dicts down to one to check.
3612
+ candidate_to_fileid: dict[str, FileID] = {
3613
+ k: v for mapping in unwrap(
3614
+ self.import_data
3615
+ ) for k, v in unwrap(mapping).items()
3616
+ }
3658
3617
 
3659
3618
  initialized_job_order = unwrap(self.initialized_job_order)
3660
3619
  tool = unwrap(self.tool)
3661
- return CWLInstallImportsJob.fill_in_files(
3620
+
3621
+ # Install the imported files in the tool and job order
3622
+ return self.fill_in_files(
3662
3623
  initialized_job_order,
3663
3624
  tool,
3664
3625
  candidate_to_fileid,
@@ -3682,33 +3643,46 @@ class CWLImportWrapper(CWLNamedJob):
3682
3643
  tool: Process,
3683
3644
  runtime_context: cwltool.context.RuntimeContext,
3684
3645
  file_to_data: dict[str, FileMetadata],
3646
+ imported_files: dict[str, FileID],
3685
3647
  options: Namespace,
3686
3648
  ):
3687
- super().__init__(local=False, disk=options.import_workers_threshold)
3649
+ """
3650
+ Make a job to do file imports on workers and then run the workflow.
3651
+
3652
+ :param file_to_data: Metadata for files that need to be imported on the
3653
+ worker.
3654
+ :param imported_files: Files already imported on the leader.
3655
+ """
3656
+ super().__init__(local=False, disk=options.import_workers_batchsize)
3688
3657
  self.initialized_job_order = initialized_job_order
3689
3658
  self.tool = tool
3690
- self.options = options
3691
3659
  self.runtime_context = runtime_context
3692
3660
  self.file_to_data = file_to_data
3661
+ self.imported_files = imported_files
3662
+ self.options = options
3693
3663
 
3694
3664
  def run(self, file_store: AbstractFileStore) -> Any:
3665
+ # Do the worker-based imports
3695
3666
  imports_job = ImportsJob(
3696
3667
  self.file_to_data,
3697
- self.options.import_workers_threshold,
3668
+ self.options.import_workers_batchsize,
3698
3669
  self.options.import_workers_disk,
3699
3670
  )
3700
3671
  self.addChild(imports_job)
3672
+
3673
+ # Install the worker imports and any leader imports
3701
3674
  install_imports_job = CWLInstallImportsJob(
3702
3675
  initialized_job_order=self.initialized_job_order,
3703
3676
  tool=self.tool,
3704
3677
  basedir=self.options.basedir,
3705
3678
  skip_remote=self.options.reference_inputs,
3706
3679
  bypass_file_store=self.options.bypass_file_store,
3707
- import_data=imports_job.rv(0),
3680
+ import_data=[self.imported_files, imports_job.rv(0)],
3708
3681
  )
3709
3682
  self.addChild(install_imports_job)
3710
3683
  imports_job.addFollowOn(install_imports_job)
3711
3684
 
3685
+ # Run the workflow
3712
3686
  start_job = CWLStartJob(
3713
3687
  install_imports_job.rv(0),
3714
3688
  install_imports_job.rv(1),
@@ -4212,6 +4186,8 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
4212
4186
  options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
4213
4187
  )
4214
4188
  tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
4189
+ # tmpdir_prefix and tmp_outdir_prefix must not be checked for existence as they may exist on a worker only path
4190
+ # See https://github.com/DataBiosphere/toil/issues/5310
4215
4191
  workdir = options.workDir or tmp_outdir_prefix
4216
4192
 
4217
4193
  if options.jobStore is None:
@@ -4262,6 +4238,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
4262
4238
  runtime_context.workdir = workdir # type: ignore[attr-defined]
4263
4239
  runtime_context.outdir = outdir
4264
4240
  setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
4241
+ setattr(runtime_context, "cwl_min_ram", options.cwl_min_ram)
4265
4242
  runtime_context.move_outputs = "leave"
4266
4243
  runtime_context.rm_tmpdir = False
4267
4244
  runtime_context.streaming_allowed = not options.disable_streaming
@@ -4272,11 +4249,12 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
4272
4249
  # of filestore files and caches those.
4273
4250
  logger.debug("CWL task caching is turned on. Bypassing file store.")
4274
4251
  options.bypass_file_store = True
4252
+
4253
+ # Ensure the cache directory exists
4254
+ # Only ensure the caching directory exists as that must be local.
4255
+ os.makedirs(os.path.abspath(options.cachedir), exist_ok=True)
4275
4256
  if options.mpi_config_file is not None:
4276
4257
  runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
4277
- if cwltool.main.check_working_directories(runtime_context) is not None:
4278
- logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
4279
- return 1
4280
4258
  setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
4281
4259
  if options.bypass_file_store and options.destBucket:
4282
4260
  # We use the file store to write to buckets, so we can't do this (yet?)