toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/slurm.py +191 -16
- toil/common.py +20 -1
- toil/cwl/cwltoil.py +97 -119
- toil/cwl/utils.py +103 -3
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +35 -255
- toil/jobStores/aws/jobStore.py +864 -1964
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/fileJobStore.py +2 -1
- toil/jobStores/googleJobStore.py +32 -13
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/accelerators.py +1 -1
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +138 -19
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/misc.py +1 -1
- toil/lib/pipes.py +385 -0
- toil/lib/plugins.py +106 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/url.py +320 -0
- toil/lib/web.py +4 -5
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +12 -1
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +47 -15
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +2 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +16 -18
- toil/test/batchSystems/batchSystemTest.py +2 -9
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/cwlTest.py +181 -8
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
- toil/test/provisioners/clusterTest.py +15 -2
- toil/test/provisioners/gceProvisionerTest.py +1 -1
- toil/test/server/serverTest.py +78 -36
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum-gs.json +1 -1
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
- toil/test/wdl/wdltoil_test.py +171 -162
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilStats.py +17 -2
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +1179 -825
- toil/worker.py +16 -8
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/common.py
CHANGED
|
@@ -86,6 +86,7 @@ from toil.provisioners import add_provisioner_options, cluster_factory
|
|
|
86
86
|
from toil.realtimeLogger import RealtimeLogger
|
|
87
87
|
from toil.statsAndLogging import add_logging_options, set_logging_from_options
|
|
88
88
|
from toil.version import dockerRegistry, dockerTag, version, baseVersion
|
|
89
|
+
from toil.lib.url import URLAccess
|
|
89
90
|
|
|
90
91
|
if TYPE_CHECKING:
|
|
91
92
|
from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
|
|
@@ -449,6 +450,11 @@ class Config:
|
|
|
449
450
|
|
|
450
451
|
self.check_configuration_consistency()
|
|
451
452
|
|
|
453
|
+
# Check for deprecated Toil built-in autoscaling
|
|
454
|
+
# --provisioner is guaranteed to be set
|
|
455
|
+
if self.provisioner is not None and self.batchSystem == "mesos":
|
|
456
|
+
logger.warning("Toil built-in autoscaling with Mesos is deprecated as Mesos is no longer active. Please use Kubernetes-based autoscaling instead.")
|
|
457
|
+
|
|
452
458
|
def check_configuration_consistency(self) -> None:
|
|
453
459
|
"""Old checks that cannot be fit into an action class for argparse"""
|
|
454
460
|
if self.writeLogs and self.writeLogsGzip:
|
|
@@ -545,6 +551,19 @@ def generate_config(filepath: str) -> None:
|
|
|
545
551
|
"enableCaching",
|
|
546
552
|
"disableCaching",
|
|
547
553
|
"version",
|
|
554
|
+
# Toil built-in autoscaling with mesos is deprecated as mesos has not been updated since Python 3.10
|
|
555
|
+
"provisioner",
|
|
556
|
+
"nodeTypes"
|
|
557
|
+
"minNodes",
|
|
558
|
+
"maxNodes",
|
|
559
|
+
"targetTime",
|
|
560
|
+
"betaInertia",
|
|
561
|
+
"scaleInterval",
|
|
562
|
+
"preemtibleCompensation",
|
|
563
|
+
"nodeStorage",
|
|
564
|
+
"nodeStorageOverrides",
|
|
565
|
+
"metrics",
|
|
566
|
+
"assumeZeroOverhead"
|
|
548
567
|
)
|
|
549
568
|
|
|
550
569
|
def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
|
|
@@ -1397,7 +1416,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1397
1416
|
self._batchSystem.setUserScript(userScriptResource)
|
|
1398
1417
|
|
|
1399
1418
|
def url_exists(self, src_uri: str) -> bool:
|
|
1400
|
-
return
|
|
1419
|
+
return URLAccess.url_exists(self.normalize_uri(src_uri))
|
|
1401
1420
|
|
|
1402
1421
|
# Importing a file with a shared file name returns None, but without one it
|
|
1403
1422
|
# returns a file ID. Explain this to MyPy.
|
toil/cwl/cwltoil.py
CHANGED
|
@@ -34,7 +34,6 @@ import stat
|
|
|
34
34
|
import sys
|
|
35
35
|
import textwrap
|
|
36
36
|
import uuid
|
|
37
|
-
from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
|
|
38
37
|
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
39
38
|
from threading import Thread
|
|
40
39
|
from typing import (
|
|
@@ -111,6 +110,11 @@ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
|
|
|
111
110
|
from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
|
|
112
111
|
from toil.common import Config, Toil, addOptions
|
|
113
112
|
from toil.cwl import check_cwltool_version
|
|
113
|
+
from toil.lib.directory import (
|
|
114
|
+
DirectoryContents,
|
|
115
|
+
decode_directory,
|
|
116
|
+
encode_directory,
|
|
117
|
+
)
|
|
114
118
|
from toil.lib.trs import resolve_workflow
|
|
115
119
|
from toil.lib.misc import call_command
|
|
116
120
|
from toil.provisioners.clusterScaler import JobTooBigError
|
|
@@ -122,6 +126,7 @@ from toil.cwl.utils import (
|
|
|
122
126
|
download_structure,
|
|
123
127
|
get_from_structure,
|
|
124
128
|
visit_cwl_class_and_reduce,
|
|
129
|
+
remove_redundant_mounts
|
|
125
130
|
)
|
|
126
131
|
from toil.exceptions import FailedJobsException
|
|
127
132
|
from toil.fileStores import FileID
|
|
@@ -149,6 +154,7 @@ from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
|
149
154
|
from toil.lib.io import mkdtemp
|
|
150
155
|
from toil.lib.threading import ExceptionalThread, global_mutex
|
|
151
156
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
157
|
+
from toil.lib.url import URLAccess
|
|
152
158
|
|
|
153
159
|
logger = logging.getLogger(__name__)
|
|
154
160
|
|
|
@@ -1155,7 +1161,7 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1155
1161
|
"""Subclass the cwltool command line tool to provide the custom ToilPathMapper."""
|
|
1156
1162
|
|
|
1157
1163
|
def _initialworkdir(
|
|
1158
|
-
self, j: cwltool.job.JobBase, builder: cwltool.builder.Builder
|
|
1164
|
+
self, j: Optional[cwltool.job.JobBase], builder: cwltool.builder.Builder
|
|
1159
1165
|
) -> None:
|
|
1160
1166
|
"""
|
|
1161
1167
|
Hook the InitialWorkDirRequirement setup to make sure that there are no
|
|
@@ -1165,6 +1171,9 @@ class ToilCommandLineTool(ToilTool, cwltool.command_line_tool.CommandLineTool):
|
|
|
1165
1171
|
# Set up the initial work dir with all its files
|
|
1166
1172
|
super()._initialworkdir(j, builder)
|
|
1167
1173
|
|
|
1174
|
+
if j is None:
|
|
1175
|
+
return # Only testing
|
|
1176
|
+
|
|
1168
1177
|
# The initial work dir listing is now in j.generatefiles["listing"]
|
|
1169
1178
|
# Also j.generatefiles is a CWL Directory.
|
|
1170
1179
|
# So check the initial working directory.
|
|
@@ -1218,79 +1227,6 @@ def toil_make_tool(
|
|
|
1218
1227
|
# URI instead of raising an error right away, in case it is optional.
|
|
1219
1228
|
MISSING_FILE = "missing://"
|
|
1220
1229
|
|
|
1221
|
-
DirectoryContents = dict[str, Union[str, "DirectoryContents"]]
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
def check_directory_dict_invariants(contents: DirectoryContents) -> None:
|
|
1225
|
-
"""
|
|
1226
|
-
Make sure a directory structure dict makes sense. Throws an error
|
|
1227
|
-
otherwise.
|
|
1228
|
-
|
|
1229
|
-
Currently just checks to make sure no empty-string keys exist.
|
|
1230
|
-
"""
|
|
1231
|
-
|
|
1232
|
-
for name, item in contents.items():
|
|
1233
|
-
if name == "":
|
|
1234
|
-
raise RuntimeError(
|
|
1235
|
-
"Found nameless entry in directory: " + json.dumps(contents, indent=2)
|
|
1236
|
-
)
|
|
1237
|
-
if isinstance(item, dict):
|
|
1238
|
-
check_directory_dict_invariants(item)
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
def decode_directory(
|
|
1242
|
-
dir_path: str,
|
|
1243
|
-
) -> tuple[DirectoryContents, Optional[str], str]:
|
|
1244
|
-
"""
|
|
1245
|
-
Decode a directory from a "toildir:" path to a directory (or a file in it).
|
|
1246
|
-
|
|
1247
|
-
Returns the decoded directory dict, the remaining part of the path (which may be
|
|
1248
|
-
None), and the deduplication key string that uniquely identifies the
|
|
1249
|
-
directory.
|
|
1250
|
-
"""
|
|
1251
|
-
if not dir_path.startswith("toildir:"):
|
|
1252
|
-
raise RuntimeError(f"Cannot decode non-directory path: {dir_path}")
|
|
1253
|
-
|
|
1254
|
-
# We will decode the directory and then look inside it
|
|
1255
|
-
|
|
1256
|
-
# Since this was encoded by upload_directory we know the
|
|
1257
|
-
# next piece is encoded JSON describing the directory structure,
|
|
1258
|
-
# and it can't contain any slashes.
|
|
1259
|
-
parts = dir_path[len("toildir:") :].split("/", 1)
|
|
1260
|
-
|
|
1261
|
-
# Before the first slash is the encoded data describing the directory contents
|
|
1262
|
-
dir_data = parts[0]
|
|
1263
|
-
|
|
1264
|
-
# Decode what to download
|
|
1265
|
-
contents = json.loads(
|
|
1266
|
-
base64.urlsafe_b64decode(dir_data.encode("utf-8")).decode("utf-8")
|
|
1267
|
-
)
|
|
1268
|
-
|
|
1269
|
-
check_directory_dict_invariants(contents)
|
|
1270
|
-
|
|
1271
|
-
if len(parts) == 1 or parts[1] == "/":
|
|
1272
|
-
# We didn't have any subdirectory
|
|
1273
|
-
return contents, None, dir_data
|
|
1274
|
-
else:
|
|
1275
|
-
# We have a path below this
|
|
1276
|
-
return contents, parts[1], dir_data
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
def encode_directory(contents: DirectoryContents) -> str:
|
|
1280
|
-
"""
|
|
1281
|
-
Encode a directory from a "toildir:" path to a directory (or a file in it).
|
|
1282
|
-
|
|
1283
|
-
Takes the directory dict, which is a dict from name to URI for a file or
|
|
1284
|
-
dict for a subdirectory.
|
|
1285
|
-
"""
|
|
1286
|
-
|
|
1287
|
-
check_directory_dict_invariants(contents)
|
|
1288
|
-
|
|
1289
|
-
return "toildir:" + base64.urlsafe_b64encode(
|
|
1290
|
-
json.dumps(contents).encode("utf-8")
|
|
1291
|
-
).decode("utf-8")
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
1230
|
class ToilFsAccess(StdFsAccess):
|
|
1295
1231
|
"""
|
|
1296
1232
|
Custom filesystem access class which handles toil filestore references.
|
|
@@ -1359,7 +1295,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1359
1295
|
|
|
1360
1296
|
# Decode its contents, the path inside it to the file (if any), and
|
|
1361
1297
|
# the key to use for caching the directory.
|
|
1362
|
-
contents, subpath, cache_key = decode_directory(path)
|
|
1298
|
+
contents, subpath, cache_key, _, _ = decode_directory(path)
|
|
1363
1299
|
logger.debug("Decoded directory contents: %s", contents)
|
|
1364
1300
|
|
|
1365
1301
|
if cache_key not in self.dir_to_download:
|
|
@@ -1395,7 +1331,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1395
1331
|
destination = path
|
|
1396
1332
|
else:
|
|
1397
1333
|
# The destination is something else.
|
|
1398
|
-
if
|
|
1334
|
+
if URLAccess.get_is_directory(path):
|
|
1399
1335
|
# Treat this as a directory
|
|
1400
1336
|
if path not in self.dir_to_download:
|
|
1401
1337
|
logger.debug(
|
|
@@ -1405,14 +1341,14 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1405
1341
|
|
|
1406
1342
|
# Recursively fetch all the files in the directory.
|
|
1407
1343
|
def download_to(url: str, dest: str) -> None:
|
|
1408
|
-
if
|
|
1344
|
+
if URLAccess.get_is_directory(url):
|
|
1409
1345
|
os.mkdir(dest)
|
|
1410
|
-
for part in
|
|
1346
|
+
for part in URLAccess.list_url(url):
|
|
1411
1347
|
download_to(
|
|
1412
1348
|
os.path.join(url, part), os.path.join(dest, part)
|
|
1413
1349
|
)
|
|
1414
1350
|
else:
|
|
1415
|
-
|
|
1351
|
+
URLAccess.read_from_url(url, open(dest, "wb"))
|
|
1416
1352
|
|
|
1417
1353
|
download_to(path, dest_dir)
|
|
1418
1354
|
self.dir_to_download[path] = dest_dir
|
|
@@ -1425,7 +1361,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1425
1361
|
# Try to grab it with a jobstore implementation, and save it
|
|
1426
1362
|
# somewhere arbitrary.
|
|
1427
1363
|
dest_file = NamedTemporaryFile(delete=False)
|
|
1428
|
-
|
|
1364
|
+
URLAccess.read_from_url(path, dest_file)
|
|
1429
1365
|
dest_file.close()
|
|
1430
1366
|
self.dir_to_download[path] = dest_file.name
|
|
1431
1367
|
destination = self.dir_to_download[path]
|
|
@@ -1461,7 +1397,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1461
1397
|
# Handle local files
|
|
1462
1398
|
return open(self._abs(fn), mode)
|
|
1463
1399
|
elif parse.scheme == "toildir":
|
|
1464
|
-
contents, subpath, cache_key = decode_directory(fn)
|
|
1400
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1465
1401
|
if cache_key in self.dir_to_download:
|
|
1466
1402
|
# This is already available locally, so fall back on the local copy
|
|
1467
1403
|
return open(self._abs(fn), mode)
|
|
@@ -1483,7 +1419,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1483
1419
|
return open(self._abs(fn), mode)
|
|
1484
1420
|
else:
|
|
1485
1421
|
# This should be supported by a job store.
|
|
1486
|
-
byte_stream =
|
|
1422
|
+
byte_stream = URLAccess.open_url(fn)
|
|
1487
1423
|
if "b" in mode:
|
|
1488
1424
|
# Pass stream along in binary
|
|
1489
1425
|
return byte_stream
|
|
@@ -1502,7 +1438,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1502
1438
|
except NoSuchFileException:
|
|
1503
1439
|
return False
|
|
1504
1440
|
elif parse.scheme == "toildir":
|
|
1505
|
-
contents, subpath, cache_key = decode_directory(path)
|
|
1441
|
+
contents, subpath, cache_key, _, _ = decode_directory(path)
|
|
1506
1442
|
if subpath is None:
|
|
1507
1443
|
# The toildir directory itself exists
|
|
1508
1444
|
return True
|
|
@@ -1520,7 +1456,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1520
1456
|
return True
|
|
1521
1457
|
else:
|
|
1522
1458
|
# This should be supported by a job store.
|
|
1523
|
-
return
|
|
1459
|
+
return URLAccess.url_exists(path)
|
|
1524
1460
|
|
|
1525
1461
|
def size(self, path: str) -> int:
|
|
1526
1462
|
parse = urlparse(path)
|
|
@@ -1529,7 +1465,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1529
1465
|
elif parse.scheme == "toildir":
|
|
1530
1466
|
# Decode its contents, the path inside it to the file (if any), and
|
|
1531
1467
|
# the key to use for caching the directory.
|
|
1532
|
-
contents, subpath, cache_key = decode_directory(path)
|
|
1468
|
+
contents, subpath, cache_key, _, _ = decode_directory(path)
|
|
1533
1469
|
|
|
1534
1470
|
# We can't get the size of just a directory.
|
|
1535
1471
|
if subpath is None:
|
|
@@ -1549,7 +1485,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1549
1485
|
)
|
|
1550
1486
|
else:
|
|
1551
1487
|
# This should be supported by a job store.
|
|
1552
|
-
size =
|
|
1488
|
+
size = URLAccess.get_size(path)
|
|
1553
1489
|
if size is None:
|
|
1554
1490
|
# get_size can be unimplemented or unavailable
|
|
1555
1491
|
raise RuntimeError(f"Could not get size of {path}")
|
|
@@ -1563,7 +1499,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1563
1499
|
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1564
1500
|
return True
|
|
1565
1501
|
elif parse.scheme == "toildir":
|
|
1566
|
-
contents, subpath, cache_key = decode_directory(fn)
|
|
1502
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1567
1503
|
if subpath is None:
|
|
1568
1504
|
# This is the toildir directory itself
|
|
1569
1505
|
return False
|
|
@@ -1572,7 +1508,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1572
1508
|
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1573
1509
|
return isinstance(found, str)
|
|
1574
1510
|
else:
|
|
1575
|
-
return self.exists(fn) and not
|
|
1511
|
+
return self.exists(fn) and not URLAccess.get_is_directory(fn)
|
|
1576
1512
|
|
|
1577
1513
|
def isdir(self, fn: str) -> bool:
|
|
1578
1514
|
logger.debug("ToilFsAccess checking type of %s", fn)
|
|
@@ -1582,7 +1518,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1582
1518
|
elif parse.scheme == "toilfile":
|
|
1583
1519
|
return False
|
|
1584
1520
|
elif parse.scheme == "toildir":
|
|
1585
|
-
contents, subpath, cache_key = decode_directory(fn)
|
|
1521
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1586
1522
|
if subpath is None:
|
|
1587
1523
|
# This is the toildir directory itself.
|
|
1588
1524
|
# TODO: We assume directories can't be deleted.
|
|
@@ -1592,7 +1528,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1592
1528
|
# TODO: We assume directories can't be deleted.
|
|
1593
1529
|
return isinstance(found, dict)
|
|
1594
1530
|
else:
|
|
1595
|
-
status =
|
|
1531
|
+
status = URLAccess.get_is_directory(fn)
|
|
1596
1532
|
logger.debug("AbstractJobStore said: %s", status)
|
|
1597
1533
|
return status
|
|
1598
1534
|
|
|
@@ -1610,7 +1546,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1610
1546
|
elif parse.scheme == "toilfile":
|
|
1611
1547
|
raise RuntimeError(f"Cannot list a file: {fn}")
|
|
1612
1548
|
elif parse.scheme == "toildir":
|
|
1613
|
-
contents, subpath, cache_key = decode_directory(fn)
|
|
1549
|
+
contents, subpath, cache_key, _, _ = decode_directory(fn)
|
|
1614
1550
|
here = contents
|
|
1615
1551
|
if subpath is not None:
|
|
1616
1552
|
got = get_from_structure(contents, subpath)
|
|
@@ -1626,7 +1562,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1626
1562
|
else:
|
|
1627
1563
|
return [
|
|
1628
1564
|
os.path.join(fn, entry.rstrip("/"))
|
|
1629
|
-
for entry in
|
|
1565
|
+
for entry in URLAccess.list_url(fn)
|
|
1630
1566
|
]
|
|
1631
1567
|
|
|
1632
1568
|
def join(self, path: str, *paths: str) -> str:
|
|
@@ -1736,7 +1672,7 @@ def toil_get_file(
|
|
|
1736
1672
|
pipe.write(data)
|
|
1737
1673
|
else:
|
|
1738
1674
|
# Stream from some other URI
|
|
1739
|
-
|
|
1675
|
+
URLAccess.read_from_url(uri, pipe)
|
|
1740
1676
|
except OSError as e:
|
|
1741
1677
|
# The other side of the pipe may have been closed by the
|
|
1742
1678
|
# reading thread, which is OK.
|
|
@@ -1779,7 +1715,7 @@ def toil_get_file(
|
|
|
1779
1715
|
# Open that path exclusively to make sure we created it
|
|
1780
1716
|
with open(src_path, "xb") as fh:
|
|
1781
1717
|
# Download into the file
|
|
1782
|
-
size, executable =
|
|
1718
|
+
size, executable = URLAccess.read_from_url(uri, fh)
|
|
1783
1719
|
if executable:
|
|
1784
1720
|
# Set the execute bit in the file's permissions
|
|
1785
1721
|
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
@@ -2401,7 +2337,7 @@ def toilStageFiles(
|
|
|
2401
2337
|
|
|
2402
2338
|
if file_id_or_contents.startswith("toildir:"):
|
|
2403
2339
|
# Get the directory contents and the path into them, if any
|
|
2404
|
-
here, subpath, _ = decode_directory(file_id_or_contents)
|
|
2340
|
+
here, subpath, _, _, _ = decode_directory(file_id_or_contents)
|
|
2405
2341
|
if subpath is not None:
|
|
2406
2342
|
for part in subpath.split("/"):
|
|
2407
2343
|
here = cast(DirectoryContents, here[part])
|
|
@@ -2583,7 +2519,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2583
2519
|
resources={},
|
|
2584
2520
|
mutation_manager=runtime_context.mutation_manager,
|
|
2585
2521
|
formatgraph=tool.formatgraph,
|
|
2586
|
-
make_fs_access=
|
|
2522
|
+
make_fs_access=runtime_context.make_fs_access,
|
|
2587
2523
|
fs_access=runtime_context.make_fs_access(""),
|
|
2588
2524
|
job_script_provider=runtime_context.job_script_provider,
|
|
2589
2525
|
timeout=runtime_context.eval_timeout,
|
|
@@ -2613,6 +2549,12 @@ class CWLJob(CWLNamedJob):
|
|
|
2613
2549
|
else:
|
|
2614
2550
|
# We use a None requirement and the Toil default applies.
|
|
2615
2551
|
memory = None
|
|
2552
|
+
|
|
2553
|
+
# Imposing a minimum memory limit
|
|
2554
|
+
min_ram = getattr(runtime_context, "cwl_min_ram")
|
|
2555
|
+
if min_ram is not None and memory is not None:
|
|
2556
|
+
# Note: if the job is using the toil default memory, it won't be increased
|
|
2557
|
+
memory = max(memory, min_ram)
|
|
2616
2558
|
|
|
2617
2559
|
accelerators: Optional[list[AcceleratorRequirement]] = None
|
|
2618
2560
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
@@ -2751,6 +2693,9 @@ class CWLJob(CWLNamedJob):
|
|
|
2751
2693
|
|
|
2752
2694
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2753
2695
|
|
|
2696
|
+
# Deletes duplicate listings
|
|
2697
|
+
remove_redundant_mounts(cwljob)
|
|
2698
|
+
|
|
2754
2699
|
if self.conditional.is_false(cwljob):
|
|
2755
2700
|
return self.conditional.skipped_outputs()
|
|
2756
2701
|
|
|
@@ -2984,24 +2929,23 @@ def makeRootJob(
|
|
|
2984
2929
|
else:
|
|
2985
2930
|
worker_metadata[filename] = file_data
|
|
2986
2931
|
|
|
2932
|
+
if worker_metadata:
|
|
2933
|
+
logger.info(
|
|
2934
|
+
"Planning to import %s files on workers",
|
|
2935
|
+
len(worker_metadata),
|
|
2936
|
+
)
|
|
2937
|
+
|
|
2987
2938
|
# import the files for the leader first
|
|
2988
2939
|
path_to_fileid = WorkerImportJob.import_files(
|
|
2989
2940
|
list(leader_metadata.keys()), toil._jobStore
|
|
2990
2941
|
)
|
|
2991
2942
|
|
|
2992
|
-
#
|
|
2993
|
-
#
|
|
2994
|
-
|
|
2995
|
-
initialized_job_order,
|
|
2996
|
-
tool,
|
|
2997
|
-
path_to_fileid,
|
|
2998
|
-
options.basedir,
|
|
2999
|
-
options.reference_inputs,
|
|
3000
|
-
options.bypass_file_store,
|
|
3001
|
-
)
|
|
2943
|
+
# Because installing the imported files expects all files to have been
|
|
2944
|
+
# imported, we don't do that here; we combine the leader imports and
|
|
2945
|
+
# the worker imports and install them all at once.
|
|
3002
2946
|
|
|
3003
2947
|
import_job = CWLImportWrapper(
|
|
3004
|
-
initialized_job_order, tool, runtime_context, worker_metadata, options
|
|
2948
|
+
initialized_job_order, tool, runtime_context, worker_metadata, path_to_fileid, options
|
|
3005
2949
|
)
|
|
3006
2950
|
return import_job
|
|
3007
2951
|
else:
|
|
@@ -3573,7 +3517,7 @@ class CWLInstallImportsJob(Job):
|
|
|
3573
3517
|
basedir: str,
|
|
3574
3518
|
skip_remote: bool,
|
|
3575
3519
|
bypass_file_store: bool,
|
|
3576
|
-
import_data: Promised[dict[str, FileID]],
|
|
3520
|
+
import_data: list[Promised[dict[str, FileID]]],
|
|
3577
3521
|
**kwargs: Any,
|
|
3578
3522
|
) -> None:
|
|
3579
3523
|
"""
|
|
@@ -3581,6 +3525,8 @@ class CWLInstallImportsJob(Job):
|
|
|
3581
3525
|
to convert all file locations to URIs.
|
|
3582
3526
|
|
|
3583
3527
|
This class is only used when runImportsOnWorkers is enabled.
|
|
3528
|
+
|
|
3529
|
+
:param import_data: List of mappings from file URI to imported file ID.
|
|
3584
3530
|
"""
|
|
3585
3531
|
super().__init__(local=True, **kwargs)
|
|
3586
3532
|
self.initialized_job_order = initialized_job_order
|
|
@@ -3590,6 +3536,8 @@ class CWLInstallImportsJob(Job):
|
|
|
3590
3536
|
self.bypass_file_store = bypass_file_store
|
|
3591
3537
|
self.import_data = import_data
|
|
3592
3538
|
|
|
3539
|
+
# TODO: Since we only call this from the class itself now it doesn't really
|
|
3540
|
+
# need to be static anymore.
|
|
3593
3541
|
@staticmethod
|
|
3594
3542
|
def fill_in_files(
|
|
3595
3543
|
initialized_job_order: CWLObjectType,
|
|
@@ -3607,7 +3555,12 @@ class CWLInstallImportsJob(Job):
|
|
|
3607
3555
|
"""
|
|
3608
3556
|
Return the file name's associated Toil file ID
|
|
3609
3557
|
"""
|
|
3610
|
-
|
|
3558
|
+
try:
|
|
3559
|
+
return candidate_to_fileid[filename]
|
|
3560
|
+
except KeyError:
|
|
3561
|
+
# Give something more useful than a KeyError if something went
|
|
3562
|
+
# wrong with the importing.
|
|
3563
|
+
raise RuntimeError(f"File at \"{filename}\" was never imported.")
|
|
3611
3564
|
|
|
3612
3565
|
file_convert_function = functools.partial(
|
|
3613
3566
|
extract_and_convert_file_to_toil_uri, fill_in_file
|
|
@@ -3654,11 +3607,19 @@ class CWLInstallImportsJob(Job):
|
|
|
3654
3607
|
Convert the filenames in the workflow inputs into the URIs
|
|
3655
3608
|
:return: Promise of transformed workflow inputs. A tuple of the job order and process
|
|
3656
3609
|
"""
|
|
3657
|
-
|
|
3610
|
+
|
|
3611
|
+
# Merge all the input dicts down to one to check.
|
|
3612
|
+
candidate_to_fileid: dict[str, FileID] = {
|
|
3613
|
+
k: v for mapping in unwrap(
|
|
3614
|
+
self.import_data
|
|
3615
|
+
) for k, v in unwrap(mapping).items()
|
|
3616
|
+
}
|
|
3658
3617
|
|
|
3659
3618
|
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3660
3619
|
tool = unwrap(self.tool)
|
|
3661
|
-
|
|
3620
|
+
|
|
3621
|
+
# Install the imported files in the tool and job order
|
|
3622
|
+
return self.fill_in_files(
|
|
3662
3623
|
initialized_job_order,
|
|
3663
3624
|
tool,
|
|
3664
3625
|
candidate_to_fileid,
|
|
@@ -3682,33 +3643,46 @@ class CWLImportWrapper(CWLNamedJob):
|
|
|
3682
3643
|
tool: Process,
|
|
3683
3644
|
runtime_context: cwltool.context.RuntimeContext,
|
|
3684
3645
|
file_to_data: dict[str, FileMetadata],
|
|
3646
|
+
imported_files: dict[str, FileID],
|
|
3685
3647
|
options: Namespace,
|
|
3686
3648
|
):
|
|
3687
|
-
|
|
3649
|
+
"""
|
|
3650
|
+
Make a job to do file imports on workers and then run the workflow.
|
|
3651
|
+
|
|
3652
|
+
:param file_to_data: Metadata for files that need to be imported on the
|
|
3653
|
+
worker.
|
|
3654
|
+
:param imported_files: Files already imported on the leader.
|
|
3655
|
+
"""
|
|
3656
|
+
super().__init__(local=False, disk=options.import_workers_batchsize)
|
|
3688
3657
|
self.initialized_job_order = initialized_job_order
|
|
3689
3658
|
self.tool = tool
|
|
3690
|
-
self.options = options
|
|
3691
3659
|
self.runtime_context = runtime_context
|
|
3692
3660
|
self.file_to_data = file_to_data
|
|
3661
|
+
self.imported_files = imported_files
|
|
3662
|
+
self.options = options
|
|
3693
3663
|
|
|
3694
3664
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3665
|
+
# Do the worker-based imports
|
|
3695
3666
|
imports_job = ImportsJob(
|
|
3696
3667
|
self.file_to_data,
|
|
3697
|
-
self.options.
|
|
3668
|
+
self.options.import_workers_batchsize,
|
|
3698
3669
|
self.options.import_workers_disk,
|
|
3699
3670
|
)
|
|
3700
3671
|
self.addChild(imports_job)
|
|
3672
|
+
|
|
3673
|
+
# Install the worker imports and any leader imports
|
|
3701
3674
|
install_imports_job = CWLInstallImportsJob(
|
|
3702
3675
|
initialized_job_order=self.initialized_job_order,
|
|
3703
3676
|
tool=self.tool,
|
|
3704
3677
|
basedir=self.options.basedir,
|
|
3705
3678
|
skip_remote=self.options.reference_inputs,
|
|
3706
3679
|
bypass_file_store=self.options.bypass_file_store,
|
|
3707
|
-
import_data=imports_job.rv(0),
|
|
3680
|
+
import_data=[self.imported_files, imports_job.rv(0)],
|
|
3708
3681
|
)
|
|
3709
3682
|
self.addChild(install_imports_job)
|
|
3710
3683
|
imports_job.addFollowOn(install_imports_job)
|
|
3711
3684
|
|
|
3685
|
+
# Run the workflow
|
|
3712
3686
|
start_job = CWLStartJob(
|
|
3713
3687
|
install_imports_job.rv(0),
|
|
3714
3688
|
install_imports_job.rv(1),
|
|
@@ -4212,6 +4186,8 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4212
4186
|
options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
4213
4187
|
)
|
|
4214
4188
|
tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
|
|
4189
|
+
# tmpdir_prefix and tmp_outdir_prefix must not be checked for existence as they may exist on a worker only path
|
|
4190
|
+
# See https://github.com/DataBiosphere/toil/issues/5310
|
|
4215
4191
|
workdir = options.workDir or tmp_outdir_prefix
|
|
4216
4192
|
|
|
4217
4193
|
if options.jobStore is None:
|
|
@@ -4262,6 +4238,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4262
4238
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
4263
4239
|
runtime_context.outdir = outdir
|
|
4264
4240
|
setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
|
|
4241
|
+
setattr(runtime_context, "cwl_min_ram", options.cwl_min_ram)
|
|
4265
4242
|
runtime_context.move_outputs = "leave"
|
|
4266
4243
|
runtime_context.rm_tmpdir = False
|
|
4267
4244
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
@@ -4272,11 +4249,12 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4272
4249
|
# of filestore files and caches those.
|
|
4273
4250
|
logger.debug("CWL task caching is turned on. Bypassing file store.")
|
|
4274
4251
|
options.bypass_file_store = True
|
|
4252
|
+
|
|
4253
|
+
# Ensure the cache directory exists
|
|
4254
|
+
# Only ensure the caching directory exists as that must be local.
|
|
4255
|
+
os.makedirs(os.path.abspath(options.cachedir), exist_ok=True)
|
|
4275
4256
|
if options.mpi_config_file is not None:
|
|
4276
4257
|
runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
|
|
4277
|
-
if cwltool.main.check_working_directories(runtime_context) is not None:
|
|
4278
|
-
logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
|
|
4279
|
-
return 1
|
|
4280
4258
|
setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
|
|
4281
4259
|
if options.bypass_file_store and options.destBucket:
|
|
4282
4260
|
# We use the file store to write to buckets, so we can't do this (yet?)
|