toil 9.0.0__py3-none-any.whl → 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/slurm.py +191 -16
- toil/cwl/cwltoil.py +17 -82
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +24 -19
- toil/jobStores/aws/jobStore.py +862 -1963
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/googleJobStore.py +25 -9
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +73 -16
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/pipes.py +385 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/web.py +4 -5
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +22 -13
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/test/__init__.py +14 -16
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/testfiles/gather.wdl +52 -0
- toil/test/wdl/wdltoil_test.py +120 -38
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilStats.py +17 -2
- toil/version.py +6 -6
- toil/wdl/wdltoil.py +1038 -549
- toil/worker.py +5 -2
- {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/METADATA +12 -12
- {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/RECORD +69 -61
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/WHEEL +0 -0
- {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/entry_points.txt +0 -0
- {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/licenses/LICENSE +0 -0
- {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/top_level.txt +0 -0
toil/test/wdl/wdltoil_test.py
CHANGED
|
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
WDL_CONFORMANCE_TEST_REPO = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
|
|
44
|
-
WDL_CONFORMANCE_TEST_COMMIT = "
|
|
44
|
+
WDL_CONFORMANCE_TEST_COMMIT = "46b5f85ee38ec60d0b8b9c35928b5104a2af83d5"
|
|
45
45
|
# These tests are known to require things not implemented by
|
|
46
46
|
# Toil and will not be run in CI.
|
|
47
47
|
WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL = [
|
|
@@ -110,6 +110,15 @@ class TestWDLConformance:
|
|
|
110
110
|
"Failed process standard error: %s",
|
|
111
111
|
p.stderr.decode("utf-8", errors="replace"),
|
|
112
112
|
)
|
|
113
|
+
else:
|
|
114
|
+
logger.debug(
|
|
115
|
+
"Successful process standard output: %s",
|
|
116
|
+
p.stdout.decode("utf-8", errors="replace"),
|
|
117
|
+
)
|
|
118
|
+
logger.debug(
|
|
119
|
+
"Successful process standard error: %s",
|
|
120
|
+
p.stderr.decode("utf-8", errors="replace"),
|
|
121
|
+
)
|
|
113
122
|
|
|
114
123
|
p.check_returncode()
|
|
115
124
|
|
|
@@ -195,6 +204,30 @@ class TestWDLConformance:
|
|
|
195
204
|
|
|
196
205
|
self.check(p)
|
|
197
206
|
|
|
207
|
+
# estimated running time: 10 minutes (once all the appropriate tests get
|
|
208
|
+
# marked as "development")
|
|
209
|
+
@slow
|
|
210
|
+
def test_conformance_tests_development(self, wdl_conformance_test_repo: Path) -> None:
|
|
211
|
+
os.chdir(wdl_conformance_test_repo)
|
|
212
|
+
commands = [
|
|
213
|
+
exactPython,
|
|
214
|
+
"run.py",
|
|
215
|
+
"--runner",
|
|
216
|
+
"toil-wdl-runner",
|
|
217
|
+
"--conformance-file",
|
|
218
|
+
"conformance.yaml",
|
|
219
|
+
"-v",
|
|
220
|
+
"development",
|
|
221
|
+
]
|
|
222
|
+
if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
|
|
223
|
+
commands.append("--exclude-numbers")
|
|
224
|
+
commands.append(
|
|
225
|
+
",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
|
|
226
|
+
)
|
|
227
|
+
p = subprocess.run(commands, capture_output=True)
|
|
228
|
+
|
|
229
|
+
self.check(p)
|
|
230
|
+
|
|
198
231
|
@slow
|
|
199
232
|
def test_conformance_tests_integration(
|
|
200
233
|
self, wdl_conformance_test_repo: Path
|
|
@@ -288,6 +321,28 @@ class TestWDL:
|
|
|
288
321
|
|
|
289
322
|
assert "StringFileCoercion.output_file" in result
|
|
290
323
|
|
|
324
|
+
@needs_docker
|
|
325
|
+
def test_gather(self, tmp_path: Path) -> None:
|
|
326
|
+
"""
|
|
327
|
+
Test files with the same name from different scatter tasks.
|
|
328
|
+
"""
|
|
329
|
+
with get_data("test/wdl/testfiles/gather.wdl") as wdl:
|
|
330
|
+
result_json = subprocess.check_output(
|
|
331
|
+
self.base_command
|
|
332
|
+
+ [
|
|
333
|
+
str(wdl),
|
|
334
|
+
"-o",
|
|
335
|
+
str(tmp_path),
|
|
336
|
+
"--logInfo",
|
|
337
|
+
"--retryCount=0"
|
|
338
|
+
]
|
|
339
|
+
)
|
|
340
|
+
result = json.loads(result_json)
|
|
341
|
+
|
|
342
|
+
assert "gather.outfile" in result
|
|
343
|
+
assert isinstance(result["gather.outfile"], str)
|
|
344
|
+
assert open(result["gather.outfile"]).read() == "1\n2\n3\n"
|
|
345
|
+
|
|
291
346
|
@needs_docker
|
|
292
347
|
def test_wait(self, tmp_path: Path) -> None:
|
|
293
348
|
"""
|
|
@@ -856,25 +911,29 @@ class TestWDL:
|
|
|
856
911
|
env["TOIL_DOCKSTORE_TOKEN"] = "99cf5578ebe94b194d7864630a86258fa3d6cedcc17d757b5dd49e64ee3b68c3"
|
|
857
912
|
# Enable history for when <https://github.com/DataBiosphere/toil/pull/5258> merges
|
|
858
913
|
env["TOIL_HISTORY"] = "True"
|
|
914
|
+
|
|
915
|
+
try:
|
|
916
|
+
output_log = subprocess.check_output(
|
|
917
|
+
self.base_command
|
|
918
|
+
+ [
|
|
919
|
+
wdl_file,
|
|
920
|
+
json_input,
|
|
921
|
+
"--logDebug",
|
|
922
|
+
"-o",
|
|
923
|
+
str(tmp_path),
|
|
924
|
+
"--outputDialect",
|
|
925
|
+
"miniwdl",
|
|
926
|
+
"--publishWorkflowMetrics=current",
|
|
927
|
+
]
|
|
928
|
+
+ (extra_args or []),
|
|
929
|
+
stderr=subprocess.STDOUT,
|
|
930
|
+
env=env,
|
|
931
|
+
).decode("utf-8", errors="replace")
|
|
932
|
+
except subprocess.CalledProcessError as e:
|
|
933
|
+
logger.error("Test run of Toil failed: %s", e.stdout.decode("utf-8", errors="replace"))
|
|
934
|
+
raise
|
|
859
935
|
|
|
860
|
-
output_log
|
|
861
|
-
self.base_command
|
|
862
|
-
+ [
|
|
863
|
-
wdl_file,
|
|
864
|
-
json_input,
|
|
865
|
-
"--logDebug",
|
|
866
|
-
"-o",
|
|
867
|
-
str(tmp_path),
|
|
868
|
-
"--outputDialect",
|
|
869
|
-
"miniwdl",
|
|
870
|
-
"--publishWorkflowMetrics=current",
|
|
871
|
-
]
|
|
872
|
-
+ (extra_args or []),
|
|
873
|
-
stderr=subprocess.STDOUT,
|
|
874
|
-
env=env,
|
|
875
|
-
)
|
|
876
|
-
|
|
877
|
-
assert b'Workflow metrics were accepted by Dockstore.' in output_log
|
|
936
|
+
assert "Workflow metrics were accepted by Dockstore." in output_log, f"No acceptance message in log: {output_log}"
|
|
878
937
|
|
|
879
938
|
@slow
|
|
880
939
|
@needs_docker_cuda
|
|
@@ -1138,34 +1197,57 @@ class TestWDLToilBench(unittest.TestCase):
|
|
|
1138
1197
|
"""
|
|
1139
1198
|
|
|
1140
1199
|
from toil.wdl.wdltoil import (
|
|
1141
|
-
DirectoryNamingStateDict,
|
|
1142
1200
|
choose_human_readable_directory,
|
|
1143
1201
|
)
|
|
1144
1202
|
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
# The first time we should get a path with the task name and without the ID
|
|
1203
|
+
# The first time we should get a path with the task name
|
|
1148
1204
|
first_chosen = choose_human_readable_directory(
|
|
1149
|
-
"root", "taskname", "
|
|
1205
|
+
"root", "taskname", "https://example.com/some/directory"
|
|
1150
1206
|
)
|
|
1151
1207
|
assert first_chosen.startswith("root")
|
|
1152
|
-
assert "taskname" in first_chosen
|
|
1153
|
-
assert "111-222-333" not in first_chosen
|
|
1154
1208
|
|
|
1155
|
-
# If we use the same
|
|
1156
|
-
|
|
1157
|
-
"root", "taskname", "
|
|
1209
|
+
# If we use the same parent we should get the same result
|
|
1210
|
+
same_parent = choose_human_readable_directory(
|
|
1211
|
+
"root", "taskname", "https://example.com/some/directory"
|
|
1212
|
+
)
|
|
1213
|
+
assert same_parent == first_chosen
|
|
1214
|
+
|
|
1215
|
+
# If we use a lower parent with a URL, we do not necessarily need to be
|
|
1216
|
+
# inside the higher parent.
|
|
1217
|
+
|
|
1218
|
+
# If we use a URL with a creative number of slashes, it should be distinct.
|
|
1219
|
+
slash_parent = choose_human_readable_directory(
|
|
1220
|
+
"root", "taskname", "https://example.com/some/directory//////"
|
|
1221
|
+
)
|
|
1222
|
+
assert slash_parent != first_chosen
|
|
1223
|
+
|
|
1224
|
+
# If we use the same parent URL but a different task we should get the same result
|
|
1225
|
+
other_task = choose_human_readable_directory(
|
|
1226
|
+
"root", "taskname2", "https://example.com/some/directory"
|
|
1227
|
+
)
|
|
1228
|
+
assert other_task == first_chosen
|
|
1229
|
+
|
|
1230
|
+
# If we use a different parent we should get a different result still obeying the constraints
|
|
1231
|
+
diff_parent = choose_human_readable_directory(
|
|
1232
|
+
"root", "taskname", "/data/tmp/files/somewhere"
|
|
1233
|
+
)
|
|
1234
|
+
assert diff_parent != first_chosen
|
|
1235
|
+
assert diff_parent.startswith("root")
|
|
1236
|
+
assert "taskname" in diff_parent
|
|
1237
|
+
|
|
1238
|
+
# If we use a subpath parent with a filename we should get a path inside it.
|
|
1239
|
+
diff_parent_subpath = choose_human_readable_directory(
|
|
1240
|
+
"root", "taskname", "/data/tmp/files/somewhere/else"
|
|
1158
1241
|
)
|
|
1159
|
-
assert
|
|
1242
|
+
assert os.path.dirname(diff_parent_subpath) == diff_parent
|
|
1160
1243
|
|
|
1161
|
-
# If we use a different
|
|
1162
|
-
|
|
1163
|
-
"root", "
|
|
1244
|
+
# If we use the same parent path but a different task we should get a different result.
|
|
1245
|
+
other_task_directory = choose_human_readable_directory(
|
|
1246
|
+
"root", "taskname2", "/data/tmp/files/somewhere"
|
|
1164
1247
|
)
|
|
1165
|
-
assert
|
|
1166
|
-
assert
|
|
1167
|
-
assert "
|
|
1168
|
-
assert "222-333-444" not in diff_id
|
|
1248
|
+
assert other_task_directory != diff_parent
|
|
1249
|
+
assert other_task_directory.startswith("root")
|
|
1250
|
+
assert "taskname2" in other_task_directory
|
|
1169
1251
|
|
|
1170
1252
|
def test_uri_packing(self) -> None:
|
|
1171
1253
|
"""
|
|
@@ -1181,7 +1263,7 @@ class TestWDLToilBench(unittest.TestCase):
|
|
|
1181
1263
|
file_basename = "thefile.txt"
|
|
1182
1264
|
|
|
1183
1265
|
# Pack and unpack it
|
|
1184
|
-
uri = pack_toil_uri(file_id, task_path, dir_id, file_basename)
|
|
1266
|
+
uri = pack_toil_uri(file_id, task_path, str(dir_id), file_basename)
|
|
1185
1267
|
unpacked = unpack_toil_uri(uri)
|
|
1186
1268
|
|
|
1187
1269
|
# Make sure we got what we put in
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import unittest
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
|
|
4
|
+
import logging
|
|
4
5
|
import pytest
|
|
5
6
|
|
|
6
7
|
from toil.provisioners import cluster_factory
|
|
@@ -12,6 +13,8 @@ from toil.test.wdl.wdltoil_test import (
|
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
15
18
|
@integrative
|
|
16
19
|
@slow
|
|
17
20
|
@pytest.mark.timeout(1800)
|
|
@@ -52,6 +55,7 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
|
52
55
|
workflow that performs an image pull on the worker.
|
|
53
56
|
:return:
|
|
54
57
|
"""
|
|
58
|
+
|
|
55
59
|
self.numWorkers = "1"
|
|
56
60
|
self.requestedLeaderStorage = 30
|
|
57
61
|
# create the cluster
|
|
@@ -64,6 +68,8 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
|
64
68
|
|
|
65
69
|
wdl_dir = "wdl_conformance_tests"
|
|
66
70
|
|
|
71
|
+
logger.info("Cloning WDL tests onto cluster...")
|
|
72
|
+
|
|
67
73
|
# get the wdl-conformance-tests repo to get WDL tasks to run
|
|
68
74
|
self.sshUtil(
|
|
69
75
|
[
|
|
@@ -78,6 +84,9 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
|
|
|
78
84
|
|
|
79
85
|
# run WDL workflow that will run singularity
|
|
80
86
|
test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
|
|
87
|
+
|
|
88
|
+
logger.info("Running workflow...")
|
|
89
|
+
|
|
81
90
|
self.sshUtil(
|
|
82
91
|
[
|
|
83
92
|
"bash",
|
toil/utils/toilDebugFile.py
CHANGED
|
@@ -47,12 +47,12 @@ def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> N
|
|
|
47
47
|
jobStoreHits = glob(directoryname=options.jobStore, glob_pattern=jobStoreFile)
|
|
48
48
|
for jobStoreFileID in jobStoreHits:
|
|
49
49
|
logger.debug(
|
|
50
|
-
f"Copying job store file: {jobStoreFileID} to {options.localFilePath
|
|
50
|
+
f"Copying job store file: {jobStoreFileID} to {options.localFilePath}"
|
|
51
51
|
)
|
|
52
52
|
jobStore.read_file(
|
|
53
53
|
jobStoreFileID,
|
|
54
54
|
os.path.join(
|
|
55
|
-
options.localFilePath
|
|
55
|
+
options.localFilePath, os.path.basename(jobStoreFileID)
|
|
56
56
|
),
|
|
57
57
|
symlink=options.useSymlinks,
|
|
58
58
|
)
|
|
@@ -97,7 +97,10 @@ def printContentsOfJobStore(
|
|
|
97
97
|
def main() -> None:
|
|
98
98
|
parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
|
|
99
99
|
parser.add_argument(
|
|
100
|
-
"--localFilePath",
|
|
100
|
+
"--localFilePath",
|
|
101
|
+
type=str,
|
|
102
|
+
default=".",
|
|
103
|
+
help="Location to which to copy job store files."
|
|
101
104
|
)
|
|
102
105
|
parser.add_argument(
|
|
103
106
|
"--fetch",
|
toil/utils/toilStats.py
CHANGED
|
@@ -326,6 +326,8 @@ def sprint_tag(
|
|
|
326
326
|
out_str += header + "\n"
|
|
327
327
|
out_str += sub_header + "\n"
|
|
328
328
|
out_str += tag_str + "\n"
|
|
329
|
+
if tag.excess_cpu > 0:
|
|
330
|
+
out_str += f" ({tag.excess_cpu} used more CPU than requested!)\n"
|
|
329
331
|
return out_str
|
|
330
332
|
|
|
331
333
|
|
|
@@ -507,13 +509,25 @@ def build_element(
|
|
|
507
509
|
float(item.get(category_key, defaults[category])), category
|
|
508
510
|
)
|
|
509
511
|
values.append(category_value)
|
|
510
|
-
|
|
512
|
+
|
|
513
|
+
excess_cpu_items = 0
|
|
511
514
|
for index in range(0, len(item_values[CATEGORIES[0]])):
|
|
512
515
|
# For each item, compute the computed categories
|
|
513
|
-
|
|
516
|
+
|
|
517
|
+
# Compute wait time (allocated CPU time wasted).
|
|
518
|
+
# Note that if any item uses *more* CPU cores than requested, at any
|
|
519
|
+
# time, that decreases the amount of wait we're able to see from that
|
|
520
|
+
# item. If it hapens a lot, our computed wait could go negative, so we
|
|
521
|
+
# bound it below at 0.
|
|
522
|
+
wait_value = (
|
|
514
523
|
item_values["time"][index] * item_values["cores"][index]
|
|
515
524
|
- item_values["clock"][index]
|
|
516
525
|
)
|
|
526
|
+
if wait_value < 0:
|
|
527
|
+
# Remember an item used more CPU than allocated.
|
|
528
|
+
excess_cpu_items += 1
|
|
529
|
+
wait_value = 0
|
|
530
|
+
item_values["wait"].append(wait_value)
|
|
517
531
|
|
|
518
532
|
for category, values in item_values.items():
|
|
519
533
|
values.sort()
|
|
@@ -531,6 +545,7 @@ def build_element(
|
|
|
531
545
|
item_element["average_" + category] = float(sum(values) / len(values))
|
|
532
546
|
item_element["min_" + category] = float(min(values))
|
|
533
547
|
item_element["max_" + category] = float(max(values))
|
|
548
|
+
item_element["excess_cpu"] = excess_cpu_items
|
|
534
549
|
|
|
535
550
|
element[item_name] = item_element
|
|
536
551
|
|
toil/version.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
baseVersion = '9.
|
|
1
|
+
baseVersion = '9.1.1'
|
|
2
2
|
cgcloudVersion = '1.6.0a1.dev393'
|
|
3
|
-
version = '9.
|
|
3
|
+
version = '9.1.1-f654a3c80f5318ae2ec2e5ecb278a05e1e844a58'
|
|
4
4
|
cacheTag = 'cache-local-py3.9'
|
|
5
5
|
mainCacheTag = 'cache-master-py3.9'
|
|
6
|
-
distVersion = '9.
|
|
6
|
+
distVersion = '9.1.1'
|
|
7
7
|
exactPython = 'python3.9'
|
|
8
8
|
python = 'python3.9'
|
|
9
|
-
dockerTag = '9.
|
|
10
|
-
currentCommit = '
|
|
9
|
+
dockerTag = '9.1.1-f654a3c80f5318ae2ec2e5ecb278a05e1e844a58-py3.9'
|
|
10
|
+
currentCommit = 'f654a3c80f5318ae2ec2e5ecb278a05e1e844a58'
|
|
11
11
|
dockerRegistry = 'quay.io/ucsc_cgl'
|
|
12
12
|
dockerName = 'toil'
|
|
13
13
|
dirty = False
|
|
14
|
-
cwltool_version = '3.1.
|
|
14
|
+
cwltool_version = '3.1.20250715140722'
|