toil 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/slurm.py +191 -16
- toil/cwl/cwltoil.py +17 -82
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +24 -19
- toil/jobStores/aws/jobStore.py +862 -1963
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/googleJobStore.py +25 -9
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +73 -16
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/pipes.py +385 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/web.py +4 -5
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +22 -13
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/test/__init__.py +14 -16
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/wdltoil_test.py +98 -38
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilStats.py +17 -2
- toil/version.py +6 -6
- toil/wdl/wdltoil.py +1032 -546
- toil/worker.py +5 -2
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/METADATA +12 -12
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/RECORD +68 -61
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/WHEEL +0 -0
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,8 @@ import logging
|
|
|
6
6
|
import pytest
|
|
7
7
|
import sys
|
|
8
8
|
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
|
|
9
11
|
import toil.batchSystems.slurm
|
|
10
12
|
from toil.batchSystems.abstractBatchSystem import (
|
|
11
13
|
EXIT_STATUS_UNAVAILABLE_VALUE,
|
|
@@ -22,10 +24,25 @@ logger = logging.getLogger(__name__)
|
|
|
22
24
|
# TODO: Come up with a better way to mock the commands then monkey-patching the
|
|
23
25
|
# command-calling functions.
|
|
24
26
|
|
|
27
|
+
# To convincingly test jobs in the past relative to the time Toil goes looking
|
|
28
|
+
# for them, we give our fake jobs times relative to the module load time, which
|
|
29
|
+
# we hope is not days and days away from the time the tests actually run.
|
|
30
|
+
JOB_BASE_TIME = datetime.now().astimezone(None) - timedelta(days=5)
|
|
31
|
+
|
|
32
|
+
def call_either(args, **_) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Pretend to call either sacct or scontrol as appropriate.
|
|
35
|
+
"""
|
|
36
|
+
if args[0] == "sacct":
|
|
37
|
+
return call_sacct(args)
|
|
38
|
+
elif args[0] == "scontrol":
|
|
39
|
+
return call_scontrol(args)
|
|
40
|
+
else:
|
|
41
|
+
raise RuntimeError(f"Cannot fake command call: {args}")
|
|
25
42
|
|
|
26
43
|
def call_sacct(args, **_) -> str:
|
|
27
44
|
"""
|
|
28
|
-
The arguments passed to `call_command` when executing `sacct` are:
|
|
45
|
+
The arguments passed to `call_command` when executing `sacct` are something like:
|
|
29
46
|
['sacct', '-n', '-j', '<comma-separated list of job-ids>', '--format',
|
|
30
47
|
'JobIDRaw,State,ExitCode', '-P', '-S', '1970-01-01']
|
|
31
48
|
The multi-line output is something like::
|
|
@@ -36,6 +53,7 @@ def call_sacct(args, **_) -> str:
|
|
|
36
53
|
1236|FAILED|0:2
|
|
37
54
|
1236.extern|COMPLETED|0:0
|
|
38
55
|
"""
|
|
56
|
+
logger.info("sacct call: %s", args)
|
|
39
57
|
if sum(len(a) for a in args) > 1000:
|
|
40
58
|
# Simulate if the argument list is too long
|
|
41
59
|
raise OSError(errno.E2BIG, "Argument list is too long")
|
|
@@ -51,11 +69,58 @@ def call_sacct(args, **_) -> str:
|
|
|
51
69
|
789868: "789868|PENDING|0:0\n",
|
|
52
70
|
789869: "789869|COMPLETED|0:0\n789869.batch|COMPLETED|0:0\n789869.extern|COMPLETED|0:0\n",
|
|
53
71
|
}
|
|
54
|
-
|
|
72
|
+
# And time we say the job was at
|
|
73
|
+
job_time = {
|
|
74
|
+
609663: JOB_BASE_TIME + timedelta(days=1),
|
|
75
|
+
754725: JOB_BASE_TIME + timedelta(days=1),
|
|
76
|
+
765096: JOB_BASE_TIME + timedelta(days=2),
|
|
77
|
+
767925: JOB_BASE_TIME + timedelta(days=2),
|
|
78
|
+
785023: JOB_BASE_TIME + timedelta(days=3),
|
|
79
|
+
789456: JOB_BASE_TIME + timedelta(days=3),
|
|
80
|
+
789724: JOB_BASE_TIME + timedelta(days=4),
|
|
81
|
+
789868: JOB_BASE_TIME + timedelta(days=4),
|
|
82
|
+
789869: JOB_BASE_TIME + timedelta(days=4),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# See if they asked for a job list
|
|
86
|
+
try:
|
|
87
|
+
j_index = args.index('-j')
|
|
88
|
+
job_ids = [int(job_id) for job_id in args[j_index + 1].split(",")]
|
|
89
|
+
except ValueError:
|
|
90
|
+
# We're not restricting to a list of jobs.
|
|
91
|
+
job_ids = list(sacct_info.keys())
|
|
92
|
+
# See if they asked for start or end times
|
|
93
|
+
try:
|
|
94
|
+
flag_index = args.index('-S')
|
|
95
|
+
begin_time = datetime.fromisoformat(args[flag_index + 1]).astimezone(None)
|
|
96
|
+
except ValueError:
|
|
97
|
+
# By default, Slurm uses today at midnight
|
|
98
|
+
begin_time = datetime.now().astimezone(None).replace(
|
|
99
|
+
hour=0,
|
|
100
|
+
minute=0,
|
|
101
|
+
second=0,
|
|
102
|
+
microsecond=0,
|
|
103
|
+
fold=0
|
|
104
|
+
)
|
|
105
|
+
try:
|
|
106
|
+
flag_index = args.index('-E')
|
|
107
|
+
end_time = datetime.fromisoformat(args[flag_index + 1]).astimezone(None)
|
|
108
|
+
except ValueError:
|
|
109
|
+
end_time = None
|
|
110
|
+
|
|
55
111
|
stdout = ""
|
|
56
112
|
# Glue the fake outputs for the request job-ids together in a single string
|
|
57
113
|
for job_id in job_ids:
|
|
58
|
-
|
|
114
|
+
if job_id not in sacct_info:
|
|
115
|
+
# Not a job we know of.
|
|
116
|
+
continue
|
|
117
|
+
if begin_time is not None and begin_time > job_time[job_id]:
|
|
118
|
+
# Skip this job as being too early
|
|
119
|
+
continue
|
|
120
|
+
if end_time is not None and end_time < job_time[job_id]:
|
|
121
|
+
# Skip this job as being too late
|
|
122
|
+
continue
|
|
123
|
+
stdout += sacct_info[job_id]
|
|
59
124
|
return stdout
|
|
60
125
|
|
|
61
126
|
|
|
@@ -64,8 +129,11 @@ def call_scontrol(args, **_) -> str:
|
|
|
64
129
|
The arguments passed to `call_command` when executing `scontrol` are:
|
|
65
130
|
``['scontrol', 'show', 'job']`` or ``['scontrol', 'show', 'job', '<job-id>']``
|
|
66
131
|
"""
|
|
132
|
+
logger.info("scontrol call: %s", args)
|
|
67
133
|
job_id = int(args[3]) if len(args) > 3 else None
|
|
68
134
|
# Fake output per fake job-id.
|
|
135
|
+
# scontrol only shows recent jobs, so we have fewer/different jobs here
|
|
136
|
+
# than for sacct.
|
|
69
137
|
scontrol_info = {
|
|
70
138
|
787204: textwrap.dedent(
|
|
71
139
|
"""\
|
|
@@ -211,6 +279,9 @@ class FakeBatchSystem(BatchSystemSupport):
|
|
|
211
279
|
|
|
212
280
|
def __init__(self):
|
|
213
281
|
super().__init__(self.__fake_config(), float("inf"), sys.maxsize, sys.maxsize)
|
|
282
|
+
# Pretend to be a workflow that started before we pretend the jobs
|
|
283
|
+
# we pretend to have ran.
|
|
284
|
+
self.start_time = JOB_BASE_TIME - timedelta(hours=2)
|
|
214
285
|
|
|
215
286
|
def getWaitDuration(self):
|
|
216
287
|
return 10
|
|
@@ -358,14 +429,14 @@ class SlurmTest(ToilTest):
|
|
|
358
429
|
###
|
|
359
430
|
|
|
360
431
|
def test_getJobExitCode_job_exists(self):
|
|
361
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
432
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
362
433
|
job_id = "785023" # FAILED
|
|
363
434
|
expected_result = (127, BatchJobExitReason.FAILED)
|
|
364
435
|
result = self.worker.getJobExitCode(job_id)
|
|
365
436
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
366
437
|
|
|
367
438
|
def test_getJobExitCode_job_not_exists(self):
|
|
368
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
439
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
369
440
|
job_id = "1234" # Non-existent
|
|
370
441
|
expected_result = None
|
|
371
442
|
result = self.worker.getJobExitCode(job_id)
|
|
@@ -379,7 +450,7 @@ class SlurmTest(ToilTest):
|
|
|
379
450
|
self.monkeypatch.setattr(
|
|
380
451
|
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
381
452
|
)
|
|
382
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
453
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
383
454
|
job_id = "787204" # COMPLETED
|
|
384
455
|
expected_result = (0, BatchJobExitReason.FINISHED)
|
|
385
456
|
result = self.worker.getJobExitCode(job_id)
|
|
@@ -393,7 +464,7 @@ class SlurmTest(ToilTest):
|
|
|
393
464
|
self.monkeypatch.setattr(
|
|
394
465
|
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
395
466
|
)
|
|
396
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
467
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
397
468
|
job_id = "1234" # Non-existent
|
|
398
469
|
try:
|
|
399
470
|
_ = self.worker.getJobExitCode(job_id)
|
|
@@ -407,21 +478,21 @@ class SlurmTest(ToilTest):
|
|
|
407
478
|
###
|
|
408
479
|
|
|
409
480
|
def test_coalesce_job_exit_codes_one_exists(self):
|
|
410
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
481
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
411
482
|
job_ids = ["785023"] # FAILED
|
|
412
483
|
expected_result = [(127, BatchJobExitReason.FAILED)]
|
|
413
484
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
414
485
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
415
486
|
|
|
416
487
|
def test_coalesce_job_exit_codes_one_not_exists(self):
|
|
417
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
488
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
418
489
|
job_ids = ["1234"] # Non-existent
|
|
419
490
|
expected_result = [None]
|
|
420
491
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
421
492
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
422
493
|
|
|
423
494
|
def test_coalesce_job_exit_codes_many_all_exist(self):
|
|
424
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
495
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
425
496
|
job_ids = [
|
|
426
497
|
"754725", # TIMEOUT,
|
|
427
498
|
"789456", # FAILED,
|
|
@@ -440,8 +511,26 @@ class SlurmTest(ToilTest):
|
|
|
440
511
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
441
512
|
assert result == expected_result, f"{result} != {expected_result}"
|
|
442
513
|
|
|
514
|
+
def test_coalesce_job_exit_codes_mix_sacct_scontrol(self):
|
|
515
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
516
|
+
job_ids = [
|
|
517
|
+
"609663", # FAILED, in sacct only
|
|
518
|
+
"789869", # COMPLETED, in sacct only
|
|
519
|
+
"787204", # COMPLETED, in scontrol only
|
|
520
|
+
"789724", # RUNNING, in scontrol only
|
|
521
|
+
]
|
|
522
|
+
# RUNNING and PENDING jobs should return None
|
|
523
|
+
expected_result = [
|
|
524
|
+
(130, BatchJobExitReason.FAILED),
|
|
525
|
+
(0, BatchJobExitReason.FINISHED),
|
|
526
|
+
(0, BatchJobExitReason.FINISHED),
|
|
527
|
+
None
|
|
528
|
+
]
|
|
529
|
+
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
530
|
+
assert result == expected_result, f"{result} != {expected_result}"
|
|
531
|
+
|
|
443
532
|
def test_coalesce_job_exit_codes_some_exists(self):
|
|
444
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
533
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
445
534
|
job_ids = [
|
|
446
535
|
"609663", # FAILED (SIGINT)
|
|
447
536
|
"767925", # FAILED,
|
|
@@ -468,7 +557,7 @@ class SlurmTest(ToilTest):
|
|
|
468
557
|
self.monkeypatch.setattr(
|
|
469
558
|
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
470
559
|
)
|
|
471
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
560
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
472
561
|
job_ids = ["787204"] # COMPLETED
|
|
473
562
|
expected_result = [(0, BatchJobExitReason.FINISHED)]
|
|
474
563
|
result = self.worker.coalesce_job_exit_codes(job_ids)
|
|
@@ -482,7 +571,7 @@ class SlurmTest(ToilTest):
|
|
|
482
571
|
self.monkeypatch.setattr(
|
|
483
572
|
self.worker, "_getJobDetailsFromSacct", call_sacct_raises
|
|
484
573
|
)
|
|
485
|
-
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command",
|
|
574
|
+
self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
|
|
486
575
|
job_ids = ["1234"] # Non-existent
|
|
487
576
|
try:
|
|
488
577
|
_ = self.worker.coalesce_job_exit_codes(job_ids)
|
|
@@ -644,5 +733,5 @@ class SlurmTest(ToilTest):
|
|
|
644
733
|
self.assertTrue(detector("-B"))
|
|
645
734
|
self.assertFalse(detector("--no-bazz"))
|
|
646
735
|
self.assertFalse(detector("--foo-bar=--bazz-only"))
|
|
647
|
-
|
|
736
|
+
|
|
648
737
|
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env cwl-runner
|
|
2
|
+
|
|
3
|
+
cwlVersion: v1.0
|
|
4
|
+
class: CommandLineTool
|
|
5
|
+
baseCommand: [ "bash", "run.sh" ]
|
|
6
|
+
|
|
7
|
+
requirements:
|
|
8
|
+
DockerRequirement: # <- this is the part that causes the error
|
|
9
|
+
dockerPull: ubuntu:latest
|
|
10
|
+
InitialWorkDirRequirement:
|
|
11
|
+
listing:
|
|
12
|
+
- entryname: some_dir # <- put all the input files into a dir
|
|
13
|
+
writable: true
|
|
14
|
+
entry: "$({class: 'Directory', listing: inputs.input_files})"
|
|
15
|
+
- entryname: run.sh
|
|
16
|
+
entry: |-
|
|
17
|
+
for i in \$(find some_dir -type f); do cat \$i ; done
|
|
18
|
+
|
|
19
|
+
stdout: output.txt
|
|
20
|
+
|
|
21
|
+
inputs:
|
|
22
|
+
input_files:
|
|
23
|
+
type: File[]
|
|
24
|
+
|
|
25
|
+
outputs:
|
|
26
|
+
output_file:
|
|
27
|
+
type: stdout
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env cwl-runner
|
|
2
|
+
|
|
3
|
+
cwlVersion: v1.0
|
|
4
|
+
class: CommandLineTool
|
|
5
|
+
baseCommand: [ "bash", "run.sh" ]
|
|
6
|
+
|
|
7
|
+
stdout: output.txt
|
|
8
|
+
|
|
9
|
+
requirements:
|
|
10
|
+
InitialWorkDirRequirement:
|
|
11
|
+
listing:
|
|
12
|
+
- entryname: run.sh
|
|
13
|
+
entry: |-
|
|
14
|
+
echo "$1"
|
|
15
|
+
|
|
16
|
+
inputs:
|
|
17
|
+
sampleId:
|
|
18
|
+
type: string
|
|
19
|
+
inputBinding:
|
|
20
|
+
position: 1
|
|
21
|
+
|
|
22
|
+
outputs:
|
|
23
|
+
output_file:
|
|
24
|
+
type: stdout
|
|
25
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#!/usr/bin/env cwl-runner
|
|
2
|
+
|
|
3
|
+
cwlVersion: v1.0
|
|
4
|
+
class: Workflow
|
|
5
|
+
requirements:
|
|
6
|
+
MultipleInputFeatureRequirement: {}
|
|
7
|
+
ScatterFeatureRequirement: {}
|
|
8
|
+
StepInputExpressionRequirement: {}
|
|
9
|
+
InlineJavascriptRequirement: {}
|
|
10
|
+
SubworkflowFeatureRequirement: {}
|
|
11
|
+
|
|
12
|
+
inputs:
|
|
13
|
+
samples:
|
|
14
|
+
type:
|
|
15
|
+
type: array
|
|
16
|
+
items:
|
|
17
|
+
type: record
|
|
18
|
+
fields:
|
|
19
|
+
sampleId: string
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
make_file:
|
|
23
|
+
run: staging_make_file.cwl
|
|
24
|
+
scatter: sample
|
|
25
|
+
in:
|
|
26
|
+
sample: samples
|
|
27
|
+
sampleId:
|
|
28
|
+
valueFrom: ${ return inputs.sample['sampleId']; }
|
|
29
|
+
out:
|
|
30
|
+
[ output_file ]
|
|
31
|
+
|
|
32
|
+
gather_files:
|
|
33
|
+
run: staging_cat.cwl
|
|
34
|
+
in:
|
|
35
|
+
input_files: make_file/output_file
|
|
36
|
+
out:
|
|
37
|
+
[ output_file ]
|
|
38
|
+
|
|
39
|
+
outputs:
|
|
40
|
+
output_file:
|
|
41
|
+
type: File
|
|
42
|
+
outputSource: gather_files/output_file
|
|
43
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#!/usr/bin/env cwl-runner
|
|
2
|
+
cwlVersion: v1.2
|
|
3
|
+
class: Workflow
|
|
4
|
+
requirements:
|
|
5
|
+
MultipleInputFeatureRequirement: {}
|
|
6
|
+
InlineJavascriptRequirement: {}
|
|
7
|
+
inputs:
|
|
8
|
+
valueA:
|
|
9
|
+
type: int?
|
|
10
|
+
default: 0
|
|
11
|
+
valueB:
|
|
12
|
+
type: int?
|
|
13
|
+
someFile:
|
|
14
|
+
type: File?
|
|
15
|
+
steps:
|
|
16
|
+
prev_step:
|
|
17
|
+
run:
|
|
18
|
+
class: ExpressionTool
|
|
19
|
+
requirements:
|
|
20
|
+
InlineJavascriptRequirement: {}
|
|
21
|
+
inputs:
|
|
22
|
+
someFile:
|
|
23
|
+
type: File
|
|
24
|
+
outputs:
|
|
25
|
+
valueA:
|
|
26
|
+
type: int
|
|
27
|
+
expression: |
|
|
28
|
+
${
|
|
29
|
+
return {valueA: 1};
|
|
30
|
+
}
|
|
31
|
+
in:
|
|
32
|
+
someFile: someFile
|
|
33
|
+
out: [valueA]
|
|
34
|
+
when: $(inputs.someFile != null)
|
|
35
|
+
|
|
36
|
+
main_step:
|
|
37
|
+
run:
|
|
38
|
+
class: ExpressionTool
|
|
39
|
+
requirements:
|
|
40
|
+
InlineJavascriptRequirement: {}
|
|
41
|
+
inputs:
|
|
42
|
+
valueA:
|
|
43
|
+
type: int
|
|
44
|
+
outputs:
|
|
45
|
+
valueA:
|
|
46
|
+
type: int
|
|
47
|
+
expression: |
|
|
48
|
+
${
|
|
49
|
+
return {valueA: inputs.valueA};
|
|
50
|
+
}
|
|
51
|
+
in:
|
|
52
|
+
valueA:
|
|
53
|
+
source: [prev_step/valueA, valueA]
|
|
54
|
+
pickValue: first_non_null
|
|
55
|
+
out: [valueA]
|
|
56
|
+
outputs:
|
|
57
|
+
valueA:
|
|
58
|
+
type: int
|
|
59
|
+
outputSource: main_step/valueA
|
|
60
|
+
|
|
61
|
+
|
|
@@ -4,7 +4,6 @@ from toil.common import Toil
|
|
|
4
4
|
from toil.job import Job
|
|
5
5
|
from toil.lib.io import mkdtemp
|
|
6
6
|
|
|
7
|
-
|
|
8
7
|
class HelloWorld(Job):
|
|
9
8
|
def __init__(self, id):
|
|
10
9
|
Job.__init__(self)
|
|
@@ -22,6 +21,7 @@ class HelloWorld(Job):
|
|
|
22
21
|
|
|
23
22
|
if __name__ == "__main__":
|
|
24
23
|
jobstore: str = mkdtemp("tutorial_staging")
|
|
24
|
+
tmp: str = mkdtemp("tutorial_staging_tmp")
|
|
25
25
|
os.rmdir(jobstore)
|
|
26
26
|
options = Job.Runner.getDefaultOptions(jobstore)
|
|
27
27
|
options.logLevel = "INFO"
|
|
@@ -29,17 +29,26 @@ if __name__ == "__main__":
|
|
|
29
29
|
|
|
30
30
|
with Toil(options) as toil:
|
|
31
31
|
if not toil.options.restart:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
# Prepare an input file
|
|
33
|
+
path = os.path.join(tmp, "in.txt")
|
|
34
|
+
with open(path, "w") as f:
|
|
35
|
+
f.write("Hello,\n")
|
|
36
|
+
# In a real workflow, you would obtain an input file path from the
|
|
37
|
+
# user.
|
|
38
|
+
|
|
39
|
+
# Stage it into the Toil job store.
|
|
40
|
+
#
|
|
41
|
+
# Note: this may create a symlink depending on the value of the
|
|
42
|
+
# --linkImports command line option, in which case the original
|
|
43
|
+
# input file needs to still exist if the workflow is restarted.
|
|
44
|
+
inputFileID = toil.importFile(f"file://{path}")
|
|
45
|
+
|
|
46
|
+
# Run the workflow
|
|
38
47
|
outputFileID = toil.start(HelloWorld(inputFileID))
|
|
39
48
|
else:
|
|
40
49
|
outputFileID = toil.restart()
|
|
41
50
|
|
|
42
51
|
toil.exportFile(
|
|
43
52
|
outputFileID,
|
|
44
|
-
"file://" + os.path.
|
|
53
|
+
"file://" + os.path.join(tmp, "out.txt"),
|
|
45
54
|
)
|
|
@@ -420,27 +420,31 @@ class AbstractJobStoreTest:
|
|
|
420
420
|
|
|
421
421
|
def testReadWriteFileStreamTextMode(self):
|
|
422
422
|
"""Checks if text mode is compatible for file streams."""
|
|
423
|
-
|
|
423
|
+
jobstore1 = self.jobstore_initialized
|
|
424
|
+
jobstore2 = self.jobstore_resumed_noconfig
|
|
424
425
|
job = self.arbitraryJob()
|
|
425
|
-
|
|
426
|
-
|
|
426
|
+
jobstore1.assign_job_id(job)
|
|
427
|
+
jobstore1.create_job(job)
|
|
427
428
|
|
|
428
429
|
foo = "foo"
|
|
429
430
|
bar = "bar"
|
|
430
431
|
|
|
431
|
-
with
|
|
432
|
+
with jobstore1.write_file_stream(job.jobStoreID, encoding="utf-8") as (
|
|
432
433
|
f,
|
|
433
434
|
fileID,
|
|
434
435
|
):
|
|
435
436
|
f.write(foo)
|
|
436
437
|
|
|
437
|
-
with
|
|
438
|
+
with jobstore1.read_file_stream(fileID, encoding="utf-8") as f:
|
|
438
439
|
self.assertEqual(foo, f.read())
|
|
439
440
|
|
|
440
|
-
with
|
|
441
|
+
with jobstore1.update_file_stream(fileID, encoding="utf-8") as f:
|
|
441
442
|
f.write(bar)
|
|
442
443
|
|
|
443
|
-
with
|
|
444
|
+
with jobstore1.read_file_stream(fileID, encoding="utf-8") as f:
|
|
445
|
+
self.assertEqual(bar, f.read())
|
|
446
|
+
|
|
447
|
+
with jobstore2.read_file_stream(fileID, encoding="utf-8") as f:
|
|
444
448
|
self.assertEqual(bar, f.read())
|
|
445
449
|
|
|
446
450
|
def testPerJobFiles(self):
|
|
@@ -1173,6 +1177,9 @@ class AbstractEncryptedJobStoreTest:
|
|
|
1173
1177
|
Create an encrypted file. Read it in encrypted mode then try with encryption off
|
|
1174
1178
|
to ensure that it fails.
|
|
1175
1179
|
"""
|
|
1180
|
+
|
|
1181
|
+
from toil.lib.aws.s3 import AWSBadEncryptionKeyError
|
|
1182
|
+
|
|
1176
1183
|
phrase = b"This file is encrypted."
|
|
1177
1184
|
fileName = "foo"
|
|
1178
1185
|
with self.jobstore_initialized.write_shared_file_stream(
|
|
@@ -1186,13 +1193,14 @@ class AbstractEncryptedJobStoreTest:
|
|
|
1186
1193
|
self.jobstore_initialized.config.sseKey = None
|
|
1187
1194
|
try:
|
|
1188
1195
|
with self.jobstore_initialized.read_shared_file_stream(fileName) as f:
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
+
# If the read goes through, we should fail the assert because
|
|
1197
|
+
# we read the cyphertext
|
|
1198
|
+
assert f.read() != phrase, (
|
|
1199
|
+
"Managed to read plaintext content with encryption off."
|
|
1200
|
+
)
|
|
1201
|
+
except AWSBadEncryptionKeyError as e:
|
|
1202
|
+
# If the read doesn't go through, we get this.
|
|
1203
|
+
assert "Your AWS encryption key is most likely configured incorrectly" in str(e)
|
|
1196
1204
|
|
|
1197
1205
|
|
|
1198
1206
|
class FileJobStoreTest(AbstractJobStoreTest.Test):
|
|
@@ -1435,113 +1443,6 @@ class AWSJobStoreTest(AbstractJobStoreTest.Test):
|
|
|
1435
1443
|
assert isinstance(self.jobstore_initialized, AWSJobStore) # type hinting
|
|
1436
1444
|
self.jobstore_initialized.destroy()
|
|
1437
1445
|
|
|
1438
|
-
def testSDBDomainsDeletedOnFailedJobstoreBucketCreation(self):
|
|
1439
|
-
"""
|
|
1440
|
-
This test ensures that SDB domains bound to a jobstore are deleted if the jobstore bucket
|
|
1441
|
-
failed to be created. We simulate a failed jobstore bucket creation by using a bucket in a
|
|
1442
|
-
different region with the same name.
|
|
1443
|
-
"""
|
|
1444
|
-
from botocore.exceptions import ClientError
|
|
1445
|
-
|
|
1446
|
-
from toil.jobStores.aws.jobStore import BucketLocationConflictException
|
|
1447
|
-
from toil.lib.aws.session import establish_boto3_session
|
|
1448
|
-
from toil.lib.aws.utils import retry_s3
|
|
1449
|
-
|
|
1450
|
-
externalAWSLocation = "us-west-1"
|
|
1451
|
-
for testRegion in "us-east-1", "us-west-2":
|
|
1452
|
-
# We run this test twice, once with the default s3 server us-east-1 as the test region
|
|
1453
|
-
# and once with another server (us-west-2). The external server is always us-west-1.
|
|
1454
|
-
# This incidentally tests that the BucketLocationConflictException is thrown when using
|
|
1455
|
-
# both the default, and a non-default server.
|
|
1456
|
-
testJobStoreUUID = str(uuid.uuid4())
|
|
1457
|
-
# Create the bucket at the external region
|
|
1458
|
-
bucketName = "domain-test-" + testJobStoreUUID + "--files"
|
|
1459
|
-
client = establish_boto3_session().client(
|
|
1460
|
-
"s3", region_name=externalAWSLocation
|
|
1461
|
-
)
|
|
1462
|
-
resource = establish_boto3_session().resource(
|
|
1463
|
-
"s3", region_name=externalAWSLocation
|
|
1464
|
-
)
|
|
1465
|
-
|
|
1466
|
-
for attempt in retry_s3(delays=(2, 5, 10, 30, 60), timeout=600):
|
|
1467
|
-
with attempt:
|
|
1468
|
-
# Create the bucket at the home region
|
|
1469
|
-
client.create_bucket(
|
|
1470
|
-
Bucket=bucketName,
|
|
1471
|
-
CreateBucketConfiguration={
|
|
1472
|
-
"LocationConstraint": externalAWSLocation
|
|
1473
|
-
},
|
|
1474
|
-
)
|
|
1475
|
-
|
|
1476
|
-
owner_tag = os.environ.get("TOIL_OWNER_TAG")
|
|
1477
|
-
if owner_tag:
|
|
1478
|
-
for attempt in retry_s3(delays=(1, 1, 2, 4, 8, 16), timeout=33):
|
|
1479
|
-
with attempt:
|
|
1480
|
-
bucket_tagging = resource.BucketTagging(bucketName)
|
|
1481
|
-
bucket_tagging.put(
|
|
1482
|
-
Tagging={"TagSet": [{"Key": "Owner", "Value": owner_tag}]}
|
|
1483
|
-
)
|
|
1484
|
-
|
|
1485
|
-
options = Job.Runner.getDefaultOptions(
|
|
1486
|
-
"aws:" + testRegion + ":domain-test-" + testJobStoreUUID
|
|
1487
|
-
)
|
|
1488
|
-
options.logLevel = "DEBUG"
|
|
1489
|
-
try:
|
|
1490
|
-
with Toil(options) as toil:
|
|
1491
|
-
pass
|
|
1492
|
-
except BucketLocationConflictException:
|
|
1493
|
-
# Catch the expected BucketLocationConflictException and ensure that the bound
|
|
1494
|
-
# domains don't exist in SDB.
|
|
1495
|
-
sdb = establish_boto3_session().client(
|
|
1496
|
-
region_name=self.awsRegion(), service_name="sdb"
|
|
1497
|
-
)
|
|
1498
|
-
next_token = None
|
|
1499
|
-
allDomainNames = []
|
|
1500
|
-
while True:
|
|
1501
|
-
if next_token is None:
|
|
1502
|
-
domains = sdb.list_domains(MaxNumberOfDomains=100)
|
|
1503
|
-
else:
|
|
1504
|
-
domains = sdb.list_domains(
|
|
1505
|
-
MaxNumberOfDomains=100, NextToken=next_token
|
|
1506
|
-
)
|
|
1507
|
-
allDomainNames.extend(domains["DomainNames"])
|
|
1508
|
-
next_token = domains.get("NextToken")
|
|
1509
|
-
if next_token is None:
|
|
1510
|
-
break
|
|
1511
|
-
self.assertFalse([d for d in allDomainNames if testJobStoreUUID in d])
|
|
1512
|
-
else:
|
|
1513
|
-
self.fail()
|
|
1514
|
-
finally:
|
|
1515
|
-
try:
|
|
1516
|
-
for attempt in retry_s3():
|
|
1517
|
-
with attempt:
|
|
1518
|
-
client.delete_bucket(Bucket=bucketName)
|
|
1519
|
-
except ClientError as e:
|
|
1520
|
-
# The actual HTTP code of the error is in status.
|
|
1521
|
-
if (
|
|
1522
|
-
e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
1523
|
-
== 404
|
|
1524
|
-
):
|
|
1525
|
-
# The bucket doesn't exist; maybe a failed delete actually succeeded.
|
|
1526
|
-
pass
|
|
1527
|
-
else:
|
|
1528
|
-
raise
|
|
1529
|
-
|
|
1530
|
-
@slow
|
|
1531
|
-
def testInlinedFiles(self):
|
|
1532
|
-
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
1533
|
-
|
|
1534
|
-
jobstore = self.jobstore_initialized
|
|
1535
|
-
for encrypted in (True, False):
|
|
1536
|
-
n = AWSJobStore.FileInfo.maxInlinedSize()
|
|
1537
|
-
sizes = (1, n // 2, n - 1, n, n + 1, 2 * n)
|
|
1538
|
-
for size in chain(sizes, islice(reversed(sizes), 1)):
|
|
1539
|
-
s = os.urandom(size)
|
|
1540
|
-
with jobstore.write_shared_file_stream("foo") as f:
|
|
1541
|
-
f.write(s)
|
|
1542
|
-
with jobstore.read_shared_file_stream("foo") as f:
|
|
1543
|
-
self.assertEqual(s, f.read())
|
|
1544
|
-
|
|
1545
1446
|
def testOverlargeJob(self):
|
|
1546
1447
|
jobstore = self.jobstore_initialized
|
|
1547
1448
|
jobRequirements = dict(memory=12, cores=34, disk=35, preemptible=True)
|
|
@@ -1661,19 +1562,8 @@ class AWSJobStoreTest(AbstractJobStoreTest.Test):
|
|
|
1661
1562
|
)
|
|
1662
1563
|
delete_s3_bucket(resource, bucket.name)
|
|
1663
1564
|
|
|
1664
|
-
def _largeLogEntrySize(self):
|
|
1665
|
-
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
1666
|
-
|
|
1667
|
-
# So we get into the else branch of reader() in uploadStream(multiPart=False):
|
|
1668
|
-
return AWSJobStore.FileInfo.maxBinarySize() * 2
|
|
1669
|
-
|
|
1670
|
-
def _batchDeletionSize(self):
|
|
1671
|
-
from toil.jobStores.aws.jobStore import AWSJobStore
|
|
1672
|
-
|
|
1673
|
-
return AWSJobStore.itemsPerBatchDelete
|
|
1674
|
-
|
|
1675
1565
|
|
|
1676
|
-
@needs_aws_s3
|
|
1566
|
+
# @needs_aws_s3
|
|
1677
1567
|
class InvalidAWSJobStoreTest(ToilTest):
|
|
1678
1568
|
def testInvalidJobStoreName(self):
|
|
1679
1569
|
from toil.jobStores.aws.jobStore import AWSJobStore
|