toil 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/slurm.py +191 -16
  6. toil/cwl/cwltoil.py +17 -82
  7. toil/fileStores/__init__.py +1 -1
  8. toil/fileStores/abstractFileStore.py +5 -2
  9. toil/fileStores/cachingFileStore.py +1 -1
  10. toil/job.py +30 -14
  11. toil/jobStores/abstractJobStore.py +24 -19
  12. toil/jobStores/aws/jobStore.py +862 -1963
  13. toil/jobStores/aws/utils.py +24 -270
  14. toil/jobStores/googleJobStore.py +25 -9
  15. toil/jobStores/utils.py +0 -327
  16. toil/leader.py +27 -22
  17. toil/lib/aws/config.py +22 -0
  18. toil/lib/aws/s3.py +477 -9
  19. toil/lib/aws/utils.py +22 -33
  20. toil/lib/checksum.py +88 -0
  21. toil/lib/conversions.py +33 -31
  22. toil/lib/directory.py +217 -0
  23. toil/lib/ec2.py +97 -29
  24. toil/lib/exceptions.py +2 -1
  25. toil/lib/expando.py +2 -2
  26. toil/lib/generatedEC2Lists.py +73 -16
  27. toil/lib/io.py +33 -2
  28. toil/lib/memoize.py +21 -7
  29. toil/lib/pipes.py +385 -0
  30. toil/lib/retry.py +1 -1
  31. toil/lib/threading.py +1 -1
  32. toil/lib/web.py +4 -5
  33. toil/provisioners/__init__.py +5 -2
  34. toil/provisioners/aws/__init__.py +43 -36
  35. toil/provisioners/aws/awsProvisioner.py +22 -13
  36. toil/provisioners/node.py +60 -12
  37. toil/resource.py +3 -13
  38. toil/test/__init__.py +14 -16
  39. toil/test/batchSystems/test_slurm.py +103 -14
  40. toil/test/cwl/staging_cat.cwl +27 -0
  41. toil/test/cwl/staging_make_file.cwl +25 -0
  42. toil/test/cwl/staging_workflow.cwl +43 -0
  43. toil/test/cwl/zero_default.cwl +61 -0
  44. toil/test/docs/scripts/tutorial_staging.py +17 -8
  45. toil/test/jobStores/jobStoreTest.py +23 -133
  46. toil/test/lib/aws/test_iam.py +7 -7
  47. toil/test/lib/aws/test_s3.py +30 -33
  48. toil/test/lib/aws/test_utils.py +9 -9
  49. toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
  50. toil/test/src/autoDeploymentTest.py +2 -3
  51. toil/test/src/fileStoreTest.py +89 -87
  52. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  53. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  54. toil/test/utils/toilKillTest.py +35 -28
  55. toil/test/wdl/md5sum/md5sum.json +1 -1
  56. toil/test/wdl/wdltoil_test.py +98 -38
  57. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  58. toil/utils/toilDebugFile.py +6 -3
  59. toil/utils/toilStats.py +17 -2
  60. toil/version.py +6 -6
  61. toil/wdl/wdltoil.py +1032 -546
  62. toil/worker.py +5 -2
  63. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/METADATA +12 -12
  64. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/RECORD +68 -61
  65. toil/lib/iterables.py +0 -112
  66. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  67. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/WHEEL +0 -0
  68. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
  69. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
  70. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,8 @@ import logging
6
6
  import pytest
7
7
  import sys
8
8
 
9
+ from datetime import datetime, timedelta
10
+
9
11
  import toil.batchSystems.slurm
10
12
  from toil.batchSystems.abstractBatchSystem import (
11
13
  EXIT_STATUS_UNAVAILABLE_VALUE,
@@ -22,10 +24,25 @@ logger = logging.getLogger(__name__)
22
24
  # TODO: Come up with a better way to mock the commands then monkey-patching the
23
25
  # command-calling functions.
24
26
 
27
+ # To convincingly test jobs in the past relative to the time Toil goes looking
28
+ # for them, we give our fake jobs times relative to the module load time, which
29
+ # we hope is not days and days away from the time the tests actually run.
30
+ JOB_BASE_TIME = datetime.now().astimezone(None) - timedelta(days=5)
31
+
32
+ def call_either(args, **_) -> str:
33
+ """
34
+ Pretend to call either sacct or scontrol as appropriate.
35
+ """
36
+ if args[0] == "sacct":
37
+ return call_sacct(args)
38
+ elif args[0] == "scontrol":
39
+ return call_scontrol(args)
40
+ else:
41
+ raise RuntimeError(f"Cannot fake command call: {args}")
25
42
 
26
43
  def call_sacct(args, **_) -> str:
27
44
  """
28
- The arguments passed to `call_command` when executing `sacct` are:
45
+ The arguments passed to `call_command` when executing `sacct` are something like:
29
46
  ['sacct', '-n', '-j', '<comma-separated list of job-ids>', '--format',
30
47
  'JobIDRaw,State,ExitCode', '-P', '-S', '1970-01-01']
31
48
  The multi-line output is something like::
@@ -36,6 +53,7 @@ def call_sacct(args, **_) -> str:
36
53
  1236|FAILED|0:2
37
54
  1236.extern|COMPLETED|0:0
38
55
  """
56
+ logger.info("sacct call: %s", args)
39
57
  if sum(len(a) for a in args) > 1000:
40
58
  # Simulate if the argument list is too long
41
59
  raise OSError(errno.E2BIG, "Argument list is too long")
@@ -51,11 +69,58 @@ def call_sacct(args, **_) -> str:
51
69
  789868: "789868|PENDING|0:0\n",
52
70
  789869: "789869|COMPLETED|0:0\n789869.batch|COMPLETED|0:0\n789869.extern|COMPLETED|0:0\n",
53
71
  }
54
- job_ids = [int(job_id) for job_id in args[3].split(",")]
72
+ # And time we say the job was at
73
+ job_time = {
74
+ 609663: JOB_BASE_TIME + timedelta(days=1),
75
+ 754725: JOB_BASE_TIME + timedelta(days=1),
76
+ 765096: JOB_BASE_TIME + timedelta(days=2),
77
+ 767925: JOB_BASE_TIME + timedelta(days=2),
78
+ 785023: JOB_BASE_TIME + timedelta(days=3),
79
+ 789456: JOB_BASE_TIME + timedelta(days=3),
80
+ 789724: JOB_BASE_TIME + timedelta(days=4),
81
+ 789868: JOB_BASE_TIME + timedelta(days=4),
82
+ 789869: JOB_BASE_TIME + timedelta(days=4),
83
+ }
84
+
85
+ # See if they asked for a job list
86
+ try:
87
+ j_index = args.index('-j')
88
+ job_ids = [int(job_id) for job_id in args[j_index + 1].split(",")]
89
+ except ValueError:
90
+ # We're not restricting to a list of jobs.
91
+ job_ids = list(sacct_info.keys())
92
+ # See if they asked for start or end times
93
+ try:
94
+ flag_index = args.index('-S')
95
+ begin_time = datetime.fromisoformat(args[flag_index + 1]).astimezone(None)
96
+ except ValueError:
97
+ # By default, Slurm uses today at midnight
98
+ begin_time = datetime.now().astimezone(None).replace(
99
+ hour=0,
100
+ minute=0,
101
+ second=0,
102
+ microsecond=0,
103
+ fold=0
104
+ )
105
+ try:
106
+ flag_index = args.index('-E')
107
+ end_time = datetime.fromisoformat(args[flag_index + 1]).astimezone(None)
108
+ except ValueError:
109
+ end_time = None
110
+
55
111
  stdout = ""
56
112
  # Glue the fake outputs for the request job-ids together in a single string
57
113
  for job_id in job_ids:
58
- stdout += sacct_info.get(job_id, "")
114
+ if job_id not in sacct_info:
115
+ # Not a job we know of.
116
+ continue
117
+ if begin_time is not None and begin_time > job_time[job_id]:
118
+ # Skip this job as being too early
119
+ continue
120
+ if end_time is not None and end_time < job_time[job_id]:
121
+ # Skip this job as being too late
122
+ continue
123
+ stdout += sacct_info[job_id]
59
124
  return stdout
60
125
 
61
126
 
@@ -64,8 +129,11 @@ def call_scontrol(args, **_) -> str:
64
129
  The arguments passed to `call_command` when executing `scontrol` are:
65
130
  ``['scontrol', 'show', 'job']`` or ``['scontrol', 'show', 'job', '<job-id>']``
66
131
  """
132
+ logger.info("scontrol call: %s", args)
67
133
  job_id = int(args[3]) if len(args) > 3 else None
68
134
  # Fake output per fake job-id.
135
+ # scontrol only shows recent jobs, so we have fewer/different jobs here
136
+ # than for sacct.
69
137
  scontrol_info = {
70
138
  787204: textwrap.dedent(
71
139
  """\
@@ -211,6 +279,9 @@ class FakeBatchSystem(BatchSystemSupport):
211
279
 
212
280
  def __init__(self):
213
281
  super().__init__(self.__fake_config(), float("inf"), sys.maxsize, sys.maxsize)
282
+ # Pretend to be a workflow that started before we pretend the jobs
283
+ # we pretend to have ran.
284
+ self.start_time = JOB_BASE_TIME - timedelta(hours=2)
214
285
 
215
286
  def getWaitDuration(self):
216
287
  return 10
@@ -358,14 +429,14 @@ class SlurmTest(ToilTest):
358
429
  ###
359
430
 
360
431
  def test_getJobExitCode_job_exists(self):
361
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
432
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
362
433
  job_id = "785023" # FAILED
363
434
  expected_result = (127, BatchJobExitReason.FAILED)
364
435
  result = self.worker.getJobExitCode(job_id)
365
436
  assert result == expected_result, f"{result} != {expected_result}"
366
437
 
367
438
  def test_getJobExitCode_job_not_exists(self):
368
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
439
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
369
440
  job_id = "1234" # Non-existent
370
441
  expected_result = None
371
442
  result = self.worker.getJobExitCode(job_id)
@@ -379,7 +450,7 @@ class SlurmTest(ToilTest):
379
450
  self.monkeypatch.setattr(
380
451
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
381
452
  )
382
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
453
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
383
454
  job_id = "787204" # COMPLETED
384
455
  expected_result = (0, BatchJobExitReason.FINISHED)
385
456
  result = self.worker.getJobExitCode(job_id)
@@ -393,7 +464,7 @@ class SlurmTest(ToilTest):
393
464
  self.monkeypatch.setattr(
394
465
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
395
466
  )
396
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
467
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
397
468
  job_id = "1234" # Non-existent
398
469
  try:
399
470
  _ = self.worker.getJobExitCode(job_id)
@@ -407,21 +478,21 @@ class SlurmTest(ToilTest):
407
478
  ###
408
479
 
409
480
  def test_coalesce_job_exit_codes_one_exists(self):
410
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
481
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
411
482
  job_ids = ["785023"] # FAILED
412
483
  expected_result = [(127, BatchJobExitReason.FAILED)]
413
484
  result = self.worker.coalesce_job_exit_codes(job_ids)
414
485
  assert result == expected_result, f"{result} != {expected_result}"
415
486
 
416
487
  def test_coalesce_job_exit_codes_one_not_exists(self):
417
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
488
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
418
489
  job_ids = ["1234"] # Non-existent
419
490
  expected_result = [None]
420
491
  result = self.worker.coalesce_job_exit_codes(job_ids)
421
492
  assert result == expected_result, f"{result} != {expected_result}"
422
493
 
423
494
  def test_coalesce_job_exit_codes_many_all_exist(self):
424
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
495
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
425
496
  job_ids = [
426
497
  "754725", # TIMEOUT,
427
498
  "789456", # FAILED,
@@ -440,8 +511,26 @@ class SlurmTest(ToilTest):
440
511
  result = self.worker.coalesce_job_exit_codes(job_ids)
441
512
  assert result == expected_result, f"{result} != {expected_result}"
442
513
 
514
+ def test_coalesce_job_exit_codes_mix_sacct_scontrol(self):
515
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
516
+ job_ids = [
517
+ "609663", # FAILED, in sacct only
518
+ "789869", # COMPLETED, in sacct only
519
+ "787204", # COMPLETED, in scontrol only
520
+ "789724", # RUNNING, in scontrol only
521
+ ]
522
+ # RUNNING and PENDING jobs should return None
523
+ expected_result = [
524
+ (130, BatchJobExitReason.FAILED),
525
+ (0, BatchJobExitReason.FINISHED),
526
+ (0, BatchJobExitReason.FINISHED),
527
+ None
528
+ ]
529
+ result = self.worker.coalesce_job_exit_codes(job_ids)
530
+ assert result == expected_result, f"{result} != {expected_result}"
531
+
443
532
  def test_coalesce_job_exit_codes_some_exists(self):
444
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
533
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
445
534
  job_ids = [
446
535
  "609663", # FAILED (SIGINT)
447
536
  "767925", # FAILED,
@@ -468,7 +557,7 @@ class SlurmTest(ToilTest):
468
557
  self.monkeypatch.setattr(
469
558
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
470
559
  )
471
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
560
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
472
561
  job_ids = ["787204"] # COMPLETED
473
562
  expected_result = [(0, BatchJobExitReason.FINISHED)]
474
563
  result = self.worker.coalesce_job_exit_codes(job_ids)
@@ -482,7 +571,7 @@ class SlurmTest(ToilTest):
482
571
  self.monkeypatch.setattr(
483
572
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
484
573
  )
485
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
574
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
486
575
  job_ids = ["1234"] # Non-existent
487
576
  try:
488
577
  _ = self.worker.coalesce_job_exit_codes(job_ids)
@@ -644,5 +733,5 @@ class SlurmTest(ToilTest):
644
733
  self.assertTrue(detector("-B"))
645
734
  self.assertFalse(detector("--no-bazz"))
646
735
  self.assertFalse(detector("--foo-bar=--bazz-only"))
647
-
736
+
648
737
 
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: CommandLineTool
5
+ baseCommand: [ "bash", "run.sh" ]
6
+
7
+ requirements:
8
+ DockerRequirement: # <- this is the part that causes the error
9
+ dockerPull: ubuntu:latest
10
+ InitialWorkDirRequirement:
11
+ listing:
12
+ - entryname: some_dir # <- put all the input files into a dir
13
+ writable: true
14
+ entry: "$({class: 'Directory', listing: inputs.input_files})"
15
+ - entryname: run.sh
16
+ entry: |-
17
+ for i in \$(find some_dir -type f); do cat \$i ; done
18
+
19
+ stdout: output.txt
20
+
21
+ inputs:
22
+ input_files:
23
+ type: File[]
24
+
25
+ outputs:
26
+ output_file:
27
+ type: stdout
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: CommandLineTool
5
+ baseCommand: [ "bash", "run.sh" ]
6
+
7
+ stdout: output.txt
8
+
9
+ requirements:
10
+ InitialWorkDirRequirement:
11
+ listing:
12
+ - entryname: run.sh
13
+ entry: |-
14
+ echo "$1"
15
+
16
+ inputs:
17
+ sampleId:
18
+ type: string
19
+ inputBinding:
20
+ position: 1
21
+
22
+ outputs:
23
+ output_file:
24
+ type: stdout
25
+
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: Workflow
5
+ requirements:
6
+ MultipleInputFeatureRequirement: {}
7
+ ScatterFeatureRequirement: {}
8
+ StepInputExpressionRequirement: {}
9
+ InlineJavascriptRequirement: {}
10
+ SubworkflowFeatureRequirement: {}
11
+
12
+ inputs:
13
+ samples:
14
+ type:
15
+ type: array
16
+ items:
17
+ type: record
18
+ fields:
19
+ sampleId: string
20
+
21
+ steps:
22
+ make_file:
23
+ run: staging_make_file.cwl
24
+ scatter: sample
25
+ in:
26
+ sample: samples
27
+ sampleId:
28
+ valueFrom: ${ return inputs.sample['sampleId']; }
29
+ out:
30
+ [ output_file ]
31
+
32
+ gather_files:
33
+ run: staging_cat.cwl
34
+ in:
35
+ input_files: make_file/output_file
36
+ out:
37
+ [ output_file ]
38
+
39
+ outputs:
40
+ output_file:
41
+ type: File
42
+ outputSource: gather_files/output_file
43
+
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env cwl-runner
2
+ cwlVersion: v1.2
3
+ class: Workflow
4
+ requirements:
5
+ MultipleInputFeatureRequirement: {}
6
+ InlineJavascriptRequirement: {}
7
+ inputs:
8
+ valueA:
9
+ type: int?
10
+ default: 0
11
+ valueB:
12
+ type: int?
13
+ someFile:
14
+ type: File?
15
+ steps:
16
+ prev_step:
17
+ run:
18
+ class: ExpressionTool
19
+ requirements:
20
+ InlineJavascriptRequirement: {}
21
+ inputs:
22
+ someFile:
23
+ type: File
24
+ outputs:
25
+ valueA:
26
+ type: int
27
+ expression: |
28
+ ${
29
+ return {valueA: 1};
30
+ }
31
+ in:
32
+ someFile: someFile
33
+ out: [valueA]
34
+ when: $(inputs.someFile != null)
35
+
36
+ main_step:
37
+ run:
38
+ class: ExpressionTool
39
+ requirements:
40
+ InlineJavascriptRequirement: {}
41
+ inputs:
42
+ valueA:
43
+ type: int
44
+ outputs:
45
+ valueA:
46
+ type: int
47
+ expression: |
48
+ ${
49
+ return {valueA: inputs.valueA};
50
+ }
51
+ in:
52
+ valueA:
53
+ source: [prev_step/valueA, valueA]
54
+ pickValue: first_non_null
55
+ out: [valueA]
56
+ outputs:
57
+ valueA:
58
+ type: int
59
+ outputSource: main_step/valueA
60
+
61
+
@@ -4,7 +4,6 @@ from toil.common import Toil
4
4
  from toil.job import Job
5
5
  from toil.lib.io import mkdtemp
6
6
 
7
-
8
7
  class HelloWorld(Job):
9
8
  def __init__(self, id):
10
9
  Job.__init__(self)
@@ -22,6 +21,7 @@ class HelloWorld(Job):
22
21
 
23
22
  if __name__ == "__main__":
24
23
  jobstore: str = mkdtemp("tutorial_staging")
24
+ tmp: str = mkdtemp("tutorial_staging_tmp")
25
25
  os.rmdir(jobstore)
26
26
  options = Job.Runner.getDefaultOptions(jobstore)
27
27
  options.logLevel = "INFO"
@@ -29,17 +29,26 @@ if __name__ == "__main__":
29
29
 
30
30
  with Toil(options) as toil:
31
31
  if not toil.options.restart:
32
- ioFileDirectory = os.path.join(
33
- os.path.dirname(os.path.abspath(__file__)), "stagingExampleFiles"
34
- )
35
- inputFileID = toil.importFile(
36
- "file://" + os.path.abspath(os.path.join(ioFileDirectory, "in.txt"))
37
- )
32
+ # Prepare an input file
33
+ path = os.path.join(tmp, "in.txt")
34
+ with open(path, "w") as f:
35
+ f.write("Hello,\n")
36
+ # In a real workflow, you would obtain an input file path from the
37
+ # user.
38
+
39
+ # Stage it into the Toil job store.
40
+ #
41
+ # Note: this may create a symlink depending on the value of the
42
+ # --linkImports command line option, in which case the original
43
+ # input file needs to still exist if the workflow is restarted.
44
+ inputFileID = toil.importFile(f"file://{path}")
45
+
46
+ # Run the workflow
38
47
  outputFileID = toil.start(HelloWorld(inputFileID))
39
48
  else:
40
49
  outputFileID = toil.restart()
41
50
 
42
51
  toil.exportFile(
43
52
  outputFileID,
44
- "file://" + os.path.abspath(os.path.join(ioFileDirectory, "out.txt")),
53
+ "file://" + os.path.join(tmp, "out.txt"),
45
54
  )
@@ -420,27 +420,31 @@ class AbstractJobStoreTest:
420
420
 
421
421
  def testReadWriteFileStreamTextMode(self):
422
422
  """Checks if text mode is compatible for file streams."""
423
- jobstore = self.jobstore_initialized
423
+ jobstore1 = self.jobstore_initialized
424
+ jobstore2 = self.jobstore_resumed_noconfig
424
425
  job = self.arbitraryJob()
425
- jobstore.assign_job_id(job)
426
- jobstore.create_job(job)
426
+ jobstore1.assign_job_id(job)
427
+ jobstore1.create_job(job)
427
428
 
428
429
  foo = "foo"
429
430
  bar = "bar"
430
431
 
431
- with jobstore.write_file_stream(job.jobStoreID, encoding="utf-8") as (
432
+ with jobstore1.write_file_stream(job.jobStoreID, encoding="utf-8") as (
432
433
  f,
433
434
  fileID,
434
435
  ):
435
436
  f.write(foo)
436
437
 
437
- with jobstore.read_file_stream(fileID, encoding="utf-8") as f:
438
+ with jobstore1.read_file_stream(fileID, encoding="utf-8") as f:
438
439
  self.assertEqual(foo, f.read())
439
440
 
440
- with jobstore.update_file_stream(fileID, encoding="utf-8") as f:
441
+ with jobstore1.update_file_stream(fileID, encoding="utf-8") as f:
441
442
  f.write(bar)
442
443
 
443
- with jobstore.read_file_stream(fileID, encoding="utf-8") as f:
444
+ with jobstore1.read_file_stream(fileID, encoding="utf-8") as f:
445
+ self.assertEqual(bar, f.read())
446
+
447
+ with jobstore2.read_file_stream(fileID, encoding="utf-8") as f:
444
448
  self.assertEqual(bar, f.read())
445
449
 
446
450
  def testPerJobFiles(self):
@@ -1173,6 +1177,9 @@ class AbstractEncryptedJobStoreTest:
1173
1177
  Create an encrypted file. Read it in encrypted mode then try with encryption off
1174
1178
  to ensure that it fails.
1175
1179
  """
1180
+
1181
+ from toil.lib.aws.s3 import AWSBadEncryptionKeyError
1182
+
1176
1183
  phrase = b"This file is encrypted."
1177
1184
  fileName = "foo"
1178
1185
  with self.jobstore_initialized.write_shared_file_stream(
@@ -1186,13 +1193,14 @@ class AbstractEncryptedJobStoreTest:
1186
1193
  self.jobstore_initialized.config.sseKey = None
1187
1194
  try:
1188
1195
  with self.jobstore_initialized.read_shared_file_stream(fileName) as f:
1189
- self.assertEqual(phrase, f.read())
1190
- except AssertionError as e:
1191
- self.assertEqual(
1192
- "Content is encrypted but no key was provided.", e.args[0]
1193
- )
1194
- else:
1195
- self.fail("Read encryption content with encryption off.")
1196
+ # If the read goes through, we should fail the assert because
1197
+ # we read the cyphertext
1198
+ assert f.read() != phrase, (
1199
+ "Managed to read plaintext content with encryption off."
1200
+ )
1201
+ except AWSBadEncryptionKeyError as e:
1202
+ # If the read doesn't go through, we get this.
1203
+ assert "Your AWS encryption key is most likely configured incorrectly" in str(e)
1196
1204
 
1197
1205
 
1198
1206
  class FileJobStoreTest(AbstractJobStoreTest.Test):
@@ -1435,113 +1443,6 @@ class AWSJobStoreTest(AbstractJobStoreTest.Test):
1435
1443
  assert isinstance(self.jobstore_initialized, AWSJobStore) # type hinting
1436
1444
  self.jobstore_initialized.destroy()
1437
1445
 
1438
- def testSDBDomainsDeletedOnFailedJobstoreBucketCreation(self):
1439
- """
1440
- This test ensures that SDB domains bound to a jobstore are deleted if the jobstore bucket
1441
- failed to be created. We simulate a failed jobstore bucket creation by using a bucket in a
1442
- different region with the same name.
1443
- """
1444
- from botocore.exceptions import ClientError
1445
-
1446
- from toil.jobStores.aws.jobStore import BucketLocationConflictException
1447
- from toil.lib.aws.session import establish_boto3_session
1448
- from toil.lib.aws.utils import retry_s3
1449
-
1450
- externalAWSLocation = "us-west-1"
1451
- for testRegion in "us-east-1", "us-west-2":
1452
- # We run this test twice, once with the default s3 server us-east-1 as the test region
1453
- # and once with another server (us-west-2). The external server is always us-west-1.
1454
- # This incidentally tests that the BucketLocationConflictException is thrown when using
1455
- # both the default, and a non-default server.
1456
- testJobStoreUUID = str(uuid.uuid4())
1457
- # Create the bucket at the external region
1458
- bucketName = "domain-test-" + testJobStoreUUID + "--files"
1459
- client = establish_boto3_session().client(
1460
- "s3", region_name=externalAWSLocation
1461
- )
1462
- resource = establish_boto3_session().resource(
1463
- "s3", region_name=externalAWSLocation
1464
- )
1465
-
1466
- for attempt in retry_s3(delays=(2, 5, 10, 30, 60), timeout=600):
1467
- with attempt:
1468
- # Create the bucket at the home region
1469
- client.create_bucket(
1470
- Bucket=bucketName,
1471
- CreateBucketConfiguration={
1472
- "LocationConstraint": externalAWSLocation
1473
- },
1474
- )
1475
-
1476
- owner_tag = os.environ.get("TOIL_OWNER_TAG")
1477
- if owner_tag:
1478
- for attempt in retry_s3(delays=(1, 1, 2, 4, 8, 16), timeout=33):
1479
- with attempt:
1480
- bucket_tagging = resource.BucketTagging(bucketName)
1481
- bucket_tagging.put(
1482
- Tagging={"TagSet": [{"Key": "Owner", "Value": owner_tag}]}
1483
- )
1484
-
1485
- options = Job.Runner.getDefaultOptions(
1486
- "aws:" + testRegion + ":domain-test-" + testJobStoreUUID
1487
- )
1488
- options.logLevel = "DEBUG"
1489
- try:
1490
- with Toil(options) as toil:
1491
- pass
1492
- except BucketLocationConflictException:
1493
- # Catch the expected BucketLocationConflictException and ensure that the bound
1494
- # domains don't exist in SDB.
1495
- sdb = establish_boto3_session().client(
1496
- region_name=self.awsRegion(), service_name="sdb"
1497
- )
1498
- next_token = None
1499
- allDomainNames = []
1500
- while True:
1501
- if next_token is None:
1502
- domains = sdb.list_domains(MaxNumberOfDomains=100)
1503
- else:
1504
- domains = sdb.list_domains(
1505
- MaxNumberOfDomains=100, NextToken=next_token
1506
- )
1507
- allDomainNames.extend(domains["DomainNames"])
1508
- next_token = domains.get("NextToken")
1509
- if next_token is None:
1510
- break
1511
- self.assertFalse([d for d in allDomainNames if testJobStoreUUID in d])
1512
- else:
1513
- self.fail()
1514
- finally:
1515
- try:
1516
- for attempt in retry_s3():
1517
- with attempt:
1518
- client.delete_bucket(Bucket=bucketName)
1519
- except ClientError as e:
1520
- # The actual HTTP code of the error is in status.
1521
- if (
1522
- e.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
1523
- == 404
1524
- ):
1525
- # The bucket doesn't exist; maybe a failed delete actually succeeded.
1526
- pass
1527
- else:
1528
- raise
1529
-
1530
- @slow
1531
- def testInlinedFiles(self):
1532
- from toil.jobStores.aws.jobStore import AWSJobStore
1533
-
1534
- jobstore = self.jobstore_initialized
1535
- for encrypted in (True, False):
1536
- n = AWSJobStore.FileInfo.maxInlinedSize()
1537
- sizes = (1, n // 2, n - 1, n, n + 1, 2 * n)
1538
- for size in chain(sizes, islice(reversed(sizes), 1)):
1539
- s = os.urandom(size)
1540
- with jobstore.write_shared_file_stream("foo") as f:
1541
- f.write(s)
1542
- with jobstore.read_shared_file_stream("foo") as f:
1543
- self.assertEqual(s, f.read())
1544
-
1545
1446
  def testOverlargeJob(self):
1546
1447
  jobstore = self.jobstore_initialized
1547
1448
  jobRequirements = dict(memory=12, cores=34, disk=35, preemptible=True)
@@ -1661,19 +1562,8 @@ class AWSJobStoreTest(AbstractJobStoreTest.Test):
1661
1562
  )
1662
1563
  delete_s3_bucket(resource, bucket.name)
1663
1564
 
1664
- def _largeLogEntrySize(self):
1665
- from toil.jobStores.aws.jobStore import AWSJobStore
1666
-
1667
- # So we get into the else branch of reader() in uploadStream(multiPart=False):
1668
- return AWSJobStore.FileInfo.maxBinarySize() * 2
1669
-
1670
- def _batchDeletionSize(self):
1671
- from toil.jobStores.aws.jobStore import AWSJobStore
1672
-
1673
- return AWSJobStore.itemsPerBatchDelete
1674
-
1675
1565
 
1676
- @needs_aws_s3
1566
+ # @needs_aws_s3
1677
1567
  class InvalidAWSJobStoreTest(ToilTest):
1678
1568
  def testInvalidJobStoreName(self):
1679
1569
  from toil.jobStores.aws.jobStore import AWSJobStore