toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/registry.py +15 -118
  6. toil/batchSystems/slurm.py +191 -16
  7. toil/common.py +20 -1
  8. toil/cwl/cwltoil.py +97 -119
  9. toil/cwl/utils.py +103 -3
  10. toil/fileStores/__init__.py +1 -1
  11. toil/fileStores/abstractFileStore.py +5 -2
  12. toil/fileStores/cachingFileStore.py +1 -1
  13. toil/job.py +30 -14
  14. toil/jobStores/abstractJobStore.py +35 -255
  15. toil/jobStores/aws/jobStore.py +864 -1964
  16. toil/jobStores/aws/utils.py +24 -270
  17. toil/jobStores/fileJobStore.py +2 -1
  18. toil/jobStores/googleJobStore.py +32 -13
  19. toil/jobStores/utils.py +0 -327
  20. toil/leader.py +27 -22
  21. toil/lib/accelerators.py +1 -1
  22. toil/lib/aws/config.py +22 -0
  23. toil/lib/aws/s3.py +477 -9
  24. toil/lib/aws/utils.py +22 -33
  25. toil/lib/checksum.py +88 -0
  26. toil/lib/conversions.py +33 -31
  27. toil/lib/directory.py +217 -0
  28. toil/lib/ec2.py +97 -29
  29. toil/lib/exceptions.py +2 -1
  30. toil/lib/expando.py +2 -2
  31. toil/lib/generatedEC2Lists.py +138 -19
  32. toil/lib/io.py +33 -2
  33. toil/lib/memoize.py +21 -7
  34. toil/lib/misc.py +1 -1
  35. toil/lib/pipes.py +385 -0
  36. toil/lib/plugins.py +106 -0
  37. toil/lib/retry.py +1 -1
  38. toil/lib/threading.py +1 -1
  39. toil/lib/url.py +320 -0
  40. toil/lib/web.py +4 -5
  41. toil/options/cwl.py +13 -1
  42. toil/options/runner.py +17 -10
  43. toil/options/wdl.py +12 -1
  44. toil/provisioners/__init__.py +5 -2
  45. toil/provisioners/aws/__init__.py +43 -36
  46. toil/provisioners/aws/awsProvisioner.py +47 -15
  47. toil/provisioners/node.py +60 -12
  48. toil/resource.py +3 -13
  49. toil/server/app.py +12 -6
  50. toil/server/cli/wes_cwl_runner.py +2 -2
  51. toil/server/wes/abstract_backend.py +21 -43
  52. toil/server/wes/toil_backend.py +2 -2
  53. toil/test/__init__.py +16 -18
  54. toil/test/batchSystems/batchSystemTest.py +2 -9
  55. toil/test/batchSystems/batch_system_plugin_test.py +7 -0
  56. toil/test/batchSystems/test_slurm.py +103 -14
  57. toil/test/cwl/cwlTest.py +181 -8
  58. toil/test/cwl/staging_cat.cwl +27 -0
  59. toil/test/cwl/staging_make_file.cwl +25 -0
  60. toil/test/cwl/staging_workflow.cwl +43 -0
  61. toil/test/cwl/zero_default.cwl +61 -0
  62. toil/test/docs/scripts/tutorial_staging.py +17 -8
  63. toil/test/docs/scriptsTest.py +2 -1
  64. toil/test/jobStores/jobStoreTest.py +23 -133
  65. toil/test/lib/aws/test_iam.py +7 -7
  66. toil/test/lib/aws/test_s3.py +30 -33
  67. toil/test/lib/aws/test_utils.py +9 -9
  68. toil/test/lib/test_url.py +69 -0
  69. toil/test/lib/url_plugin_test.py +105 -0
  70. toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
  71. toil/test/provisioners/clusterTest.py +15 -2
  72. toil/test/provisioners/gceProvisionerTest.py +1 -1
  73. toil/test/server/serverTest.py +78 -36
  74. toil/test/src/autoDeploymentTest.py +2 -3
  75. toil/test/src/fileStoreTest.py +89 -87
  76. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  77. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  78. toil/test/utils/toilKillTest.py +35 -28
  79. toil/test/wdl/md5sum/md5sum-gs.json +1 -1
  80. toil/test/wdl/md5sum/md5sum.json +1 -1
  81. toil/test/wdl/testfiles/read_file.wdl +18 -0
  82. toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
  83. toil/test/wdl/wdltoil_test.py +171 -162
  84. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  85. toil/utils/toilDebugFile.py +6 -3
  86. toil/utils/toilSshCluster.py +23 -0
  87. toil/utils/toilStats.py +17 -2
  88. toil/utils/toilUpdateEC2Instances.py +1 -0
  89. toil/version.py +10 -10
  90. toil/wdl/wdltoil.py +1179 -825
  91. toil/worker.py +16 -8
  92. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
  93. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
  94. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
  95. toil/lib/iterables.py +0 -112
  96. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  97. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
  98. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
  99. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,8 @@ import logging
6
6
  import pytest
7
7
  import sys
8
8
 
9
+ from datetime import datetime, timedelta
10
+
9
11
  import toil.batchSystems.slurm
10
12
  from toil.batchSystems.abstractBatchSystem import (
11
13
  EXIT_STATUS_UNAVAILABLE_VALUE,
@@ -22,10 +24,25 @@ logger = logging.getLogger(__name__)
22
24
  # TODO: Come up with a better way to mock the commands then monkey-patching the
23
25
  # command-calling functions.
24
26
 
27
+ # To convincingly test jobs in the past relative to the time Toil goes looking
28
+ # for them, we give our fake jobs times relative to the module load time, which
29
+ # we hope is not days and days away from the time the tests actually run.
30
+ JOB_BASE_TIME = datetime.now().astimezone(None) - timedelta(days=5)
31
+
32
+ def call_either(args, **_) -> str:
33
+ """
34
+ Pretend to call either sacct or scontrol as appropriate.
35
+ """
36
+ if args[0] == "sacct":
37
+ return call_sacct(args)
38
+ elif args[0] == "scontrol":
39
+ return call_scontrol(args)
40
+ else:
41
+ raise RuntimeError(f"Cannot fake command call: {args}")
25
42
 
26
43
  def call_sacct(args, **_) -> str:
27
44
  """
28
- The arguments passed to `call_command` when executing `sacct` are:
45
+ The arguments passed to `call_command` when executing `sacct` are something like:
29
46
  ['sacct', '-n', '-j', '<comma-separated list of job-ids>', '--format',
30
47
  'JobIDRaw,State,ExitCode', '-P', '-S', '1970-01-01']
31
48
  The multi-line output is something like::
@@ -36,6 +53,7 @@ def call_sacct(args, **_) -> str:
36
53
  1236|FAILED|0:2
37
54
  1236.extern|COMPLETED|0:0
38
55
  """
56
+ logger.info("sacct call: %s", args)
39
57
  if sum(len(a) for a in args) > 1000:
40
58
  # Simulate if the argument list is too long
41
59
  raise OSError(errno.E2BIG, "Argument list is too long")
@@ -51,11 +69,58 @@ def call_sacct(args, **_) -> str:
51
69
  789868: "789868|PENDING|0:0\n",
52
70
  789869: "789869|COMPLETED|0:0\n789869.batch|COMPLETED|0:0\n789869.extern|COMPLETED|0:0\n",
53
71
  }
54
- job_ids = [int(job_id) for job_id in args[3].split(",")]
72
+ # And time we say the job was at
73
+ job_time = {
74
+ 609663: JOB_BASE_TIME + timedelta(days=1),
75
+ 754725: JOB_BASE_TIME + timedelta(days=1),
76
+ 765096: JOB_BASE_TIME + timedelta(days=2),
77
+ 767925: JOB_BASE_TIME + timedelta(days=2),
78
+ 785023: JOB_BASE_TIME + timedelta(days=3),
79
+ 789456: JOB_BASE_TIME + timedelta(days=3),
80
+ 789724: JOB_BASE_TIME + timedelta(days=4),
81
+ 789868: JOB_BASE_TIME + timedelta(days=4),
82
+ 789869: JOB_BASE_TIME + timedelta(days=4),
83
+ }
84
+
85
+ # See if they asked for a job list
86
+ try:
87
+ j_index = args.index('-j')
88
+ job_ids = [int(job_id) for job_id in args[j_index + 1].split(",")]
89
+ except ValueError:
90
+ # We're not restricting to a list of jobs.
91
+ job_ids = list(sacct_info.keys())
92
+ # See if they asked for start or end times
93
+ try:
94
+ flag_index = args.index('-S')
95
+ begin_time = datetime.fromisoformat(args[flag_index + 1]).astimezone(None)
96
+ except ValueError:
97
+ # By default, Slurm uses today at midnight
98
+ begin_time = datetime.now().astimezone(None).replace(
99
+ hour=0,
100
+ minute=0,
101
+ second=0,
102
+ microsecond=0,
103
+ fold=0
104
+ )
105
+ try:
106
+ flag_index = args.index('-E')
107
+ end_time = datetime.fromisoformat(args[flag_index + 1]).astimezone(None)
108
+ except ValueError:
109
+ end_time = None
110
+
55
111
  stdout = ""
56
112
  # Glue the fake outputs for the request job-ids together in a single string
57
113
  for job_id in job_ids:
58
- stdout += sacct_info.get(job_id, "")
114
+ if job_id not in sacct_info:
115
+ # Not a job we know of.
116
+ continue
117
+ if begin_time is not None and begin_time > job_time[job_id]:
118
+ # Skip this job as being too early
119
+ continue
120
+ if end_time is not None and end_time < job_time[job_id]:
121
+ # Skip this job as being too late
122
+ continue
123
+ stdout += sacct_info[job_id]
59
124
  return stdout
60
125
 
61
126
 
@@ -64,8 +129,11 @@ def call_scontrol(args, **_) -> str:
64
129
  The arguments passed to `call_command` when executing `scontrol` are:
65
130
  ``['scontrol', 'show', 'job']`` or ``['scontrol', 'show', 'job', '<job-id>']``
66
131
  """
132
+ logger.info("scontrol call: %s", args)
67
133
  job_id = int(args[3]) if len(args) > 3 else None
68
134
  # Fake output per fake job-id.
135
+ # scontrol only shows recent jobs, so we have fewer/different jobs here
136
+ # than for sacct.
69
137
  scontrol_info = {
70
138
  787204: textwrap.dedent(
71
139
  """\
@@ -211,6 +279,9 @@ class FakeBatchSystem(BatchSystemSupport):
211
279
 
212
280
  def __init__(self):
213
281
  super().__init__(self.__fake_config(), float("inf"), sys.maxsize, sys.maxsize)
282
+ # Pretend to be a workflow that started before we pretend the jobs
283
+ # we pretend to have ran.
284
+ self.start_time = JOB_BASE_TIME - timedelta(hours=2)
214
285
 
215
286
  def getWaitDuration(self):
216
287
  return 10
@@ -358,14 +429,14 @@ class SlurmTest(ToilTest):
358
429
  ###
359
430
 
360
431
  def test_getJobExitCode_job_exists(self):
361
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
432
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
362
433
  job_id = "785023" # FAILED
363
434
  expected_result = (127, BatchJobExitReason.FAILED)
364
435
  result = self.worker.getJobExitCode(job_id)
365
436
  assert result == expected_result, f"{result} != {expected_result}"
366
437
 
367
438
  def test_getJobExitCode_job_not_exists(self):
368
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
439
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
369
440
  job_id = "1234" # Non-existent
370
441
  expected_result = None
371
442
  result = self.worker.getJobExitCode(job_id)
@@ -379,7 +450,7 @@ class SlurmTest(ToilTest):
379
450
  self.monkeypatch.setattr(
380
451
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
381
452
  )
382
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
453
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
383
454
  job_id = "787204" # COMPLETED
384
455
  expected_result = (0, BatchJobExitReason.FINISHED)
385
456
  result = self.worker.getJobExitCode(job_id)
@@ -393,7 +464,7 @@ class SlurmTest(ToilTest):
393
464
  self.monkeypatch.setattr(
394
465
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
395
466
  )
396
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
467
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
397
468
  job_id = "1234" # Non-existent
398
469
  try:
399
470
  _ = self.worker.getJobExitCode(job_id)
@@ -407,21 +478,21 @@ class SlurmTest(ToilTest):
407
478
  ###
408
479
 
409
480
  def test_coalesce_job_exit_codes_one_exists(self):
410
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
481
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
411
482
  job_ids = ["785023"] # FAILED
412
483
  expected_result = [(127, BatchJobExitReason.FAILED)]
413
484
  result = self.worker.coalesce_job_exit_codes(job_ids)
414
485
  assert result == expected_result, f"{result} != {expected_result}"
415
486
 
416
487
  def test_coalesce_job_exit_codes_one_not_exists(self):
417
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
488
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
418
489
  job_ids = ["1234"] # Non-existent
419
490
  expected_result = [None]
420
491
  result = self.worker.coalesce_job_exit_codes(job_ids)
421
492
  assert result == expected_result, f"{result} != {expected_result}"
422
493
 
423
494
  def test_coalesce_job_exit_codes_many_all_exist(self):
424
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
495
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
425
496
  job_ids = [
426
497
  "754725", # TIMEOUT,
427
498
  "789456", # FAILED,
@@ -440,8 +511,26 @@ class SlurmTest(ToilTest):
440
511
  result = self.worker.coalesce_job_exit_codes(job_ids)
441
512
  assert result == expected_result, f"{result} != {expected_result}"
442
513
 
514
+ def test_coalesce_job_exit_codes_mix_sacct_scontrol(self):
515
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
516
+ job_ids = [
517
+ "609663", # FAILED, in sacct only
518
+ "789869", # COMPLETED, in sacct only
519
+ "787204", # COMPLETED, in scontrol only
520
+ "789724", # RUNNING, in scontrol only
521
+ ]
522
+ # RUNNING and PENDING jobs should return None
523
+ expected_result = [
524
+ (130, BatchJobExitReason.FAILED),
525
+ (0, BatchJobExitReason.FINISHED),
526
+ (0, BatchJobExitReason.FINISHED),
527
+ None
528
+ ]
529
+ result = self.worker.coalesce_job_exit_codes(job_ids)
530
+ assert result == expected_result, f"{result} != {expected_result}"
531
+
443
532
  def test_coalesce_job_exit_codes_some_exists(self):
444
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_sacct)
533
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
445
534
  job_ids = [
446
535
  "609663", # FAILED (SIGINT)
447
536
  "767925", # FAILED,
@@ -468,7 +557,7 @@ class SlurmTest(ToilTest):
468
557
  self.monkeypatch.setattr(
469
558
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
470
559
  )
471
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
560
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
472
561
  job_ids = ["787204"] # COMPLETED
473
562
  expected_result = [(0, BatchJobExitReason.FINISHED)]
474
563
  result = self.worker.coalesce_job_exit_codes(job_ids)
@@ -482,7 +571,7 @@ class SlurmTest(ToilTest):
482
571
  self.monkeypatch.setattr(
483
572
  self.worker, "_getJobDetailsFromSacct", call_sacct_raises
484
573
  )
485
- self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_scontrol)
574
+ self.monkeypatch.setattr(toil.batchSystems.slurm, "call_command", call_either)
486
575
  job_ids = ["1234"] # Non-existent
487
576
  try:
488
577
  _ = self.worker.coalesce_job_exit_codes(job_ids)
@@ -644,5 +733,5 @@ class SlurmTest(ToilTest):
644
733
  self.assertTrue(detector("-B"))
645
734
  self.assertFalse(detector("--no-bazz"))
646
735
  self.assertFalse(detector("--foo-bar=--bazz-only"))
647
-
736
+
648
737
 
toil/test/cwl/cwlTest.py CHANGED
@@ -45,9 +45,11 @@ from toil.cwl.utils import (
45
45
  download_structure,
46
46
  visit_cwl_class_and_reduce,
47
47
  visit_top_cwl_class,
48
+ remove_redundant_mounts
48
49
  )
49
50
  from toil.fileStores import FileID
50
51
  from toil.fileStores.abstractFileStore import AbstractFileStore
52
+ from toil.job import WorkerImportJob
51
53
  from toil.lib.threading import cpu_count
52
54
  from toil.test import (
53
55
  get_data,
@@ -1112,12 +1114,13 @@ def cwl_v1_0_spec(tmp_path: Path) -> Generator[Path]:
1112
1114
  finally:
1113
1115
  pass # no cleanup
1114
1116
 
1115
-
1117
+ @pytest.mark.integrative
1118
+ @pytest.mark.conformance
1116
1119
  @needs_cwl
1117
1120
  @needs_online
1118
1121
  @pytest.mark.cwl
1119
1122
  @pytest.mark.online
1120
- class TestCWLv10:
1123
+ class TestCWLv10Conformance:
1121
1124
  """
1122
1125
  Run the CWL 1.0 conformance tests in various environments.
1123
1126
  """
@@ -1295,11 +1298,13 @@ def cwl_v1_1_spec(tmp_path: Path) -> Generator[Path]:
1295
1298
  pass # no cleanup
1296
1299
 
1297
1300
 
1301
+ @pytest.mark.integrative
1302
+ @pytest.mark.conformance
1298
1303
  @needs_cwl
1299
1304
  @needs_online
1300
1305
  @pytest.mark.cwl
1301
1306
  @pytest.mark.online
1302
- class TestCWLv11:
1307
+ class TestCWLv11Conformance:
1303
1308
  """
1304
1309
  Run the CWL 1.1 conformance tests in various environments.
1305
1310
  """
@@ -1383,11 +1388,13 @@ def cwl_v1_2_spec(tmp_path: Path) -> Generator[Path]:
1383
1388
  pass # no cleanup
1384
1389
 
1385
1390
 
1391
+ @pytest.mark.integrative
1392
+ @pytest.mark.conformance
1386
1393
  @needs_cwl
1387
1394
  @needs_online
1388
1395
  @pytest.mark.cwl
1389
1396
  @pytest.mark.online
1390
- class TestCWLv12:
1397
+ class TestCWLv12Conformance:
1391
1398
  """
1392
1399
  Run the CWL 1.2 conformance tests in various environments.
1393
1400
  """
@@ -1525,7 +1532,7 @@ class TestCWLv12:
1525
1532
  TOIL_WES_ENDPOINT=http://localhost:8080 \
1526
1533
  TOIL_WES_USER=test \
1527
1534
  TOIL_WES_PASSWORD=password \
1528
- python -m pytest src/toil/test/cwl/cwlTest.py::TestCWLv12::test_wes_server_cwl_conformance -vv --log-level INFO --log-cli-level INFO
1535
+ python -m pytest src/toil/test/cwl/cwlTest.py::TestCWLv12Conformance::test_wes_server_cwl_conformance -vv --log-level INFO --log-cli-level INFO
1529
1536
  """
1530
1537
  endpoint = os.environ.get("TOIL_WES_ENDPOINT")
1531
1538
  extra_args = [f"--wes_endpoint={endpoint}"]
@@ -1906,6 +1913,134 @@ def test_visit_cwl_class_and_reduce() -> None:
1906
1913
  assert up_child_count == 2
1907
1914
 
1908
1915
 
1916
+ @needs_cwl
1917
+ @pytest.mark.cwl
1918
+ @pytest.mark.cwl_small
1919
+ def test_trim_mounts_op_nonredundant() -> None:
1920
+ """
1921
+ Make sure we don't remove all non-duplicate listings
1922
+ """
1923
+ s: CWLObjectType = {"class": "Directory", "basename": "directory", "listing": [{"class": "File", "basename": "file", "contents": "hello world"}]}
1924
+ remove_redundant_mounts(s)
1925
+
1926
+ # nothing should have been removed
1927
+ assert isinstance(s['listing'], list)
1928
+ assert len(s['listing']) == 1
1929
+
1930
+ @needs_cwl
1931
+ @pytest.mark.cwl
1932
+ @pytest.mark.cwl_small
1933
+ def test_trim_mounts_op_redundant() -> None:
1934
+ """
1935
+ Make sure we remove all duplicate listings
1936
+ """
1937
+ s: CWLObjectType = {
1938
+ "class": "Directory",
1939
+ "location": "file:///home/heaucques/Documents/toil/test_dir",
1940
+ "basename": "test_dir",
1941
+ "listing": [
1942
+ {
1943
+ "class": "Directory",
1944
+ "location": "file:///home/heaucques/Documents/toil/test_dir/nested_dir",
1945
+ "basename": "nested_dir",
1946
+ "listing": [],
1947
+ "path": "/home/heaucques/Documents/toil/test_dir/nested_dir"
1948
+ },
1949
+ {
1950
+ "class": "File",
1951
+ "location": "file:///home/heaucques/Documents/toil/test_dir/test_file",
1952
+ "basename": "test_file",
1953
+ "size": 0,
1954
+ "nameroot": "test_file",
1955
+ "nameext": "",
1956
+ "path": "/home/heaucques/Documents/toil/test_dir/test_file",
1957
+ "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"
1958
+ }
1959
+ ],
1960
+ "path": "/home/heaucques/Documents/toil/test_dir"
1961
+ }
1962
+ remove_redundant_mounts(s)
1963
+
1964
+ # everything should have been removed
1965
+ assert isinstance(s['listing'], list)
1966
+ assert len(s['listing']) == 0
1967
+
1968
+ @needs_cwl
1969
+ @pytest.mark.cwl
1970
+ @pytest.mark.cwl_small
1971
+ def test_trim_mounts_op_partially_redundant() -> None:
1972
+ """
1973
+ Make sure we remove only the redundant listings in the CWL object and leave nonredundant listings intact
1974
+ """
1975
+ s: CWLObjectType = {
1976
+ "class": "Directory",
1977
+ "location": "file:///home/heaucques/Documents/toil/test_dir",
1978
+ "basename": "test_dir",
1979
+ "listing": [
1980
+ {
1981
+ "class": "Directory",
1982
+ "location": "file:///home/heaucques/Documents/thing",
1983
+ "basename": "thing2",
1984
+ "listing": [],
1985
+ "path": "/home/heaucques/Documents/toil/thing2"
1986
+ },
1987
+ {
1988
+ "class": "File",
1989
+ "location": "file:///home/heaucques/Documents/toil/test_dir/test_file",
1990
+ "basename": "test_file",
1991
+ "size": 0,
1992
+ "nameroot": "test_file",
1993
+ "nameext": "",
1994
+ "path": "/home/heaucques/Documents/toil/test_dir/test_file",
1995
+ "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709"
1996
+ }
1997
+ ],
1998
+ "path": "/home/heaucques/Documents/toil/test_dir"
1999
+ }
2000
+ remove_redundant_mounts(s)
2001
+
2002
+ # everything except the nested directory should be removed
2003
+ assert isinstance(s['listing'], list)
2004
+ assert len(s['listing']) == 1
2005
+
2006
+ @needs_cwl
2007
+ @pytest.mark.cwl
2008
+ @pytest.mark.cwl_small
2009
+ def test_trim_mounts_op_mixed_urls_and_paths() -> None:
2010
+ """
2011
+ Ensure we remove redundant listings in certain edge cases
2012
+ """
2013
+ # Edge cases around encoding:
2014
+ # Ensure URL decoded file URIs match the bare path equivalent. Both of these paths should have the same shared directory
2015
+ s: CWLObjectType = {"class": "Directory", "basename": "123", "location": "file:///tmp/%25/123", "listing": [{"class": "File", "path": "/tmp/%/123/456", "basename": "456"}]}
2016
+ remove_redundant_mounts(s)
2017
+ assert isinstance(s['listing'], list)
2018
+ assert len(s['listing']) == 0
2019
+
2020
+ @needs_cwl
2021
+ @pytest.mark.cwl
2022
+ @pytest.mark.cwl_small
2023
+ def test_trim_mounts_op_decodable_paths() -> None:
2024
+ """"""
2025
+ # Ensure path names don't get unnecessarily decoded
2026
+ s: CWLObjectType = {"class": "Directory", "basename": "dir", "path": "/tmp/cat%2Ftag/dir", "listing": [{"class": "File", "path": "/tmp/cat/tag/dir/file", "basename": "file"}]}
2027
+ remove_redundant_mounts(s)
2028
+ assert isinstance(s['listing'], list)
2029
+ assert len(s['listing']) == 1
2030
+
2031
+ @needs_cwl
2032
+ @pytest.mark.cwl
2033
+ @pytest.mark.cwl_small
2034
+ def test_trim_mounts_op_multiple_encodings() -> None:
2035
+ # Ensure differently encoded URLs are properly decoded
2036
+ s: CWLObjectType = {"class": "Directory", "basename": "dir", "location": "file:///tmp/cat%2Ftag/dir", "listing": [{"class": "File", "location": "file:///tmp/cat%2ftag/dir/file", "basename": "file"}]}
2037
+ remove_redundant_mounts(s)
2038
+ assert isinstance(s['listing'], list)
2039
+ assert len(s['listing']) == 0
2040
+
2041
+
2042
+
2043
+
1909
2044
  @needs_cwl
1910
2045
  @pytest.mark.cwl
1911
2046
  @pytest.mark.cwl_small
@@ -2010,12 +2145,16 @@ def test_import_on_workers() -> None:
2010
2145
 
2011
2146
  with get_data("test/cwl/download.cwl") as cwl_file:
2012
2147
  with get_data("test/cwl/directory/directory/file.txt") as file_path:
2148
+ # To make sure we see every job issued with a leader log message
2149
+ # that we can then detect for the test, we need to turn off
2150
+ # chaining.
2013
2151
  args = [
2014
2152
  "--runImportsOnWorkers",
2015
2153
  "--importWorkersDisk=10MiB",
2016
2154
  "--realTimeLogging=True",
2017
2155
  "--logLevel=INFO",
2018
2156
  "--logColors=False",
2157
+ "--disableChaining=True",
2019
2158
  str(cwl_file),
2020
2159
  "--input",
2021
2160
  str(file_path),
@@ -2024,6 +2163,29 @@ def test_import_on_workers() -> None:
2024
2163
 
2025
2164
  assert detector.detected is True
2026
2165
 
2166
+ @needs_cwl
2167
+ @pytest.mark.cwl
2168
+ @pytest.mark.cwl_small
2169
+ def test_missing_tmpdir_and_tmp_outdir(tmp_path: Path) -> None:
2170
+ """
2171
+ tmpdir_prefix and tmp_outdir_prefix do not need to exist prior to running the workflow
2172
+ """
2173
+ tmpdir_prefix = os.path.join(tmp_path, "tmpdir/blah")
2174
+ tmp_outdir_prefix = os.path.join(tmp_path, "tmp_outdir/blah")
2175
+
2176
+ assert not os.path.exists(os.path.dirname(tmpdir_prefix))
2177
+ assert not os.path.exists(os.path.dirname(tmp_outdir_prefix))
2178
+ with get_data("test/cwl/echo_string.cwl") as cwl_file:
2179
+ cmd = [
2180
+ "toil-cwl-runner",
2181
+ f"--jobStore=file:{tmp_path / 'jobstore'}",
2182
+ "--strict-memory-limit",
2183
+ f'--tmpdir-prefix={tmpdir_prefix}',
2184
+ f'--tmp-outdir-prefix={tmp_outdir_prefix}',
2185
+ str(cwl_file),
2186
+ ]
2187
+ p = subprocess.run(cmd)
2188
+ assert p.returncode == 0
2027
2189
 
2028
2190
  # StreamHandler is generic, _typeshed doesn't exist at runtime, do a bit of typing trickery, see https://github.com/python/typeshed/issues/5680
2029
2191
  if TYPE_CHECKING:
@@ -2036,7 +2198,7 @@ else:
2036
2198
 
2037
2199
  class ImportWorkersMessageHandler(_stream_handler):
2038
2200
  """
2039
- Detect the import workers log message and set a flag.
2201
+ Detect whether any WorkerImportJob jobs ran during a workflow.
2040
2202
  """
2041
2203
 
2042
2204
  def __init__(self) -> None:
@@ -2045,7 +2207,18 @@ class ImportWorkersMessageHandler(_stream_handler):
2045
2207
  super().__init__(sys.stderr)
2046
2208
 
2047
2209
  def emit(self, record: logging.LogRecord) -> None:
2048
- if (record.msg % record.args).startswith(
2049
- "Issued job 'CWLImportJob' CWLImportJob"
2210
+ # We get the job name from the class since we already started failing
2211
+ # this test once due to it being renamed.
2212
+ try:
2213
+ formatted = record.getMessage()
2214
+ except TypeError as e:
2215
+ # The log message has the wrong number of items for its fields.
2216
+ # Complain in a way we could figure out.
2217
+ raise RuntimeError(
2218
+ f"Log message {record.msg} has wrong number of "
2219
+ f"fields in {record.args}"
2220
+ ) from e
2221
+ if formatted.startswith(
2222
+ f"Issued job '{WorkerImportJob.__name__}'"
2050
2223
  ):
2051
2224
  self.detected = True
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: CommandLineTool
5
+ baseCommand: [ "bash", "run.sh" ]
6
+
7
+ requirements:
8
+ DockerRequirement: # <- this is the part that causes the error
9
+ dockerPull: ubuntu:latest
10
+ InitialWorkDirRequirement:
11
+ listing:
12
+ - entryname: some_dir # <- put all the input files into a dir
13
+ writable: true
14
+ entry: "$({class: 'Directory', listing: inputs.input_files})"
15
+ - entryname: run.sh
16
+ entry: |-
17
+ for i in \$(find some_dir -type f); do cat \$i ; done
18
+
19
+ stdout: output.txt
20
+
21
+ inputs:
22
+ input_files:
23
+ type: File[]
24
+
25
+ outputs:
26
+ output_file:
27
+ type: stdout
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: CommandLineTool
5
+ baseCommand: [ "bash", "run.sh" ]
6
+
7
+ stdout: output.txt
8
+
9
+ requirements:
10
+ InitialWorkDirRequirement:
11
+ listing:
12
+ - entryname: run.sh
13
+ entry: |-
14
+ echo "$1"
15
+
16
+ inputs:
17
+ sampleId:
18
+ type: string
19
+ inputBinding:
20
+ position: 1
21
+
22
+ outputs:
23
+ output_file:
24
+ type: stdout
25
+
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: Workflow
5
+ requirements:
6
+ MultipleInputFeatureRequirement: {}
7
+ ScatterFeatureRequirement: {}
8
+ StepInputExpressionRequirement: {}
9
+ InlineJavascriptRequirement: {}
10
+ SubworkflowFeatureRequirement: {}
11
+
12
+ inputs:
13
+ samples:
14
+ type:
15
+ type: array
16
+ items:
17
+ type: record
18
+ fields:
19
+ sampleId: string
20
+
21
+ steps:
22
+ make_file:
23
+ run: staging_make_file.cwl
24
+ scatter: sample
25
+ in:
26
+ sample: samples
27
+ sampleId:
28
+ valueFrom: ${ return inputs.sample['sampleId']; }
29
+ out:
30
+ [ output_file ]
31
+
32
+ gather_files:
33
+ run: staging_cat.cwl
34
+ in:
35
+ input_files: make_file/output_file
36
+ out:
37
+ [ output_file ]
38
+
39
+ outputs:
40
+ output_file:
41
+ type: File
42
+ outputSource: gather_files/output_file
43
+
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env cwl-runner
2
+ cwlVersion: v1.2
3
+ class: Workflow
4
+ requirements:
5
+ MultipleInputFeatureRequirement: {}
6
+ InlineJavascriptRequirement: {}
7
+ inputs:
8
+ valueA:
9
+ type: int?
10
+ default: 0
11
+ valueB:
12
+ type: int?
13
+ someFile:
14
+ type: File?
15
+ steps:
16
+ prev_step:
17
+ run:
18
+ class: ExpressionTool
19
+ requirements:
20
+ InlineJavascriptRequirement: {}
21
+ inputs:
22
+ someFile:
23
+ type: File
24
+ outputs:
25
+ valueA:
26
+ type: int
27
+ expression: |
28
+ ${
29
+ return {valueA: 1};
30
+ }
31
+ in:
32
+ someFile: someFile
33
+ out: [valueA]
34
+ when: $(inputs.someFile != null)
35
+
36
+ main_step:
37
+ run:
38
+ class: ExpressionTool
39
+ requirements:
40
+ InlineJavascriptRequirement: {}
41
+ inputs:
42
+ valueA:
43
+ type: int
44
+ outputs:
45
+ valueA:
46
+ type: int
47
+ expression: |
48
+ ${
49
+ return {valueA: inputs.valueA};
50
+ }
51
+ in:
52
+ valueA:
53
+ source: [prev_step/valueA, valueA]
54
+ pickValue: first_non_null
55
+ out: [valueA]
56
+ outputs:
57
+ valueA:
58
+ type: int
59
+ outputSource: main_step/valueA
60
+
61
+