toil 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/slurm.py +191 -16
  6. toil/cwl/cwltoil.py +17 -82
  7. toil/fileStores/__init__.py +1 -1
  8. toil/fileStores/abstractFileStore.py +5 -2
  9. toil/fileStores/cachingFileStore.py +1 -1
  10. toil/job.py +30 -14
  11. toil/jobStores/abstractJobStore.py +24 -19
  12. toil/jobStores/aws/jobStore.py +862 -1963
  13. toil/jobStores/aws/utils.py +24 -270
  14. toil/jobStores/googleJobStore.py +25 -9
  15. toil/jobStores/utils.py +0 -327
  16. toil/leader.py +27 -22
  17. toil/lib/aws/config.py +22 -0
  18. toil/lib/aws/s3.py +477 -9
  19. toil/lib/aws/utils.py +22 -33
  20. toil/lib/checksum.py +88 -0
  21. toil/lib/conversions.py +33 -31
  22. toil/lib/directory.py +217 -0
  23. toil/lib/ec2.py +97 -29
  24. toil/lib/exceptions.py +2 -1
  25. toil/lib/expando.py +2 -2
  26. toil/lib/generatedEC2Lists.py +73 -16
  27. toil/lib/io.py +33 -2
  28. toil/lib/memoize.py +21 -7
  29. toil/lib/pipes.py +385 -0
  30. toil/lib/retry.py +1 -1
  31. toil/lib/threading.py +1 -1
  32. toil/lib/web.py +4 -5
  33. toil/provisioners/__init__.py +5 -2
  34. toil/provisioners/aws/__init__.py +43 -36
  35. toil/provisioners/aws/awsProvisioner.py +22 -13
  36. toil/provisioners/node.py +60 -12
  37. toil/resource.py +3 -13
  38. toil/test/__init__.py +14 -16
  39. toil/test/batchSystems/test_slurm.py +103 -14
  40. toil/test/cwl/staging_cat.cwl +27 -0
  41. toil/test/cwl/staging_make_file.cwl +25 -0
  42. toil/test/cwl/staging_workflow.cwl +43 -0
  43. toil/test/cwl/zero_default.cwl +61 -0
  44. toil/test/docs/scripts/tutorial_staging.py +17 -8
  45. toil/test/jobStores/jobStoreTest.py +23 -133
  46. toil/test/lib/aws/test_iam.py +7 -7
  47. toil/test/lib/aws/test_s3.py +30 -33
  48. toil/test/lib/aws/test_utils.py +9 -9
  49. toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
  50. toil/test/src/autoDeploymentTest.py +2 -3
  51. toil/test/src/fileStoreTest.py +89 -87
  52. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  53. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  54. toil/test/utils/toilKillTest.py +35 -28
  55. toil/test/wdl/md5sum/md5sum.json +1 -1
  56. toil/test/wdl/wdltoil_test.py +98 -38
  57. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  58. toil/utils/toilDebugFile.py +6 -3
  59. toil/utils/toilStats.py +17 -2
  60. toil/version.py +6 -6
  61. toil/wdl/wdltoil.py +1032 -546
  62. toil/worker.py +5 -2
  63. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/METADATA +12 -12
  64. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/RECORD +68 -61
  65. toil/lib/iterables.py +0 -112
  66. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  67. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/WHEEL +0 -0
  68. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
  69. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
  70. {toil-9.0.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
41
41
 
42
42
 
43
43
  WDL_CONFORMANCE_TEST_REPO = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
44
- WDL_CONFORMANCE_TEST_COMMIT = "baf44bcc7e6f6927540adf77d91b26a5558ae4b7"
44
+ WDL_CONFORMANCE_TEST_COMMIT = "46b5f85ee38ec60d0b8b9c35928b5104a2af83d5"
45
45
  # These tests are known to require things not implemented by
46
46
  # Toil and will not be run in CI.
47
47
  WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL = [
@@ -110,6 +110,15 @@ class TestWDLConformance:
110
110
  "Failed process standard error: %s",
111
111
  p.stderr.decode("utf-8", errors="replace"),
112
112
  )
113
+ else:
114
+ logger.debug(
115
+ "Successful process standard output: %s",
116
+ p.stdout.decode("utf-8", errors="replace"),
117
+ )
118
+ logger.debug(
119
+ "Successful process standard error: %s",
120
+ p.stderr.decode("utf-8", errors="replace"),
121
+ )
113
122
 
114
123
  p.check_returncode()
115
124
 
@@ -195,6 +204,30 @@ class TestWDLConformance:
195
204
 
196
205
  self.check(p)
197
206
 
207
+ # estimated running time: 10 minutes (once all the appropriate tests get
208
+ # marked as "development")
209
+ @slow
210
+ def test_conformance_tests_development(self, wdl_conformance_test_repo: Path) -> None:
211
+ os.chdir(wdl_conformance_test_repo)
212
+ commands = [
213
+ exactPython,
214
+ "run.py",
215
+ "--runner",
216
+ "toil-wdl-runner",
217
+ "--conformance-file",
218
+ "conformance.yaml",
219
+ "-v",
220
+ "development",
221
+ ]
222
+ if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
223
+ commands.append("--exclude-numbers")
224
+ commands.append(
225
+ ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
226
+ )
227
+ p = subprocess.run(commands, capture_output=True)
228
+
229
+ self.check(p)
230
+
198
231
  @slow
199
232
  def test_conformance_tests_integration(
200
233
  self, wdl_conformance_test_repo: Path
@@ -856,25 +889,29 @@ class TestWDL:
856
889
  env["TOIL_DOCKSTORE_TOKEN"] = "99cf5578ebe94b194d7864630a86258fa3d6cedcc17d757b5dd49e64ee3b68c3"
857
890
  # Enable history for when <https://github.com/DataBiosphere/toil/pull/5258> merges
858
891
  env["TOIL_HISTORY"] = "True"
892
+
893
+ try:
894
+ output_log = subprocess.check_output(
895
+ self.base_command
896
+ + [
897
+ wdl_file,
898
+ json_input,
899
+ "--logDebug",
900
+ "-o",
901
+ str(tmp_path),
902
+ "--outputDialect",
903
+ "miniwdl",
904
+ "--publishWorkflowMetrics=current",
905
+ ]
906
+ + (extra_args or []),
907
+ stderr=subprocess.STDOUT,
908
+ env=env,
909
+ ).decode("utf-8", errors="replace")
910
+ except subprocess.CalledProcessError as e:
911
+ logger.error("Test run of Toil failed: %s", e.stdout.decode("utf-8", errors="replace"))
912
+ raise
859
913
 
860
- output_log = subprocess.check_output(
861
- self.base_command
862
- + [
863
- wdl_file,
864
- json_input,
865
- "--logDebug",
866
- "-o",
867
- str(tmp_path),
868
- "--outputDialect",
869
- "miniwdl",
870
- "--publishWorkflowMetrics=current",
871
- ]
872
- + (extra_args or []),
873
- stderr=subprocess.STDOUT,
874
- env=env,
875
- )
876
-
877
- assert b'Workflow metrics were accepted by Dockstore.' in output_log
914
+ assert "Workflow metrics were accepted by Dockstore." in output_log, f"No acceptance message in log: {output_log}"
878
915
 
879
916
  @slow
880
917
  @needs_docker_cuda
@@ -1138,34 +1175,57 @@ class TestWDLToilBench(unittest.TestCase):
1138
1175
  """
1139
1176
 
1140
1177
  from toil.wdl.wdltoil import (
1141
- DirectoryNamingStateDict,
1142
1178
  choose_human_readable_directory,
1143
1179
  )
1144
1180
 
1145
- state: DirectoryNamingStateDict = {}
1146
-
1147
- # The first time we should get a path with the task name and without the ID
1181
+ # The first time we should get a path with the task name
1148
1182
  first_chosen = choose_human_readable_directory(
1149
- "root", "taskname", "111-222-333", state
1183
+ "root", "taskname", "https://example.com/some/directory"
1150
1184
  )
1151
1185
  assert first_chosen.startswith("root")
1152
- assert "taskname" in first_chosen
1153
- assert "111-222-333" not in first_chosen
1154
1186
 
1155
- # If we use the same ID we should get the same result
1156
- same_id = choose_human_readable_directory(
1157
- "root", "taskname", "111-222-333", state
1187
+ # If we use the same parent we should get the same result
1188
+ same_parent = choose_human_readable_directory(
1189
+ "root", "taskname", "https://example.com/some/directory"
1190
+ )
1191
+ assert same_parent == first_chosen
1192
+
1193
+ # If we use a lower parent with a URL, we do not necessarily need to be
1194
+ # inside the higher parent.
1195
+
1196
+ # If we use a URL with a creative number of slashes, it should be distinct.
1197
+ slash_parent = choose_human_readable_directory(
1198
+ "root", "taskname", "https://example.com/some/directory//////"
1199
+ )
1200
+ assert slash_parent != first_chosen
1201
+
1202
+ # If we use the same parent URL but a different task we should get the same result
1203
+ other_task = choose_human_readable_directory(
1204
+ "root", "taskname2", "https://example.com/some/directory"
1205
+ )
1206
+ assert other_task == first_chosen
1207
+
1208
+ # If we use a different parent we should get a different result still obeying the constraints
1209
+ diff_parent = choose_human_readable_directory(
1210
+ "root", "taskname", "/data/tmp/files/somewhere"
1211
+ )
1212
+ assert diff_parent != first_chosen
1213
+ assert diff_parent.startswith("root")
1214
+ assert "taskname" in diff_parent
1215
+
1216
+ # If we use a subpath parent with a filename we should get a path inside it.
1217
+ diff_parent_subpath = choose_human_readable_directory(
1218
+ "root", "taskname", "/data/tmp/files/somewhere/else"
1158
1219
  )
1159
- assert same_id == first_chosen
1220
+ assert os.path.dirname(diff_parent_subpath) == diff_parent
1160
1221
 
1161
- # If we use a different ID we should get a different result still obeying the constraints
1162
- diff_id = choose_human_readable_directory(
1163
- "root", "taskname", "222-333-444", state
1222
+ # If we use the same parent path but a different task we should get a different result.
1223
+ other_task_directory = choose_human_readable_directory(
1224
+ "root", "taskname2", "/data/tmp/files/somewhere"
1164
1225
  )
1165
- assert diff_id != first_chosen
1166
- assert diff_id.startswith("root")
1167
- assert "taskname" in diff_id
1168
- assert "222-333-444" not in diff_id
1226
+ assert other_task_directory != diff_parent
1227
+ assert other_task_directory.startswith("root")
1228
+ assert "taskname2" in other_task_directory
1169
1229
 
1170
1230
  def test_uri_packing(self) -> None:
1171
1231
  """
@@ -1181,7 +1241,7 @@ class TestWDLToilBench(unittest.TestCase):
1181
1241
  file_basename = "thefile.txt"
1182
1242
 
1183
1243
  # Pack and unpack it
1184
- uri = pack_toil_uri(file_id, task_path, dir_id, file_basename)
1244
+ uri = pack_toil_uri(file_id, task_path, str(dir_id), file_basename)
1185
1245
  unpacked = unpack_toil_uri(uri)
1186
1246
 
1187
1247
  # Make sure we got what we put in
@@ -1,6 +1,7 @@
1
1
  import unittest
2
2
  from uuid import uuid4
3
3
 
4
+ import logging
4
5
  import pytest
5
6
 
6
7
  from toil.provisioners import cluster_factory
@@ -12,6 +13,8 @@ from toil.test.wdl.wdltoil_test import (
12
13
  )
13
14
 
14
15
 
16
+ logger = logging.getLogger(__name__)
17
+
15
18
  @integrative
16
19
  @slow
17
20
  @pytest.mark.timeout(1800)
@@ -52,6 +55,7 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
52
55
  workflow that performs an image pull on the worker.
53
56
  :return:
54
57
  """
58
+
55
59
  self.numWorkers = "1"
56
60
  self.requestedLeaderStorage = 30
57
61
  # create the cluster
@@ -64,6 +68,8 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
64
68
 
65
69
  wdl_dir = "wdl_conformance_tests"
66
70
 
71
+ logger.info("Cloning WDL tests onto cluster...")
72
+
67
73
  # get the wdl-conformance-tests repo to get WDL tasks to run
68
74
  self.sshUtil(
69
75
  [
@@ -78,6 +84,9 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
78
84
 
79
85
  # run WDL workflow that will run singularity
80
86
  test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
87
+
88
+ logger.info("Running workflow...")
89
+
81
90
  self.sshUtil(
82
91
  [
83
92
  "bash",
@@ -47,12 +47,12 @@ def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> N
47
47
  jobStoreHits = glob(directoryname=options.jobStore, glob_pattern=jobStoreFile)
48
48
  for jobStoreFileID in jobStoreHits:
49
49
  logger.debug(
50
- f"Copying job store file: {jobStoreFileID} to {options.localFilePath[0]}"
50
+ f"Copying job store file: {jobStoreFileID} to {options.localFilePath}"
51
51
  )
52
52
  jobStore.read_file(
53
53
  jobStoreFileID,
54
54
  os.path.join(
55
- options.localFilePath[0], os.path.basename(jobStoreFileID)
55
+ options.localFilePath, os.path.basename(jobStoreFileID)
56
56
  ),
57
57
  symlink=options.useSymlinks,
58
58
  )
@@ -97,7 +97,10 @@ def printContentsOfJobStore(
97
97
  def main() -> None:
98
98
  parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
99
99
  parser.add_argument(
100
- "--localFilePath", nargs=1, help="Location to which to copy job store files."
100
+ "--localFilePath",
101
+ type=str,
102
+ default=".",
103
+ help="Location to which to copy job store files."
101
104
  )
102
105
  parser.add_argument(
103
106
  "--fetch",
toil/utils/toilStats.py CHANGED
@@ -326,6 +326,8 @@ def sprint_tag(
326
326
  out_str += header + "\n"
327
327
  out_str += sub_header + "\n"
328
328
  out_str += tag_str + "\n"
329
+ if tag.excess_cpu > 0:
330
+ out_str += f" ({tag.excess_cpu} used more CPU than requested!)\n"
329
331
  return out_str
330
332
 
331
333
 
@@ -507,13 +509,25 @@ def build_element(
507
509
  float(item.get(category_key, defaults[category])), category
508
510
  )
509
511
  values.append(category_value)
510
-
512
+
513
+ excess_cpu_items = 0
511
514
  for index in range(0, len(item_values[CATEGORIES[0]])):
512
515
  # For each item, compute the computed categories
513
- item_values["wait"].append(
516
+
517
+ # Compute wait time (allocated CPU time wasted).
518
+ # Note that if any item uses *more* CPU cores than requested, at any
519
+ # time, that decreases the amount of wait we're able to see from that
520
+ # item. If it hapens a lot, our computed wait could go negative, so we
521
+ # bound it below at 0.
522
+ wait_value = (
514
523
  item_values["time"][index] * item_values["cores"][index]
515
524
  - item_values["clock"][index]
516
525
  )
526
+ if wait_value < 0:
527
+ # Remember an item used more CPU than allocated.
528
+ excess_cpu_items += 1
529
+ wait_value = 0
530
+ item_values["wait"].append(wait_value)
517
531
 
518
532
  for category, values in item_values.items():
519
533
  values.sort()
@@ -531,6 +545,7 @@ def build_element(
531
545
  item_element["average_" + category] = float(sum(values) / len(values))
532
546
  item_element["min_" + category] = float(min(values))
533
547
  item_element["max_" + category] = float(max(values))
548
+ item_element["excess_cpu"] = excess_cpu_items
534
549
 
535
550
  element[item_name] = item_element
536
551
 
toil/version.py CHANGED
@@ -1,14 +1,14 @@
1
- baseVersion = '9.0.0'
1
+ baseVersion = '9.1.0'
2
2
  cgcloudVersion = '1.6.0a1.dev393'
3
- version = '9.0.0-fa1e35a17670e1927036914ca624bfd591f8349c'
3
+ version = '9.1.0-e341bb669efe78f93308e5ff1f02f7e375973511'
4
4
  cacheTag = 'cache-local-py3.9'
5
5
  mainCacheTag = 'cache-master-py3.9'
6
- distVersion = '9.0.0'
6
+ distVersion = '9.1.0'
7
7
  exactPython = 'python3.9'
8
8
  python = 'python3.9'
9
- dockerTag = '9.0.0-fa1e35a17670e1927036914ca624bfd591f8349c-py3.9'
10
- currentCommit = 'fa1e35a17670e1927036914ca624bfd591f8349c'
9
+ dockerTag = '9.1.0-e341bb669efe78f93308e5ff1f02f7e375973511-py3.9'
10
+ currentCommit = 'e341bb669efe78f93308e5ff1f02f7e375973511'
11
11
  dockerRegistry = 'quay.io/ucsc_cgl'
12
12
  dockerName = 'toil'
13
13
  dirty = False
14
- cwltool_version = '3.1.20250110105449'
14
+ cwltool_version = '3.1.20250715140722'