toil 9.0.0__py3-none-any.whl → 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/slurm.py +191 -16
  6. toil/cwl/cwltoil.py +17 -82
  7. toil/fileStores/__init__.py +1 -1
  8. toil/fileStores/abstractFileStore.py +5 -2
  9. toil/fileStores/cachingFileStore.py +1 -1
  10. toil/job.py +30 -14
  11. toil/jobStores/abstractJobStore.py +24 -19
  12. toil/jobStores/aws/jobStore.py +862 -1963
  13. toil/jobStores/aws/utils.py +24 -270
  14. toil/jobStores/googleJobStore.py +25 -9
  15. toil/jobStores/utils.py +0 -327
  16. toil/leader.py +27 -22
  17. toil/lib/aws/config.py +22 -0
  18. toil/lib/aws/s3.py +477 -9
  19. toil/lib/aws/utils.py +22 -33
  20. toil/lib/checksum.py +88 -0
  21. toil/lib/conversions.py +33 -31
  22. toil/lib/directory.py +217 -0
  23. toil/lib/ec2.py +97 -29
  24. toil/lib/exceptions.py +2 -1
  25. toil/lib/expando.py +2 -2
  26. toil/lib/generatedEC2Lists.py +73 -16
  27. toil/lib/io.py +33 -2
  28. toil/lib/memoize.py +21 -7
  29. toil/lib/pipes.py +385 -0
  30. toil/lib/retry.py +1 -1
  31. toil/lib/threading.py +1 -1
  32. toil/lib/web.py +4 -5
  33. toil/provisioners/__init__.py +5 -2
  34. toil/provisioners/aws/__init__.py +43 -36
  35. toil/provisioners/aws/awsProvisioner.py +22 -13
  36. toil/provisioners/node.py +60 -12
  37. toil/resource.py +3 -13
  38. toil/test/__init__.py +14 -16
  39. toil/test/batchSystems/test_slurm.py +103 -14
  40. toil/test/cwl/staging_cat.cwl +27 -0
  41. toil/test/cwl/staging_make_file.cwl +25 -0
  42. toil/test/cwl/staging_workflow.cwl +43 -0
  43. toil/test/cwl/zero_default.cwl +61 -0
  44. toil/test/docs/scripts/tutorial_staging.py +17 -8
  45. toil/test/jobStores/jobStoreTest.py +23 -133
  46. toil/test/lib/aws/test_iam.py +7 -7
  47. toil/test/lib/aws/test_s3.py +30 -33
  48. toil/test/lib/aws/test_utils.py +9 -9
  49. toil/test/provisioners/aws/awsProvisionerTest.py +59 -6
  50. toil/test/src/autoDeploymentTest.py +2 -3
  51. toil/test/src/fileStoreTest.py +89 -87
  52. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  53. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  54. toil/test/utils/toilKillTest.py +35 -28
  55. toil/test/wdl/md5sum/md5sum.json +1 -1
  56. toil/test/wdl/testfiles/gather.wdl +52 -0
  57. toil/test/wdl/wdltoil_test.py +120 -38
  58. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  59. toil/utils/toilDebugFile.py +6 -3
  60. toil/utils/toilStats.py +17 -2
  61. toil/version.py +6 -6
  62. toil/wdl/wdltoil.py +1038 -549
  63. toil/worker.py +5 -2
  64. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/METADATA +12 -12
  65. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/RECORD +69 -61
  66. toil/lib/iterables.py +0 -112
  67. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  68. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/WHEEL +0 -0
  69. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/entry_points.txt +0 -0
  70. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/licenses/LICENSE +0 -0
  71. {toil-9.0.0.dist-info → toil-9.1.1.dist-info}/top_level.txt +0 -0
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
41
41
 
42
42
 
43
43
  WDL_CONFORMANCE_TEST_REPO = "https://github.com/DataBiosphere/wdl-conformance-tests.git"
44
- WDL_CONFORMANCE_TEST_COMMIT = "baf44bcc7e6f6927540adf77d91b26a5558ae4b7"
44
+ WDL_CONFORMANCE_TEST_COMMIT = "46b5f85ee38ec60d0b8b9c35928b5104a2af83d5"
45
45
  # These tests are known to require things not implemented by
46
46
  # Toil and will not be run in CI.
47
47
  WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL = [
@@ -110,6 +110,15 @@ class TestWDLConformance:
110
110
  "Failed process standard error: %s",
111
111
  p.stderr.decode("utf-8", errors="replace"),
112
112
  )
113
+ else:
114
+ logger.debug(
115
+ "Successful process standard output: %s",
116
+ p.stdout.decode("utf-8", errors="replace"),
117
+ )
118
+ logger.debug(
119
+ "Successful process standard error: %s",
120
+ p.stderr.decode("utf-8", errors="replace"),
121
+ )
113
122
 
114
123
  p.check_returncode()
115
124
 
@@ -195,6 +204,30 @@ class TestWDLConformance:
195
204
 
196
205
  self.check(p)
197
206
 
207
+ # estimated running time: 10 minutes (once all the appropriate tests get
208
+ # marked as "development")
209
+ @slow
210
+ def test_conformance_tests_development(self, wdl_conformance_test_repo: Path) -> None:
211
+ os.chdir(wdl_conformance_test_repo)
212
+ commands = [
213
+ exactPython,
214
+ "run.py",
215
+ "--runner",
216
+ "toil-wdl-runner",
217
+ "--conformance-file",
218
+ "conformance.yaml",
219
+ "-v",
220
+ "development",
221
+ ]
222
+ if WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL:
223
+ commands.append("--exclude-numbers")
224
+ commands.append(
225
+ ",".join([str(t) for t in WDL_CONFORMANCE_TESTS_UNSUPPORTED_BY_TOIL])
226
+ )
227
+ p = subprocess.run(commands, capture_output=True)
228
+
229
+ self.check(p)
230
+
198
231
  @slow
199
232
  def test_conformance_tests_integration(
200
233
  self, wdl_conformance_test_repo: Path
@@ -288,6 +321,28 @@ class TestWDL:
288
321
 
289
322
  assert "StringFileCoercion.output_file" in result
290
323
 
324
+ @needs_docker
325
+ def test_gather(self, tmp_path: Path) -> None:
326
+ """
327
+ Test files with the same name from different scatter tasks.
328
+ """
329
+ with get_data("test/wdl/testfiles/gather.wdl") as wdl:
330
+ result_json = subprocess.check_output(
331
+ self.base_command
332
+ + [
333
+ str(wdl),
334
+ "-o",
335
+ str(tmp_path),
336
+ "--logInfo",
337
+ "--retryCount=0"
338
+ ]
339
+ )
340
+ result = json.loads(result_json)
341
+
342
+ assert "gather.outfile" in result
343
+ assert isinstance(result["gather.outfile"], str)
344
+ assert open(result["gather.outfile"]).read() == "1\n2\n3\n"
345
+
291
346
  @needs_docker
292
347
  def test_wait(self, tmp_path: Path) -> None:
293
348
  """
@@ -856,25 +911,29 @@ class TestWDL:
856
911
  env["TOIL_DOCKSTORE_TOKEN"] = "99cf5578ebe94b194d7864630a86258fa3d6cedcc17d757b5dd49e64ee3b68c3"
857
912
  # Enable history for when <https://github.com/DataBiosphere/toil/pull/5258> merges
858
913
  env["TOIL_HISTORY"] = "True"
914
+
915
+ try:
916
+ output_log = subprocess.check_output(
917
+ self.base_command
918
+ + [
919
+ wdl_file,
920
+ json_input,
921
+ "--logDebug",
922
+ "-o",
923
+ str(tmp_path),
924
+ "--outputDialect",
925
+ "miniwdl",
926
+ "--publishWorkflowMetrics=current",
927
+ ]
928
+ + (extra_args or []),
929
+ stderr=subprocess.STDOUT,
930
+ env=env,
931
+ ).decode("utf-8", errors="replace")
932
+ except subprocess.CalledProcessError as e:
933
+ logger.error("Test run of Toil failed: %s", e.stdout.decode("utf-8", errors="replace"))
934
+ raise
859
935
 
860
- output_log = subprocess.check_output(
861
- self.base_command
862
- + [
863
- wdl_file,
864
- json_input,
865
- "--logDebug",
866
- "-o",
867
- str(tmp_path),
868
- "--outputDialect",
869
- "miniwdl",
870
- "--publishWorkflowMetrics=current",
871
- ]
872
- + (extra_args or []),
873
- stderr=subprocess.STDOUT,
874
- env=env,
875
- )
876
-
877
- assert b'Workflow metrics were accepted by Dockstore.' in output_log
936
+ assert "Workflow metrics were accepted by Dockstore." in output_log, f"No acceptance message in log: {output_log}"
878
937
 
879
938
  @slow
880
939
  @needs_docker_cuda
@@ -1138,34 +1197,57 @@ class TestWDLToilBench(unittest.TestCase):
1138
1197
  """
1139
1198
 
1140
1199
  from toil.wdl.wdltoil import (
1141
- DirectoryNamingStateDict,
1142
1200
  choose_human_readable_directory,
1143
1201
  )
1144
1202
 
1145
- state: DirectoryNamingStateDict = {}
1146
-
1147
- # The first time we should get a path with the task name and without the ID
1203
+ # The first time we should get a path with the task name
1148
1204
  first_chosen = choose_human_readable_directory(
1149
- "root", "taskname", "111-222-333", state
1205
+ "root", "taskname", "https://example.com/some/directory"
1150
1206
  )
1151
1207
  assert first_chosen.startswith("root")
1152
- assert "taskname" in first_chosen
1153
- assert "111-222-333" not in first_chosen
1154
1208
 
1155
- # If we use the same ID we should get the same result
1156
- same_id = choose_human_readable_directory(
1157
- "root", "taskname", "111-222-333", state
1209
+ # If we use the same parent we should get the same result
1210
+ same_parent = choose_human_readable_directory(
1211
+ "root", "taskname", "https://example.com/some/directory"
1212
+ )
1213
+ assert same_parent == first_chosen
1214
+
1215
+ # If we use a lower parent with a URL, we do not necessarily need to be
1216
+ # inside the higher parent.
1217
+
1218
+ # If we use a URL with a creative number of slashes, it should be distinct.
1219
+ slash_parent = choose_human_readable_directory(
1220
+ "root", "taskname", "https://example.com/some/directory//////"
1221
+ )
1222
+ assert slash_parent != first_chosen
1223
+
1224
+ # If we use the same parent URL but a different task we should get the same result
1225
+ other_task = choose_human_readable_directory(
1226
+ "root", "taskname2", "https://example.com/some/directory"
1227
+ )
1228
+ assert other_task == first_chosen
1229
+
1230
+ # If we use a different parent we should get a different result still obeying the constraints
1231
+ diff_parent = choose_human_readable_directory(
1232
+ "root", "taskname", "/data/tmp/files/somewhere"
1233
+ )
1234
+ assert diff_parent != first_chosen
1235
+ assert diff_parent.startswith("root")
1236
+ assert "taskname" in diff_parent
1237
+
1238
+ # If we use a subpath parent with a filename we should get a path inside it.
1239
+ diff_parent_subpath = choose_human_readable_directory(
1240
+ "root", "taskname", "/data/tmp/files/somewhere/else"
1158
1241
  )
1159
- assert same_id == first_chosen
1242
+ assert os.path.dirname(diff_parent_subpath) == diff_parent
1160
1243
 
1161
- # If we use a different ID we should get a different result still obeying the constraints
1162
- diff_id = choose_human_readable_directory(
1163
- "root", "taskname", "222-333-444", state
1244
+ # If we use the same parent path but a different task we should get a different result.
1245
+ other_task_directory = choose_human_readable_directory(
1246
+ "root", "taskname2", "/data/tmp/files/somewhere"
1164
1247
  )
1165
- assert diff_id != first_chosen
1166
- assert diff_id.startswith("root")
1167
- assert "taskname" in diff_id
1168
- assert "222-333-444" not in diff_id
1248
+ assert other_task_directory != diff_parent
1249
+ assert other_task_directory.startswith("root")
1250
+ assert "taskname2" in other_task_directory
1169
1251
 
1170
1252
  def test_uri_packing(self) -> None:
1171
1253
  """
@@ -1181,7 +1263,7 @@ class TestWDLToilBench(unittest.TestCase):
1181
1263
  file_basename = "thefile.txt"
1182
1264
 
1183
1265
  # Pack and unpack it
1184
- uri = pack_toil_uri(file_id, task_path, dir_id, file_basename)
1266
+ uri = pack_toil_uri(file_id, task_path, str(dir_id), file_basename)
1185
1267
  unpacked = unpack_toil_uri(uri)
1186
1268
 
1187
1269
  # Make sure we got what we put in
@@ -1,6 +1,7 @@
1
1
  import unittest
2
2
  from uuid import uuid4
3
3
 
4
+ import logging
4
5
  import pytest
5
6
 
6
7
  from toil.provisioners import cluster_factory
@@ -12,6 +13,8 @@ from toil.test.wdl.wdltoil_test import (
12
13
  )
13
14
 
14
15
 
16
+ logger = logging.getLogger(__name__)
17
+
15
18
  @integrative
16
19
  @slow
17
20
  @pytest.mark.timeout(1800)
@@ -52,6 +55,7 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
52
55
  workflow that performs an image pull on the worker.
53
56
  :return:
54
57
  """
58
+
55
59
  self.numWorkers = "1"
56
60
  self.requestedLeaderStorage = 30
57
61
  # create the cluster
@@ -64,6 +68,8 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
64
68
 
65
69
  wdl_dir = "wdl_conformance_tests"
66
70
 
71
+ logger.info("Cloning WDL tests onto cluster...")
72
+
67
73
  # get the wdl-conformance-tests repo to get WDL tasks to run
68
74
  self.sshUtil(
69
75
  [
@@ -78,6 +84,9 @@ class WDLKubernetesClusterTest(AbstractClusterTest):
78
84
 
79
85
  # run WDL workflow that will run singularity
80
86
  test_options = [f"tests/md5sum/md5sum.wdl", f"tests/md5sum/md5sum.json"]
87
+
88
+ logger.info("Running workflow...")
89
+
81
90
  self.sshUtil(
82
91
  [
83
92
  "bash",
@@ -47,12 +47,12 @@ def fetchJobStoreFiles(jobStore: FileJobStore, options: argparse.Namespace) -> N
47
47
  jobStoreHits = glob(directoryname=options.jobStore, glob_pattern=jobStoreFile)
48
48
  for jobStoreFileID in jobStoreHits:
49
49
  logger.debug(
50
- f"Copying job store file: {jobStoreFileID} to {options.localFilePath[0]}"
50
+ f"Copying job store file: {jobStoreFileID} to {options.localFilePath}"
51
51
  )
52
52
  jobStore.read_file(
53
53
  jobStoreFileID,
54
54
  os.path.join(
55
- options.localFilePath[0], os.path.basename(jobStoreFileID)
55
+ options.localFilePath, os.path.basename(jobStoreFileID)
56
56
  ),
57
57
  symlink=options.useSymlinks,
58
58
  )
@@ -97,7 +97,10 @@ def printContentsOfJobStore(
97
97
  def main() -> None:
98
98
  parser = parser_with_common_options(jobstore_option=True, prog="toil debug-file")
99
99
  parser.add_argument(
100
- "--localFilePath", nargs=1, help="Location to which to copy job store files."
100
+ "--localFilePath",
101
+ type=str,
102
+ default=".",
103
+ help="Location to which to copy job store files."
101
104
  )
102
105
  parser.add_argument(
103
106
  "--fetch",
toil/utils/toilStats.py CHANGED
@@ -326,6 +326,8 @@ def sprint_tag(
326
326
  out_str += header + "\n"
327
327
  out_str += sub_header + "\n"
328
328
  out_str += tag_str + "\n"
329
+ if tag.excess_cpu > 0:
330
+ out_str += f" ({tag.excess_cpu} used more CPU than requested!)\n"
329
331
  return out_str
330
332
 
331
333
 
@@ -507,13 +509,25 @@ def build_element(
507
509
  float(item.get(category_key, defaults[category])), category
508
510
  )
509
511
  values.append(category_value)
510
-
512
+
513
+ excess_cpu_items = 0
511
514
  for index in range(0, len(item_values[CATEGORIES[0]])):
512
515
  # For each item, compute the computed categories
513
- item_values["wait"].append(
516
+
517
+ # Compute wait time (allocated CPU time wasted).
518
+ # Note that if any item uses *more* CPU cores than requested, at any
519
+ # time, that decreases the amount of wait we're able to see from that
520
+ # item. If it hapens a lot, our computed wait could go negative, so we
521
+ # bound it below at 0.
522
+ wait_value = (
514
523
  item_values["time"][index] * item_values["cores"][index]
515
524
  - item_values["clock"][index]
516
525
  )
526
+ if wait_value < 0:
527
+ # Remember an item used more CPU than allocated.
528
+ excess_cpu_items += 1
529
+ wait_value = 0
530
+ item_values["wait"].append(wait_value)
517
531
 
518
532
  for category, values in item_values.items():
519
533
  values.sort()
@@ -531,6 +545,7 @@ def build_element(
531
545
  item_element["average_" + category] = float(sum(values) / len(values))
532
546
  item_element["min_" + category] = float(min(values))
533
547
  item_element["max_" + category] = float(max(values))
548
+ item_element["excess_cpu"] = excess_cpu_items
534
549
 
535
550
  element[item_name] = item_element
536
551
 
toil/version.py CHANGED
@@ -1,14 +1,14 @@
1
- baseVersion = '9.0.0'
1
+ baseVersion = '9.1.1'
2
2
  cgcloudVersion = '1.6.0a1.dev393'
3
- version = '9.0.0-fa1e35a17670e1927036914ca624bfd591f8349c'
3
+ version = '9.1.1-f654a3c80f5318ae2ec2e5ecb278a05e1e844a58'
4
4
  cacheTag = 'cache-local-py3.9'
5
5
  mainCacheTag = 'cache-master-py3.9'
6
- distVersion = '9.0.0'
6
+ distVersion = '9.1.1'
7
7
  exactPython = 'python3.9'
8
8
  python = 'python3.9'
9
- dockerTag = '9.0.0-fa1e35a17670e1927036914ca624bfd591f8349c-py3.9'
10
- currentCommit = 'fa1e35a17670e1927036914ca624bfd591f8349c'
9
+ dockerTag = '9.1.1-f654a3c80f5318ae2ec2e5ecb278a05e1e844a58-py3.9'
10
+ currentCommit = 'f654a3c80f5318ae2ec2e5ecb278a05e1e844a58'
11
11
  dockerRegistry = 'quay.io/ucsc_cgl'
12
12
  dockerName = 'toil'
13
13
  dirty = False
14
- cwltool_version = '3.1.20250110105449'
14
+ cwltool_version = '3.1.20250715140722'