toil 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. toil/batchSystems/abstractBatchSystem.py +19 -4
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +22 -22
  3. toil/batchSystems/cleanup_support.py +7 -3
  4. toil/batchSystems/lsf.py +7 -7
  5. toil/batchSystems/slurm.py +85 -14
  6. toil/bus.py +38 -0
  7. toil/common.py +20 -18
  8. toil/cwl/cwltoil.py +81 -63
  9. toil/exceptions.py +1 -1
  10. toil/fileStores/abstractFileStore.py +53 -4
  11. toil/fileStores/cachingFileStore.py +4 -20
  12. toil/fileStores/nonCachingFileStore.py +5 -14
  13. toil/job.py +46 -30
  14. toil/jobStores/abstractJobStore.py +21 -23
  15. toil/jobStores/aws/utils.py +5 -4
  16. toil/jobStores/fileJobStore.py +1 -1
  17. toil/leader.py +17 -14
  18. toil/lib/conversions.py +19 -0
  19. toil/lib/generatedEC2Lists.py +8 -8
  20. toil/lib/io.py +28 -2
  21. toil/lib/resources.py +8 -1
  22. toil/lib/threading.py +27 -12
  23. toil/options/common.py +5 -7
  24. toil/options/wdl.py +5 -0
  25. toil/provisioners/abstractProvisioner.py +8 -0
  26. toil/statsAndLogging.py +36 -8
  27. toil/test/batchSystems/test_slurm.py +21 -6
  28. toil/test/cactus/__init__.py +0 -0
  29. toil/test/cactus/test_cactus_integration.py +58 -0
  30. toil/test/cwl/cwlTest.py +243 -151
  31. toil/test/docs/scriptsTest.py +2 -2
  32. toil/test/jobStores/jobStoreTest.py +7 -5
  33. toil/test/lib/test_ec2.py +1 -1
  34. toil/test/options/__init__.py +13 -0
  35. toil/test/options/options.py +37 -0
  36. toil/test/provisioners/clusterTest.py +9 -8
  37. toil/test/utils/toilDebugTest.py +1 -1
  38. toil/test/utils/utilsTest.py +3 -3
  39. toil/test/wdl/wdltoil_test.py +91 -16
  40. toil/utils/toilDebugFile.py +1 -1
  41. toil/utils/toilStats.py +309 -266
  42. toil/utils/toilStatus.py +1 -1
  43. toil/version.py +9 -9
  44. toil/wdl/wdltoil.py +341 -189
  45. toil/worker.py +31 -16
  46. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/METADATA +6 -7
  47. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/RECORD +51 -47
  48. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  49. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/WHEEL +0 -0
  50. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -0
  51. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/utils/toilStats.py CHANGED
@@ -14,18 +14,56 @@
14
14
  """Reports statistical data about a given Toil workflow."""
15
15
  import json
16
16
  import logging
17
+ import math
18
+ import sys
17
19
  from argparse import ArgumentParser, Namespace
18
20
  from functools import partial
19
- from typing import Any, Callable, Dict, List, Optional, TextIO
21
+ from typing import Any, Callable, Dict, List, Optional, TextIO, Union
20
22
 
21
23
  from toil.common import Config, Toil, parser_with_common_options
22
24
  from toil.job import Job
23
- from toil.jobStores.abstractJobStore import AbstractJobStore
25
+ from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchJobStoreException
24
26
  from toil.lib.expando import Expando
27
+ from toil.options.common import SYS_MAX_SIZE
25
28
  from toil.statsAndLogging import set_logging_from_options
26
29
 
27
30
  logger = logging.getLogger(__name__)
28
31
 
32
+ # These categories of stat will be reported
33
+ CATEGORIES = ["time", "clock", "wait", "memory", "disk"]
34
+ # These are the units they are stored in
35
+ CATEGORY_UNITS = {
36
+ "time": "s",
37
+ "clock": "core-s",
38
+ "wait": "core-s",
39
+ "memory": "KiB",
40
+ "disk": "B"
41
+ }
42
+ # These are what we call them to the user
43
+ TITLES = {
44
+ "time": "Real Time",
45
+ "clock": "CPU Time",
46
+ "wait": "CPU Wait",
47
+ "memory": "Memory",
48
+ "disk": "Disk"
49
+ }
50
+
51
+ # Of those, these are in time
52
+ TIME_CATEGORIES = {"time", "clock", "wait"}
53
+ # And these are in space
54
+ SPACE_CATEGORIES = {"memory", "disk"}
55
+ # These categories aren't stored and need to be computed
56
+ COMPUTED_CATEGORIES = {"wait"}
57
+
58
+ # The different kinds of summaries have both short and long names, and we need
59
+ # to convert between them.
60
+ LONG_FORMS = {
61
+ "med": "median",
62
+ "ave": "average",
63
+ "min": "min",
64
+ "total": "total",
65
+ "max": "max",
66
+ }
29
67
 
30
68
  class ColumnWidths:
31
69
  """
@@ -33,33 +71,33 @@ class ColumnWidths:
33
71
  """
34
72
 
35
73
  def __init__(self) -> None:
36
- self.categories = ["time", "clock", "wait", "memory"]
74
+ self.categories = CATEGORIES
37
75
  self.fields_count = ["count", "min", "med", "ave", "max", "total"]
38
76
  self.fields = ["min", "med", "ave", "max", "total"]
39
77
  self.data: Dict[str, int] = {}
40
78
  for category in self.categories:
41
79
  for field in self.fields_count:
42
- self.setWidth(category, field, 8)
80
+ self.set_width(category, field, 8)
43
81
 
44
82
  def title(self, category: str) -> int:
45
83
  """Return the total printed length of this category item."""
46
- return sum(self.getWidth(category, x) for x in self.fields)
84
+ return sum(self.get_width(category, x) for x in self.fields)
47
85
 
48
- def getWidth(self, category: str, field: str) -> int:
86
+ def get_width(self, category: str, field: str) -> int:
49
87
  category = category.lower()
50
88
  return self.data[f"{category}_{field}"]
51
89
 
52
- def setWidth(self, category: str, field: str, width: int) -> None:
90
+ def set_width(self, category: str, field: str, width: int) -> None:
53
91
  category = category.lower()
54
92
  self.data[f"{category}_{field}"] = width
55
93
 
56
94
  def report(self) -> None:
57
95
  for c in self.categories:
58
96
  for f in self.fields:
59
- print("%s %s %d" % (c, f, self.getWidth(c, f)))
97
+ print("%s %s %d" % (c, f, self.get_width(c, f)))
60
98
 
61
99
 
62
- def padStr(s: str, field: Optional[int] = None) -> str:
100
+ def pad_str(s: str, field: Optional[int] = None) -> str:
63
101
  """Pad the beginning of a string with spaces, if necessary."""
64
102
  if field is None or len(s) >= field:
65
103
  return s
@@ -67,48 +105,54 @@ def padStr(s: str, field: Optional[int] = None) -> str:
67
105
  return " " * (field - len(s)) + s
68
106
 
69
107
 
70
- def prettyMemory(k: float, field: Optional[int] = None, isBytes: bool = False) -> str:
71
- """Given input k as kilobytes, return a nicely formatted string."""
72
- if isBytes:
73
- k /= 1024
108
+ def pretty_space(k: float, field: Optional[int] = None, alone: bool = False) -> str:
109
+ """Given input k as kibibytes, return a nicely formatted string."""
110
+ # If we don't have a header to say bytes, include the B.
111
+ trailer = "B" if alone else ""
74
112
  if k < 1024:
75
- return padStr("%gK" % k, field)
113
+ return pad_str("%gKi%s" % (k, trailer), field)
76
114
  if k < (1024 * 1024):
77
- return padStr("%.1fM" % (k / 1024.0), field)
115
+ return pad_str("%.1fMi%s" % (k / 1024.0, trailer), field)
78
116
  if k < (1024 * 1024 * 1024):
79
- return padStr("%.1fG" % (k / 1024.0 / 1024.0), field)
117
+ return pad_str("%.1fGi%s" % (k / 1024.0 / 1024.0, trailer), field)
80
118
  if k < (1024 * 1024 * 1024 * 1024):
81
- return padStr("%.1fT" % (k / 1024.0 / 1024.0 / 1024.0), field)
119
+ return pad_str("%.1fTi%s" % (k / 1024.0 / 1024.0 / 1024.0, trailer), field)
82
120
  if k < (1024 * 1024 * 1024 * 1024 * 1024):
83
- return padStr("%.1fP" % (k / 1024.0 / 1024.0 / 1024.0 / 1024.0), field)
121
+ return pad_str("%.1fPi%s" % (k / 1024.0 / 1024.0 / 1024.0 / 1024.0, trailer), field)
84
122
 
85
123
  # due to https://stackoverflow.com/questions/47149154
86
124
  assert False
87
125
 
88
126
 
89
- def prettyTime(t: float, field: Optional[int] = None) -> str:
90
- """Given input t as seconds, return a nicely formatted string."""
127
+ def pretty_time(t: float, field: Optional[int] = None, unit: str = "s", alone: bool = False) -> str:
128
+ """
129
+ Given input t as seconds, return a nicely formatted string.
130
+ """
131
+ assert unit in ("s", "core-s")
132
+ # Qualify our CPU times as CPU time if we aren't in a table that does that
133
+ unit_str = report_unit(unit) if alone else "s"
134
+
91
135
  from math import floor
92
136
 
93
137
  pluralDict = {True: "s", False: ""}
94
138
  if t < 120:
95
- return padStr("%ds" % t, field)
139
+ return pad_str("%d%s" % (t, unit_str), field)
96
140
  if t < 120 * 60:
97
141
  m = floor(t / 60.0)
98
142
  s = t % 60
99
- return padStr("%dm%ds" % (m, s), field)
143
+ return pad_str("%dm%d%s" % (m, s, unit_str), field)
100
144
  if t < 25 * 60 * 60:
101
145
  h = floor(t / 60.0 / 60.0)
102
146
  m = floor((t - (h * 60.0 * 60.0)) / 60.0)
103
147
  s = t % 60
104
- return padStr("%dh%gm%ds" % (h, m, s), field)
148
+ return pad_str("%dh%gm%d%s" % (h, m, s, unit_str), field)
105
149
  if t < 7 * 24 * 60 * 60:
106
150
  d = floor(t / 24.0 / 60.0 / 60.0)
107
151
  h = floor((t - (d * 24.0 * 60.0 * 60.0)) / 60.0 / 60.0)
108
152
  m = floor((t - (d * 24.0 * 60.0 * 60.0) - (h * 60.0 * 60.0)) / 60.0)
109
153
  s = t % 60
110
154
  dPlural = pluralDict[d > 1]
111
- return padStr("%dday%s%dh%dm%ds" % (d, dPlural, h, m, s), field)
155
+ return pad_str("%dday%s%dh%dm%d%s" % (d, dPlural, h, m, s, unit_str), field)
112
156
  w = floor(t / 7.0 / 24.0 / 60.0 / 60.0)
113
157
  d = floor((t - (w * 7 * 24 * 60 * 60)) / 24.0 / 60.0 / 60.0)
114
158
  h = floor(
@@ -126,39 +170,85 @@ def prettyTime(t: float, field: Optional[int] = None) -> str:
126
170
  s = t % 60
127
171
  wPlural = pluralDict[w > 1]
128
172
  dPlural = pluralDict[d > 1]
129
- return padStr("%dweek%s%dday%s%dh%dm%ds" % (w, wPlural, d, dPlural, h, m, s), field)
173
+ return pad_str("%dweek%s%dday%s%dh%dm%d%s" % (w, wPlural, d, dPlural, h, m, s, unit_str), field)
130
174
 
175
+ def report_unit(unit: str) -> str:
176
+ """
177
+ Format a unit name for display.
178
+ """
179
+ if unit == "core-s":
180
+ return "core·s"
181
+ return unit
131
182
 
132
- def reportTime(t: float, options: Namespace, field: Optional[int] = None) -> str:
183
+ def report_time(t: float, options: Namespace, field: Optional[int] = None, unit: str = "s", alone: bool = False) -> str:
133
184
  """Given t seconds, report back the correct format as string."""
185
+ assert unit in ("s", "core-s")
134
186
  if options.pretty:
135
- return prettyTime(t, field=field)
136
- elif field is not None:
137
- return "%*.2f" % (field, t)
138
- return "%.2f" % t
187
+ return pretty_time(t, field=field, unit=unit, alone=alone)
188
+ unit_text = f" {report_unit(unit)}" if alone else ""
189
+ if field is not None:
190
+ assert field >= len(unit_text)
191
+ return "%*.2f%s" % (field - len(unit_text), t, unit_text)
192
+ return "%.2f%s" % (t, unit_text)
139
193
 
140
194
 
141
- def reportMemory(
142
- k: float, options: Namespace, field: Optional[int] = None, isBytes: bool = False
195
+ def report_space(
196
+ k: float, options: Namespace, field: Optional[int] = None, unit: str = "KiB", alone: bool = False
143
197
  ) -> str:
144
- """Given k kilobytes, report back the correct format as string."""
198
+ """
199
+ Given k kibibytes, report back the correct format as string.
200
+
201
+ If unit is set to B, convert to KiB first.
202
+ """
203
+ if unit == "B":
204
+ k /= 1024.0
205
+ unit = "KiB"
206
+ assert unit == "KiB"
145
207
  if options.pretty:
146
- return prettyMemory(int(k), field=field, isBytes=isBytes)
208
+ return pretty_space(int(k), field=field, alone=alone)
147
209
  else:
148
- if isBytes:
149
- k /= 1024.0
210
+ # If we don't have a heading to say bytes, include the B
211
+ trailer = "KiB" if alone else "Ki"
150
212
  if field is not None:
151
- return "%*dK" % (field - 1, k) # -1 for the "K"
213
+ assert field >= len(trailer)
214
+ return "%*d%s" % (field - len(trailer), k, trailer)
152
215
  else:
153
- return "%dK" % int(k)
216
+ return "%d%s" % (int(k), trailer)
217
+
218
+
219
+ def report_number(n: Union[int, float, None], field: Optional[int] = None, nan_value: str = "NaN") -> str:
220
+ """
221
+ Given a number, report back the correct format as string.
222
+
223
+ If it is a NaN or None, use nan_value to represent it instead.
224
+ """
225
+ if n is None or math.isnan(n):
226
+ return pad_str(nan_value, field=field)
227
+ else:
228
+ # Make sure not to format with too much precision for the field size;
229
+ # leave room for . and the spacing to the previous field.
230
+ return "%*.*g" % (field, field - 2, n) if field else "%g" % n
231
+
232
+ def report(v: float, category: str, options: Namespace, field: Optional[int] = None, alone=False) -> str:
233
+ """
234
+ Report a value of the given category formatted as a string.
154
235
 
236
+ Uses the given field width if set.
155
237
 
156
- def reportNumber(n: float, field: Optional[int] = None) -> str:
157
- """Given n an integer, report back the correct format as string."""
158
- return "%*g" % (field, n) if field else "%g" % n
238
+ If alone is set, the field is being formatted outside a table and might need a unit.
239
+ """
159
240
 
241
+ unit = CATEGORY_UNITS.get(category)
242
+ if unit in ("s", "core-s"):
243
+ # This is time.
244
+ return report_time(v, options, field=field, unit=unit, alone=alone)
245
+ elif unit in ("B", "KiB"):
246
+ # This is space.
247
+ return report_space(v, options, field=field, unit=unit, alone=alone)
248
+ else:
249
+ raise ValueError(f"Unimplemented unit {unit} for category {category}")
160
250
 
161
- def sprintTag(
251
+ def sprint_tag(
162
252
  key: str,
163
253
  tag: Expando,
164
254
  options: Namespace,
@@ -167,9 +257,9 @@ def sprintTag(
167
257
  """Generate a pretty-print ready string from a JTTag()."""
168
258
  if columnWidths is None:
169
259
  columnWidths = ColumnWidths()
170
- header = " %7s " % decorateTitle("Count", options)
260
+ header = " %7s " % decorate_title("count", "Count", options)
171
261
  sub_header = " %7s " % "n"
172
- tag_str = f" {reportNumber(n=tag.total_number, field=7)}"
262
+ tag_str = f" {report_number(n=tag.total_number, field=7)}"
173
263
  out_str = ""
174
264
  if key == "job":
175
265
  out_str += " {:<12} | {:>7}{:>7}{:>7}{:>7}\n".format(
@@ -182,109 +272,82 @@ def sprintTag(
182
272
  tag.average_number_per_worker,
183
273
  tag.max_number_per_worker,
184
274
  ]:
185
- worker_str += reportNumber(n=t, field=7)
275
+ worker_str += report_number(n=t, field=7)
186
276
  out_str += worker_str + "\n"
187
- if "time" in options.categories:
188
- header += "| %*s " % (
189
- columnWidths.title("time"),
190
- decorateTitle("Time", options),
191
- )
192
- sub_header += decorateSubHeader("Time", columnWidths, options)
193
- tag_str += " | "
194
- for t, width in [
195
- (tag.min_time, columnWidths.getWidth("time", "min")),
196
- (tag.median_time, columnWidths.getWidth("time", "med")),
197
- (tag.average_time, columnWidths.getWidth("time", "ave")),
198
- (tag.max_time, columnWidths.getWidth("time", "max")),
199
- (tag.total_time, columnWidths.getWidth("time", "total")),
200
- ]:
201
- tag_str += reportTime(t, options, field=width)
202
- if "clock" in options.categories:
203
- header += "| %*s " % (
204
- columnWidths.title("clock"),
205
- decorateTitle("Clock", options),
206
- )
207
- sub_header += decorateSubHeader("Clock", columnWidths, options)
208
- tag_str += " | "
209
- for t, width in [
210
- (tag.min_clock, columnWidths.getWidth("clock", "min")),
211
- (tag.median_clock, columnWidths.getWidth("clock", "med")),
212
- (tag.average_clock, columnWidths.getWidth("clock", "ave")),
213
- (tag.max_clock, columnWidths.getWidth("clock", "max")),
214
- (tag.total_clock, columnWidths.getWidth("clock", "total")),
215
- ]:
216
- tag_str += reportTime(t, options, field=width)
217
- if "wait" in options.categories:
218
- header += "| %*s " % (
219
- columnWidths.title("wait"),
220
- decorateTitle("Wait", options),
221
- )
222
- sub_header += decorateSubHeader("Wait", columnWidths, options)
223
- tag_str += " | "
224
- for t, width in [
225
- (tag.min_wait, columnWidths.getWidth("wait", "min")),
226
- (tag.median_wait, columnWidths.getWidth("wait", "med")),
227
- (tag.average_wait, columnWidths.getWidth("wait", "ave")),
228
- (tag.max_wait, columnWidths.getWidth("wait", "max")),
229
- (tag.total_wait, columnWidths.getWidth("wait", "total")),
230
- ]:
231
- tag_str += reportTime(t, options, field=width)
232
- if "memory" in options.categories:
277
+
278
+ for category in CATEGORIES:
279
+ if category not in options.categories:
280
+ continue
281
+
233
282
  header += "| %*s " % (
234
- columnWidths.title("memory"),
235
- decorateTitle("Memory", options),
283
+ columnWidths.title(category),
284
+ decorate_title(category, TITLES[category], options),
236
285
  )
237
- sub_header += decorateSubHeader("Memory", columnWidths, options)
286
+ sub_header += decorate_subheader(category, columnWidths, options)
238
287
  tag_str += " | "
239
- for t, width in [
240
- (tag.min_memory, columnWidths.getWidth("memory", "min")),
241
- (tag.median_memory, columnWidths.getWidth("memory", "med")),
242
- (tag.average_memory, columnWidths.getWidth("memory", "ave")),
243
- (tag.max_memory, columnWidths.getWidth("memory", "max")),
244
- (tag.total_memory, columnWidths.getWidth("memory", "total")),
245
- ]:
246
- tag_str += reportMemory(t, options, field=width)
288
+
289
+ for field in ["min", "med", "ave", "max", "total"]:
290
+ t = getattr(tag, f"{LONG_FORMS[field]}_{category}")
291
+ width = columnWidths.get_width(category, field)
292
+ s = report(t, category, options, field=width)
293
+ tag_str += s
294
+
247
295
  out_str += header + "\n"
248
296
  out_str += sub_header + "\n"
249
297
  out_str += tag_str + "\n"
250
298
  return out_str
251
299
 
252
300
 
253
- def decorateTitle(title: str, options: Namespace) -> str:
254
- """Add a marker to TITLE if the TITLE is sorted on."""
255
- if title.lower() == options.sortCategory:
301
+ def decorate_title(category: str, title: str, options: Namespace) -> str:
302
+ """
303
+ Add extra parts to the category titles.
304
+
305
+ Add units to title if they won't appear in the formatted values.
306
+ Add a marker to TITLE if the TITLE is sorted on.
307
+ """
308
+ unit = CATEGORY_UNITS.get(category)
309
+ if unit in ("s", "core-s") and not options.pretty:
310
+ # This is a time and we won't write it out as text, so add a unit.
311
+ title = f"{title} ({report_unit(unit)})"
312
+ elif unit == "core-s" and options.pretty:
313
+ # This is a core-second category and we won't be putting the core unit
314
+ # in the value, so note that here.
315
+ title = f"{title} (core)"
316
+ elif unit in ("B", "KiB"):
317
+ # The Ki part will appear in the cell so we need a B
318
+ title = f"{title} (B)"
319
+ if category.lower() == options.sortCategory:
256
320
  return "%s*" % title
257
321
  else:
258
322
  return title
259
323
 
260
324
 
261
- def decorateSubHeader(
262
- title: str, columnWidths: ColumnWidths, options: Namespace
325
+ def decorate_subheader(
326
+ category: str, columnWidths: ColumnWidths, options: Namespace
263
327
  ) -> str:
264
328
  """Add a marker to the correct field if the TITLE is sorted on."""
265
- title = title.lower()
266
- if title != options.sortCategory:
329
+ if category != options.sortCategory:
267
330
  s = "| %*s%*s%*s%*s%*s " % (
268
- columnWidths.getWidth(title, "min"),
331
+ columnWidths.get_width(category, "min"),
269
332
  "min",
270
- columnWidths.getWidth(title, "med"),
333
+ columnWidths.get_width(category, "med"),
271
334
  "med",
272
- columnWidths.getWidth(title, "ave"),
335
+ columnWidths.get_width(category, "ave"),
273
336
  "ave",
274
- columnWidths.getWidth(title, "max"),
337
+ columnWidths.get_width(category, "max"),
275
338
  "max",
276
- columnWidths.getWidth(title, "total"),
339
+ columnWidths.get_width(category, "total"),
277
340
  "total",
278
341
  )
279
342
  return s
280
343
  else:
281
344
  s = "| "
282
345
  for field, width in [
283
- ("min", columnWidths.getWidth(title, "min")),
284
- ("med", columnWidths.getWidth(title, "med")),
285
- ("ave", columnWidths.getWidth(title, "ave")),
286
- ("max", columnWidths.getWidth(title, "max")),
287
- ("total", columnWidths.getWidth(title, "total")),
346
+ ("min", columnWidths.get_width(category, "min")),
347
+ ("med", columnWidths.get_width(category, "med")),
348
+ ("ave", columnWidths.get_width(category, "ave")),
349
+ ("max", columnWidths.get_width(category, "max")),
350
+ ("total", columnWidths.get_width(category, "total")),
288
351
  ]:
289
352
  if options.sortField == field:
290
353
  s += "%*s*" % (width - 1, field)
@@ -302,114 +365,93 @@ def get(tree: Expando, name: str) -> float:
302
365
  return float("nan")
303
366
 
304
367
 
305
- def sortJobs(jobTypes: List[Any], options: Namespace) -> List[Any]:
368
+ def sort_jobs(jobTypes: List[Any], options: Namespace) -> List[Any]:
306
369
  """Return a jobTypes all sorted."""
307
- longforms = {
308
- "med": "median",
309
- "ave": "average",
310
- "min": "min",
311
- "total": "total",
312
- "max": "max",
313
- }
314
- sortField = longforms[options.sortField]
370
+ sortField = LONG_FORMS[options.sortField]
315
371
  if (
316
- options.sortCategory == "time"
317
- or options.sortCategory == "clock"
318
- or options.sortCategory == "wait"
319
- or options.sortCategory == "memory"
372
+ options.sortCategory in CATEGORIES
320
373
  ):
321
374
  return sorted(
322
375
  jobTypes,
323
376
  key=lambda tag: getattr(tag, "%s_%s" % (sortField, options.sortCategory)),
324
- reverse=options.sortReverse,
377
+ reverse=options.sort == "decending",
325
378
  )
326
379
  elif options.sortCategory == "alpha":
327
380
  return sorted(
328
381
  jobTypes,
329
382
  key=lambda tag: tag.name, # type: ignore
330
- reverse=options.sortReverse,
383
+ reverse=options.sort == "decending",
331
384
  )
332
385
  elif options.sortCategory == "count":
333
386
  return sorted(
334
387
  jobTypes,
335
388
  key=lambda tag: tag.total_number, # type: ignore
336
- reverse=options.sortReverse,
389
+ reverse=options.sort == "decending",
337
390
  )
338
391
 
339
392
  # due to https://stackoverflow.com/questions/47149154
340
393
  assert False
341
394
 
342
395
 
343
- def reportPrettyData(
396
+ def report_pretty_data(
344
397
  root: Expando,
345
- worker: List[Job],
346
- job: List[Job],
398
+ worker: Expando,
399
+ job: Expando,
347
400
  job_types: List[Any],
348
401
  options: Namespace,
349
402
  ) -> str:
350
403
  """Print the important bits out."""
351
404
  out_str = "Batch System: %s\n" % root.batch_system
352
405
  out_str += "Default Cores: %s Default Memory: %s\n" "Max Cores: %s\n" % (
353
- reportNumber(n=get(root, "default_cores")),
354
- reportMemory(get(root, "default_memory"), options, isBytes=True),
355
- reportNumber(n=get(root, "max_cores")),
406
+ report_number(n=get(root, "default_cores")),
407
+ # Although per-job memory usage is in KiB, our default is stored in bytes.
408
+ report_space(get(root, "default_memory"), options, unit="B", alone=True),
409
+ report_number(n=get(root, "max_cores"), nan_value="unlimited"),
356
410
  )
357
- out_str += "Total Clock: {} Total Runtime: {}\n".format(
358
- reportTime(get(root, "total_clock"), options),
359
- reportTime(get(root, "total_run_time"), options),
411
+ out_str += "Local CPU Time: {} Overall Runtime: {}\n".format(
412
+ report(get(root, "total_clock"), "clock", options, alone=True),
413
+ report(get(root, "total_run_time"), "time", options, alone=True),
360
414
  )
361
- job_types = sortJobs(job_types, options)
362
- columnWidths = computeColumnWidths(job_types, worker, job, options)
415
+ job_types = sort_jobs(job_types, options)
416
+ columnWidths = compute_column_widths(job_types, worker, job, options)
363
417
  out_str += "Worker\n"
364
- out_str += sprintTag("worker", worker, options, columnWidths=columnWidths)
418
+ out_str += sprint_tag("worker", worker, options, columnWidths=columnWidths)
365
419
  out_str += "Job\n"
366
- out_str += sprintTag("job", job, options, columnWidths=columnWidths)
420
+ out_str += sprint_tag("job", job, options, columnWidths=columnWidths)
367
421
  for t in job_types:
368
422
  out_str += f" {t.name}\n"
369
423
  out_str += f" Total Cores: {t.total_cores}\n"
370
- out_str += sprintTag(t.name, t, options, columnWidths=columnWidths)
424
+ out_str += sprint_tag(t.name, t, options, columnWidths=columnWidths)
371
425
  return out_str
372
426
 
373
427
 
374
- def computeColumnWidths(
375
- job_types: List[Any], worker: List[Job], job: List[Job], options: Expando
428
+ def compute_column_widths(
429
+ job_types: List[Any], worker: Expando, job: Expando, options: Namespace
376
430
  ) -> ColumnWidths:
377
431
  """Return a ColumnWidths() object with the correct max widths."""
378
432
  cw = ColumnWidths()
379
433
  for t in job_types:
380
- updateColumnWidths(t, cw, options)
381
- updateColumnWidths(worker, cw, options)
382
- updateColumnWidths(job, cw, options)
434
+ update_column_widths(t, cw, options)
435
+ update_column_widths(worker, cw, options)
436
+ update_column_widths(job, cw, options)
383
437
  return cw
384
438
 
385
439
 
386
- def updateColumnWidths(tag: Expando, cw: ColumnWidths, options: Expando) -> None:
440
+ def update_column_widths(tag: Expando, cw: ColumnWidths, options: Namespace) -> None:
387
441
  """Update the column width attributes for this tag's fields."""
388
- longforms = {
389
- "med": "median",
390
- "ave": "average",
391
- "min": "min",
392
- "total": "total",
393
- "max": "max",
394
- }
395
- for category in ["time", "clock", "wait", "memory"]:
442
+ # TODO: Deduplicate with actual printing code!
443
+ for category in CATEGORIES:
396
444
  if category in options.categories:
397
445
  for field in ["min", "med", "ave", "max", "total"]:
398
- t = getattr(tag, f"{longforms[field]}_{category}")
399
- if category in ["time", "clock", "wait"]:
400
- s = reportTime(
401
- t, options, field=cw.getWidth(category, field)
402
- ).strip()
403
- else:
404
- s = reportMemory(
405
- t, options, field=cw.getWidth(category, field), isBytes=True
406
- ).strip()
407
- if len(s) >= cw.getWidth(category, field):
446
+ t = getattr(tag, f"{LONG_FORMS[field]}_{category}")
447
+ width = cw.get_width(category, field)
448
+ s = report(t, category, options, field=width).strip()
449
+ if len(s) >= cw.get_width(category, field):
408
450
  # this string is larger than max, width must be increased
409
- cw.setWidth(category, field, len(s) + 1)
451
+ cw.set_width(category, field, len(s) + 1)
410
452
 
411
453
 
412
- def buildElement(element: Expando, items: List[Job], itemName: str) -> Expando:
454
+ def build_element(element: Expando, items: List[Job], item_name: str, defaults: dict[str, float]) -> Expando:
413
455
  """Create an element for output."""
414
456
 
415
457
  def assertNonnegative(i: float, name: str) -> float:
@@ -418,74 +460,66 @@ def buildElement(element: Expando, items: List[Job], itemName: str) -> Expando:
418
460
  else:
419
461
  return float(i)
420
462
 
421
- totalCores = 0
422
-
423
- itemTimes = []
424
- itemClocks = []
425
- itemMemory = []
463
+ # Make lists of all values for all items in each category, plus requested cores.
464
+ item_values = {category: [] for category in (CATEGORIES + ["cores"])}
426
465
 
427
466
  for item in items:
428
467
  # If something lacks an entry, assume it used none of that thing.
429
468
  # This avoids crashing when jobs e.g. aren't done.
430
- itemTimes.append(assertNonnegative(float(item.get("time", 0)), "time"))
431
- itemClocks.append(assertNonnegative(float(item.get("clock", 0)), "clock"))
432
- itemMemory.append(assertNonnegative(float(item.get("memory", 0)), "memory"))
433
- totalCores += assertNonnegative(
434
- float(item.get("requested_cores", 0)), "requested_cores"
435
- )
436
-
437
- assert len(itemClocks) == len(itemTimes) == len(itemMemory)
469
+ for category, values in item_values.items():
470
+ if category in COMPUTED_CATEGORIES:
471
+ continue
472
+ category_key = category if category != "cores" else "requested_cores"
473
+ category_value = assertNonnegative(float(item.get(category_key, defaults[category])), category)
474
+ values.append(category_value)
475
+
476
+ for index in range(0, len(item_values[CATEGORIES[0]])):
477
+ # For each item, compute the computed categories
478
+ item_values["wait"].append(item_values["time"][index] * item_values["cores"][index] - item_values["clock"][index])
479
+
480
+ for category, values in item_values.items():
481
+ values.sort()
482
+
483
+ if len(item_values[CATEGORIES[0]]) == 0:
484
+ # Nothing actually there so make a 0 value
485
+ for k, v in item_values.items():
486
+ v.append(0)
487
+
488
+ item_element = Expando(
489
+ total_number=float(len(items)),
490
+ name=item_name
491
+ )
438
492
 
439
- itemWaits = []
440
- for index in range(0, len(itemTimes)):
441
- itemWaits.append(itemTimes[index] - itemClocks[index])
493
+ for category, values in item_values.items():
494
+ item_element["total_" + category] = float(sum(values))
495
+ item_element["median_" + category] = float(values[len(values) // 2])
496
+ item_element["average_" + category] = float(sum(values) / len(values))
497
+ item_element["min_" + category] = float(min(values))
498
+ item_element["max_" + category] = float(max(values))
442
499
 
443
- itemWaits.sort()
444
- itemTimes.sort()
445
- itemClocks.sort()
446
- itemMemory.sort()
500
+ element[item_name] = item_element
447
501
 
448
- if len(itemTimes) == 0:
449
- itemTimes.append(0)
450
- itemClocks.append(0)
451
- itemWaits.append(0)
452
- itemMemory.append(0)
502
+ return item_element
453
503
 
454
- element[itemName] = Expando(
455
- total_number=float(len(items)),
456
- total_time=float(sum(itemTimes)),
457
- median_time=float(itemTimes[len(itemTimes) // 2]),
458
- average_time=float(sum(itemTimes) / len(itemTimes)),
459
- min_time=float(min(itemTimes)),
460
- max_time=float(max(itemTimes)),
461
- total_clock=float(sum(itemClocks)),
462
- median_clock=float(itemClocks[len(itemClocks) // 2]),
463
- average_clock=float(sum(itemClocks) / len(itemClocks)),
464
- min_clock=float(min(itemClocks)),
465
- max_clock=float(max(itemClocks)),
466
- total_wait=float(sum(itemWaits)),
467
- median_wait=float(itemWaits[len(itemWaits) // 2]),
468
- average_wait=float(sum(itemWaits) / len(itemWaits)),
469
- min_wait=float(min(itemWaits)),
470
- max_wait=float(max(itemWaits)),
471
- total_memory=float(sum(itemMemory)),
472
- median_memory=float(itemMemory[len(itemMemory) // 2]),
473
- average_memory=float(sum(itemMemory) / len(itemMemory)),
474
- min_memory=float(min(itemMemory)),
475
- max_memory=float(max(itemMemory)),
476
- total_cores=totalCores,
477
- name=itemName,
478
- )
479
- return element[itemName]
480
504
 
481
-
482
- def createSummary(
505
+ def create_summary(
483
506
  element: Expando,
484
- containingItems: List[Job],
507
+ containingItems: List[Expando],
485
508
  containingItemName: str,
486
- getFn: Callable[[Job], List[Optional[Job]]],
509
+ count_contained: Callable[[Expando], int],
487
510
  ) -> None:
488
- itemCounts = [len(getFn(containingItem)) for containingItem in containingItems]
511
+ """
512
+ Figure out how many jobs (or contained items) ran on each worker (or containing item).
513
+
514
+ Stick a bunch of xxx_number_per_xxx stats into element to describe this.
515
+
516
+ :param count_contained: function that maps from containing item to number of contained items.
517
+ """
518
+
519
+ # TODO: this still thinks like the old XML stats, even though now the
520
+ # worker records no longer actually contain the job records.
521
+
522
+ itemCounts = [count_contained(containingItem) for containingItem in containingItems]
489
523
  itemCounts.sort()
490
524
  if len(itemCounts) == 0:
491
525
  itemCounts.append(0)
@@ -499,10 +533,14 @@ def createSummary(
499
533
  element["max_number_per_%s" % containingItemName] = max(itemCounts)
500
534
 
501
535
 
502
- def getStats(jobStore: AbstractJobStore) -> Expando:
503
- """Collect and return the stats and config data."""
536
+ def get_stats(jobStore: AbstractJobStore) -> Expando:
537
+ """
538
+ Sum together all the stats information in the job store.
539
+
540
+ Produces one object containing lists of the values from all the summed objects.
541
+ """
504
542
 
505
- def aggregateStats(fileHandle: TextIO, aggregateObject: Expando) -> None:
543
+ def aggregate_stats(fileHandle: TextIO, aggregateObject: Expando) -> None:
506
544
  try:
507
545
  stats = json.load(fileHandle, object_hook=Expando)
508
546
  for key in list(stats.keys()):
@@ -517,12 +555,12 @@ def getStats(jobStore: AbstractJobStore) -> Expando:
517
555
  pass # The file is corrupted.
518
556
 
519
557
  aggregateObject = Expando()
520
- callBack = partial(aggregateStats, aggregateObject=aggregateObject)
558
+ callBack = partial(aggregate_stats, aggregateObject=aggregateObject)
521
559
  jobStore.read_logs(callBack, read_all=True)
522
560
  return aggregateObject
523
561
 
524
562
 
525
- def processData(config: Config, stats: Expando) -> Expando:
563
+ def process_data(config: Config, stats: Expando) -> Expando:
526
564
  """
527
565
  Collate the stats and report
528
566
  """
@@ -531,7 +569,11 @@ def processData(config: Config, stats: Expando) -> Expando:
531
569
  stats.total_time = [0.0]
532
570
  stats.total_clock = [0.0]
533
571
 
572
+ # This is actually the sum of *overall* wall clock time as measured by the
573
+ # leader in each leader invocation, not a sum over jobs.
534
574
  stats.total_time = sum(float(number) for number in stats.total_time)
575
+ # And this is CPU clock as measured by the leader, so it will count time
576
+ # used in local jobs but not remote ones.
535
577
  stats.total_clock = sum(float(number) for number in stats.total_clock)
536
578
 
537
579
  collatedStatsTag = Expando(
@@ -540,7 +582,7 @@ def processData(config: Config, stats: Expando) -> Expando:
540
582
  batch_system=config.batchSystem,
541
583
  default_memory=str(config.defaultMemory),
542
584
  default_cores=str(config.defaultCores),
543
- max_cores=str(config.maxCores),
585
+ max_cores=str(config.maxCores if config.maxCores != SYS_MAX_SIZE else None),
544
586
  )
545
587
 
546
588
  # Add worker info
@@ -548,18 +590,16 @@ def processData(config: Config, stats: Expando) -> Expando:
548
590
  jobs = [_f for _f in getattr(stats, "jobs", []) if _f]
549
591
  jobs = [item for sublist in jobs for item in sublist]
550
592
 
551
- def fn4(job: Job) -> List[Optional[Job]]:
552
- try:
553
- return list(jobs)
554
- except TypeError:
555
- return []
593
+ # Work out what usage to assume for things that didn't report
594
+ defaults = {category: 0 for category in CATEGORIES}
595
+ defaults["cores"] = config.defaultCores
556
596
 
557
- buildElement(collatedStatsTag, worker, "worker")
558
- createSummary(
559
- buildElement(collatedStatsTag, jobs, "jobs"),
597
+ build_element(collatedStatsTag, worker, "worker", defaults)
598
+ create_summary(
599
+ build_element(collatedStatsTag, jobs, "jobs", defaults),
560
600
  getattr(stats, "workers", []),
561
601
  "worker",
562
- fn4,
602
+ lambda worker: getattr(worker, "jobs_run", 0)
563
603
  )
564
604
  # Get info for each job
565
605
  jobNames = set()
@@ -569,17 +609,17 @@ def processData(config: Config, stats: Expando) -> Expando:
569
609
  collatedStatsTag.job_types = jobTypesTag
570
610
  for jobName in jobNames:
571
611
  jobTypes = [job for job in jobs if job.class_name == jobName]
572
- buildElement(jobTypesTag, jobTypes, jobName)
612
+ build_element(jobTypesTag, jobTypes, jobName, defaults)
573
613
  collatedStatsTag.name = "collatedStatsTag"
574
614
  return collatedStatsTag
575
615
 
576
616
 
577
- def reportData(tree: Expando, options: Namespace) -> None:
617
+ def report_data(tree: Expando, options: Namespace) -> None:
578
618
  # Now dump it all out to file
579
619
  if options.raw:
580
620
  out_str = json.dumps(tree, indent=4, separators=(",", ": "))
581
621
  else:
582
- out_str = reportPrettyData(
622
+ out_str = report_pretty_data(
583
623
  tree, tree.worker, tree.jobs, tree.job_types.values(), options
584
624
  )
585
625
  if options.outputFile is not None:
@@ -589,8 +629,7 @@ def reportData(tree: Expando, options: Namespace) -> None:
589
629
  print(out_str)
590
630
 
591
631
 
592
- category_choices = ["time", "clock", "wait", "memory"]
593
- sort_category_choices = ["time", "clock", "wait", "memory", "alpha", "count"]
632
+ sort_category_choices = CATEGORIES + ["alpha", "count"]
594
633
  sort_field_choices = ["min", "med", "ave", "max", "total"]
595
634
 
596
635
 
@@ -612,29 +651,28 @@ def add_stats_options(parser: ArgumentParser) -> None:
612
651
  help="if not raw, prettify the numbers to be human readable.",
613
652
  )
614
653
  parser.add_argument(
615
- "--sortReverse",
616
- "--reverseSort",
617
- default=False,
618
- action="store_true",
619
- help="Reverse sort.",
654
+ "--sort",
655
+ default="decending",
656
+ choices=["ascending", "decending"],
657
+ help="Sort direction.",
620
658
  )
621
659
  parser.add_argument(
622
660
  "--categories",
623
- default=",".join(category_choices),
661
+ default=",".join(CATEGORIES),
624
662
  type=str,
625
- help=f"Comma separated list of any of the following: {category_choices}.",
663
+ help=f"Comma separated list of any of the following: {CATEGORIES}.",
626
664
  )
627
665
  parser.add_argument(
628
666
  "--sortCategory",
629
667
  default="time",
630
668
  choices=sort_category_choices,
631
- help=f"How to sort job categories. Choices: {sort_category_choices}. Default: time.",
669
+ help=f"How to sort job categories.",
632
670
  )
633
671
  parser.add_argument(
634
672
  "--sortField",
635
673
  default="med",
636
674
  choices=sort_field_choices,
637
- help=f"How to sort job fields. Choices: {sort_field_choices}. Default: med.",
675
+ help=f"How to sort job fields.",
638
676
  )
639
677
 
640
678
 
@@ -645,14 +683,19 @@ def main() -> None:
645
683
  options = parser.parse_args()
646
684
 
647
685
  for c in options.categories.split(","):
648
- if c.strip() not in category_choices:
649
- raise ValueError(f"{c} not in {category_choices}!")
686
+ if c.strip().lower() not in CATEGORIES:
687
+ logger.critical("Cannot use category %s, options are: %s", c.strip().lower(), CATEGORIES)
688
+ sys.exit(1)
650
689
  options.categories = [x.strip().lower() for x in options.categories.split(",")]
651
690
 
652
691
  set_logging_from_options(options)
653
692
  config = Config()
654
693
  config.setOptions(options)
655
- jobStore = Toil.resumeJobStore(config.jobStore)
656
- stats = getStats(jobStore)
657
- collatedStatsTag = processData(jobStore.config, stats)
658
- reportData(collatedStatsTag, options)
694
+ try:
695
+ jobStore = Toil.resumeJobStore(config.jobStore)
696
+ except NoSuchJobStoreException:
697
+ logger.critical("The job store %s does not exist", config.jobStore)
698
+ sys.exit(1)
699
+ stats = get_stats(jobStore)
700
+ collatedStatsTag = process_data(jobStore.config, stats)
701
+ report_data(collatedStatsTag, options)