toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +39 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/htcondor.py +0 -1
  8. toil/batchSystems/kubernetes.py +34 -31
  9. toil/batchSystems/local_support.py +3 -1
  10. toil/batchSystems/lsf.py +7 -7
  11. toil/batchSystems/mesos/batchSystem.py +7 -7
  12. toil/batchSystems/options.py +32 -83
  13. toil/batchSystems/registry.py +104 -23
  14. toil/batchSystems/singleMachine.py +16 -13
  15. toil/batchSystems/slurm.py +87 -16
  16. toil/batchSystems/torque.py +0 -1
  17. toil/bus.py +44 -8
  18. toil/common.py +544 -753
  19. toil/cwl/__init__.py +28 -32
  20. toil/cwl/cwltoil.py +595 -574
  21. toil/cwl/utils.py +55 -10
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/__init__.py +2 -2
  24. toil/fileStores/abstractFileStore.py +88 -14
  25. toil/fileStores/cachingFileStore.py +610 -549
  26. toil/fileStores/nonCachingFileStore.py +46 -22
  27. toil/job.py +182 -101
  28. toil/jobStores/abstractJobStore.py +161 -95
  29. toil/jobStores/aws/jobStore.py +23 -9
  30. toil/jobStores/aws/utils.py +6 -6
  31. toil/jobStores/fileJobStore.py +116 -18
  32. toil/jobStores/googleJobStore.py +16 -7
  33. toil/jobStores/utils.py +5 -6
  34. toil/leader.py +87 -56
  35. toil/lib/accelerators.py +10 -5
  36. toil/lib/aws/__init__.py +3 -14
  37. toil/lib/aws/ami.py +22 -9
  38. toil/lib/aws/iam.py +21 -13
  39. toil/lib/aws/session.py +2 -16
  40. toil/lib/aws/utils.py +4 -5
  41. toil/lib/compatibility.py +1 -1
  42. toil/lib/conversions.py +26 -3
  43. toil/lib/docker.py +22 -23
  44. toil/lib/ec2.py +10 -6
  45. toil/lib/ec2nodes.py +106 -100
  46. toil/lib/encryption/_nacl.py +2 -1
  47. toil/lib/generatedEC2Lists.py +325 -18
  48. toil/lib/io.py +49 -2
  49. toil/lib/misc.py +1 -1
  50. toil/lib/resources.py +9 -2
  51. toil/lib/threading.py +101 -38
  52. toil/options/common.py +736 -0
  53. toil/options/cwl.py +336 -0
  54. toil/options/wdl.py +37 -0
  55. toil/provisioners/abstractProvisioner.py +9 -4
  56. toil/provisioners/aws/__init__.py +3 -6
  57. toil/provisioners/aws/awsProvisioner.py +6 -0
  58. toil/provisioners/clusterScaler.py +3 -2
  59. toil/provisioners/gceProvisioner.py +2 -2
  60. toil/realtimeLogger.py +2 -1
  61. toil/resource.py +24 -18
  62. toil/server/app.py +2 -3
  63. toil/server/cli/wes_cwl_runner.py +4 -4
  64. toil/server/utils.py +1 -1
  65. toil/server/wes/abstract_backend.py +3 -2
  66. toil/server/wes/amazon_wes_utils.py +5 -4
  67. toil/server/wes/tasks.py +2 -3
  68. toil/server/wes/toil_backend.py +2 -10
  69. toil/server/wsgi_app.py +2 -0
  70. toil/serviceManager.py +12 -10
  71. toil/statsAndLogging.py +41 -9
  72. toil/test/__init__.py +29 -54
  73. toil/test/batchSystems/batchSystemTest.py +11 -111
  74. toil/test/batchSystems/test_slurm.py +24 -8
  75. toil/test/cactus/__init__.py +0 -0
  76. toil/test/cactus/test_cactus_integration.py +58 -0
  77. toil/test/cwl/cwlTest.py +438 -223
  78. toil/test/cwl/glob_dir.cwl +15 -0
  79. toil/test/cwl/preemptible.cwl +21 -0
  80. toil/test/cwl/preemptible_expression.cwl +28 -0
  81. toil/test/cwl/revsort.cwl +1 -1
  82. toil/test/cwl/revsort2.cwl +1 -1
  83. toil/test/docs/scriptsTest.py +2 -3
  84. toil/test/jobStores/jobStoreTest.py +34 -21
  85. toil/test/lib/aws/test_iam.py +4 -14
  86. toil/test/lib/aws/test_utils.py +0 -3
  87. toil/test/lib/dockerTest.py +4 -4
  88. toil/test/lib/test_ec2.py +12 -17
  89. toil/test/mesos/helloWorld.py +4 -5
  90. toil/test/mesos/stress.py +1 -1
  91. toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
  92. toil/test/options/options.py +37 -0
  93. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  94. toil/test/provisioners/clusterScalerTest.py +6 -4
  95. toil/test/provisioners/clusterTest.py +23 -11
  96. toil/test/provisioners/gceProvisionerTest.py +0 -6
  97. toil/test/provisioners/restartScript.py +3 -2
  98. toil/test/server/serverTest.py +1 -1
  99. toil/test/sort/restart_sort.py +2 -1
  100. toil/test/sort/sort.py +2 -1
  101. toil/test/sort/sortTest.py +2 -13
  102. toil/test/src/autoDeploymentTest.py +45 -45
  103. toil/test/src/busTest.py +5 -5
  104. toil/test/src/checkpointTest.py +2 -2
  105. toil/test/src/deferredFunctionTest.py +1 -1
  106. toil/test/src/fileStoreTest.py +32 -16
  107. toil/test/src/helloWorldTest.py +1 -1
  108. toil/test/src/importExportFileTest.py +1 -1
  109. toil/test/src/jobDescriptionTest.py +2 -1
  110. toil/test/src/jobServiceTest.py +1 -1
  111. toil/test/src/jobTest.py +18 -18
  112. toil/test/src/miscTests.py +5 -3
  113. toil/test/src/promisedRequirementTest.py +3 -3
  114. toil/test/src/realtimeLoggerTest.py +1 -1
  115. toil/test/src/resourceTest.py +2 -2
  116. toil/test/src/restartDAGTest.py +1 -1
  117. toil/test/src/resumabilityTest.py +36 -2
  118. toil/test/src/retainTempDirTest.py +1 -1
  119. toil/test/src/systemTest.py +2 -2
  120. toil/test/src/toilContextManagerTest.py +2 -2
  121. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  122. toil/test/utils/toilDebugTest.py +98 -32
  123. toil/test/utils/toilKillTest.py +2 -2
  124. toil/test/utils/utilsTest.py +23 -3
  125. toil/test/wdl/wdltoil_test.py +223 -45
  126. toil/toilState.py +7 -6
  127. toil/utils/toilClean.py +1 -1
  128. toil/utils/toilConfig.py +36 -0
  129. toil/utils/toilDebugFile.py +60 -33
  130. toil/utils/toilDebugJob.py +39 -12
  131. toil/utils/toilDestroyCluster.py +1 -1
  132. toil/utils/toilKill.py +1 -1
  133. toil/utils/toilLaunchCluster.py +13 -2
  134. toil/utils/toilMain.py +3 -2
  135. toil/utils/toilRsyncCluster.py +1 -1
  136. toil/utils/toilSshCluster.py +1 -1
  137. toil/utils/toilStats.py +445 -305
  138. toil/utils/toilStatus.py +2 -5
  139. toil/version.py +10 -10
  140. toil/wdl/utils.py +2 -122
  141. toil/wdl/wdltoil.py +1257 -492
  142. toil/worker.py +55 -46
  143. toil-6.1.0.dist-info/METADATA +124 -0
  144. toil-6.1.0.dist-info/RECORD +241 -0
  145. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
  146. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
  147. toil/batchSystems/parasol.py +0 -379
  148. toil/batchSystems/tes.py +0 -459
  149. toil/test/batchSystems/parasolTestSupport.py +0 -117
  150. toil/test/wdl/builtinTest.py +0 -506
  151. toil/test/wdl/toilwdlTest.py +0 -522
  152. toil/wdl/toilwdl.py +0 -141
  153. toil/wdl/versions/dev.py +0 -107
  154. toil/wdl/versions/draft2.py +0 -980
  155. toil/wdl/versions/v1.py +0 -794
  156. toil/wdl/wdl_analysis.py +0 -116
  157. toil/wdl/wdl_functions.py +0 -997
  158. toil/wdl/wdl_synthesis.py +0 -1011
  159. toil/wdl/wdl_types.py +0 -243
  160. toil-5.12.0.dist-info/METADATA +0 -118
  161. toil-5.12.0.dist-info/RECORD +0 -244
  162. /toil/{wdl/versions → options}/__init__.py +0 -0
  163. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  164. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/utils/toilStats.py CHANGED
@@ -14,52 +14,90 @@
14
14
  """Reports statistical data about a given Toil workflow."""
15
15
  import json
16
16
  import logging
17
+ import math
18
+ import sys
17
19
  from argparse import ArgumentParser, Namespace
18
20
  from functools import partial
19
- from typing import Any, Callable, Dict, List, Optional, TextIO
21
+ from typing import Any, Callable, Dict, List, Optional, TextIO, Union
20
22
 
21
23
  from toil.common import Config, Toil, parser_with_common_options
22
24
  from toil.job import Job
23
- from toil.jobStores.abstractJobStore import AbstractJobStore
25
+ from toil.jobStores.abstractJobStore import AbstractJobStore, NoSuchJobStoreException
24
26
  from toil.lib.expando import Expando
27
+ from toil.options.common import SYS_MAX_SIZE
25
28
  from toil.statsAndLogging import set_logging_from_options
26
29
 
27
30
  logger = logging.getLogger(__name__)
28
31
 
32
+ # These categories of stat will be reported
33
+ CATEGORIES = ["time", "clock", "wait", "memory", "disk"]
34
+ # These are the units they are stored in
35
+ CATEGORY_UNITS = {
36
+ "time": "s",
37
+ "clock": "core-s",
38
+ "wait": "core-s",
39
+ "memory": "KiB",
40
+ "disk": "B"
41
+ }
42
+ # These are what we call them to the user
43
+ TITLES = {
44
+ "time": "Real Time",
45
+ "clock": "CPU Time",
46
+ "wait": "CPU Wait",
47
+ "memory": "Memory",
48
+ "disk": "Disk"
49
+ }
50
+
51
+ # Of those, these are in time
52
+ TIME_CATEGORIES = {"time", "clock", "wait"}
53
+ # And these are in space
54
+ SPACE_CATEGORIES = {"memory", "disk"}
55
+ # These categories aren't stored and need to be computed
56
+ COMPUTED_CATEGORIES = {"wait"}
57
+
58
+ # The different kinds of summaries have both short and long names, and we need
59
+ # to convert between them.
60
+ LONG_FORMS = {
61
+ "med": "median",
62
+ "ave": "average",
63
+ "min": "min",
64
+ "total": "total",
65
+ "max": "max",
66
+ }
29
67
 
30
68
  class ColumnWidths:
31
69
  """
32
70
  Convenience object that stores the width of columns for printing. Helps make things pretty.
33
71
  """
72
+
34
73
  def __init__(self) -> None:
35
- self.categories = ["time", "clock", "wait", "memory"]
74
+ self.categories = CATEGORIES
36
75
  self.fields_count = ["count", "min", "med", "ave", "max", "total"]
37
76
  self.fields = ["min", "med", "ave", "max", "total"]
38
77
  self.data: Dict[str, int] = {}
39
78
  for category in self.categories:
40
79
  for field in self.fields_count:
41
- self.setWidth(category, field, 8)
80
+ self.set_width(category, field, 8)
42
81
 
43
82
  def title(self, category: str) -> int:
44
- """ Return the total printed length of this category item.
45
- """
46
- return sum(self.getWidth(category, x) for x in self.fields)
83
+ """Return the total printed length of this category item."""
84
+ return sum(self.get_width(category, x) for x in self.fields)
47
85
 
48
- def getWidth(self, category: str, field: str ) -> int:
86
+ def get_width(self, category: str, field: str) -> int:
49
87
  category = category.lower()
50
88
  return self.data[f"{category}_{field}"]
51
89
 
52
- def setWidth(self, category: str, field: str, width: int) -> None:
90
+ def set_width(self, category: str, field: str, width: int) -> None:
53
91
  category = category.lower()
54
92
  self.data[f"{category}_{field}"] = width
55
93
 
56
94
  def report(self) -> None:
57
95
  for c in self.categories:
58
96
  for f in self.fields:
59
- print('%s %s %d' % (c, f, self.getWidth(c, f)))
97
+ print("%s %s %d" % (c, f, self.get_width(c, f)))
60
98
 
61
99
 
62
- def padStr(s: str, field: Optional[int] = None) -> str:
100
+ def pad_str(s: str, field: Optional[int] = None) -> str:
63
101
  """Pad the beginning of a string with spaces, if necessary."""
64
102
  if field is None or len(s) >= field:
65
103
  return s
@@ -67,193 +105,250 @@ def padStr(s: str, field: Optional[int] = None) -> str:
67
105
  return " " * (field - len(s)) + s
68
106
 
69
107
 
70
- def prettyMemory(k: float, field: Optional[int] = None, isBytes: bool = False) -> str:
71
- """Given input k as kilobytes, return a nicely formatted string."""
72
- if isBytes:
73
- k /= 1024
108
+ def pretty_space(k: float, field: Optional[int] = None, alone: bool = False) -> str:
109
+ """Given input k as kibibytes, return a nicely formatted string."""
110
+ # If we don't have a header to say bytes, include the B.
111
+ trailer = "B" if alone else ""
74
112
  if k < 1024:
75
- return padStr("%gK" % k, field)
113
+ return pad_str("%gKi%s" % (k, trailer), field)
76
114
  if k < (1024 * 1024):
77
- return padStr("%.1fM" % (k / 1024.0), field)
115
+ return pad_str("%.1fMi%s" % (k / 1024.0, trailer), field)
78
116
  if k < (1024 * 1024 * 1024):
79
- return padStr("%.1fG" % (k / 1024.0 / 1024.0), field)
117
+ return pad_str("%.1fGi%s" % (k / 1024.0 / 1024.0, trailer), field)
80
118
  if k < (1024 * 1024 * 1024 * 1024):
81
- return padStr("%.1fT" % (k / 1024.0 / 1024.0 / 1024.0), field)
119
+ return pad_str("%.1fTi%s" % (k / 1024.0 / 1024.0 / 1024.0, trailer), field)
82
120
  if k < (1024 * 1024 * 1024 * 1024 * 1024):
83
- return padStr("%.1fP" % (k / 1024.0 / 1024.0 / 1024.0 / 1024.0), field)
121
+ return pad_str("%.1fPi%s" % (k / 1024.0 / 1024.0 / 1024.0 / 1024.0, trailer), field)
84
122
 
85
123
  # due to https://stackoverflow.com/questions/47149154
86
124
  assert False
87
125
 
88
126
 
89
- def prettyTime(t: float, field: Optional[int] = None) -> str:
90
- """ Given input t as seconds, return a nicely formatted string.
127
+ def pretty_time(t: float, field: Optional[int] = None, unit: str = "s", alone: bool = False) -> str:
128
+ """
129
+ Given input t as seconds, return a nicely formatted string.
91
130
  """
131
+ assert unit in ("s", "core-s")
132
+ # Qualify our CPU times as CPU time if we aren't in a table that does that
133
+ unit_str = report_unit(unit) if alone else "s"
134
+
92
135
  from math import floor
136
+
93
137
  pluralDict = {True: "s", False: ""}
94
138
  if t < 120:
95
- return padStr("%ds" % t, field)
139
+ return pad_str("%d%s" % (t, unit_str), field)
96
140
  if t < 120 * 60:
97
- m = floor(t / 60.)
141
+ m = floor(t / 60.0)
98
142
  s = t % 60
99
- return padStr("%dm%ds" % (m, s), field)
143
+ return pad_str("%dm%d%s" % (m, s, unit_str), field)
100
144
  if t < 25 * 60 * 60:
101
- h = floor(t / 60. / 60.)
102
- m = floor((t - (h * 60. * 60.)) / 60.)
145
+ h = floor(t / 60.0 / 60.0)
146
+ m = floor((t - (h * 60.0 * 60.0)) / 60.0)
103
147
  s = t % 60
104
- return padStr("%dh%gm%ds" % (h, m, s), field)
148
+ return pad_str("%dh%gm%d%s" % (h, m, s, unit_str), field)
105
149
  if t < 7 * 24 * 60 * 60:
106
- d = floor(t / 24. / 60. / 60.)
107
- h = floor((t - (d * 24. * 60. * 60.)) / 60. / 60.)
108
- m = floor((t
109
- - (d * 24. * 60. * 60.)
110
- - (h * 60. * 60.)) / 60.)
150
+ d = floor(t / 24.0 / 60.0 / 60.0)
151
+ h = floor((t - (d * 24.0 * 60.0 * 60.0)) / 60.0 / 60.0)
152
+ m = floor((t - (d * 24.0 * 60.0 * 60.0) - (h * 60.0 * 60.0)) / 60.0)
111
153
  s = t % 60
112
154
  dPlural = pluralDict[d > 1]
113
- return padStr("%dday%s%dh%dm%ds" % (d, dPlural, h, m, s), field)
114
- w = floor(t / 7. / 24. / 60. / 60.)
115
- d = floor((t - (w * 7 * 24 * 60 * 60)) / 24. / 60. / 60.)
116
- h = floor((t - (w * 7. * 24. * 60. * 60.)
117
- - (d * 24. * 60. * 60.))
118
- / 60. / 60.)
119
- m = floor((t - (w * 7. * 24. * 60. * 60.)
120
- - (d * 24. * 60. * 60.)
121
- - (h * 60. * 60.)) / 60.)
155
+ return pad_str("%dday%s%dh%dm%d%s" % (d, dPlural, h, m, s, unit_str), field)
156
+ w = floor(t / 7.0 / 24.0 / 60.0 / 60.0)
157
+ d = floor((t - (w * 7 * 24 * 60 * 60)) / 24.0 / 60.0 / 60.0)
158
+ h = floor(
159
+ (t - (w * 7.0 * 24.0 * 60.0 * 60.0) - (d * 24.0 * 60.0 * 60.0)) / 60.0 / 60.0
160
+ )
161
+ m = floor(
162
+ (
163
+ t
164
+ - (w * 7.0 * 24.0 * 60.0 * 60.0)
165
+ - (d * 24.0 * 60.0 * 60.0)
166
+ - (h * 60.0 * 60.0)
167
+ )
168
+ / 60.0
169
+ )
122
170
  s = t % 60
123
171
  wPlural = pluralDict[w > 1]
124
172
  dPlural = pluralDict[d > 1]
125
- return padStr("%dweek%s%dday%s%dh%dm%ds" % (w, wPlural, d,
126
- dPlural, h, m, s), field)
173
+ return pad_str("%dweek%s%dday%s%dh%dm%d%s" % (w, wPlural, d, dPlural, h, m, s, unit_str), field)
127
174
 
175
+ def report_unit(unit: str) -> str:
176
+ """
177
+ Format a unit name for display.
178
+ """
179
+ if unit == "core-s":
180
+ return "core·s"
181
+ return unit
128
182
 
129
- def reportTime(t: float, options: Namespace, field: Optional[int] = None) -> str:
183
+ def report_time(t: float, options: Namespace, field: Optional[int] = None, unit: str = "s", alone: bool = False) -> str:
130
184
  """Given t seconds, report back the correct format as string."""
185
+ assert unit in ("s", "core-s")
131
186
  if options.pretty:
132
- return prettyTime(t, field=field)
133
- elif field is not None:
134
- return "%*.2f" % (field, t)
135
- return "%.2f" % t
187
+ return pretty_time(t, field=field, unit=unit, alone=alone)
188
+ unit_text = f" {report_unit(unit)}" if alone else ""
189
+ if field is not None:
190
+ assert field >= len(unit_text)
191
+ return "%*.2f%s" % (field - len(unit_text), t, unit_text)
192
+ return "%.2f%s" % (t, unit_text)
136
193
 
137
194
 
138
- def reportMemory(k: float, options: Namespace, field: Optional[int] = None, isBytes: bool = False) -> str:
139
- """Given k kilobytes, report back the correct format as string."""
195
+ def report_space(
196
+ k: float, options: Namespace, field: Optional[int] = None, unit: str = "KiB", alone: bool = False
197
+ ) -> str:
198
+ """
199
+ Given k kibibytes, report back the correct format as string.
200
+
201
+ If unit is set to B, convert to KiB first.
202
+ """
203
+ if unit == "B":
204
+ k /= 1024.0
205
+ unit = "KiB"
206
+ assert unit == "KiB"
140
207
  if options.pretty:
141
- return prettyMemory(int(k), field=field, isBytes=isBytes)
208
+ return pretty_space(int(k), field=field, alone=alone)
142
209
  else:
143
- if isBytes:
144
- k /= 1024.
210
+ # If we don't have a heading to say bytes, include the B
211
+ trailer = "KiB" if alone else "Ki"
145
212
  if field is not None:
146
- return "%*dK" % (field - 1, k) # -1 for the "K"
213
+ assert field >= len(trailer)
214
+ return "%*d%s" % (field - len(trailer), k, trailer)
147
215
  else:
148
- return "%dK" % int(k)
216
+ return "%d%s" % (int(k), trailer)
217
+
149
218
 
219
+ def report_number(n: Union[int, float, None], field: Optional[int] = None, nan_value: str = "NaN") -> str:
220
+ """
221
+ Given a number, report back the correct format as string.
222
+
223
+ If it is a NaN or None, use nan_value to represent it instead.
224
+ """
225
+ if n is None or math.isnan(n):
226
+ return pad_str(nan_value, field=field)
227
+ else:
228
+ # Make sure not to format with too much precision for the field size;
229
+ # leave room for . and the spacing to the previous field.
230
+ return "%*.*g" % (field, field - 2, n) if field else "%g" % n
150
231
 
151
- def reportNumber(n: float, field: Optional[int] = None) -> str:
152
- """Given n an integer, report back the correct format as string."""
153
- return "%*g" % (field, n) if field else "%g" % n
232
+ def report(v: float, category: str, options: Namespace, field: Optional[int] = None, alone=False) -> str:
233
+ """
234
+ Report a value of the given category formatted as a string.
154
235
 
236
+ Uses the given field width if set.
155
237
 
156
- def sprintTag(key: str, tag: Expando, options: Namespace, columnWidths: Optional[ColumnWidths] = None) -> str:
157
- """ Generate a pretty-print ready string from a JTTag().
238
+ If alone is set, the field is being formatted outside a table and might need a unit.
158
239
  """
240
+
241
+ unit = CATEGORY_UNITS.get(category)
242
+ if unit in ("s", "core-s"):
243
+ # This is time.
244
+ return report_time(v, options, field=field, unit=unit, alone=alone)
245
+ elif unit in ("B", "KiB"):
246
+ # This is space.
247
+ return report_space(v, options, field=field, unit=unit, alone=alone)
248
+ else:
249
+ raise ValueError(f"Unimplemented unit {unit} for category {category}")
250
+
251
+ def sprint_tag(
252
+ key: str,
253
+ tag: Expando,
254
+ options: Namespace,
255
+ columnWidths: Optional[ColumnWidths] = None,
256
+ ) -> str:
257
+ """Generate a pretty-print ready string from a JTTag()."""
159
258
  if columnWidths is None:
160
259
  columnWidths = ColumnWidths()
161
- header = " %7s " % decorateTitle("Count", options)
260
+ header = " %7s " % decorate_title("count", "Count", options)
162
261
  sub_header = " %7s " % "n"
163
- tag_str = f" {reportNumber(n=tag.total_number, field=7)}"
262
+ tag_str = f" {report_number(n=tag.total_number, field=7)}"
164
263
  out_str = ""
165
264
  if key == "job":
166
- out_str += " {:<12} | {:>7}{:>7}{:>7}{:>7}\n".format("Worker Jobs", "min",
167
- "med", "ave", "max")
265
+ out_str += " {:<12} | {:>7}{:>7}{:>7}{:>7}\n".format(
266
+ "Worker Jobs", "min", "med", "ave", "max"
267
+ )
168
268
  worker_str = "%s| " % (" " * 14)
169
- for t in [tag.min_number_per_worker, tag.median_number_per_worker,
170
- tag.average_number_per_worker, tag.max_number_per_worker]:
171
- worker_str += reportNumber(n=t, field=7)
269
+ for t in [
270
+ tag.min_number_per_worker,
271
+ tag.median_number_per_worker,
272
+ tag.average_number_per_worker,
273
+ tag.max_number_per_worker,
274
+ ]:
275
+ worker_str += report_number(n=t, field=7)
172
276
  out_str += worker_str + "\n"
173
- if "time" in options.categories:
174
- header += "| %*s " % (columnWidths.title("time"),
175
- decorateTitle("Time", options))
176
- sub_header += decorateSubHeader("Time", columnWidths, options)
177
- tag_str += " | "
178
- for t, width in [
179
- (tag.min_time, columnWidths.getWidth("time", "min")),
180
- (tag.median_time, columnWidths.getWidth("time", "med")),
181
- (tag.average_time, columnWidths.getWidth("time", "ave")),
182
- (tag.max_time, columnWidths.getWidth("time", "max")),
183
- (tag.total_time, columnWidths.getWidth("time", "total")),
184
- ]:
185
- tag_str += reportTime(t, options, field=width)
186
- if "clock" in options.categories:
187
- header += "| %*s " % (columnWidths.title("clock"),
188
- decorateTitle("Clock", options))
189
- sub_header += decorateSubHeader("Clock", columnWidths, options)
190
- tag_str += " | "
191
- for t, width in [
192
- (tag.min_clock, columnWidths.getWidth("clock", "min")),
193
- (tag.median_clock, columnWidths.getWidth("clock", "med")),
194
- (tag.average_clock, columnWidths.getWidth("clock", "ave")),
195
- (tag.max_clock, columnWidths.getWidth("clock", "max")),
196
- (tag.total_clock, columnWidths.getWidth("clock", "total")),
197
- ]:
198
- tag_str += reportTime(t, options, field=width)
199
- if "wait" in options.categories:
200
- header += "| %*s " % (columnWidths.title("wait"),
201
- decorateTitle("Wait", options))
202
- sub_header += decorateSubHeader("Wait", columnWidths, options)
203
- tag_str += " | "
204
- for t, width in [
205
- (tag.min_wait, columnWidths.getWidth("wait", "min")),
206
- (tag.median_wait, columnWidths.getWidth("wait", "med")),
207
- (tag.average_wait, columnWidths.getWidth("wait", "ave")),
208
- (tag.max_wait, columnWidths.getWidth("wait", "max")),
209
- (tag.total_wait, columnWidths.getWidth("wait", "total")),
210
- ]:
211
- tag_str += reportTime(t, options, field=width)
212
- if "memory" in options.categories:
213
- header += "| %*s " % (columnWidths.title("memory"),
214
- decorateTitle("Memory", options))
215
- sub_header += decorateSubHeader("Memory", columnWidths, options)
277
+
278
+ for category in CATEGORIES:
279
+ if category not in options.categories:
280
+ continue
281
+
282
+ header += "| %*s " % (
283
+ columnWidths.title(category),
284
+ decorate_title(category, TITLES[category], options),
285
+ )
286
+ sub_header += decorate_subheader(category, columnWidths, options)
216
287
  tag_str += " | "
217
- for t, width in [
218
- (tag.min_memory, columnWidths.getWidth("memory", "min")),
219
- (tag.median_memory, columnWidths.getWidth("memory", "med")),
220
- (tag.average_memory, columnWidths.getWidth("memory", "ave")),
221
- (tag.max_memory, columnWidths.getWidth("memory", "max")),
222
- (tag.total_memory, columnWidths.getWidth("memory", "total")),
223
- ]:
224
- tag_str += reportMemory(t, options, field=width)
288
+
289
+ for field in ["min", "med", "ave", "max", "total"]:
290
+ t = getattr(tag, f"{LONG_FORMS[field]}_{category}")
291
+ width = columnWidths.get_width(category, field)
292
+ s = report(t, category, options, field=width)
293
+ tag_str += s
294
+
225
295
  out_str += header + "\n"
226
296
  out_str += sub_header + "\n"
227
297
  out_str += tag_str + "\n"
228
298
  return out_str
229
299
 
230
- def decorateTitle(title: str, options: Namespace) -> str:
231
- """ Add a marker to TITLE if the TITLE is sorted on.
300
+
301
+ def decorate_title(category: str, title: str, options: Namespace) -> str:
302
+ """
303
+ Add extra parts to the category titles.
304
+
305
+ Add units to title if they won't appear in the formatted values.
306
+ Add a marker to TITLE if the TITLE is sorted on.
232
307
  """
233
- if title.lower() == options.sortCategory:
308
+ unit = CATEGORY_UNITS.get(category)
309
+ if unit in ("s", "core-s") and not options.pretty:
310
+ # This is a time and we won't write it out as text, so add a unit.
311
+ title = f"{title} ({report_unit(unit)})"
312
+ elif unit == "core-s" and options.pretty:
313
+ # This is a core-second category and we won't be putting the core unit
314
+ # in the value, so note that here.
315
+ title = f"{title} (core)"
316
+ elif unit in ("B", "KiB"):
317
+ # The Ki part will appear in the cell so we need a B
318
+ title = f"{title} (B)"
319
+ if category.lower() == options.sortCategory:
234
320
  return "%s*" % title
235
321
  else:
236
322
  return title
237
323
 
238
- def decorateSubHeader(title: str, columnWidths: ColumnWidths, options: Namespace) -> str:
239
- """ Add a marker to the correct field if the TITLE is sorted on.
240
- """
241
- title = title.lower()
242
- if title != options.sortCategory:
324
+
325
+ def decorate_subheader(
326
+ category: str, columnWidths: ColumnWidths, options: Namespace
327
+ ) -> str:
328
+ """Add a marker to the correct field if the TITLE is sorted on."""
329
+ if category != options.sortCategory:
243
330
  s = "| %*s%*s%*s%*s%*s " % (
244
- columnWidths.getWidth(title, "min"), "min",
245
- columnWidths.getWidth(title, "med"), "med",
246
- columnWidths.getWidth(title, "ave"), "ave",
247
- columnWidths.getWidth(title, "max"), "max",
248
- columnWidths.getWidth(title, "total"), "total")
331
+ columnWidths.get_width(category, "min"),
332
+ "min",
333
+ columnWidths.get_width(category, "med"),
334
+ "med",
335
+ columnWidths.get_width(category, "ave"),
336
+ "ave",
337
+ columnWidths.get_width(category, "max"),
338
+ "max",
339
+ columnWidths.get_width(category, "total"),
340
+ "total",
341
+ )
249
342
  return s
250
343
  else:
251
344
  s = "| "
252
- for field, width in [("min", columnWidths.getWidth(title, "min")),
253
- ("med", columnWidths.getWidth(title, "med")),
254
- ("ave", columnWidths.getWidth(title, "ave")),
255
- ("max", columnWidths.getWidth(title, "max")),
256
- ("total", columnWidths.getWidth(title, "total"))]:
345
+ for field, width in [
346
+ ("min", columnWidths.get_width(category, "min")),
347
+ ("med", columnWidths.get_width(category, "med")),
348
+ ("ave", columnWidths.get_width(category, "ave")),
349
+ ("max", columnWidths.get_width(category, "max")),
350
+ ("total", columnWidths.get_width(category, "total")),
351
+ ]:
257
352
  if options.sortField == field:
258
353
  s += "%*s*" % (width - 1, field)
259
354
  else:
@@ -270,232 +365,242 @@ def get(tree: Expando, name: str) -> float:
270
365
  return float("nan")
271
366
 
272
367
 
273
- def sortJobs(jobTypes: List[Any], options: Namespace) -> List[Any]:
368
+ def sort_jobs(jobTypes: List[Any], options: Namespace) -> List[Any]:
274
369
  """Return a jobTypes all sorted."""
275
- longforms = {"med": "median",
276
- "ave": "average",
277
- "min": "min",
278
- "total": "total",
279
- "max": "max",}
280
- sortField = longforms[options.sortField]
281
- if (options.sortCategory == "time" or
282
- options.sortCategory == "clock" or
283
- options.sortCategory == "wait" or
284
- options.sortCategory == "memory"
285
- ):
370
+ sortField = LONG_FORMS[options.sortField]
371
+ if (
372
+ options.sortCategory in CATEGORIES
373
+ ):
286
374
  return sorted(
287
375
  jobTypes,
288
- # due to https://github.com/python/mypy/issues/9656
289
- key=lambda tag: getattr(tag, "%s_%s" # type: ignore
290
- % (sortField, options.sortCategory)),
291
- reverse=options.sortReverse)
376
+ key=lambda tag: getattr(tag, "%s_%s" % (sortField, options.sortCategory)),
377
+ reverse=options.sort == "decending",
378
+ )
292
379
  elif options.sortCategory == "alpha":
293
380
  return sorted(
294
- jobTypes, key=lambda tag: tag.name, # type: ignore
295
- reverse=options.sortReverse)
381
+ jobTypes,
382
+ key=lambda tag: tag.name, # type: ignore
383
+ reverse=options.sort == "decending",
384
+ )
296
385
  elif options.sortCategory == "count":
297
- return sorted(jobTypes, key=lambda tag: tag.total_number, # type: ignore
298
- reverse=options.sortReverse)
386
+ return sorted(
387
+ jobTypes,
388
+ key=lambda tag: tag.total_number, # type: ignore
389
+ reverse=options.sort == "decending",
390
+ )
299
391
 
300
392
  # due to https://stackoverflow.com/questions/47149154
301
393
  assert False
302
394
 
303
395
 
304
- def reportPrettyData(root: Expando, worker: List[Job], job: List[Job], job_types: List[Any], options: Namespace) -> str:
396
+ def report_pretty_data(
397
+ root: Expando,
398
+ worker: Expando,
399
+ job: Expando,
400
+ job_types: List[Any],
401
+ options: Namespace,
402
+ ) -> str:
305
403
  """Print the important bits out."""
306
404
  out_str = "Batch System: %s\n" % root.batch_system
307
- out_str += ("Default Cores: %s Default Memory: %s\n"
308
- "Max Cores: %s\n" % (
309
- reportNumber(n=get(root, "default_cores")),
310
- reportMemory(get(root, "default_memory"), options, isBytes=True),
311
- reportNumber(n=get(root, "max_cores")),
312
- ))
313
- out_str += ("Total Clock: {} Total Runtime: {}\n".format(
314
- reportTime(get(root, "total_clock"), options),
315
- reportTime(get(root, "total_run_time"), options),
316
- ))
317
- job_types = sortJobs(job_types, options)
318
- columnWidths = computeColumnWidths(job_types, worker, job, options)
405
+ out_str += "Default Cores: %s Default Memory: %s\n" "Max Cores: %s\n" % (
406
+ report_number(n=get(root, "default_cores")),
407
+ # Although per-job memory usage is in KiB, our default is stored in bytes.
408
+ report_space(get(root, "default_memory"), options, unit="B", alone=True),
409
+ report_number(n=get(root, "max_cores"), nan_value="unlimited"),
410
+ )
411
+ out_str += "Local CPU Time: {} Overall Runtime: {}\n".format(
412
+ report(get(root, "total_clock"), "clock", options, alone=True),
413
+ report(get(root, "total_run_time"), "time", options, alone=True),
414
+ )
415
+ job_types = sort_jobs(job_types, options)
416
+ columnWidths = compute_column_widths(job_types, worker, job, options)
319
417
  out_str += "Worker\n"
320
- out_str += sprintTag("worker", worker, options, columnWidths=columnWidths)
418
+ out_str += sprint_tag("worker", worker, options, columnWidths=columnWidths)
321
419
  out_str += "Job\n"
322
- out_str += sprintTag("job", job, options, columnWidths=columnWidths)
420
+ out_str += sprint_tag("job", job, options, columnWidths=columnWidths)
323
421
  for t in job_types:
324
422
  out_str += f" {t.name}\n"
325
423
  out_str += f" Total Cores: {t.total_cores}\n"
326
- out_str += sprintTag(t.name, t, options, columnWidths=columnWidths)
424
+ out_str += sprint_tag(t.name, t, options, columnWidths=columnWidths)
327
425
  return out_str
328
426
 
329
427
 
330
- def computeColumnWidths(job_types: List[Any], worker: List[Job], job: List[Job], options: Expando) -> ColumnWidths:
331
- """ Return a ColumnWidths() object with the correct max widths.
332
- """
428
+ def compute_column_widths(
429
+ job_types: List[Any], worker: Expando, job: Expando, options: Namespace
430
+ ) -> ColumnWidths:
431
+ """Return a ColumnWidths() object with the correct max widths."""
333
432
  cw = ColumnWidths()
334
433
  for t in job_types:
335
- updateColumnWidths(t, cw, options)
336
- updateColumnWidths(worker, cw, options)
337
- updateColumnWidths(job, cw, options)
434
+ update_column_widths(t, cw, options)
435
+ update_column_widths(worker, cw, options)
436
+ update_column_widths(job, cw, options)
338
437
  return cw
339
438
 
340
439
 
341
- def updateColumnWidths(tag: Expando, cw: ColumnWidths, options: Expando) -> None:
342
- """ Update the column width attributes for this tag's fields.
343
- """
344
- longforms = {"med": "median",
345
- "ave": "average",
346
- "min": "min",
347
- "total": "total",
348
- "max": "max",}
349
- for category in ["time", "clock", "wait", "memory"]:
440
+ def update_column_widths(tag: Expando, cw: ColumnWidths, options: Namespace) -> None:
441
+ """Update the column width attributes for this tag's fields."""
442
+ # TODO: Deduplicate with actual printing code!
443
+ for category in CATEGORIES:
350
444
  if category in options.categories:
351
445
  for field in ["min", "med", "ave", "max", "total"]:
352
- t = getattr(tag, f"{longforms[field]}_{category}")
353
- if category in ["time", "clock", "wait"]:
354
- s = reportTime(t, options,
355
- field=cw.getWidth(category, field)).strip()
356
- else:
357
- s = reportMemory(t, options,
358
- field=cw.getWidth(category, field), isBytes=True).strip()
359
- if len(s) >= cw.getWidth(category, field):
446
+ t = getattr(tag, f"{LONG_FORMS[field]}_{category}")
447
+ width = cw.get_width(category, field)
448
+ s = report(t, category, options, field=width).strip()
449
+ if len(s) >= cw.get_width(category, field):
360
450
  # this string is larger than max, width must be increased
361
- cw.setWidth(category, field, len(s) + 1)
451
+ cw.set_width(category, field, len(s) + 1)
362
452
 
363
453
 
364
- def buildElement(element: Expando, items: List[Job], itemName: str) -> Expando:
365
- """ Create an element for output.
366
- """
454
+ def build_element(element: Expando, items: List[Job], item_name: str, defaults: dict[str, float]) -> Expando:
455
+ """Create an element for output."""
456
+
367
457
  def assertNonnegative(i: float, name: str) -> float:
368
458
  if i < 0:
369
- raise RuntimeError("Negative value %s reported for %s" %(i,name) )
459
+ raise RuntimeError("Negative value %s reported for %s" % (i, name))
370
460
  else:
371
461
  return float(i)
372
462
 
373
- totalCores = 0
374
-
375
- itemTimes = []
376
- itemClocks = []
377
- itemMemory = []
463
+ # Make lists of all values for all items in each category, plus requested cores.
464
+ item_values = {category: [] for category in (CATEGORIES + ["cores"])}
378
465
 
379
466
  for item in items:
380
467
  # If something lacks an entry, assume it used none of that thing.
381
468
  # This avoids crashing when jobs e.g. aren't done.
382
- itemTimes.append(assertNonnegative(float(item.get("time", 0)), "time"))
383
- itemClocks.append(assertNonnegative(float(item.get("clock", 0)), "clock"))
384
- itemMemory.append(assertNonnegative(float(item.get("memory", 0)), "memory"))
385
- totalCores += assertNonnegative(float(item.get("requested_cores", 0)), "requested_cores")
469
+ for category, values in item_values.items():
470
+ if category in COMPUTED_CATEGORIES:
471
+ continue
472
+ category_key = category if category != "cores" else "requested_cores"
473
+ category_value = assertNonnegative(float(item.get(category_key, defaults[category])), category)
474
+ values.append(category_value)
475
+
476
+ for index in range(0, len(item_values[CATEGORIES[0]])):
477
+ # For each item, compute the computed categories
478
+ item_values["wait"].append(item_values["time"][index] * item_values["cores"][index] - item_values["clock"][index])
479
+
480
+ for category, values in item_values.items():
481
+ values.sort()
482
+
483
+ if len(item_values[CATEGORIES[0]]) == 0:
484
+ # Nothing actually there so make a 0 value
485
+ for k, v in item_values.items():
486
+ v.append(0)
487
+
488
+ item_element = Expando(
489
+ total_number=float(len(items)),
490
+ name=item_name
491
+ )
386
492
 
387
- assert len(itemClocks) == len(itemTimes) == len(itemMemory)
493
+ for category, values in item_values.items():
494
+ item_element["total_" + category] = float(sum(values))
495
+ item_element["median_" + category] = float(values[len(values) // 2])
496
+ item_element["average_" + category] = float(sum(values) / len(values))
497
+ item_element["min_" + category] = float(min(values))
498
+ item_element["max_" + category] = float(max(values))
388
499
 
389
- itemWaits = []
390
- for index in range(0,len(itemTimes)):
391
- itemWaits.append(itemTimes[index] - itemClocks[index])
500
+ element[item_name] = item_element
392
501
 
393
- itemWaits.sort()
394
- itemTimes.sort()
395
- itemClocks.sort()
396
- itemMemory.sort()
502
+ return item_element
397
503
 
398
- if len(itemTimes) == 0:
399
- itemTimes.append(0)
400
- itemClocks.append(0)
401
- itemWaits.append(0)
402
- itemMemory.append(0)
403
504
 
404
- element[itemName]=Expando(
405
- total_number=float(len(items)),
406
- total_time=float(sum(itemTimes)),
407
- median_time=float(itemTimes[len(itemTimes) // 2]),
408
- average_time=float(sum(itemTimes) / len(itemTimes)),
409
- min_time=float(min(itemTimes)),
410
- max_time=float(max(itemTimes)),
411
- total_clock=float(sum(itemClocks)),
412
- median_clock=float(itemClocks[len(itemClocks) // 2]),
413
- average_clock=float(sum(itemClocks) / len(itemClocks)),
414
- min_clock=float(min(itemClocks)),
415
- max_clock=float(max(itemClocks)),
416
- total_wait=float(sum(itemWaits)),
417
- median_wait=float(itemWaits[len(itemWaits) // 2]),
418
- average_wait=float(sum(itemWaits) / len(itemWaits)),
419
- min_wait=float(min(itemWaits)),
420
- max_wait=float(max(itemWaits)),
421
- total_memory=float(sum(itemMemory)),
422
- median_memory=float(itemMemory[len(itemMemory) // 2]),
423
- average_memory=float(sum(itemMemory) / len(itemMemory)),
424
- min_memory=float(min(itemMemory)),
425
- max_memory=float(max(itemMemory)),
426
- total_cores=totalCores,
427
- name=itemName
428
- )
429
- return element[itemName]
505
+ def create_summary(
506
+ element: Expando,
507
+ containingItems: List[Expando],
508
+ containingItemName: str,
509
+ count_contained: Callable[[Expando], int],
510
+ ) -> None:
511
+ """
512
+ Figure out how many jobs (or contained items) ran on each worker (or containing item).
513
+
514
+ Stick a bunch of xxx_number_per_xxx stats into element to describe this.
515
+
516
+ :param count_contained: function that maps from containing item to number of contained items.
517
+ """
430
518
 
519
+ # TODO: this still thinks like the old XML stats, even though now the
520
+ # worker records no longer actually contain the job records.
431
521
 
432
- def createSummary(element: Expando, containingItems: List[Job], containingItemName: str, getFn: Callable[[Job], List[Optional[Job]]]) -> None:
433
- itemCounts = [len(getFn(containingItem)) for
434
- containingItem in containingItems]
522
+ itemCounts = [count_contained(containingItem) for containingItem in containingItems]
435
523
  itemCounts.sort()
436
524
  if len(itemCounts) == 0:
437
525
  itemCounts.append(0)
438
- element["median_number_per_%s" % containingItemName] = itemCounts[len(itemCounts) // 2]
439
- element["average_number_per_%s" % containingItemName] = float(sum(itemCounts) / len(itemCounts))
526
+ element["median_number_per_%s" % containingItemName] = itemCounts[
527
+ len(itemCounts) // 2
528
+ ]
529
+ element["average_number_per_%s" % containingItemName] = float(
530
+ sum(itemCounts) / len(itemCounts)
531
+ )
440
532
  element["min_number_per_%s" % containingItemName] = min(itemCounts)
441
533
  element["max_number_per_%s" % containingItemName] = max(itemCounts)
442
534
 
443
535
 
444
- def getStats(jobStore: AbstractJobStore) -> Expando:
445
- """ Collect and return the stats and config data.
536
+ def get_stats(jobStore: AbstractJobStore) -> Expando:
537
+ """
538
+ Sum together all the stats information in the job store.
539
+
540
+ Produces one object containing lists of the values from all the summed objects.
446
541
  """
447
- def aggregateStats(fileHandle: TextIO, aggregateObject: Expando) -> None:
542
+
543
+ def aggregate_stats(fileHandle: TextIO, aggregateObject: Expando) -> None:
448
544
  try:
449
545
  stats = json.load(fileHandle, object_hook=Expando)
450
546
  for key in list(stats.keys()):
451
547
  if key in aggregateObject:
452
548
  aggregateObject[key].append(stats[key])
453
549
  else:
454
- aggregateObject[key]=[stats[key]]
550
+ aggregateObject[key] = [stats[key]]
455
551
  except ValueError:
456
- logger.critical("File %s contains corrupted json. Skipping file." % fileHandle)
552
+ logger.critical(
553
+ "File %s contains corrupted json. Skipping file." % fileHandle
554
+ )
457
555
  pass # The file is corrupted.
458
556
 
459
557
  aggregateObject = Expando()
460
- callBack = partial(aggregateStats, aggregateObject=aggregateObject)
558
+ callBack = partial(aggregate_stats, aggregateObject=aggregateObject)
461
559
  jobStore.read_logs(callBack, read_all=True)
462
560
  return aggregateObject
463
561
 
464
562
 
465
- def processData(config: Config, stats: Expando) -> Expando:
563
+ def process_data(config: Config, stats: Expando) -> Expando:
466
564
  """
467
565
  Collate the stats and report
468
566
  """
469
- if 'total_time' not in stats or 'total_clock' not in stats:
567
+ if "total_time" not in stats or "total_clock" not in stats:
470
568
  # toil job not finished yet
471
569
  stats.total_time = [0.0]
472
570
  stats.total_clock = [0.0]
473
571
 
572
+ # This is actually the sum of *overall* wall clock time as measured by the
573
+ # leader in each leader invocation, not a sum over jobs.
474
574
  stats.total_time = sum(float(number) for number in stats.total_time)
575
+ # And this is CPU clock as measured by the leader, so it will count time
576
+ # used in local jobs but not remote ones.
475
577
  stats.total_clock = sum(float(number) for number in stats.total_clock)
476
578
 
477
- collatedStatsTag = Expando(total_run_time=stats.total_time,
478
- total_clock=stats.total_clock,
479
- batch_system=config.batchSystem,
480
- default_memory=str(config.defaultMemory),
481
- default_cores=str(config.defaultCores),
482
- max_cores=str(config.maxCores)
483
- )
579
+ collatedStatsTag = Expando(
580
+ total_run_time=stats.total_time,
581
+ total_clock=stats.total_clock,
582
+ batch_system=config.batchSystem,
583
+ default_memory=str(config.defaultMemory),
584
+ default_cores=str(config.defaultCores),
585
+ max_cores=str(config.maxCores if config.maxCores != SYS_MAX_SIZE else None),
586
+ )
484
587
 
485
588
  # Add worker info
486
- worker = [_f for _f in getattr(stats, 'workers', []) if _f]
487
- jobs = [_f for _f in getattr(stats, 'jobs', []) if _f]
589
+ worker = [_f for _f in getattr(stats, "workers", []) if _f]
590
+ jobs = [_f for _f in getattr(stats, "jobs", []) if _f]
488
591
  jobs = [item for sublist in jobs for item in sublist]
489
592
 
490
- def fn4(job: Job) -> List[Optional[Job]]:
491
- try:
492
- return list(jobs)
493
- except TypeError:
494
- return []
593
+ # Work out what usage to assume for things that didn't report
594
+ defaults = {category: 0 for category in CATEGORIES}
595
+ defaults["cores"] = config.defaultCores
495
596
 
496
- buildElement(collatedStatsTag, worker, "worker")
497
- createSummary(buildElement(collatedStatsTag, jobs, "jobs"),
498
- getattr(stats, 'workers', []), "worker", fn4)
597
+ build_element(collatedStatsTag, worker, "worker", defaults)
598
+ create_summary(
599
+ build_element(collatedStatsTag, jobs, "jobs", defaults),
600
+ getattr(stats, "workers", []),
601
+ "worker",
602
+ lambda worker: getattr(worker, "jobs_run", 0)
603
+ )
499
604
  # Get info for each job
500
605
  jobNames = set()
501
606
  for job in jobs:
@@ -503,18 +608,20 @@ def processData(config: Config, stats: Expando) -> Expando:
503
608
  jobTypesTag = Expando()
504
609
  collatedStatsTag.job_types = jobTypesTag
505
610
  for jobName in jobNames:
506
- jobTypes = [ job for job in jobs if job.class_name == jobName ]
507
- buildElement(jobTypesTag, jobTypes, jobName)
611
+ jobTypes = [job for job in jobs if job.class_name == jobName]
612
+ build_element(jobTypesTag, jobTypes, jobName, defaults)
508
613
  collatedStatsTag.name = "collatedStatsTag"
509
614
  return collatedStatsTag
510
615
 
511
616
 
512
- def reportData(tree: Expando, options: Namespace) -> None:
617
+ def report_data(tree: Expando, options: Namespace) -> None:
513
618
  # Now dump it all out to file
514
619
  if options.raw:
515
- out_str = json.dumps(tree, indent=4, separators=(',', ': '))
620
+ out_str = json.dumps(tree, indent=4, separators=(",", ": "))
516
621
  else:
517
- out_str = reportPrettyData(tree, tree.worker, tree.jobs, tree.job_types.values(), options)
622
+ out_str = report_pretty_data(
623
+ tree, tree.worker, tree.jobs, tree.job_types.values(), options
624
+ )
518
625
  if options.outputFile is not None:
519
626
  with open(options.outputFile, "w") as f:
520
627
  f.write(out_str)
@@ -522,40 +629,73 @@ def reportData(tree: Expando, options: Namespace) -> None:
522
629
  print(out_str)
523
630
 
524
631
 
525
- category_choices = ["time", "clock", "wait", "memory"]
526
- sort_category_choices = ["time", "clock", "wait", "memory", "alpha", "count"]
527
- sort_field_choices = ['min', 'med', 'ave', 'max', 'total']
632
+ sort_category_choices = CATEGORIES + ["alpha", "count"]
633
+ sort_field_choices = ["min", "med", "ave", "max", "total"]
528
634
 
529
635
 
530
636
  def add_stats_options(parser: ArgumentParser) -> None:
531
- parser.add_argument("--outputFile", dest="outputFile", default=None, help="File in which to write results.")
532
- parser.add_argument("--raw", action="store_true", default=False, help="Return raw json data.")
533
- parser.add_argument("--pretty", "--human", action="store_true", default=False,
534
- help="if not raw, prettify the numbers to be human readable.")
535
- parser.add_argument("--sortReverse", "--reverseSort", default=False, action="store_true", help="Reverse sort.")
536
- parser.add_argument("--categories", default=','.join(category_choices), type=str,
537
- help=f"Comma separated list of any of the following: {category_choices}.")
538
- parser.add_argument("--sortCategory", default="time", choices=sort_category_choices,
539
- help=f"How to sort job categories. Choices: {sort_category_choices}. Default: time.")
540
- parser.add_argument("--sortField", default="med", choices=sort_field_choices,
541
- help=f"How to sort job fields. Choices: {sort_field_choices}. Default: med.")
637
+ parser.add_argument(
638
+ "--outputFile",
639
+ dest="outputFile",
640
+ default=None,
641
+ help="File in which to write results.",
642
+ )
643
+ parser.add_argument(
644
+ "--raw", action="store_true", default=False, help="Return raw json data."
645
+ )
646
+ parser.add_argument(
647
+ "--pretty",
648
+ "--human",
649
+ action="store_true",
650
+ default=False,
651
+ help="if not raw, prettify the numbers to be human readable.",
652
+ )
653
+ parser.add_argument(
654
+ "--sort",
655
+ default="decending",
656
+ choices=["ascending", "decending"],
657
+ help="Sort direction.",
658
+ )
659
+ parser.add_argument(
660
+ "--categories",
661
+ default=",".join(CATEGORIES),
662
+ type=str,
663
+ help=f"Comma separated list of any of the following: {CATEGORIES}.",
664
+ )
665
+ parser.add_argument(
666
+ "--sortCategory",
667
+ default="time",
668
+ choices=sort_category_choices,
669
+ help=f"How to sort job categories.",
670
+ )
671
+ parser.add_argument(
672
+ "--sortField",
673
+ default="med",
674
+ choices=sort_field_choices,
675
+ help=f"How to sort job fields.",
676
+ )
542
677
 
543
678
 
544
679
  def main() -> None:
545
680
  """Reports stats on the workflow, use with --stats option to toil."""
546
- parser = parser_with_common_options()
681
+ parser = parser_with_common_options(prog="toil stats")
547
682
  add_stats_options(parser)
548
683
  options = parser.parse_args()
549
684
 
550
685
  for c in options.categories.split(","):
551
- if c.strip() not in category_choices:
552
- raise ValueError(f'{c} not in {category_choices}!')
686
+ if c.strip().lower() not in CATEGORIES:
687
+ logger.critical("Cannot use category %s, options are: %s", c.strip().lower(), CATEGORIES)
688
+ sys.exit(1)
553
689
  options.categories = [x.strip().lower() for x in options.categories.split(",")]
554
690
 
555
691
  set_logging_from_options(options)
556
692
  config = Config()
557
693
  config.setOptions(options)
558
- jobStore = Toil.resumeJobStore(config.jobStore)
559
- stats = getStats(jobStore)
560
- collatedStatsTag = processData(jobStore.config, stats)
561
- reportData(collatedStatsTag, options)
694
+ try:
695
+ jobStore = Toil.resumeJobStore(config.jobStore)
696
+ except NoSuchJobStoreException:
697
+ logger.critical("The job store %s does not exist", config.jobStore)
698
+ sys.exit(1)
699
+ stats = get_stats(jobStore)
700
+ collatedStatsTag = process_data(jobStore.config, stats)
701
+ report_data(collatedStatsTag, options)