triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,658 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from copy import deepcopy
19
+ from functools import total_ordering
20
+ from typing import Any, Dict, List, Optional
21
+
22
+ from model_analyzer.constants import COMPARISON_SCORE_THRESHOLD, LOGGER_NAME
23
+ from model_analyzer.record.record import Record, RecordType
24
+ from model_analyzer.result.constraint_manager import ConstraintManager
25
+ from model_analyzer.result.model_config_measurement import ModelConfigMeasurement
26
+
27
+ logger = logging.getLogger(LOGGER_NAME)
28
+
29
+
30
+ @total_ordering
31
+ class RunConfigMeasurement:
32
+ """
33
+ Encapsulates the set of metrics obtained from all model configs
34
+ in a single RunConfig
35
+ """
36
+
37
+ def __init__(
38
+ self, model_variants_name: Optional[str], gpu_data: Dict[int, List[Record]]
39
+ ):
40
+ """
41
+ model_variants_name: str
42
+ Name of the model variants this measurement was collected for
43
+
44
+ gpu_data : dict of list of Records
45
+ Metrics from the monitors that have a GPU UUID
46
+ associated with them
47
+ """
48
+ self._model_variants_name = model_variants_name
49
+
50
+ self._gpu_data = gpu_data
51
+ self._avg_gpu_data = self._average_list(list(self._gpu_data.values()))
52
+ self._avg_gpu_data_from_tag = self._get_avg_gpu_data_from_tag()
53
+
54
+ self._model_config_measurements: List[ModelConfigMeasurement] = []
55
+ self._model_config_weights: List[float] = []
56
+ self._constraint_manager: Optional[ConstraintManager] = None
57
+
58
+ def to_dict(self):
59
+ rcm_dict = deepcopy(self.__dict__)
60
+ del rcm_dict["_model_config_weights"]
61
+ del rcm_dict["_constraint_manager"]
62
+
63
+ return rcm_dict
64
+
65
+ @classmethod
66
+ def from_dict(cls, run_config_measurement_dict: Dict) -> "RunConfigMeasurement":
67
+ run_config_measurement = RunConfigMeasurement(None, {})
68
+
69
+ run_config_measurement._model_variants_name = run_config_measurement_dict[
70
+ "_model_variants_name"
71
+ ]
72
+
73
+ run_config_measurement._gpu_data = cls._deserialize_gpu_data(
74
+ run_config_measurement, run_config_measurement_dict["_gpu_data"]
75
+ )
76
+
77
+ run_config_measurement._avg_gpu_data = cls._average_list(
78
+ run_config_measurement, list(run_config_measurement._gpu_data.values())
79
+ )
80
+
81
+ run_config_measurement._avg_gpu_data_from_tag = cls._get_avg_gpu_data_from_tag(
82
+ run_config_measurement
83
+ )
84
+
85
+ run_config_measurement._model_config_measurements = (
86
+ cls._deserialize_model_config_measurements(
87
+ run_config_measurement,
88
+ run_config_measurement_dict["_model_config_measurements"],
89
+ )
90
+ )
91
+
92
+ return run_config_measurement
93
+
94
+ def set_model_config_weighting(self, model_config_weights: List[int]) -> None:
95
+ """
96
+ Sets the model config weightings used when calculating
97
+ weighted metrics
98
+
99
+ Parameters
100
+ ----------
101
+ model_weights: list of ints
102
+ Weights are the relative importance of the model_configs
103
+ with respect to one another
104
+ """
105
+ self._model_config_weights = [
106
+ model_config_weight / sum(model_config_weights)
107
+ for model_config_weight in model_config_weights
108
+ ]
109
+
110
+ def set_constraint_manager(self, constraint_manager: ConstraintManager) -> None:
111
+ """
112
+
113
+ Parameters
114
+ ----------
115
+ constraint_manager: ConstraintManager object
116
+ Used to determine if an ModelConfigMeasurement passes or fails
117
+ """
118
+ self._constraint_manager = constraint_manager
119
+
120
+ def add_model_config_measurement(
121
+ self,
122
+ model_config_name: str,
123
+ model_specific_pa_params: Dict[str, int],
124
+ non_gpu_data: List[Record],
125
+ ) -> None:
126
+ """
127
+ Adds a measurement from a single model config in this PA's run
128
+
129
+ model_config_name : string
130
+ The model config name that was used for this PA run
131
+ model_specific_pa_params: dict
132
+ Dictionary of PA parameters that change between models
133
+ in a multi-model run
134
+ non_gpu_data : list of Records
135
+ Metrics that do not have a GPU UUID associated with them,
136
+ from either CPU or PA
137
+ """
138
+ self._model_config_measurements.append(
139
+ ModelConfigMeasurement(
140
+ model_config_name, model_specific_pa_params, non_gpu_data
141
+ )
142
+ )
143
+
144
+ # By default setting all models to have equal weighting
145
+ self._model_config_weights.append(1)
146
+
147
+ def set_metric_weightings(self, metric_objectives: List[Dict[str, int]]) -> None:
148
+ """
149
+ Sets the metric weighting for all non-GPU measurements
150
+
151
+ Parameters
152
+ ----------
153
+ metric_objectives : list of dict of RecordTypes
154
+ One entry per ModelConfig
155
+ """
156
+ for index, measurement in enumerate(self._model_config_measurements):
157
+ measurement.set_metric_weighting(metric_objectives[index])
158
+
159
+ def model_variants_name(self) -> Optional[str]:
160
+ """
161
+ Returns: str
162
+ The name of the model variants this measurement was collected for
163
+ """
164
+
165
+ return self._model_variants_name
166
+
167
+ def model_name(self) -> Optional[str]:
168
+ """
169
+ Returns
170
+ -------
171
+ str: Model name for this RunConfigMeasurement
172
+ """
173
+
174
+ return self._model_variants_name
175
+
176
+ def data(self) -> Dict[str, List[Record]]:
177
+ """
178
+ Returns
179
+ -------
180
+ dict
181
+ keys are model names and values are list of Records per model
182
+ All the metric values in each model's measurement
183
+ for both GPU and non-GPU
184
+ """
185
+
186
+ return {
187
+ mcm.model_name(): self._avg_gpu_data + mcm.non_gpu_data()
188
+ for mcm in self._model_config_measurements
189
+ }
190
+
191
+ def gpu_data(self) -> Dict[int, List[Record]]:
192
+ """
193
+ Returns
194
+ -------
195
+ Dict of List of Records
196
+ GPU specific measurements
197
+ """
198
+
199
+ return self._gpu_data
200
+
201
+ def non_gpu_data(self) -> List[List[Record]]:
202
+ """
203
+ Returns
204
+ -------
205
+ per model list of a list Records
206
+ The non GPU specific measurements
207
+ """
208
+
209
+ return [
210
+ model_config_measurement.non_gpu_data()
211
+ for model_config_measurement in self._model_config_measurements
212
+ ]
213
+
214
+ def get_gpu_metric(self, tag: str) -> Optional[Record]:
215
+ """
216
+ Returns the average of Records associated with this GPU metric
217
+
218
+ Parameters
219
+ ----------
220
+ tag : str
221
+ A human readable tag that corresponds
222
+ to a particular GPU metric
223
+
224
+ Returns
225
+ -------
226
+ Record:
227
+ of average GPU metric Records corresponding to this tag,
228
+ or None if tag not found
229
+ """
230
+ if tag in self._avg_gpu_data_from_tag:
231
+ return self._avg_gpu_data_from_tag[tag]
232
+ else:
233
+ logger.warning(
234
+ f"No GPU metric corresponding to tag '{tag}' "
235
+ "found in the model's measurement. Possibly comparing "
236
+ "measurements across devices."
237
+ )
238
+ return None
239
+
240
+ def get_non_gpu_metric(self, tag: str) -> List[Record]:
241
+ """
242
+ Returns the Records associated with this non-GPU metric
243
+
244
+ Parameters
245
+ ----------
246
+ tag : str
247
+ A human readable tag that corresponds
248
+ to a particular metric
249
+
250
+ Returns
251
+ -------
252
+ list:
253
+ of per model list:
254
+ of non-GPU metric Records, or None if tag not found
255
+ """
256
+ return [
257
+ model_config_measurement.get_metric(tag)
258
+ for model_config_measurement in self._model_config_measurements
259
+ ]
260
+
261
+ def get_weighted_non_gpu_metric(self, tag: str) -> List[Record]:
262
+ """
263
+ Parameters
264
+ ----------
265
+ tag : str
266
+ A human readable tag that corresponds
267
+ to a particular non-GPU metric
268
+
269
+ Returns
270
+ -------
271
+ list:
272
+ of per model list:
273
+ of weighted non-GPU metric Records,
274
+ or None if tag not found
275
+
276
+ """
277
+ assert len(self._model_config_weights) == len(self._model_config_measurements)
278
+
279
+ return [
280
+ model_config_measurement.get_metric(tag) * self._model_config_weights[index]
281
+ for index, model_config_measurement in enumerate(
282
+ self._model_config_measurements
283
+ )
284
+ ]
285
+
286
+ def get_non_gpu_metric_value(self, tag: str, default_value: Any = 0) -> float:
287
+ """
288
+ Parameters
289
+ ----------
290
+ tag : str
291
+ A human readable tag that corresponds
292
+ to a particular non-GPU metric
293
+ default_value : any
294
+ Value to return if tag is not found
295
+
296
+ Returns
297
+ -------
298
+ Float
299
+ Computation of the values of the non-GPU metric Records
300
+ corresponding to the tag, default_value if tag not found,
301
+ based on the supplied aggregation function (usually mean or sum).
302
+ """
303
+ return RecordType.get_all_record_types()[tag].value_function()(
304
+ [
305
+ default_value if m is None else m.value()
306
+ for m in self.get_non_gpu_metric(tag)
307
+ ]
308
+ )
309
+
310
+ def get_gpu_metric_value(self, tag: str, default_value: Any = 0) -> float:
311
+ """
312
+ Parameters
313
+ ----------
314
+ tag : str
315
+ A human readable tag that corresponds
316
+ to a particular GPU metric
317
+ default_value : any
318
+ Value to return if tag is not found
319
+
320
+ Returns
321
+ -------
322
+ float :
323
+ Average of the values of the GPU metric Records
324
+ corresponding to the tag, default_value if tag not found.
325
+ """
326
+ metric = self.get_gpu_metric(tag)
327
+ if metric is None:
328
+ return default_value
329
+ else:
330
+ return metric.value()
331
+
332
+ def get_weighted_non_gpu_metric_value(
333
+ self,
334
+ tag: str,
335
+ ) -> List[float]:
336
+ """
337
+ Parameters
338
+ ----------
339
+ tag : str
340
+ A human readable tag that corresponds
341
+ to a particular metric
342
+
343
+ Returns
344
+ -------
345
+ list of floats
346
+ Weighted average of the values of the metric Record corresponding
347
+ to the tag
348
+ """
349
+ assert len(self._model_config_weights) == len(self._model_config_measurements)
350
+
351
+ weighted_non_gpu_metrics = [
352
+ metric.value() * self._model_config_weights[index]
353
+ for index, metric in enumerate(self.get_non_gpu_metric(tag))
354
+ ]
355
+
356
+ return RecordType.get_all_record_types()[tag].value_function()(
357
+ weighted_non_gpu_metrics
358
+ )
359
+
360
+ def gpus_used(self) -> List[int]:
361
+ """
362
+ Returns
363
+ -------
364
+ list of ints
365
+ list of device IDs used in this measurement
366
+ """
367
+
368
+ return list(self._gpu_data.keys())
369
+
370
+ def model_specific_pa_params(self) -> List[Dict[str, int]]:
371
+ """
372
+ Returns
373
+ -------
374
+ list:
375
+ of dicts:
376
+ of model specific PA parameters
377
+ used in this measurement
378
+ """
379
+
380
+ return [
381
+ model_config_measurement.model_specific_pa_params()
382
+ for model_config_measurement in self._model_config_measurements
383
+ ]
384
+
385
+ def is_better_than(self, other: "RunConfigMeasurement") -> bool:
386
+ """
387
+ Checks whether a measurement is better than another
388
+ by using the weighted average across all model configs in the
389
+ RunConfig
390
+
391
+ If True, this means this RunConfig measurement is better
392
+ than the other
393
+ """
394
+ # seems like this should be == -1 but we're using a min heap
395
+ return self._compare_measurements(other) == 1
396
+
397
+ def __eq__(self, other: object) -> bool:
398
+ """
399
+ Check whether two sets of measurements are equivalent
400
+ """
401
+ if not isinstance(other, RunConfigMeasurement):
402
+ return NotImplemented
403
+ return self._compare_measurements(other) == 0
404
+
405
+ def __lt__(self, other: "RunConfigMeasurement") -> bool:
406
+ """
407
+ Checks whether a measurement is better than another
408
+ by using the weighted average across all model configs in the
409
+ RunConfig
410
+
411
+ This is used when sorting
412
+
413
+ Returns
414
+ -------
415
+ bool:
416
+ True if other is better than or equal to self
417
+ """
418
+
419
+ return not self.is_better_than(other)
420
+
421
+ def is_passing_constraints(self) -> bool:
422
+ """
423
+ Returns true if all model measurements pass
424
+ their respective constraints
425
+ """
426
+
427
+ assert self._constraint_manager is not None
428
+ return self._constraint_manager.satisfies_constraints(self)
429
+
430
+ def compare_measurements(self, other: "RunConfigMeasurement") -> float:
431
+ """
432
+ Compares two RunConfigMeasurements based on each
433
+ ModelConfigs weighted metric objectives and the
434
+ ModelConfigs weighted value within the RunConfigMeasurement
435
+
436
+ Parameters
437
+ ----------
438
+ other: RunConfigMeasurement
439
+
440
+ Returns
441
+ -------
442
+ float
443
+ Positive value if other is better
444
+ Negative value is self is better
445
+ Zero if they are equal
446
+ """
447
+ # Step 1: for each ModelConfig determine the weighted score
448
+ weighted_mcm_scores = self._calculate_weighted_mcm_score(other)
449
+
450
+ # Step 2: combine these using the ModelConfig weighting
451
+ weighted_rcm_score = self._calculate_weighted_rcm_score(weighted_mcm_scores)
452
+
453
+ # Step 3: Reverse the polarity to match what is expected in the docstring return
454
+ return -1 * weighted_rcm_score
455
+
456
+ def calculate_weighted_percentage_gain(
457
+ self, other: "RunConfigMeasurement"
458
+ ) -> float:
459
+ """
460
+ Calculates the weighted percentage gain between
461
+ two RunConfigMeasurements based on each
462
+ ModelConfigs weighted metric objectives and the
463
+ ModelConfigs weighted value within the RunConfigMeasurement
464
+
465
+ Parameters
466
+ ----------
467
+ other: RunConfigMeasurement
468
+
469
+ Returns
470
+ -------
471
+ float
472
+ The weighted percentage gain. A positive value indicates
473
+ this ModelConfig measurement is better than the other
474
+ """
475
+ # for each ModelConfig determine the weighted percentage gain
476
+ weighted_mcm_pct = self._calculate_weighted_mcm_percentage_gain(other)
477
+
478
+ # combine these using the ModelConfig weighting
479
+ weighted_rcm_pct = self._calculate_weighted_rcm_score(weighted_mcm_pct)
480
+
481
+ return weighted_rcm_pct
482
+
483
+ def compare_constraints(self, other: "RunConfigMeasurement") -> Optional[float]:
484
+ """
485
+ Compares two RunConfigMeasurements based on how close
486
+ each RCM is to passing their constraints
487
+
488
+ Parameters
489
+ ----------
490
+ other: RunConfigMeasurement
491
+
492
+ Returns
493
+ -------
494
+ float
495
+ Positive value if other is closer to passing constraints
496
+ Negative value if self is closer to passing constraints
497
+ Zero if they are equally close to passing constraints
498
+ None if either RCM is passing constraints
499
+ """
500
+
501
+ assert (
502
+ self._constraint_manager is not None
503
+ and other._constraint_manager is not None
504
+ )
505
+
506
+ if self.is_passing_constraints() or other.is_passing_constraints():
507
+ return None
508
+
509
+ self_failing_pct = self._constraint_manager.constraint_failure_percentage(self)
510
+ other_failing_pct = other._constraint_manager.constraint_failure_percentage(
511
+ other
512
+ )
513
+
514
+ return (self_failing_pct - other_failing_pct) / 100
515
+
516
+ def _compare_measurements(self, other: "RunConfigMeasurement") -> int:
517
+ """
518
+ Compares two RunConfigMeasurements based on each
519
+ ModelConfigs weighted metric objectives and the
520
+ ModelConfigs weighted value within the RunConfigMeasurement
521
+
522
+ Parameters
523
+ ----------
524
+ other: RunConfigMeasurement
525
+
526
+ Returns
527
+ -------
528
+ int
529
+ 0
530
+ if the results are determined
531
+ to be the same within a threshold
532
+ 1
533
+ if self > other (is better than)
534
+ -1
535
+ if self < other (is worse than)
536
+ """
537
+
538
+ # Step 1: for each ModelConfig determine the weighted score
539
+ weighted_mcm_scores = self._calculate_weighted_mcm_score(other)
540
+
541
+ # Step 2: combine these using the ModelConfig weighting
542
+ weighted_rcm_score = self._calculate_weighted_rcm_score(weighted_mcm_scores)
543
+
544
+ # Step 3: check the polarity
545
+ if weighted_rcm_score > COMPARISON_SCORE_THRESHOLD:
546
+ return 1
547
+ elif weighted_rcm_score < -COMPARISON_SCORE_THRESHOLD:
548
+ return -1
549
+ return 0
550
+
551
+ def _calculate_weighted_mcm_score(
552
+ self, other: "RunConfigMeasurement"
553
+ ) -> List[float]:
554
+ """
555
+ Parameters
556
+ ----------
557
+ other: RunConfigMeasurement
558
+
559
+ Returns
560
+ -------
561
+ list of floats
562
+ A weighted score for each ModelConfig measurement in the RunConfig
563
+ """
564
+ return [
565
+ self_mcm.get_weighted_score(other_mcm)
566
+ for self_mcm, other_mcm in zip(
567
+ self._model_config_measurements, other._model_config_measurements
568
+ )
569
+ ]
570
+
571
+ def _calculate_weighted_mcm_percentage_gain(
572
+ self, other: "RunConfigMeasurement"
573
+ ) -> List[float]:
574
+ """
575
+ Parameters
576
+ ----------
577
+ other: RunConfigMeasurement
578
+
579
+ Returns
580
+ -------
581
+ list of floats
582
+ A weighted percentage gain for each ModelConfig measurement in the RunConfig
583
+ """
584
+ return [
585
+ self_mcm.calculate_weighted_percentage_gain(other_mcm)
586
+ for self_mcm, other_mcm in zip(
587
+ self._model_config_measurements, other._model_config_measurements
588
+ )
589
+ ]
590
+
591
+ def _calculate_weighted_rcm_score(self, weighted_mcm_scores: List[float]) -> float:
592
+ """
593
+ Parameters
594
+ ----------
595
+ weighted_mcm_scores: list of floats
596
+ A weighted score for each ModelConfig measurement in the RunConfig
597
+
598
+ Returns
599
+ -------
600
+ float
601
+ The weighted score. A positive value indicates this
602
+ RunConfig measurement is better than the other
603
+ """
604
+
605
+ assert len(self._model_config_weights) == len(weighted_mcm_scores)
606
+
607
+ return sum(
608
+ [
609
+ weighted_mcm_score * model_config_weight
610
+ for weighted_mcm_score, model_config_weight in zip(
611
+ weighted_mcm_scores, self._model_config_weights
612
+ )
613
+ ]
614
+ )
615
+
616
+ def _average_list(self, row_list):
617
+ """
618
+ Average a 2d list
619
+ """
620
+
621
+ if not row_list:
622
+ return row_list
623
+ else:
624
+ N = len(row_list)
625
+ d = len(row_list[0])
626
+ avg = [0 for _ in range(d)]
627
+ for i in range(d):
628
+ avg[i] = (
629
+ sum([row_list[j][i] for j in range(1, N)], start=row_list[0][i])
630
+ * 1.0
631
+ ) / N
632
+ return avg
633
+
634
+ def _deserialize_gpu_data(
635
+ self, serialized_gpu_data: Dict
636
+ ) -> Dict[int, List[Record]]:
637
+ gpu_data = {}
638
+ for gpu_uuid, gpu_data_list in serialized_gpu_data.items():
639
+ metric_list = []
640
+ for [tag, record_dict] in gpu_data_list:
641
+ record_type = RecordType.get(tag)
642
+ record = record_type.from_dict(record_dict)
643
+ metric_list.append(record)
644
+ gpu_data[gpu_uuid] = metric_list
645
+
646
+ return gpu_data
647
+
648
+ def _get_avg_gpu_data_from_tag(self) -> Dict[str, Record]:
649
+ return {metric.tag: metric for metric in self._avg_gpu_data}
650
+
651
+ def _deserialize_model_config_measurements(
652
+ self, serialized_model_config_measurements: List[Dict]
653
+ ) -> List[ModelConfigMeasurement]:
654
+ model_config_measurements = []
655
+ for mcm_dict in serialized_model_config_measurements:
656
+ model_config_measurements.append(ModelConfigMeasurement.from_dict(mcm_dict))
657
+
658
+ return model_config_measurements