triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1376 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Tuple, Union
18
+
19
+ if TYPE_CHECKING:
20
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
21
+
22
+ import logging
23
+ import os
24
+ from collections import defaultdict
25
+
26
+ from model_analyzer.config.generate.base_model_config_generator import (
27
+ BaseModelConfigGenerator,
28
+ )
29
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
30
+ from model_analyzer.config.input.config_command_report import ConfigCommandReport
31
+ from model_analyzer.config.run.run_config import RunConfig
32
+ from model_analyzer.constants import (
33
+ GLOBAL_CONSTRAINTS_KEY,
34
+ LOGGER_NAME,
35
+ TOP_MODELS_REPORT_KEY,
36
+ )
37
+ from model_analyzer.plots.plot_manager import PlotManager
38
+ from model_analyzer.record.metrics_manager import MetricsManager
39
+ from model_analyzer.reports.html_report import HTMLReport
40
+ from model_analyzer.reports.pdf_report import PDFReport
41
+ from model_analyzer.result.constraint_manager import ConstraintManager
42
+ from model_analyzer.result.result_manager import ResultManager
43
+ from model_analyzer.result.result_table import ResultTable
44
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
45
+
46
+ from .report_factory import ReportFactory
47
+
48
+ logger = logging.getLogger(LOGGER_NAME)
49
+
50
+
51
+ class ReportManager:
52
+ """
53
+ Manages the building and export of
54
+ various types of reports
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ mode: str,
60
+ config: Union[ConfigCommandProfile, ConfigCommandReport],
61
+ gpu_info: Dict[str, Dict[str, Any]],
62
+ result_manager: ResultManager,
63
+ constraint_manager: ConstraintManager,
64
+ ):
65
+ """
66
+ Parameters
67
+ ----------
68
+ mode: str
69
+ The mode in which Model Analyzer is operating
70
+ config :ConfigCommandProfile or ConfigCommandReport
71
+ The model analyzer's config containing information
72
+ about the kind of reports to generate
73
+ gpu_info: dict
74
+ containing information about the GPUs used
75
+ during profiling
76
+ result_manager : ResultManager
77
+ instance that manages the result tables and
78
+ adding results
79
+ constraint_manager: ConstraintManager
80
+ instance that manages constraints
81
+ """
82
+
83
+ self._mode = mode
84
+ self._config = config
85
+ self._gpu_info = gpu_info
86
+ self._result_manager = result_manager
87
+ self._constraint_manager = constraint_manager
88
+
89
+ # Create the plot manager
90
+ self._plot_manager = PlotManager(
91
+ config=self._config,
92
+ result_manager=self._result_manager,
93
+ constraint_manager=self._constraint_manager,
94
+ )
95
+
96
+ self._summary_data: DefaultDict[
97
+ str, List[Tuple[RunConfig, RunConfigMeasurement]]
98
+ ] = defaultdict(list)
99
+ self._summaries: Dict[str, Union[PDFReport, HTMLReport]] = {}
100
+
101
+ self._detailed_report_data: Dict[
102
+ str, Tuple[RunConfig, RunConfigMeasurement]
103
+ ] = {}
104
+ self._detailed_reports: Dict[str, Union[PDFReport, HTMLReport]] = {}
105
+
106
+ self._reports_export_directory = os.path.join(config.export_path, "reports")
107
+ os.makedirs(self._reports_export_directory, exist_ok=True)
108
+
109
+ self._cpu_metrics_gathered_sticky = None
110
+
111
+ def report_keys(self):
112
+ """
113
+ Returns
114
+ -------
115
+ list of str
116
+ identifiers for all the
117
+ reports in this report manager
118
+ """
119
+
120
+ return list(self._summary_data.keys())
121
+
122
+ def data(self, report_key):
123
+ """
124
+ Parameters
125
+ ----------
126
+ report_key: str
127
+ An identifier for a particular report
128
+ Returns
129
+ -------
130
+ dict
131
+ The data in the report corresponding with
132
+ the report key
133
+ """
134
+
135
+ return self._summary_data[report_key]
136
+
137
+ def create_summaries(self):
138
+ """
139
+ Add summary data and build summary report
140
+ """
141
+
142
+ self._add_summary_data()
143
+ self._plot_manager.create_summary_plots()
144
+ self._plot_manager.export_summary_plots()
145
+
146
+ statistics = self._result_manager.get_result_statistics()
147
+ model_names = self._result_manager._profile_model_names
148
+
149
+ at_least_one_summary = False
150
+ for model_name in model_names:
151
+ if model_name in self._summary_data:
152
+ at_least_one_summary = True
153
+ self._summaries[model_name] = self._build_summary_report(
154
+ report_key=model_name,
155
+ num_configs=self._config.num_configs_per_model,
156
+ statistics=statistics,
157
+ )
158
+ else:
159
+ logger.warning(
160
+ f"No data found for model {model_name}, skipping export summary."
161
+ )
162
+
163
+ if self._config.num_top_model_configs and at_least_one_summary:
164
+ self._summaries[TOP_MODELS_REPORT_KEY] = self._build_summary_report(
165
+ report_key=TOP_MODELS_REPORT_KEY,
166
+ num_configs=self._config.num_top_model_configs,
167
+ statistics=statistics,
168
+ )
169
+
170
+ def export_summaries(self):
171
+ """
172
+ Write a summary to disk
173
+ """
174
+
175
+ for report_key, summary in self._summaries.items():
176
+ model_report_dir = os.path.join(
177
+ self._reports_export_directory, "summaries", report_key
178
+ )
179
+ os.makedirs(model_report_dir, exist_ok=True)
180
+ output_filename = os.path.join(
181
+ model_report_dir, f"result_summary.{summary.get_file_extension()}"
182
+ )
183
+ logger.info(f"Exporting Summary Report to {output_filename}")
184
+ summary.write_report(filename=output_filename)
185
+
186
+ def create_detailed_reports(self):
187
+ """
188
+ Adds detailed report data and build detailed reports
189
+ """
190
+
191
+ self._add_detailed_report_data()
192
+ self._plot_manager.create_detailed_plots()
193
+ self._plot_manager.export_detailed_plots()
194
+
195
+ for report_model_config in self._config.report_model_configs:
196
+ model_config_name = report_model_config.model_config_name()
197
+ self._detailed_reports[model_config_name] = self._build_detailed_report(
198
+ report_model_config
199
+ )
200
+
201
+ def export_detailed_reports(self):
202
+ """
203
+ Write a detailed report to disk
204
+ """
205
+
206
+ for report_key, report in self._detailed_reports.items():
207
+ model_report_dir = os.path.join(
208
+ self._reports_export_directory, "detailed", report_key
209
+ )
210
+ os.makedirs(model_report_dir, exist_ok=True)
211
+ output_filename = os.path.join(
212
+ model_report_dir, f"detailed_report.{report.get_file_extension()}"
213
+ )
214
+ logger.info(f"Exporting Detailed Report to {output_filename}")
215
+ report.write_report(filename=output_filename)
216
+
217
+ def _add_summary_data(self):
218
+ """
219
+ Adds measurements on which the report manager
220
+ can do complex analyses or with which it can
221
+ build tables and add to reports
222
+ """
223
+
224
+ model_names = self._result_manager._profile_model_names
225
+
226
+ for model_name in model_names:
227
+ top_results = self._result_manager.top_n_results(
228
+ model_name=model_name,
229
+ n=self._config.num_configs_per_model,
230
+ include_default=True,
231
+ )
232
+
233
+ for result in top_results:
234
+ for measurement in result.top_n_measurements(n=1):
235
+ self._summary_data[model_name].append(
236
+ (result.run_config(), measurement)
237
+ )
238
+
239
+ if self._config.num_top_model_configs:
240
+ for result in self._result_manager.top_n_results(
241
+ n=self._config.num_top_model_configs
242
+ ):
243
+ for measurement in result.top_n_measurements(n=1):
244
+ self._summary_data[TOP_MODELS_REPORT_KEY].append(
245
+ (result.run_config(), measurement)
246
+ )
247
+
248
+ def _add_detailed_report_data(self):
249
+ """
250
+ Adds data specific to the model configs
251
+ for which we want detailed reports
252
+ """
253
+
254
+ model_config_names = [
255
+ model.model_config_name() for model in self._config.report_model_configs
256
+ ]
257
+
258
+ # TODO-TMA-650 - this needs to be updated for multi-model
259
+ for model_config_name in model_config_names:
260
+ self._detailed_report_data[
261
+ model_config_name
262
+ ] = self._result_manager.get_model_configs_run_config_measurements(
263
+ model_config_name
264
+ )
265
+
266
+ def _build_detailed_report(self, report_model_config):
267
+ """
268
+ Builder method for a detailed report
269
+ """
270
+
271
+ detailed_report = ReportFactory.create_report()
272
+
273
+ report_key = report_model_config.model_config_name()
274
+ model_config, _ = self._detailed_report_data[report_key]
275
+
276
+ detailed_report.add_title(title="Detailed Report")
277
+ detailed_report.add_subheading(subheading=f"Model Config: {report_key}")
278
+
279
+ if self._mode == "online":
280
+ # Add main latency breakdown image
281
+ detailed_plot = os.path.join(
282
+ self._config.export_path,
283
+ "plots",
284
+ "detailed",
285
+ report_key,
286
+ "latency_breakdown.png",
287
+ )
288
+ detailed_caption = (
289
+ f"Latency Breakdown for Online Performance of {report_key}"
290
+ )
291
+
292
+ # First add row of detailed
293
+ detailed_report.add_images([detailed_plot], [detailed_caption])
294
+
295
+ # Next add the SimplePlots created for this detailed report
296
+ plot_stack = []
297
+ caption_stack = []
298
+ plot_path = os.path.join(
299
+ self._config.export_path, "plots", "simple", report_key
300
+ )
301
+ for plot_config in report_model_config.plots():
302
+ if (
303
+ plot_config.title().startswith("RAM")
304
+ and not self._cpu_metrics_were_gathered()
305
+ ):
306
+ continue
307
+ if model_config.cpu_only() and (
308
+ plot_config.y_axis().startswith("gpu_")
309
+ or plot_config.x_axis().startswith("gpu_")
310
+ ):
311
+ continue
312
+ plot_stack.append(os.path.join(plot_path, f"{plot_config.name()}.png"))
313
+ caption_stack.append(
314
+ f"{plot_config.title()} curves for config {report_key}"
315
+ )
316
+ if len(plot_stack) == 2:
317
+ detailed_report.add_images(plot_stack, caption_stack, float="left")
318
+ plot_stack = []
319
+ caption_stack = []
320
+
321
+ # Odd number of plots
322
+ if plot_stack:
323
+ detailed_report.add_images(plot_stack, caption_stack, float="left")
324
+
325
+ # Next add table of measurements
326
+ detailed_table = self._build_detailed_table(report_key)
327
+ detailed_report.add_table(table=detailed_table)
328
+
329
+ # Add some details about the config
330
+ detailed_info = self._build_detailed_info(report_key)
331
+ detailed_report.add_line_breaks(num_breaks=2)
332
+ detailed_report.add_paragraph(detailed_info, font_size=18)
333
+
334
+ sort_by_tag = "latency" if self._mode == "online" else "throughput"
335
+ detailed_report.add_paragraph(
336
+ "The first plot above shows the breakdown of the latencies in "
337
+ "the latency throughput curve for this model config. Following that "
338
+ "are the requested configurable plots showing the relationship between "
339
+ "various metrics measured by the Model Analyzer. The above table contains "
340
+ "detailed data for each of the measurements taken for this model config in "
341
+ f"decreasing order of {sort_by_tag}.",
342
+ font_size=18,
343
+ )
344
+ return detailed_report
345
+
346
+ def _build_summary_report(self, report_key, num_configs, statistics):
347
+ """
348
+ Builder method for a summary
349
+ report.
350
+ """
351
+
352
+ summary = ReportFactory.create_report()
353
+
354
+ total_measurements = statistics.total_measurements(report_key)
355
+ total_configurations = statistics.total_configurations(report_key)
356
+ num_best_configs = min(num_configs, total_configurations)
357
+
358
+ # Get GPU names and memory
359
+ run_config = self._summary_data[report_key][0][0]
360
+ report_gpu_metrics = (
361
+ self._config.always_report_gpu_metrics or not run_config.cpu_only()
362
+ )
363
+
364
+ (gpu_names, max_memories) = (None, None)
365
+ if report_gpu_metrics:
366
+ (gpu_names, max_memories) = self._get_gpu_stats(
367
+ measurements=[v for _, v in self._summary_data[report_key]]
368
+ )
369
+
370
+ # Get constraints
371
+ constraint_str = self._create_constraint_string(report_key)
372
+
373
+ # Build summary table and info sentence
374
+ table, summary_sentence = self._build_summary_table(
375
+ report_key=report_key,
376
+ num_configurations=total_configurations,
377
+ num_measurements=total_measurements,
378
+ gpu_name=gpu_names,
379
+ report_gpu_metrics=report_gpu_metrics,
380
+ )
381
+
382
+ # Add summary sections
383
+ summary.add_title(title=f"{self._mode.title()} Result Summary")
384
+ summary.add_subheading(f"Model: {' and '.join(report_key.split(','))}")
385
+ if report_gpu_metrics:
386
+ summary.add_paragraph(f"GPU(s): {gpu_names}")
387
+ summary.add_paragraph(f"Total Available GPU Memory: {max_memories}")
388
+ summary.add_paragraph(f"Constraint targets: {constraint_str}")
389
+ summary.add_paragraph(summary_sentence)
390
+ summary.add_paragraph(
391
+ f"Curves corresponding to the {num_best_configs} best model "
392
+ f"configuration(s) out of a total of {total_configurations} are "
393
+ "shown in the plots."
394
+ )
395
+
396
+ throughput_plot_config = self._config.plots[0]
397
+ throughput_plot = os.path.join(
398
+ self._config.export_path,
399
+ "plots",
400
+ "simple",
401
+ report_key,
402
+ f"{throughput_plot_config.name()}.png",
403
+ )
404
+
405
+ caption_throughput = f"{throughput_plot_config.title()} curves for {num_best_configs} best configurations."
406
+
407
+ if report_gpu_metrics:
408
+ summary.add_images([throughput_plot], [caption_throughput], image_width=66)
409
+ if self._mode == "online":
410
+ memory_latency_plot = os.path.join(
411
+ self._config.export_path,
412
+ "plots",
413
+ "simple",
414
+ report_key,
415
+ "gpu_mem_v_latency.png",
416
+ )
417
+ caption_memory_latency = f"GPU Memory vs. Latency curves for {num_best_configs} best configurations."
418
+ summary.add_images(
419
+ [memory_latency_plot], [caption_memory_latency], image_width=66
420
+ )
421
+ else:
422
+ summary.add_images([throughput_plot], [caption_throughput], image_width=66)
423
+ if self._mode == "online" and self._cpu_metrics_were_gathered():
424
+ memory_latency_plot = os.path.join(
425
+ self._config.export_path,
426
+ "plots",
427
+ "simple",
428
+ report_key,
429
+ "cpu_mem_v_latency.png",
430
+ )
431
+ caption_memory_latency = f"CPU Memory vs. Latency curves for {num_best_configs} best configurations."
432
+ summary.add_images(
433
+ [memory_latency_plot], [caption_memory_latency], image_width=66
434
+ )
435
+
436
+ caption_results_table = (
437
+ '<div style = "display:block; clear:both; page-break-after:always;"></div>'
438
+ "The following table summarizes each configuration at the measurement"
439
+ " that optimizes the desired metrics under the given constraints."
440
+ )
441
+
442
+ if self._result_manager._profiling_models_concurrently():
443
+ caption_results_table = (
444
+ caption_results_table + " Per model values are parenthetical."
445
+ )
446
+
447
+ if run_config.is_ensemble_model():
448
+ caption_results_table = (
449
+ caption_results_table
450
+ + " The ensemble's composing model values are listed in the following order: "
451
+ )
452
+ elif run_config.is_bls_model():
453
+ caption_results_table = (
454
+ caption_results_table
455
+ + " The BLS composing model values are listed in the following order: "
456
+ )
457
+
458
+ if run_config.is_ensemble_model() or run_config.is_bls_model():
459
+ for composing_config_name in run_config.model_run_configs()[
460
+ 0
461
+ ].get_composing_config_names():
462
+ caption_results_table = (
463
+ caption_results_table
464
+ + BaseModelConfigGenerator.extract_model_name_from_variant_name(
465
+ composing_config_name
466
+ )
467
+ + ", "
468
+ )
469
+ caption_results_table = caption_results_table[:-2] # removes comma
470
+
471
+ summary.add_paragraph(caption_results_table)
472
+ summary.add_table(table=table)
473
+
474
+ return summary
475
+
476
+ def _build_summary_table(
477
+ self,
478
+ report_key,
479
+ num_configurations,
480
+ num_measurements,
481
+ gpu_name=None,
482
+ report_gpu_metrics=True,
483
+ ):
484
+ """
485
+ Creates a result table corresponding
486
+ to the best measurements for a particular
487
+ model
488
+ """
489
+
490
+ (
491
+ best_run_config,
492
+ best_run_config_measurement,
493
+ sorted_measurements,
494
+ ) = self._find_best_configs(report_key)
495
+
496
+ multi_model = len(best_run_config.model_run_configs()) > 1
497
+ is_ensemble = best_run_config.is_ensemble_model()
498
+ is_bls = best_run_config.is_bls_model()
499
+ has_composing_models = is_ensemble or is_bls
500
+
501
+ summary_sentence = self._create_summary_sentence(
502
+ report_key,
503
+ num_configurations,
504
+ num_measurements,
505
+ best_run_config,
506
+ best_run_config_measurement,
507
+ gpu_name,
508
+ report_gpu_metrics,
509
+ multi_model,
510
+ is_ensemble,
511
+ is_bls,
512
+ )
513
+
514
+ summary_table = self._construct_summary_result_table(
515
+ sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
516
+ )
517
+
518
+ return summary_table, summary_sentence
519
+
520
+ def _find_best_configs(self, report_key):
521
+ sorted_measurements = sorted(
522
+ self._summary_data[report_key], key=lambda x: x[1], reverse=True
523
+ )
524
+
525
+ best_run_config = sorted_measurements[0][0]
526
+ best_run_config_measurement = sorted_measurements[0][1]
527
+
528
+ return best_run_config, best_run_config_measurement, sorted_measurements
529
+
530
+ def _create_constraint_string(self, report_key: str) -> str:
531
+ constraint_strs = self._build_constraint_strings()
532
+
533
+ constraint_str = "None"
534
+ if constraint_strs:
535
+ if report_key == TOP_MODELS_REPORT_KEY:
536
+ constraint_str = constraint_strs[GLOBAL_CONSTRAINTS_KEY]
537
+ elif "," in report_key: # indicates multi-model
538
+ constraint_str = self._create_multi_model_constraint_string(
539
+ report_key, constraint_strs
540
+ )
541
+ else: # single-model
542
+ if report_key in constraint_strs:
543
+ constraint_str = constraint_strs[report_key]
544
+
545
+ return constraint_str
546
+
547
+ def _create_multi_model_constraint_string(
548
+ self, report_key: str, constraint_strs: Dict[str, str]
549
+ ) -> str:
550
+ constraint_str = ""
551
+ for model_name in report_key.split(","):
552
+ if model_name in constraint_strs:
553
+ if constraint_str:
554
+ constraint_str += "<br>"
555
+ for i in range(len("Constraint targets: ")):
556
+ constraint_str += "&ensp;"
557
+
558
+ constraint_str += (
559
+ "<strong>"
560
+ + model_name
561
+ + "</strong>: "
562
+ + constraint_strs[model_name]
563
+ )
564
+
565
+ return constraint_str
566
+
567
+ def _create_summary_sentence(
568
+ self,
569
+ report_key,
570
+ num_configurations,
571
+ num_measurements,
572
+ best_run_config,
573
+ best_run_config_measurement,
574
+ gpu_name,
575
+ report_gpu_metrics,
576
+ multi_model,
577
+ is_ensemble,
578
+ is_bls,
579
+ ):
580
+ measurement_phrase = self._create_summary_measurement_phrase(num_measurements)
581
+ config_phrase = self._create_summary_config_phrase(
582
+ best_run_config, num_configurations
583
+ )
584
+ objective_phrase = self._create_summary_objective_phrase(
585
+ report_key, best_run_config_measurement
586
+ )
587
+ gpu_name_phrase = self._create_summary_gpu_name_phrase(
588
+ gpu_name, report_gpu_metrics
589
+ )
590
+
591
+ summary_sentence = (
592
+ f"In {measurement_phrase} across {config_phrase} "
593
+ f"{objective_phrase}, under the given constraints{gpu_name_phrase}."
594
+ )
595
+
596
+ if is_ensemble:
597
+ summary_sentence = (
598
+ summary_sentence
599
+ + self._create_ensemble_summary_sentence(best_run_config)
600
+ )
601
+ elif is_bls:
602
+ summary_sentence = summary_sentence + self._create_bls_summary_sentence(
603
+ best_run_config
604
+ )
605
+ else:
606
+ summary_sentence = summary_sentence + self._create_model_summary_sentence(
607
+ best_run_config
608
+ )
609
+
610
+ summary_sentence = summary_sentence + " </UL>"
611
+ return summary_sentence
612
+
613
+ def _create_ensemble_summary_sentence(self, run_config: RunConfig) -> str:
614
+ summary_sentence = "<BR><BR>"
615
+ best_config_name = run_config.model_run_configs()[0].model_variant_name()
616
+
617
+ summary_sentence = (
618
+ summary_sentence
619
+ + f"<strong>{best_config_name}</strong> is comprised of the following composing models: <UL> "
620
+ )
621
+ summary_sentence = (
622
+ summary_sentence + self._create_composing_model_summary_sentence(run_config)
623
+ )
624
+
625
+ return summary_sentence
626
+
627
+ def _create_bls_summary_sentence(self, run_config: RunConfig) -> str:
628
+ summary_sentence = self._create_model_summary_sentence(run_config)
629
+ summary_sentence = (
630
+ summary_sentence
631
+ + f"<BR>Which is comprised of the following composing models: <UL>"
632
+ )
633
+ summary_sentence = (
634
+ summary_sentence + self._create_composing_model_summary_sentence(run_config)
635
+ )
636
+
637
+ return summary_sentence
638
+
639
+ def _create_model_summary_sentence(self, run_config: RunConfig) -> str:
640
+ summary_sentence = "<UL>"
641
+ for model_run_config in run_config.model_run_configs():
642
+ summary_sentence = (
643
+ summary_sentence
644
+ + "<LI> "
645
+ + self._create_summary_config_info(
646
+ model_run_config.model_config_variant()
647
+ )
648
+ + " </LI>"
649
+ )
650
+
651
+ return summary_sentence
652
+
653
+ def _create_composing_model_summary_sentence(self, run_config: RunConfig) -> str:
654
+ summary_sentence = ""
655
+ for composing_config_variant in run_config.model_run_configs()[
656
+ 0
657
+ ].composing_config_variants():
658
+ summary_sentence = (
659
+ summary_sentence
660
+ + "<LI> "
661
+ + self._create_summary_config_info(composing_config_variant)
662
+ + " </LI>"
663
+ )
664
+
665
+ return summary_sentence
666
+
667
+ def _create_summary_measurement_phrase(self, num_measurements):
668
+ assert num_measurements > 0, "Number of measurements must be greater than 0"
669
+
670
+ return (
671
+ f"{num_measurements} measurements"
672
+ if num_measurements > 1
673
+ else "1 measurement"
674
+ )
675
+
676
+ def _create_summary_config_phrase(self, best_run_config, num_configurations):
677
+ config_names = [
678
+ f"<strong>{model_run_config.model_config_variant().variant_name}</strong>"
679
+ for model_run_config in best_run_config.model_run_configs()
680
+ ]
681
+
682
+ config_names_str = f"{' and '.join(config_names)}"
683
+
684
+ if len(config_names) > 1:
685
+ return f"{num_configurations} configurations, the combination of {config_names_str}"
686
+ else:
687
+ return f"{num_configurations} configurations, {config_names_str}"
688
+
689
+ def _create_summary_objective_phrase(
690
+ self, report_key: str, best_run_config_measurement: "RunConfigMeasurement"
691
+ ) -> str:
692
+ default_run_config_measurement = self._find_default_run_config_measurement(
693
+ report_key
694
+ )
695
+
696
+ if default_run_config_measurement:
697
+ objective_gain = self._get_objective_gain(
698
+ best_run_config_measurement, default_run_config_measurement
699
+ )
700
+ else:
701
+ objective_gain = 0
702
+
703
+ if objective_gain > 0:
704
+ if self._config.get_config()["objectives"].is_set_by_user():
705
+ objective_phrase = f"is <strong>{objective_gain}%</strong> better than the default configuration at meeting the objectives"
706
+ else:
707
+ if self._mode == "online":
708
+ objective_phrase = f"is <strong>{objective_gain}%</strong> better than the default configuration at maximizing throughput"
709
+ else:
710
+ objective_phrase = f"is <strong>{objective_gain}%</strong> better than the default configuration at minimizing latency"
711
+ else:
712
+ objective_phrase = "provides no gain over the default configuration"
713
+
714
+ return objective_phrase
715
+
716
+ def _get_objective_gain(
717
+ self,
718
+ run_config_measurement: "RunConfigMeasurement",
719
+ default_run_config_measurement: "RunConfigMeasurement",
720
+ ) -> float:
721
+ return round(
722
+ run_config_measurement.calculate_weighted_percentage_gain(
723
+ default_run_config_measurement
724
+ )
725
+ )
726
+
727
+ def _find_default_run_config_measurement(self, model_name):
728
+ # There is no single default config when comparing across
729
+ # multiple model runs
730
+ #
731
+ if model_name == TOP_MODELS_REPORT_KEY:
732
+ return None
733
+
734
+ sorted_results = self._result_manager.get_model_sorted_results(model_name)
735
+
736
+ for run_config_result in sorted_results.results():
737
+ run_config_measurements = run_config_result.passing_measurements()
738
+ if (
739
+ run_config_measurements
740
+ and "default" in run_config_measurements[0].model_variants_name()
741
+ ):
742
+ best_rcm = run_config_measurements[0]
743
+ for run_config_measurement in run_config_measurements:
744
+ if run_config_measurement > best_rcm:
745
+ best_rcm = run_config_measurement
746
+
747
+ return best_rcm
748
+
749
+ return None
750
+
751
+ def _create_summary_platform_phrase(self, model_config):
752
+ if model_config.get_field("backend"):
753
+ platform = model_config.get_field("backend")
754
+ else:
755
+ platform = model_config.get_field("platform")
756
+
757
+ return f"platform {platform}"
758
+
759
+ def _create_summary_max_batch_size_phrase(self, model_config):
760
+ return f"max batch size of {model_config.max_batch_size()}"
761
+
762
+ def _create_instance_group_phrase(self, model_config):
763
+ instance_group_str = model_config.instance_group_string(self._get_gpu_count())
764
+ kind_counts = instance_group_str.split("+")
765
+ ret_str = ""
766
+ for kind_count in kind_counts:
767
+ kind_count = kind_count.strip()
768
+ count, kind = kind_count.split(":")
769
+ if ret_str != "":
770
+ ret_str += " and "
771
+ ret_str += f"{count} {kind} instance"
772
+ if int(count) > 1:
773
+ ret_str += "s"
774
+ return ret_str
775
+
776
+ def _create_summary_gpu_name_phrase(self, gpu_name, report_gpu_metrics):
777
+ return f", on GPU(s) {gpu_name}" if report_gpu_metrics else ""
778
+
779
+ def _construct_summary_result_table(
780
+ self, sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
781
+ ):
782
+ if report_gpu_metrics:
783
+ return self._construct_summary_result_table_with_gpu(
784
+ sorted_measurements, multi_model, has_composing_models
785
+ )
786
+ else:
787
+ return self._construct_summary_result_table_cpu_only(
788
+ sorted_measurements, multi_model, has_composing_models
789
+ )
790
+
791
+ def _construct_summary_result_table_cpu_only(
792
+ self, sorted_measurements, multi_model, has_composing_models
793
+ ):
794
+ summary_table = self._create_summary_result_table_header_cpu_only(multi_model)
795
+
796
+ for run_config, run_config_measurement in sorted_measurements:
797
+ row = self._create_summary_row_cpu_only(
798
+ run_config, run_config_measurement, has_composing_models
799
+ )
800
+ summary_table.insert_row_by_index(row)
801
+
802
+ return summary_table
803
+
804
+ def _construct_summary_result_table_with_gpu(
805
+ self, sorted_measurements, multi_model, has_composing_models
806
+ ):
807
+ summary_table = self._create_summary_result_table_header(multi_model)
808
+
809
+ for run_config, run_config_measurement in sorted_measurements:
810
+ row = self._create_summary_row(
811
+ run_config, run_config_measurement, has_composing_models
812
+ )
813
+ summary_table.insert_row_by_index(row)
814
+
815
+ return summary_table
816
+
817
+ def _create_summary_result_table_header_cpu_only(self, multi_model):
818
+ if multi_model:
819
+ header_values = [
820
+ "Model Config Name",
821
+ "Max Batch Size",
822
+ "Dynamic Batching",
823
+ "Total Instance Count",
824
+ "Average p99 Latency (ms)",
825
+ "Total Throughput (infer/sec)",
826
+ "Max CPU Memory Usage (MB)",
827
+ ]
828
+ else:
829
+ header_values = [
830
+ "Model Config Name",
831
+ "Max Batch Size",
832
+ "Dynamic Batching",
833
+ "Total Instance Count",
834
+ "p99 Latency (ms)",
835
+ "Throughput (infer/sec)",
836
+ "Max CPU Memory Usage (MB)",
837
+ ]
838
+ if not self._cpu_metrics_were_gathered():
839
+ header_values.remove("Max CPU Memory Usage (MB)")
840
+
841
+ return ResultTable(headers=header_values, title="Report Table")
842
+
843
+ def _create_summary_result_table_header(self, multi_model):
844
+ if multi_model:
845
+ header_values = [
846
+ "Model Config Name",
847
+ "Max Batch Size",
848
+ "Dynamic Batching",
849
+ "Total Instance Count",
850
+ "Average p99 Latency (ms)",
851
+ "Total Throughput (infer/sec)",
852
+ "Max CPU Memory Usage (MB)",
853
+ "Max GPU Memory Usage (MB)",
854
+ "Average GPU Utilization (%)",
855
+ ]
856
+ else:
857
+ header_values = [
858
+ "Model Config Name",
859
+ "Max Batch Size",
860
+ "Dynamic Batching",
861
+ "Total Instance Count",
862
+ "p99 Latency (ms)",
863
+ "Throughput (infer/sec)",
864
+ "Max CPU Memory Usage (MB)",
865
+ "Max GPU Memory Usage (MB)",
866
+ "Average GPU Utilization (%)",
867
+ ]
868
+
869
+ if not self._cpu_metrics_were_gathered():
870
+ header_values.remove("Max CPU Memory Usage (MB)")
871
+
872
+ return ResultTable(headers=header_values, title="Report Table")
873
+
874
+ def _create_summary_row_cpu_only(
875
+ self, run_config, run_config_measurement, has_composing_models
876
+ ):
877
+ model_config_names = ", ".join(
878
+ [
879
+ model_run_config.model_config_variant().variant_name
880
+ for model_run_config in run_config.model_run_configs()
881
+ ]
882
+ )
883
+
884
+ if has_composing_models:
885
+ dynamic_batching_string = self._create_summary_string(
886
+ [
887
+ model_config.dynamic_batching_string()
888
+ for model_config in run_config.composing_configs()
889
+ ]
890
+ )
891
+ else:
892
+ dynamic_batching_string = self._create_summary_string(
893
+ [
894
+ model_run_config.model_config().dynamic_batching_string()
895
+ for model_run_config in run_config.model_run_configs()
896
+ ]
897
+ )
898
+
899
+ if has_composing_models:
900
+ max_batch_sizes = ", ".join(
901
+ [
902
+ str(model_config.max_batch_size())
903
+ for model_config in run_config.composing_configs()
904
+ ]
905
+ )
906
+ else:
907
+ max_batch_sizes = ", ".join(
908
+ [
909
+ str(model_run_config.model_config().max_batch_size())
910
+ for model_run_config in run_config.model_run_configs()
911
+ ]
912
+ )
913
+
914
+ if has_composing_models:
915
+ instance_group_strings = ", ".join(
916
+ [
917
+ model_config.instance_group_string(self._get_gpu_count())
918
+ for model_config in run_config.model_run_configs()[
919
+ 0
920
+ ].composing_configs()
921
+ ]
922
+ )
923
+ else:
924
+ instance_group_strings = ", ".join(
925
+ [
926
+ model_run_config.model_config().instance_group_string(
927
+ self._get_gpu_count()
928
+ )
929
+ for model_run_config in run_config.model_run_configs()
930
+ ]
931
+ )
932
+
933
+ perf_latency_string = self._create_non_gpu_metric_string(
934
+ run_config_measurement=run_config_measurement,
935
+ non_gpu_metric="perf_latency_p99",
936
+ )
937
+ perf_throughput_string = self._create_non_gpu_metric_string(
938
+ run_config_measurement=run_config_measurement,
939
+ non_gpu_metric="perf_throughput",
940
+ )
941
+
942
+ row = [
943
+ model_config_names,
944
+ max_batch_sizes,
945
+ dynamic_batching_string,
946
+ instance_group_strings,
947
+ perf_latency_string,
948
+ perf_throughput_string,
949
+ ]
950
+
951
+ if self._cpu_metrics_were_gathered():
952
+ cpu_used_ram_string = self._create_non_gpu_metric_string(
953
+ run_config_measurement=run_config_measurement,
954
+ non_gpu_metric="cpu_used_ram",
955
+ )
956
+ row.append(cpu_used_ram_string)
957
+
958
+ return row
959
+
960
+ def _create_summary_row(
961
+ self, run_config, run_config_measurement, has_composing_models
962
+ ):
963
+ if has_composing_models:
964
+ dynamic_batching_string = self._create_summary_string(
965
+ [
966
+ model_config.dynamic_batching_string()
967
+ for model_config in run_config.composing_configs()
968
+ ]
969
+ )
970
+ else:
971
+ dynamic_batching_string = self._create_summary_string(
972
+ [
973
+ model_run_config.model_config().dynamic_batching_string()
974
+ for model_run_config in run_config.model_run_configs()
975
+ ]
976
+ )
977
+
978
+ if has_composing_models:
979
+ instance_group_string = self._create_summary_string(
980
+ [
981
+ model_config.instance_group_string(self._get_gpu_count())
982
+ for model_config in run_config.model_run_configs()[
983
+ 0
984
+ ].composing_configs()
985
+ ]
986
+ )
987
+ else:
988
+ instance_group_string = self._create_summary_string(
989
+ [
990
+ model_run_config.model_config().instance_group_string(
991
+ self._get_gpu_count()
992
+ )
993
+ for model_run_config in run_config.model_run_configs()
994
+ ]
995
+ )
996
+
997
+ if has_composing_models:
998
+ max_batch_sizes_string = self._create_summary_string(
999
+ [
1000
+ str(model_config.max_batch_size())
1001
+ for model_config in run_config.composing_configs()
1002
+ ]
1003
+ )
1004
+ else:
1005
+ max_batch_sizes_string = self._create_summary_string(
1006
+ [
1007
+ str(model_run_config.model_config().max_batch_size())
1008
+ for model_run_config in run_config.model_run_configs()
1009
+ ]
1010
+ )
1011
+
1012
+ model_config_names = "<br>".join(
1013
+ [
1014
+ model_run_config.model_config_variant().variant_name
1015
+ for model_run_config in run_config.model_run_configs()
1016
+ ]
1017
+ )
1018
+
1019
+ perf_latency_string = self._create_non_gpu_metric_string(
1020
+ run_config_measurement=run_config_measurement,
1021
+ non_gpu_metric="perf_latency_p99",
1022
+ )
1023
+ perf_throughput_string = self._create_non_gpu_metric_string(
1024
+ run_config_measurement=run_config_measurement,
1025
+ non_gpu_metric="perf_throughput",
1026
+ )
1027
+
1028
+ if self._cpu_metrics_were_gathered():
1029
+ cpu_used_ram_string = self._create_non_gpu_metric_string(
1030
+ run_config_measurement=run_config_measurement,
1031
+ non_gpu_metric="cpu_used_ram",
1032
+ )
1033
+
1034
+ row = [
1035
+ model_config_names,
1036
+ max_batch_sizes_string,
1037
+ dynamic_batching_string,
1038
+ instance_group_string,
1039
+ perf_latency_string,
1040
+ perf_throughput_string,
1041
+ cpu_used_ram_string,
1042
+ int(run_config_measurement.get_gpu_metric_value("gpu_used_memory")),
1043
+ round(
1044
+ run_config_measurement.get_gpu_metric_value("gpu_utilization"), 1
1045
+ ),
1046
+ ]
1047
+ else:
1048
+ row = [
1049
+ model_config_names,
1050
+ max_batch_sizes_string,
1051
+ dynamic_batching_string,
1052
+ instance_group_string,
1053
+ perf_latency_string,
1054
+ perf_throughput_string,
1055
+ int(run_config_measurement.get_gpu_metric_value("gpu_used_memory")),
1056
+ round(
1057
+ run_config_measurement.get_gpu_metric_value("gpu_utilization"), 1
1058
+ ),
1059
+ ]
1060
+
1061
+ return row
1062
+
1063
+ def _create_summary_string(self, values):
1064
+ if len(values) > 1:
1065
+ return f"({', '.join(values)})"
1066
+ else:
1067
+ return f"{values[0]}"
1068
+
1069
+ def _create_non_gpu_metric_string(self, run_config_measurement, non_gpu_metric):
1070
+ non_gpu_metrics = run_config_measurement.get_non_gpu_metric(non_gpu_metric)
1071
+
1072
+ if non_gpu_metrics[0] is None:
1073
+ return "0"
1074
+ elif len(non_gpu_metrics) > 1:
1075
+ non_gpu_metric_config_string = ", ".join(
1076
+ [
1077
+ str(round(non_gpu_metric.value(), 1))
1078
+ for non_gpu_metric in non_gpu_metrics
1079
+ ]
1080
+ )
1081
+
1082
+ return (
1083
+ f"<strong>{round(run_config_measurement.get_non_gpu_metric_value(non_gpu_metric), 1)}</strong> "
1084
+ f"({non_gpu_metric_config_string})"
1085
+ )
1086
+ else:
1087
+ return f"{non_gpu_metrics[0].value()}"
1088
+
1089
+ def _create_summary_config_info(self, model_config_variant):
1090
+ model_config = model_config_variant.model_config
1091
+
1092
+ config_info = f"<strong>{model_config_variant.variant_name}</strong>: "
1093
+ config_info = (
1094
+ config_info + f"{self._create_instance_group_phrase(model_config)} with a "
1095
+ )
1096
+ config_info = (
1097
+ config_info
1098
+ + f"{self._create_summary_max_batch_size_phrase(model_config)} on "
1099
+ )
1100
+ config_info = (
1101
+ config_info + f"{self._create_summary_platform_phrase(model_config)}"
1102
+ )
1103
+
1104
+ return config_info
1105
+
1106
+ def _build_detailed_table(self, model_config_name):
1107
+ """
1108
+ Build the table used in the detailed report
1109
+ """
1110
+
1111
+ model_config, measurements = self._detailed_report_data[model_config_name]
1112
+ sort_by_tag = (
1113
+ "perf_latency_p99" if self._mode == "online" else "perf_throughput"
1114
+ )
1115
+ measurements = sorted(
1116
+ measurements,
1117
+ key=lambda x: x.get_non_gpu_metric_value(sort_by_tag),
1118
+ reverse=True,
1119
+ )
1120
+ report_gpu_metrics = (
1121
+ self._config.always_report_gpu_metrics or not model_config.cpu_only()
1122
+ )
1123
+
1124
+ if self._was_measured_with_request_rate(measurements[0]):
1125
+ first_column_header = (
1126
+ "Request Rate" if self._mode == "online" else "Client Batch Size"
1127
+ )
1128
+ first_column_tag = (
1129
+ "request-rate-range" if self._mode == "online" else "batch-size"
1130
+ )
1131
+ else:
1132
+ first_column_header = (
1133
+ "Request Concurrency" if self._mode == "online" else "Client Batch Size"
1134
+ )
1135
+ first_column_tag = (
1136
+ "concurrency-range" if self._mode == "online" else "batch-size"
1137
+ )
1138
+
1139
+ if report_gpu_metrics:
1140
+ headers = [
1141
+ first_column_header,
1142
+ "p99 Latency (ms)",
1143
+ "Client Response Wait (ms)",
1144
+ "Server Queue (ms)",
1145
+ "Server Compute Input (ms)",
1146
+ "Server Compute Infer (ms)",
1147
+ "Throughput (infer/sec)",
1148
+ "Max CPU Memory Usage (MB)",
1149
+ "Max GPU Memory Usage (MB)",
1150
+ "Average GPU Utilization (%)",
1151
+ ]
1152
+ else:
1153
+ headers = [
1154
+ first_column_header,
1155
+ "p99 Latency (ms)",
1156
+ "Client Response Wait (ms)",
1157
+ "Server Queue (ms)",
1158
+ "Server Compute Input (ms)",
1159
+ "Server Compute Infer (ms)",
1160
+ "Throughput (infer/sec)",
1161
+ "Max CPU Memory Usage (MB)",
1162
+ ]
1163
+
1164
+ if not self._cpu_metrics_were_gathered():
1165
+ headers.remove("Max CPU Memory Usage (MB)")
1166
+
1167
+ detailed_table = ResultTable(headers, title="Detailed Table")
1168
+
1169
+ # Construct table
1170
+ if report_gpu_metrics:
1171
+ for measurement in measurements:
1172
+ row = [
1173
+ # TODO-TMA-568: This needs to be updated because there will be multiple model configs
1174
+ measurement.model_specific_pa_params()[0][first_column_tag],
1175
+ measurement.get_non_gpu_metric_value("perf_latency_p99"),
1176
+ measurement.get_non_gpu_metric_value("perf_client_response_wait"),
1177
+ measurement.get_non_gpu_metric_value("perf_server_queue"),
1178
+ measurement.get_non_gpu_metric_value("perf_server_compute_input"),
1179
+ measurement.get_non_gpu_metric_value("perf_server_compute_infer"),
1180
+ measurement.get_non_gpu_metric_value("perf_throughput"),
1181
+ ]
1182
+ if self._cpu_metrics_were_gathered():
1183
+ row.append(measurement.get_non_gpu_metric_value("cpu_used_ram"))
1184
+
1185
+ row.append(measurement.get_gpu_metric_value("gpu_used_memory"))
1186
+ row.append(
1187
+ round(measurement.get_gpu_metric_value("gpu_utilization"), 1)
1188
+ )
1189
+
1190
+ detailed_table.insert_row_by_index(row)
1191
+ else:
1192
+ for measurement in measurements:
1193
+ row = [
1194
+ # TODO-TMA-568: This needs to be updated because there will be multiple model configs
1195
+ measurement.model_specific_pa_params()[0][first_column_tag],
1196
+ measurement.get_non_gpu_metric_value("perf_latency_p99"),
1197
+ measurement.get_non_gpu_metric_value("perf_client_response_wait"),
1198
+ measurement.get_non_gpu_metric_value("perf_server_queue"),
1199
+ measurement.get_non_gpu_metric_value("perf_server_compute_input"),
1200
+ measurement.get_non_gpu_metric_value("perf_server_compute_infer"),
1201
+ measurement.get_non_gpu_metric_value("perf_throughput"),
1202
+ ]
1203
+ if self._cpu_metrics_were_gathered():
1204
+ row.append(measurement.get_non_gpu_metric_value("cpu_used_ram"))
1205
+
1206
+ detailed_table.insert_row_by_index(row)
1207
+ return detailed_table
1208
+
1209
+ def _build_detailed_info(self, model_config_name):
1210
+ """
1211
+ Constructs important info sentence about the model config
1212
+ specified
1213
+ """
1214
+
1215
+ run_config, measurements = self._detailed_report_data[model_config_name]
1216
+
1217
+ # TODO-TMA-568 - add support for multi-model
1218
+ model_config = run_config.model_run_configs()[0].model_config()
1219
+ instance_group_string = self._create_instance_group_phrase(model_config)
1220
+ dynamic_batching = model_config.dynamic_batching_string()
1221
+ max_batch_size = model_config.max_batch_size()
1222
+ platform = model_config.get_field("platform")
1223
+
1224
+ max_batch_size_string = f"a max batch size of {max_batch_size}"
1225
+
1226
+ if dynamic_batching == "Disabled":
1227
+ dynamic_batching_string = "dynamic batching disabled"
1228
+ else:
1229
+ dynamic_batching_string = "dynamic batching enabled"
1230
+
1231
+ gpu_cpu_string = "CPU"
1232
+
1233
+ report_gpu_metrics = (
1234
+ self._config.always_report_gpu_metrics or not run_config.cpu_only()
1235
+ )
1236
+
1237
+ if report_gpu_metrics:
1238
+ gpu_names, max_memories = self._get_gpu_stats(measurements)
1239
+ gpu_cpu_string = f"GPU(s) {gpu_names} with total memory {max_memories}"
1240
+
1241
+ if run_config.is_ensemble_model():
1242
+ sentence = f"<strong>{model_config_name}</strong> is comprised of the following composing models:"
1243
+
1244
+ for composing_config_variant in run_config.composing_config_variants():
1245
+ sentence = (
1246
+ sentence
1247
+ + "<LI> "
1248
+ + self._create_summary_config_info(composing_config_variant)
1249
+ + " </LI>"
1250
+ )
1251
+
1252
+ sentence = (
1253
+ sentence
1254
+ + f"<br>{len(measurements)} measurement(s) were obtained for the model config on {gpu_cpu_string}."
1255
+ )
1256
+ elif run_config.is_bls_model():
1257
+ sentence = f"<strong>{model_config_name}</strong> is comprised of the following composing models:"
1258
+
1259
+ for composing_config_variant in run_config.composing_config_variants():
1260
+ sentence = (
1261
+ sentence
1262
+ + "<LI> "
1263
+ + self._create_summary_config_info(composing_config_variant)
1264
+ + " </LI>"
1265
+ )
1266
+
1267
+ sentence = (
1268
+ sentence
1269
+ + f"<br>{len(measurements)} measurement(s) were obtained for the model config on {gpu_cpu_string}."
1270
+ )
1271
+ else:
1272
+ sentence = (
1273
+ f"The model config <strong>{model_config_name}</strong> uses {instance_group_string} "
1274
+ f"with {max_batch_size_string} and has {dynamic_batching_string}. "
1275
+ f"{len(measurements)} measurement(s) were obtained for the model config on "
1276
+ f"{gpu_cpu_string}. "
1277
+ f"This model uses the platform {platform}."
1278
+ )
1279
+
1280
+ return sentence
1281
+
1282
+ def _get_gpu_count(self):
1283
+ return len(self._gpu_info)
1284
+
1285
+ def _get_gpu_stats(
1286
+ self, measurements: List["RunConfigMeasurement"]
1287
+ ) -> Tuple[str, str]:
1288
+ """
1289
+ Gets names and max total memory of GPUs used in measurements as a
1290
+ tuple of strings
1291
+
1292
+ Returns
1293
+ -------
1294
+ (gpu_names_str, max_memory_str):
1295
+ The GPU names as a string, and the total combined memory as a string
1296
+ """
1297
+
1298
+ gpu_dict: Dict[str, Any] = {}
1299
+ for gpu_uuid, gpu_info in self._gpu_info.items():
1300
+ for measurement in measurements:
1301
+ if gpu_uuid in measurement.gpus_used():
1302
+ gpu_name = gpu_info["name"]
1303
+ max_memory = round(gpu_info["total_memory"] / (2**30), 1)
1304
+ if gpu_name not in gpu_dict:
1305
+ gpu_dict[gpu_name] = {"memory": max_memory, "count": 1}
1306
+ else:
1307
+ gpu_dict[gpu_name]["count"] += 1
1308
+ break
1309
+
1310
+ gpu_names = ""
1311
+ max_memory = 0
1312
+ for name in gpu_dict.keys():
1313
+ count = gpu_dict[name]["count"]
1314
+ memory = gpu_dict[name]["memory"]
1315
+ if gpu_names != "":
1316
+ gpu_names += ", "
1317
+ gpu_names += f"{count} x {name}"
1318
+ max_memory += memory * count
1319
+
1320
+ max_mem_str = f"{max_memory} GB"
1321
+ return (gpu_names, max_mem_str)
1322
+
1323
+ def _build_constraint_strings(self) -> Dict[str, str]:
1324
+ """
1325
+ Constructs constraint strings to show the constraints under which
1326
+ each model is being run.
1327
+ """
1328
+
1329
+ constraint_strs = {}
1330
+
1331
+ for (
1332
+ model_name,
1333
+ model_constraints,
1334
+ ) in self._constraint_manager.get_constraints_for_all_models().items():
1335
+ strs = []
1336
+ if model_constraints:
1337
+ for metric, constraint in model_constraints.items():
1338
+ metric_header = MetricsManager.get_metric_types([metric])[0].header(
1339
+ aggregation_tag=""
1340
+ )
1341
+ for constraint_type, constraint_val in constraint.items():
1342
+ # String looks like 'Max p99 Latency: 99 ms'
1343
+ metric_header_name = metric_header.rsplit(" ", 1)[0]
1344
+ metric_unit = metric_header.rsplit(" ", 1)[1][1:-1]
1345
+ strs.append(
1346
+ f"{constraint_type.capitalize()} {metric_header_name}: {constraint_val} {metric_unit}"
1347
+ )
1348
+ constraint_strs[model_name] = ", ".join(strs)
1349
+ return constraint_strs
1350
+
1351
+ def _cpu_metrics_were_gathered(self):
1352
+ if self._cpu_metrics_gathered_sticky is None:
1353
+ used_ram = None
1354
+ if self._detailed_report_data:
1355
+ key = list(self._detailed_report_data.keys())[0]
1356
+ _, measurements = self._detailed_report_data[key]
1357
+ used_ram = measurements[0].get_non_gpu_metric_value("cpu_used_ram")
1358
+ else:
1359
+ key = list(self._summary_data.keys())[0]
1360
+ _, measurement = self._summary_data[key][0]
1361
+ used_ram = measurement.get_non_gpu_metric_value("cpu_used_ram")
1362
+
1363
+ self._cpu_metrics_gathered_sticky = used_ram != 0
1364
+
1365
+ return self._cpu_metrics_gathered_sticky
1366
+
1367
+ def _was_measured_with_request_rate(
1368
+ self, measurement: RunConfigMeasurement
1369
+ ) -> bool:
1370
+ if (
1371
+ "request-rate-range" in measurement.model_specific_pa_params()[0]
1372
+ and measurement.model_specific_pa_params()[0]["request-rate-range"]
1373
+ ):
1374
+ return True
1375
+ else:
1376
+ return False