triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1376 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Tuple, Union
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
from collections import defaultdict
|
|
25
|
+
|
|
26
|
+
from model_analyzer.config.generate.base_model_config_generator import (
|
|
27
|
+
BaseModelConfigGenerator,
|
|
28
|
+
)
|
|
29
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
30
|
+
from model_analyzer.config.input.config_command_report import ConfigCommandReport
|
|
31
|
+
from model_analyzer.config.run.run_config import RunConfig
|
|
32
|
+
from model_analyzer.constants import (
|
|
33
|
+
GLOBAL_CONSTRAINTS_KEY,
|
|
34
|
+
LOGGER_NAME,
|
|
35
|
+
TOP_MODELS_REPORT_KEY,
|
|
36
|
+
)
|
|
37
|
+
from model_analyzer.plots.plot_manager import PlotManager
|
|
38
|
+
from model_analyzer.record.metrics_manager import MetricsManager
|
|
39
|
+
from model_analyzer.reports.html_report import HTMLReport
|
|
40
|
+
from model_analyzer.reports.pdf_report import PDFReport
|
|
41
|
+
from model_analyzer.result.constraint_manager import ConstraintManager
|
|
42
|
+
from model_analyzer.result.result_manager import ResultManager
|
|
43
|
+
from model_analyzer.result.result_table import ResultTable
|
|
44
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
45
|
+
|
|
46
|
+
from .report_factory import ReportFactory
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ReportManager:
|
|
52
|
+
"""
|
|
53
|
+
Manages the building and export of
|
|
54
|
+
various types of reports
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
mode: str,
|
|
60
|
+
config: Union[ConfigCommandProfile, ConfigCommandReport],
|
|
61
|
+
gpu_info: Dict[str, Dict[str, Any]],
|
|
62
|
+
result_manager: ResultManager,
|
|
63
|
+
constraint_manager: ConstraintManager,
|
|
64
|
+
):
|
|
65
|
+
"""
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
mode: str
|
|
69
|
+
The mode in which Model Analyzer is operating
|
|
70
|
+
config :ConfigCommandProfile or ConfigCommandReport
|
|
71
|
+
The model analyzer's config containing information
|
|
72
|
+
about the kind of reports to generate
|
|
73
|
+
gpu_info: dict
|
|
74
|
+
containing information about the GPUs used
|
|
75
|
+
during profiling
|
|
76
|
+
result_manager : ResultManager
|
|
77
|
+
instance that manages the result tables and
|
|
78
|
+
adding results
|
|
79
|
+
constraint_manager: ConstraintManager
|
|
80
|
+
instance that manages constraints
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
self._mode = mode
|
|
84
|
+
self._config = config
|
|
85
|
+
self._gpu_info = gpu_info
|
|
86
|
+
self._result_manager = result_manager
|
|
87
|
+
self._constraint_manager = constraint_manager
|
|
88
|
+
|
|
89
|
+
# Create the plot manager
|
|
90
|
+
self._plot_manager = PlotManager(
|
|
91
|
+
config=self._config,
|
|
92
|
+
result_manager=self._result_manager,
|
|
93
|
+
constraint_manager=self._constraint_manager,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
self._summary_data: DefaultDict[
|
|
97
|
+
str, List[Tuple[RunConfig, RunConfigMeasurement]]
|
|
98
|
+
] = defaultdict(list)
|
|
99
|
+
self._summaries: Dict[str, Union[PDFReport, HTMLReport]] = {}
|
|
100
|
+
|
|
101
|
+
self._detailed_report_data: Dict[
|
|
102
|
+
str, Tuple[RunConfig, RunConfigMeasurement]
|
|
103
|
+
] = {}
|
|
104
|
+
self._detailed_reports: Dict[str, Union[PDFReport, HTMLReport]] = {}
|
|
105
|
+
|
|
106
|
+
self._reports_export_directory = os.path.join(config.export_path, "reports")
|
|
107
|
+
os.makedirs(self._reports_export_directory, exist_ok=True)
|
|
108
|
+
|
|
109
|
+
self._cpu_metrics_gathered_sticky = None
|
|
110
|
+
|
|
111
|
+
def report_keys(self):
|
|
112
|
+
"""
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
list of str
|
|
116
|
+
identifiers for all the
|
|
117
|
+
reports in this report manager
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
return list(self._summary_data.keys())
|
|
121
|
+
|
|
122
|
+
def data(self, report_key):
|
|
123
|
+
"""
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
report_key: str
|
|
127
|
+
An identifier for a particular report
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
dict
|
|
131
|
+
The data in the report corresponding with
|
|
132
|
+
the report key
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
return self._summary_data[report_key]
|
|
136
|
+
|
|
137
|
+
def create_summaries(self):
|
|
138
|
+
"""
|
|
139
|
+
Add summary data and build summary report
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
self._add_summary_data()
|
|
143
|
+
self._plot_manager.create_summary_plots()
|
|
144
|
+
self._plot_manager.export_summary_plots()
|
|
145
|
+
|
|
146
|
+
statistics = self._result_manager.get_result_statistics()
|
|
147
|
+
model_names = self._result_manager._profile_model_names
|
|
148
|
+
|
|
149
|
+
at_least_one_summary = False
|
|
150
|
+
for model_name in model_names:
|
|
151
|
+
if model_name in self._summary_data:
|
|
152
|
+
at_least_one_summary = True
|
|
153
|
+
self._summaries[model_name] = self._build_summary_report(
|
|
154
|
+
report_key=model_name,
|
|
155
|
+
num_configs=self._config.num_configs_per_model,
|
|
156
|
+
statistics=statistics,
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
logger.warning(
|
|
160
|
+
f"No data found for model {model_name}, skipping export summary."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
if self._config.num_top_model_configs and at_least_one_summary:
|
|
164
|
+
self._summaries[TOP_MODELS_REPORT_KEY] = self._build_summary_report(
|
|
165
|
+
report_key=TOP_MODELS_REPORT_KEY,
|
|
166
|
+
num_configs=self._config.num_top_model_configs,
|
|
167
|
+
statistics=statistics,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def export_summaries(self):
|
|
171
|
+
"""
|
|
172
|
+
Write a summary to disk
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
for report_key, summary in self._summaries.items():
|
|
176
|
+
model_report_dir = os.path.join(
|
|
177
|
+
self._reports_export_directory, "summaries", report_key
|
|
178
|
+
)
|
|
179
|
+
os.makedirs(model_report_dir, exist_ok=True)
|
|
180
|
+
output_filename = os.path.join(
|
|
181
|
+
model_report_dir, f"result_summary.{summary.get_file_extension()}"
|
|
182
|
+
)
|
|
183
|
+
logger.info(f"Exporting Summary Report to {output_filename}")
|
|
184
|
+
summary.write_report(filename=output_filename)
|
|
185
|
+
|
|
186
|
+
def create_detailed_reports(self):
|
|
187
|
+
"""
|
|
188
|
+
Adds detailed report data and build detailed reports
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
self._add_detailed_report_data()
|
|
192
|
+
self._plot_manager.create_detailed_plots()
|
|
193
|
+
self._plot_manager.export_detailed_plots()
|
|
194
|
+
|
|
195
|
+
for report_model_config in self._config.report_model_configs:
|
|
196
|
+
model_config_name = report_model_config.model_config_name()
|
|
197
|
+
self._detailed_reports[model_config_name] = self._build_detailed_report(
|
|
198
|
+
report_model_config
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def export_detailed_reports(self):
|
|
202
|
+
"""
|
|
203
|
+
Write a detailed report to disk
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
for report_key, report in self._detailed_reports.items():
|
|
207
|
+
model_report_dir = os.path.join(
|
|
208
|
+
self._reports_export_directory, "detailed", report_key
|
|
209
|
+
)
|
|
210
|
+
os.makedirs(model_report_dir, exist_ok=True)
|
|
211
|
+
output_filename = os.path.join(
|
|
212
|
+
model_report_dir, f"detailed_report.{report.get_file_extension()}"
|
|
213
|
+
)
|
|
214
|
+
logger.info(f"Exporting Detailed Report to {output_filename}")
|
|
215
|
+
report.write_report(filename=output_filename)
|
|
216
|
+
|
|
217
|
+
def _add_summary_data(self):
|
|
218
|
+
"""
|
|
219
|
+
Adds measurements on which the report manager
|
|
220
|
+
can do complex analyses or with which it can
|
|
221
|
+
build tables and add to reports
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
model_names = self._result_manager._profile_model_names
|
|
225
|
+
|
|
226
|
+
for model_name in model_names:
|
|
227
|
+
top_results = self._result_manager.top_n_results(
|
|
228
|
+
model_name=model_name,
|
|
229
|
+
n=self._config.num_configs_per_model,
|
|
230
|
+
include_default=True,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
for result in top_results:
|
|
234
|
+
for measurement in result.top_n_measurements(n=1):
|
|
235
|
+
self._summary_data[model_name].append(
|
|
236
|
+
(result.run_config(), measurement)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
if self._config.num_top_model_configs:
|
|
240
|
+
for result in self._result_manager.top_n_results(
|
|
241
|
+
n=self._config.num_top_model_configs
|
|
242
|
+
):
|
|
243
|
+
for measurement in result.top_n_measurements(n=1):
|
|
244
|
+
self._summary_data[TOP_MODELS_REPORT_KEY].append(
|
|
245
|
+
(result.run_config(), measurement)
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def _add_detailed_report_data(self):
|
|
249
|
+
"""
|
|
250
|
+
Adds data specific to the model configs
|
|
251
|
+
for which we want detailed reports
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
model_config_names = [
|
|
255
|
+
model.model_config_name() for model in self._config.report_model_configs
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
# TODO-TMA-650 - this needs to be updated for multi-model
|
|
259
|
+
for model_config_name in model_config_names:
|
|
260
|
+
self._detailed_report_data[
|
|
261
|
+
model_config_name
|
|
262
|
+
] = self._result_manager.get_model_configs_run_config_measurements(
|
|
263
|
+
model_config_name
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def _build_detailed_report(self, report_model_config):
|
|
267
|
+
"""
|
|
268
|
+
Builder method for a detailed report
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
detailed_report = ReportFactory.create_report()
|
|
272
|
+
|
|
273
|
+
report_key = report_model_config.model_config_name()
|
|
274
|
+
model_config, _ = self._detailed_report_data[report_key]
|
|
275
|
+
|
|
276
|
+
detailed_report.add_title(title="Detailed Report")
|
|
277
|
+
detailed_report.add_subheading(subheading=f"Model Config: {report_key}")
|
|
278
|
+
|
|
279
|
+
if self._mode == "online":
|
|
280
|
+
# Add main latency breakdown image
|
|
281
|
+
detailed_plot = os.path.join(
|
|
282
|
+
self._config.export_path,
|
|
283
|
+
"plots",
|
|
284
|
+
"detailed",
|
|
285
|
+
report_key,
|
|
286
|
+
"latency_breakdown.png",
|
|
287
|
+
)
|
|
288
|
+
detailed_caption = (
|
|
289
|
+
f"Latency Breakdown for Online Performance of {report_key}"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# First add row of detailed
|
|
293
|
+
detailed_report.add_images([detailed_plot], [detailed_caption])
|
|
294
|
+
|
|
295
|
+
# Next add the SimplePlots created for this detailed report
|
|
296
|
+
plot_stack = []
|
|
297
|
+
caption_stack = []
|
|
298
|
+
plot_path = os.path.join(
|
|
299
|
+
self._config.export_path, "plots", "simple", report_key
|
|
300
|
+
)
|
|
301
|
+
for plot_config in report_model_config.plots():
|
|
302
|
+
if (
|
|
303
|
+
plot_config.title().startswith("RAM")
|
|
304
|
+
and not self._cpu_metrics_were_gathered()
|
|
305
|
+
):
|
|
306
|
+
continue
|
|
307
|
+
if model_config.cpu_only() and (
|
|
308
|
+
plot_config.y_axis().startswith("gpu_")
|
|
309
|
+
or plot_config.x_axis().startswith("gpu_")
|
|
310
|
+
):
|
|
311
|
+
continue
|
|
312
|
+
plot_stack.append(os.path.join(plot_path, f"{plot_config.name()}.png"))
|
|
313
|
+
caption_stack.append(
|
|
314
|
+
f"{plot_config.title()} curves for config {report_key}"
|
|
315
|
+
)
|
|
316
|
+
if len(plot_stack) == 2:
|
|
317
|
+
detailed_report.add_images(plot_stack, caption_stack, float="left")
|
|
318
|
+
plot_stack = []
|
|
319
|
+
caption_stack = []
|
|
320
|
+
|
|
321
|
+
# Odd number of plots
|
|
322
|
+
if plot_stack:
|
|
323
|
+
detailed_report.add_images(plot_stack, caption_stack, float="left")
|
|
324
|
+
|
|
325
|
+
# Next add table of measurements
|
|
326
|
+
detailed_table = self._build_detailed_table(report_key)
|
|
327
|
+
detailed_report.add_table(table=detailed_table)
|
|
328
|
+
|
|
329
|
+
# Add some details about the config
|
|
330
|
+
detailed_info = self._build_detailed_info(report_key)
|
|
331
|
+
detailed_report.add_line_breaks(num_breaks=2)
|
|
332
|
+
detailed_report.add_paragraph(detailed_info, font_size=18)
|
|
333
|
+
|
|
334
|
+
sort_by_tag = "latency" if self._mode == "online" else "throughput"
|
|
335
|
+
detailed_report.add_paragraph(
|
|
336
|
+
"The first plot above shows the breakdown of the latencies in "
|
|
337
|
+
"the latency throughput curve for this model config. Following that "
|
|
338
|
+
"are the requested configurable plots showing the relationship between "
|
|
339
|
+
"various metrics measured by the Model Analyzer. The above table contains "
|
|
340
|
+
"detailed data for each of the measurements taken for this model config in "
|
|
341
|
+
f"decreasing order of {sort_by_tag}.",
|
|
342
|
+
font_size=18,
|
|
343
|
+
)
|
|
344
|
+
return detailed_report
|
|
345
|
+
|
|
346
|
+
def _build_summary_report(self, report_key, num_configs, statistics):
|
|
347
|
+
"""
|
|
348
|
+
Builder method for a summary
|
|
349
|
+
report.
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
summary = ReportFactory.create_report()
|
|
353
|
+
|
|
354
|
+
total_measurements = statistics.total_measurements(report_key)
|
|
355
|
+
total_configurations = statistics.total_configurations(report_key)
|
|
356
|
+
num_best_configs = min(num_configs, total_configurations)
|
|
357
|
+
|
|
358
|
+
# Get GPU names and memory
|
|
359
|
+
run_config = self._summary_data[report_key][0][0]
|
|
360
|
+
report_gpu_metrics = (
|
|
361
|
+
self._config.always_report_gpu_metrics or not run_config.cpu_only()
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
(gpu_names, max_memories) = (None, None)
|
|
365
|
+
if report_gpu_metrics:
|
|
366
|
+
(gpu_names, max_memories) = self._get_gpu_stats(
|
|
367
|
+
measurements=[v for _, v in self._summary_data[report_key]]
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Get constraints
|
|
371
|
+
constraint_str = self._create_constraint_string(report_key)
|
|
372
|
+
|
|
373
|
+
# Build summary table and info sentence
|
|
374
|
+
table, summary_sentence = self._build_summary_table(
|
|
375
|
+
report_key=report_key,
|
|
376
|
+
num_configurations=total_configurations,
|
|
377
|
+
num_measurements=total_measurements,
|
|
378
|
+
gpu_name=gpu_names,
|
|
379
|
+
report_gpu_metrics=report_gpu_metrics,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Add summary sections
|
|
383
|
+
summary.add_title(title=f"{self._mode.title()} Result Summary")
|
|
384
|
+
summary.add_subheading(f"Model: {' and '.join(report_key.split(','))}")
|
|
385
|
+
if report_gpu_metrics:
|
|
386
|
+
summary.add_paragraph(f"GPU(s): {gpu_names}")
|
|
387
|
+
summary.add_paragraph(f"Total Available GPU Memory: {max_memories}")
|
|
388
|
+
summary.add_paragraph(f"Constraint targets: {constraint_str}")
|
|
389
|
+
summary.add_paragraph(summary_sentence)
|
|
390
|
+
summary.add_paragraph(
|
|
391
|
+
f"Curves corresponding to the {num_best_configs} best model "
|
|
392
|
+
f"configuration(s) out of a total of {total_configurations} are "
|
|
393
|
+
"shown in the plots."
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
throughput_plot_config = self._config.plots[0]
|
|
397
|
+
throughput_plot = os.path.join(
|
|
398
|
+
self._config.export_path,
|
|
399
|
+
"plots",
|
|
400
|
+
"simple",
|
|
401
|
+
report_key,
|
|
402
|
+
f"{throughput_plot_config.name()}.png",
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
caption_throughput = f"{throughput_plot_config.title()} curves for {num_best_configs} best configurations."
|
|
406
|
+
|
|
407
|
+
if report_gpu_metrics:
|
|
408
|
+
summary.add_images([throughput_plot], [caption_throughput], image_width=66)
|
|
409
|
+
if self._mode == "online":
|
|
410
|
+
memory_latency_plot = os.path.join(
|
|
411
|
+
self._config.export_path,
|
|
412
|
+
"plots",
|
|
413
|
+
"simple",
|
|
414
|
+
report_key,
|
|
415
|
+
"gpu_mem_v_latency.png",
|
|
416
|
+
)
|
|
417
|
+
caption_memory_latency = f"GPU Memory vs. Latency curves for {num_best_configs} best configurations."
|
|
418
|
+
summary.add_images(
|
|
419
|
+
[memory_latency_plot], [caption_memory_latency], image_width=66
|
|
420
|
+
)
|
|
421
|
+
else:
|
|
422
|
+
summary.add_images([throughput_plot], [caption_throughput], image_width=66)
|
|
423
|
+
if self._mode == "online" and self._cpu_metrics_were_gathered():
|
|
424
|
+
memory_latency_plot = os.path.join(
|
|
425
|
+
self._config.export_path,
|
|
426
|
+
"plots",
|
|
427
|
+
"simple",
|
|
428
|
+
report_key,
|
|
429
|
+
"cpu_mem_v_latency.png",
|
|
430
|
+
)
|
|
431
|
+
caption_memory_latency = f"CPU Memory vs. Latency curves for {num_best_configs} best configurations."
|
|
432
|
+
summary.add_images(
|
|
433
|
+
[memory_latency_plot], [caption_memory_latency], image_width=66
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
caption_results_table = (
|
|
437
|
+
'<div style = "display:block; clear:both; page-break-after:always;"></div>'
|
|
438
|
+
"The following table summarizes each configuration at the measurement"
|
|
439
|
+
" that optimizes the desired metrics under the given constraints."
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
if self._result_manager._profiling_models_concurrently():
|
|
443
|
+
caption_results_table = (
|
|
444
|
+
caption_results_table + " Per model values are parenthetical."
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
if run_config.is_ensemble_model():
|
|
448
|
+
caption_results_table = (
|
|
449
|
+
caption_results_table
|
|
450
|
+
+ " The ensemble's composing model values are listed in the following order: "
|
|
451
|
+
)
|
|
452
|
+
elif run_config.is_bls_model():
|
|
453
|
+
caption_results_table = (
|
|
454
|
+
caption_results_table
|
|
455
|
+
+ " The BLS composing model values are listed in the following order: "
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
if run_config.is_ensemble_model() or run_config.is_bls_model():
|
|
459
|
+
for composing_config_name in run_config.model_run_configs()[
|
|
460
|
+
0
|
|
461
|
+
].get_composing_config_names():
|
|
462
|
+
caption_results_table = (
|
|
463
|
+
caption_results_table
|
|
464
|
+
+ BaseModelConfigGenerator.extract_model_name_from_variant_name(
|
|
465
|
+
composing_config_name
|
|
466
|
+
)
|
|
467
|
+
+ ", "
|
|
468
|
+
)
|
|
469
|
+
caption_results_table = caption_results_table[:-2] # removes comma
|
|
470
|
+
|
|
471
|
+
summary.add_paragraph(caption_results_table)
|
|
472
|
+
summary.add_table(table=table)
|
|
473
|
+
|
|
474
|
+
return summary
|
|
475
|
+
|
|
476
|
+
def _build_summary_table(
|
|
477
|
+
self,
|
|
478
|
+
report_key,
|
|
479
|
+
num_configurations,
|
|
480
|
+
num_measurements,
|
|
481
|
+
gpu_name=None,
|
|
482
|
+
report_gpu_metrics=True,
|
|
483
|
+
):
|
|
484
|
+
"""
|
|
485
|
+
Creates a result table corresponding
|
|
486
|
+
to the best measurements for a particular
|
|
487
|
+
model
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
(
|
|
491
|
+
best_run_config,
|
|
492
|
+
best_run_config_measurement,
|
|
493
|
+
sorted_measurements,
|
|
494
|
+
) = self._find_best_configs(report_key)
|
|
495
|
+
|
|
496
|
+
multi_model = len(best_run_config.model_run_configs()) > 1
|
|
497
|
+
is_ensemble = best_run_config.is_ensemble_model()
|
|
498
|
+
is_bls = best_run_config.is_bls_model()
|
|
499
|
+
has_composing_models = is_ensemble or is_bls
|
|
500
|
+
|
|
501
|
+
summary_sentence = self._create_summary_sentence(
|
|
502
|
+
report_key,
|
|
503
|
+
num_configurations,
|
|
504
|
+
num_measurements,
|
|
505
|
+
best_run_config,
|
|
506
|
+
best_run_config_measurement,
|
|
507
|
+
gpu_name,
|
|
508
|
+
report_gpu_metrics,
|
|
509
|
+
multi_model,
|
|
510
|
+
is_ensemble,
|
|
511
|
+
is_bls,
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
summary_table = self._construct_summary_result_table(
|
|
515
|
+
sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
return summary_table, summary_sentence
|
|
519
|
+
|
|
520
|
+
def _find_best_configs(self, report_key):
|
|
521
|
+
sorted_measurements = sorted(
|
|
522
|
+
self._summary_data[report_key], key=lambda x: x[1], reverse=True
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
best_run_config = sorted_measurements[0][0]
|
|
526
|
+
best_run_config_measurement = sorted_measurements[0][1]
|
|
527
|
+
|
|
528
|
+
return best_run_config, best_run_config_measurement, sorted_measurements
|
|
529
|
+
|
|
530
|
+
def _create_constraint_string(self, report_key: str) -> str:
|
|
531
|
+
constraint_strs = self._build_constraint_strings()
|
|
532
|
+
|
|
533
|
+
constraint_str = "None"
|
|
534
|
+
if constraint_strs:
|
|
535
|
+
if report_key == TOP_MODELS_REPORT_KEY:
|
|
536
|
+
constraint_str = constraint_strs[GLOBAL_CONSTRAINTS_KEY]
|
|
537
|
+
elif "," in report_key: # indicates multi-model
|
|
538
|
+
constraint_str = self._create_multi_model_constraint_string(
|
|
539
|
+
report_key, constraint_strs
|
|
540
|
+
)
|
|
541
|
+
else: # single-model
|
|
542
|
+
if report_key in constraint_strs:
|
|
543
|
+
constraint_str = constraint_strs[report_key]
|
|
544
|
+
|
|
545
|
+
return constraint_str
|
|
546
|
+
|
|
547
|
+
def _create_multi_model_constraint_string(
|
|
548
|
+
self, report_key: str, constraint_strs: Dict[str, str]
|
|
549
|
+
) -> str:
|
|
550
|
+
constraint_str = ""
|
|
551
|
+
for model_name in report_key.split(","):
|
|
552
|
+
if model_name in constraint_strs:
|
|
553
|
+
if constraint_str:
|
|
554
|
+
constraint_str += "<br>"
|
|
555
|
+
for i in range(len("Constraint targets: ")):
|
|
556
|
+
constraint_str += " "
|
|
557
|
+
|
|
558
|
+
constraint_str += (
|
|
559
|
+
"<strong>"
|
|
560
|
+
+ model_name
|
|
561
|
+
+ "</strong>: "
|
|
562
|
+
+ constraint_strs[model_name]
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
return constraint_str
|
|
566
|
+
|
|
567
|
+
def _create_summary_sentence(
|
|
568
|
+
self,
|
|
569
|
+
report_key,
|
|
570
|
+
num_configurations,
|
|
571
|
+
num_measurements,
|
|
572
|
+
best_run_config,
|
|
573
|
+
best_run_config_measurement,
|
|
574
|
+
gpu_name,
|
|
575
|
+
report_gpu_metrics,
|
|
576
|
+
multi_model,
|
|
577
|
+
is_ensemble,
|
|
578
|
+
is_bls,
|
|
579
|
+
):
|
|
580
|
+
measurement_phrase = self._create_summary_measurement_phrase(num_measurements)
|
|
581
|
+
config_phrase = self._create_summary_config_phrase(
|
|
582
|
+
best_run_config, num_configurations
|
|
583
|
+
)
|
|
584
|
+
objective_phrase = self._create_summary_objective_phrase(
|
|
585
|
+
report_key, best_run_config_measurement
|
|
586
|
+
)
|
|
587
|
+
gpu_name_phrase = self._create_summary_gpu_name_phrase(
|
|
588
|
+
gpu_name, report_gpu_metrics
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
summary_sentence = (
|
|
592
|
+
f"In {measurement_phrase} across {config_phrase} "
|
|
593
|
+
f"{objective_phrase}, under the given constraints{gpu_name_phrase}."
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
if is_ensemble:
|
|
597
|
+
summary_sentence = (
|
|
598
|
+
summary_sentence
|
|
599
|
+
+ self._create_ensemble_summary_sentence(best_run_config)
|
|
600
|
+
)
|
|
601
|
+
elif is_bls:
|
|
602
|
+
summary_sentence = summary_sentence + self._create_bls_summary_sentence(
|
|
603
|
+
best_run_config
|
|
604
|
+
)
|
|
605
|
+
else:
|
|
606
|
+
summary_sentence = summary_sentence + self._create_model_summary_sentence(
|
|
607
|
+
best_run_config
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
summary_sentence = summary_sentence + " </UL>"
|
|
611
|
+
return summary_sentence
|
|
612
|
+
|
|
613
|
+
def _create_ensemble_summary_sentence(self, run_config: RunConfig) -> str:
|
|
614
|
+
summary_sentence = "<BR><BR>"
|
|
615
|
+
best_config_name = run_config.model_run_configs()[0].model_variant_name()
|
|
616
|
+
|
|
617
|
+
summary_sentence = (
|
|
618
|
+
summary_sentence
|
|
619
|
+
+ f"<strong>{best_config_name}</strong> is comprised of the following composing models: <UL> "
|
|
620
|
+
)
|
|
621
|
+
summary_sentence = (
|
|
622
|
+
summary_sentence + self._create_composing_model_summary_sentence(run_config)
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
return summary_sentence
|
|
626
|
+
|
|
627
|
+
def _create_bls_summary_sentence(self, run_config: RunConfig) -> str:
|
|
628
|
+
summary_sentence = self._create_model_summary_sentence(run_config)
|
|
629
|
+
summary_sentence = (
|
|
630
|
+
summary_sentence
|
|
631
|
+
+ f"<BR>Which is comprised of the following composing models: <UL>"
|
|
632
|
+
)
|
|
633
|
+
summary_sentence = (
|
|
634
|
+
summary_sentence + self._create_composing_model_summary_sentence(run_config)
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
return summary_sentence
|
|
638
|
+
|
|
639
|
+
def _create_model_summary_sentence(self, run_config: RunConfig) -> str:
|
|
640
|
+
summary_sentence = "<UL>"
|
|
641
|
+
for model_run_config in run_config.model_run_configs():
|
|
642
|
+
summary_sentence = (
|
|
643
|
+
summary_sentence
|
|
644
|
+
+ "<LI> "
|
|
645
|
+
+ self._create_summary_config_info(
|
|
646
|
+
model_run_config.model_config_variant()
|
|
647
|
+
)
|
|
648
|
+
+ " </LI>"
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
return summary_sentence
|
|
652
|
+
|
|
653
|
+
def _create_composing_model_summary_sentence(self, run_config: RunConfig) -> str:
|
|
654
|
+
summary_sentence = ""
|
|
655
|
+
for composing_config_variant in run_config.model_run_configs()[
|
|
656
|
+
0
|
|
657
|
+
].composing_config_variants():
|
|
658
|
+
summary_sentence = (
|
|
659
|
+
summary_sentence
|
|
660
|
+
+ "<LI> "
|
|
661
|
+
+ self._create_summary_config_info(composing_config_variant)
|
|
662
|
+
+ " </LI>"
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
return summary_sentence
|
|
666
|
+
|
|
667
|
+
def _create_summary_measurement_phrase(self, num_measurements):
|
|
668
|
+
assert num_measurements > 0, "Number of measurements must be greater than 0"
|
|
669
|
+
|
|
670
|
+
return (
|
|
671
|
+
f"{num_measurements} measurements"
|
|
672
|
+
if num_measurements > 1
|
|
673
|
+
else "1 measurement"
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
def _create_summary_config_phrase(self, best_run_config, num_configurations):
|
|
677
|
+
config_names = [
|
|
678
|
+
f"<strong>{model_run_config.model_config_variant().variant_name}</strong>"
|
|
679
|
+
for model_run_config in best_run_config.model_run_configs()
|
|
680
|
+
]
|
|
681
|
+
|
|
682
|
+
config_names_str = f"{' and '.join(config_names)}"
|
|
683
|
+
|
|
684
|
+
if len(config_names) > 1:
|
|
685
|
+
return f"{num_configurations} configurations, the combination of {config_names_str}"
|
|
686
|
+
else:
|
|
687
|
+
return f"{num_configurations} configurations, {config_names_str}"
|
|
688
|
+
|
|
689
|
+
def _create_summary_objective_phrase(
|
|
690
|
+
self, report_key: str, best_run_config_measurement: "RunConfigMeasurement"
|
|
691
|
+
) -> str:
|
|
692
|
+
default_run_config_measurement = self._find_default_run_config_measurement(
|
|
693
|
+
report_key
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
if default_run_config_measurement:
|
|
697
|
+
objective_gain = self._get_objective_gain(
|
|
698
|
+
best_run_config_measurement, default_run_config_measurement
|
|
699
|
+
)
|
|
700
|
+
else:
|
|
701
|
+
objective_gain = 0
|
|
702
|
+
|
|
703
|
+
if objective_gain > 0:
|
|
704
|
+
if self._config.get_config()["objectives"].is_set_by_user():
|
|
705
|
+
objective_phrase = f"is <strong>{objective_gain}%</strong> better than the default configuration at meeting the objectives"
|
|
706
|
+
else:
|
|
707
|
+
if self._mode == "online":
|
|
708
|
+
objective_phrase = f"is <strong>{objective_gain}%</strong> better than the default configuration at maximizing throughput"
|
|
709
|
+
else:
|
|
710
|
+
objective_phrase = f"is <strong>{objective_gain}%</strong> better than the default configuration at minimizing latency"
|
|
711
|
+
else:
|
|
712
|
+
objective_phrase = "provides no gain over the default configuration"
|
|
713
|
+
|
|
714
|
+
return objective_phrase
|
|
715
|
+
|
|
716
|
+
def _get_objective_gain(
|
|
717
|
+
self,
|
|
718
|
+
run_config_measurement: "RunConfigMeasurement",
|
|
719
|
+
default_run_config_measurement: "RunConfigMeasurement",
|
|
720
|
+
) -> float:
|
|
721
|
+
return round(
|
|
722
|
+
run_config_measurement.calculate_weighted_percentage_gain(
|
|
723
|
+
default_run_config_measurement
|
|
724
|
+
)
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
def _find_default_run_config_measurement(self, model_name):
|
|
728
|
+
# There is no single default config when comparing across
|
|
729
|
+
# multiple model runs
|
|
730
|
+
#
|
|
731
|
+
if model_name == TOP_MODELS_REPORT_KEY:
|
|
732
|
+
return None
|
|
733
|
+
|
|
734
|
+
sorted_results = self._result_manager.get_model_sorted_results(model_name)
|
|
735
|
+
|
|
736
|
+
for run_config_result in sorted_results.results():
|
|
737
|
+
run_config_measurements = run_config_result.passing_measurements()
|
|
738
|
+
if (
|
|
739
|
+
run_config_measurements
|
|
740
|
+
and "default" in run_config_measurements[0].model_variants_name()
|
|
741
|
+
):
|
|
742
|
+
best_rcm = run_config_measurements[0]
|
|
743
|
+
for run_config_measurement in run_config_measurements:
|
|
744
|
+
if run_config_measurement > best_rcm:
|
|
745
|
+
best_rcm = run_config_measurement
|
|
746
|
+
|
|
747
|
+
return best_rcm
|
|
748
|
+
|
|
749
|
+
return None
|
|
750
|
+
|
|
751
|
+
def _create_summary_platform_phrase(self, model_config):
|
|
752
|
+
if model_config.get_field("backend"):
|
|
753
|
+
platform = model_config.get_field("backend")
|
|
754
|
+
else:
|
|
755
|
+
platform = model_config.get_field("platform")
|
|
756
|
+
|
|
757
|
+
return f"platform {platform}"
|
|
758
|
+
|
|
759
|
+
def _create_summary_max_batch_size_phrase(self, model_config):
|
|
760
|
+
return f"max batch size of {model_config.max_batch_size()}"
|
|
761
|
+
|
|
762
|
+
def _create_instance_group_phrase(self, model_config):
|
|
763
|
+
instance_group_str = model_config.instance_group_string(self._get_gpu_count())
|
|
764
|
+
kind_counts = instance_group_str.split("+")
|
|
765
|
+
ret_str = ""
|
|
766
|
+
for kind_count in kind_counts:
|
|
767
|
+
kind_count = kind_count.strip()
|
|
768
|
+
count, kind = kind_count.split(":")
|
|
769
|
+
if ret_str != "":
|
|
770
|
+
ret_str += " and "
|
|
771
|
+
ret_str += f"{count} {kind} instance"
|
|
772
|
+
if int(count) > 1:
|
|
773
|
+
ret_str += "s"
|
|
774
|
+
return ret_str
|
|
775
|
+
|
|
776
|
+
def _create_summary_gpu_name_phrase(self, gpu_name, report_gpu_metrics):
|
|
777
|
+
return f", on GPU(s) {gpu_name}" if report_gpu_metrics else ""
|
|
778
|
+
|
|
779
|
+
def _construct_summary_result_table(
|
|
780
|
+
self, sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
|
|
781
|
+
):
|
|
782
|
+
if report_gpu_metrics:
|
|
783
|
+
return self._construct_summary_result_table_with_gpu(
|
|
784
|
+
sorted_measurements, multi_model, has_composing_models
|
|
785
|
+
)
|
|
786
|
+
else:
|
|
787
|
+
return self._construct_summary_result_table_cpu_only(
|
|
788
|
+
sorted_measurements, multi_model, has_composing_models
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
def _construct_summary_result_table_cpu_only(
|
|
792
|
+
self, sorted_measurements, multi_model, has_composing_models
|
|
793
|
+
):
|
|
794
|
+
summary_table = self._create_summary_result_table_header_cpu_only(multi_model)
|
|
795
|
+
|
|
796
|
+
for run_config, run_config_measurement in sorted_measurements:
|
|
797
|
+
row = self._create_summary_row_cpu_only(
|
|
798
|
+
run_config, run_config_measurement, has_composing_models
|
|
799
|
+
)
|
|
800
|
+
summary_table.insert_row_by_index(row)
|
|
801
|
+
|
|
802
|
+
return summary_table
|
|
803
|
+
|
|
804
|
+
def _construct_summary_result_table_with_gpu(
|
|
805
|
+
self, sorted_measurements, multi_model, has_composing_models
|
|
806
|
+
):
|
|
807
|
+
summary_table = self._create_summary_result_table_header(multi_model)
|
|
808
|
+
|
|
809
|
+
for run_config, run_config_measurement in sorted_measurements:
|
|
810
|
+
row = self._create_summary_row(
|
|
811
|
+
run_config, run_config_measurement, has_composing_models
|
|
812
|
+
)
|
|
813
|
+
summary_table.insert_row_by_index(row)
|
|
814
|
+
|
|
815
|
+
return summary_table
|
|
816
|
+
|
|
817
|
+
def _create_summary_result_table_header_cpu_only(self, multi_model):
|
|
818
|
+
if multi_model:
|
|
819
|
+
header_values = [
|
|
820
|
+
"Model Config Name",
|
|
821
|
+
"Max Batch Size",
|
|
822
|
+
"Dynamic Batching",
|
|
823
|
+
"Total Instance Count",
|
|
824
|
+
"Average p99 Latency (ms)",
|
|
825
|
+
"Total Throughput (infer/sec)",
|
|
826
|
+
"Max CPU Memory Usage (MB)",
|
|
827
|
+
]
|
|
828
|
+
else:
|
|
829
|
+
header_values = [
|
|
830
|
+
"Model Config Name",
|
|
831
|
+
"Max Batch Size",
|
|
832
|
+
"Dynamic Batching",
|
|
833
|
+
"Total Instance Count",
|
|
834
|
+
"p99 Latency (ms)",
|
|
835
|
+
"Throughput (infer/sec)",
|
|
836
|
+
"Max CPU Memory Usage (MB)",
|
|
837
|
+
]
|
|
838
|
+
if not self._cpu_metrics_were_gathered():
|
|
839
|
+
header_values.remove("Max CPU Memory Usage (MB)")
|
|
840
|
+
|
|
841
|
+
return ResultTable(headers=header_values, title="Report Table")
|
|
842
|
+
|
|
843
|
+
def _create_summary_result_table_header(self, multi_model):
|
|
844
|
+
if multi_model:
|
|
845
|
+
header_values = [
|
|
846
|
+
"Model Config Name",
|
|
847
|
+
"Max Batch Size",
|
|
848
|
+
"Dynamic Batching",
|
|
849
|
+
"Total Instance Count",
|
|
850
|
+
"Average p99 Latency (ms)",
|
|
851
|
+
"Total Throughput (infer/sec)",
|
|
852
|
+
"Max CPU Memory Usage (MB)",
|
|
853
|
+
"Max GPU Memory Usage (MB)",
|
|
854
|
+
"Average GPU Utilization (%)",
|
|
855
|
+
]
|
|
856
|
+
else:
|
|
857
|
+
header_values = [
|
|
858
|
+
"Model Config Name",
|
|
859
|
+
"Max Batch Size",
|
|
860
|
+
"Dynamic Batching",
|
|
861
|
+
"Total Instance Count",
|
|
862
|
+
"p99 Latency (ms)",
|
|
863
|
+
"Throughput (infer/sec)",
|
|
864
|
+
"Max CPU Memory Usage (MB)",
|
|
865
|
+
"Max GPU Memory Usage (MB)",
|
|
866
|
+
"Average GPU Utilization (%)",
|
|
867
|
+
]
|
|
868
|
+
|
|
869
|
+
if not self._cpu_metrics_were_gathered():
|
|
870
|
+
header_values.remove("Max CPU Memory Usage (MB)")
|
|
871
|
+
|
|
872
|
+
return ResultTable(headers=header_values, title="Report Table")
|
|
873
|
+
|
|
874
|
+
def _create_summary_row_cpu_only(
|
|
875
|
+
self, run_config, run_config_measurement, has_composing_models
|
|
876
|
+
):
|
|
877
|
+
model_config_names = ", ".join(
|
|
878
|
+
[
|
|
879
|
+
model_run_config.model_config_variant().variant_name
|
|
880
|
+
for model_run_config in run_config.model_run_configs()
|
|
881
|
+
]
|
|
882
|
+
)
|
|
883
|
+
|
|
884
|
+
if has_composing_models:
|
|
885
|
+
dynamic_batching_string = self._create_summary_string(
|
|
886
|
+
[
|
|
887
|
+
model_config.dynamic_batching_string()
|
|
888
|
+
for model_config in run_config.composing_configs()
|
|
889
|
+
]
|
|
890
|
+
)
|
|
891
|
+
else:
|
|
892
|
+
dynamic_batching_string = self._create_summary_string(
|
|
893
|
+
[
|
|
894
|
+
model_run_config.model_config().dynamic_batching_string()
|
|
895
|
+
for model_run_config in run_config.model_run_configs()
|
|
896
|
+
]
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
if has_composing_models:
|
|
900
|
+
max_batch_sizes = ", ".join(
|
|
901
|
+
[
|
|
902
|
+
str(model_config.max_batch_size())
|
|
903
|
+
for model_config in run_config.composing_configs()
|
|
904
|
+
]
|
|
905
|
+
)
|
|
906
|
+
else:
|
|
907
|
+
max_batch_sizes = ", ".join(
|
|
908
|
+
[
|
|
909
|
+
str(model_run_config.model_config().max_batch_size())
|
|
910
|
+
for model_run_config in run_config.model_run_configs()
|
|
911
|
+
]
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
if has_composing_models:
|
|
915
|
+
instance_group_strings = ", ".join(
|
|
916
|
+
[
|
|
917
|
+
model_config.instance_group_string(self._get_gpu_count())
|
|
918
|
+
for model_config in run_config.model_run_configs()[
|
|
919
|
+
0
|
|
920
|
+
].composing_configs()
|
|
921
|
+
]
|
|
922
|
+
)
|
|
923
|
+
else:
|
|
924
|
+
instance_group_strings = ", ".join(
|
|
925
|
+
[
|
|
926
|
+
model_run_config.model_config().instance_group_string(
|
|
927
|
+
self._get_gpu_count()
|
|
928
|
+
)
|
|
929
|
+
for model_run_config in run_config.model_run_configs()
|
|
930
|
+
]
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
perf_latency_string = self._create_non_gpu_metric_string(
|
|
934
|
+
run_config_measurement=run_config_measurement,
|
|
935
|
+
non_gpu_metric="perf_latency_p99",
|
|
936
|
+
)
|
|
937
|
+
perf_throughput_string = self._create_non_gpu_metric_string(
|
|
938
|
+
run_config_measurement=run_config_measurement,
|
|
939
|
+
non_gpu_metric="perf_throughput",
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
row = [
|
|
943
|
+
model_config_names,
|
|
944
|
+
max_batch_sizes,
|
|
945
|
+
dynamic_batching_string,
|
|
946
|
+
instance_group_strings,
|
|
947
|
+
perf_latency_string,
|
|
948
|
+
perf_throughput_string,
|
|
949
|
+
]
|
|
950
|
+
|
|
951
|
+
if self._cpu_metrics_were_gathered():
|
|
952
|
+
cpu_used_ram_string = self._create_non_gpu_metric_string(
|
|
953
|
+
run_config_measurement=run_config_measurement,
|
|
954
|
+
non_gpu_metric="cpu_used_ram",
|
|
955
|
+
)
|
|
956
|
+
row.append(cpu_used_ram_string)
|
|
957
|
+
|
|
958
|
+
return row
|
|
959
|
+
|
|
960
|
+
def _create_summary_row(
|
|
961
|
+
self, run_config, run_config_measurement, has_composing_models
|
|
962
|
+
):
|
|
963
|
+
if has_composing_models:
|
|
964
|
+
dynamic_batching_string = self._create_summary_string(
|
|
965
|
+
[
|
|
966
|
+
model_config.dynamic_batching_string()
|
|
967
|
+
for model_config in run_config.composing_configs()
|
|
968
|
+
]
|
|
969
|
+
)
|
|
970
|
+
else:
|
|
971
|
+
dynamic_batching_string = self._create_summary_string(
|
|
972
|
+
[
|
|
973
|
+
model_run_config.model_config().dynamic_batching_string()
|
|
974
|
+
for model_run_config in run_config.model_run_configs()
|
|
975
|
+
]
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
if has_composing_models:
|
|
979
|
+
instance_group_string = self._create_summary_string(
|
|
980
|
+
[
|
|
981
|
+
model_config.instance_group_string(self._get_gpu_count())
|
|
982
|
+
for model_config in run_config.model_run_configs()[
|
|
983
|
+
0
|
|
984
|
+
].composing_configs()
|
|
985
|
+
]
|
|
986
|
+
)
|
|
987
|
+
else:
|
|
988
|
+
instance_group_string = self._create_summary_string(
|
|
989
|
+
[
|
|
990
|
+
model_run_config.model_config().instance_group_string(
|
|
991
|
+
self._get_gpu_count()
|
|
992
|
+
)
|
|
993
|
+
for model_run_config in run_config.model_run_configs()
|
|
994
|
+
]
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
if has_composing_models:
|
|
998
|
+
max_batch_sizes_string = self._create_summary_string(
|
|
999
|
+
[
|
|
1000
|
+
str(model_config.max_batch_size())
|
|
1001
|
+
for model_config in run_config.composing_configs()
|
|
1002
|
+
]
|
|
1003
|
+
)
|
|
1004
|
+
else:
|
|
1005
|
+
max_batch_sizes_string = self._create_summary_string(
|
|
1006
|
+
[
|
|
1007
|
+
str(model_run_config.model_config().max_batch_size())
|
|
1008
|
+
for model_run_config in run_config.model_run_configs()
|
|
1009
|
+
]
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
model_config_names = "<br>".join(
|
|
1013
|
+
[
|
|
1014
|
+
model_run_config.model_config_variant().variant_name
|
|
1015
|
+
for model_run_config in run_config.model_run_configs()
|
|
1016
|
+
]
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
perf_latency_string = self._create_non_gpu_metric_string(
|
|
1020
|
+
run_config_measurement=run_config_measurement,
|
|
1021
|
+
non_gpu_metric="perf_latency_p99",
|
|
1022
|
+
)
|
|
1023
|
+
perf_throughput_string = self._create_non_gpu_metric_string(
|
|
1024
|
+
run_config_measurement=run_config_measurement,
|
|
1025
|
+
non_gpu_metric="perf_throughput",
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
if self._cpu_metrics_were_gathered():
|
|
1029
|
+
cpu_used_ram_string = self._create_non_gpu_metric_string(
|
|
1030
|
+
run_config_measurement=run_config_measurement,
|
|
1031
|
+
non_gpu_metric="cpu_used_ram",
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
row = [
|
|
1035
|
+
model_config_names,
|
|
1036
|
+
max_batch_sizes_string,
|
|
1037
|
+
dynamic_batching_string,
|
|
1038
|
+
instance_group_string,
|
|
1039
|
+
perf_latency_string,
|
|
1040
|
+
perf_throughput_string,
|
|
1041
|
+
cpu_used_ram_string,
|
|
1042
|
+
int(run_config_measurement.get_gpu_metric_value("gpu_used_memory")),
|
|
1043
|
+
round(
|
|
1044
|
+
run_config_measurement.get_gpu_metric_value("gpu_utilization"), 1
|
|
1045
|
+
),
|
|
1046
|
+
]
|
|
1047
|
+
else:
|
|
1048
|
+
row = [
|
|
1049
|
+
model_config_names,
|
|
1050
|
+
max_batch_sizes_string,
|
|
1051
|
+
dynamic_batching_string,
|
|
1052
|
+
instance_group_string,
|
|
1053
|
+
perf_latency_string,
|
|
1054
|
+
perf_throughput_string,
|
|
1055
|
+
int(run_config_measurement.get_gpu_metric_value("gpu_used_memory")),
|
|
1056
|
+
round(
|
|
1057
|
+
run_config_measurement.get_gpu_metric_value("gpu_utilization"), 1
|
|
1058
|
+
),
|
|
1059
|
+
]
|
|
1060
|
+
|
|
1061
|
+
return row
|
|
1062
|
+
|
|
1063
|
+
def _create_summary_string(self, values):
|
|
1064
|
+
if len(values) > 1:
|
|
1065
|
+
return f"({', '.join(values)})"
|
|
1066
|
+
else:
|
|
1067
|
+
return f"{values[0]}"
|
|
1068
|
+
|
|
1069
|
+
def _create_non_gpu_metric_string(self, run_config_measurement, non_gpu_metric):
|
|
1070
|
+
non_gpu_metrics = run_config_measurement.get_non_gpu_metric(non_gpu_metric)
|
|
1071
|
+
|
|
1072
|
+
if non_gpu_metrics[0] is None:
|
|
1073
|
+
return "0"
|
|
1074
|
+
elif len(non_gpu_metrics) > 1:
|
|
1075
|
+
non_gpu_metric_config_string = ", ".join(
|
|
1076
|
+
[
|
|
1077
|
+
str(round(non_gpu_metric.value(), 1))
|
|
1078
|
+
for non_gpu_metric in non_gpu_metrics
|
|
1079
|
+
]
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
return (
|
|
1083
|
+
f"<strong>{round(run_config_measurement.get_non_gpu_metric_value(non_gpu_metric), 1)}</strong> "
|
|
1084
|
+
f"({non_gpu_metric_config_string})"
|
|
1085
|
+
)
|
|
1086
|
+
else:
|
|
1087
|
+
return f"{non_gpu_metrics[0].value()}"
|
|
1088
|
+
|
|
1089
|
+
def _create_summary_config_info(self, model_config_variant):
|
|
1090
|
+
model_config = model_config_variant.model_config
|
|
1091
|
+
|
|
1092
|
+
config_info = f"<strong>{model_config_variant.variant_name}</strong>: "
|
|
1093
|
+
config_info = (
|
|
1094
|
+
config_info + f"{self._create_instance_group_phrase(model_config)} with a "
|
|
1095
|
+
)
|
|
1096
|
+
config_info = (
|
|
1097
|
+
config_info
|
|
1098
|
+
+ f"{self._create_summary_max_batch_size_phrase(model_config)} on "
|
|
1099
|
+
)
|
|
1100
|
+
config_info = (
|
|
1101
|
+
config_info + f"{self._create_summary_platform_phrase(model_config)}"
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1104
|
+
return config_info
|
|
1105
|
+
|
|
1106
|
+
def _build_detailed_table(self, model_config_name):
|
|
1107
|
+
"""
|
|
1108
|
+
Build the table used in the detailed report
|
|
1109
|
+
"""
|
|
1110
|
+
|
|
1111
|
+
model_config, measurements = self._detailed_report_data[model_config_name]
|
|
1112
|
+
sort_by_tag = (
|
|
1113
|
+
"perf_latency_p99" if self._mode == "online" else "perf_throughput"
|
|
1114
|
+
)
|
|
1115
|
+
measurements = sorted(
|
|
1116
|
+
measurements,
|
|
1117
|
+
key=lambda x: x.get_non_gpu_metric_value(sort_by_tag),
|
|
1118
|
+
reverse=True,
|
|
1119
|
+
)
|
|
1120
|
+
report_gpu_metrics = (
|
|
1121
|
+
self._config.always_report_gpu_metrics or not model_config.cpu_only()
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1124
|
+
if self._was_measured_with_request_rate(measurements[0]):
|
|
1125
|
+
first_column_header = (
|
|
1126
|
+
"Request Rate" if self._mode == "online" else "Client Batch Size"
|
|
1127
|
+
)
|
|
1128
|
+
first_column_tag = (
|
|
1129
|
+
"request-rate-range" if self._mode == "online" else "batch-size"
|
|
1130
|
+
)
|
|
1131
|
+
else:
|
|
1132
|
+
first_column_header = (
|
|
1133
|
+
"Request Concurrency" if self._mode == "online" else "Client Batch Size"
|
|
1134
|
+
)
|
|
1135
|
+
first_column_tag = (
|
|
1136
|
+
"concurrency-range" if self._mode == "online" else "batch-size"
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
if report_gpu_metrics:
|
|
1140
|
+
headers = [
|
|
1141
|
+
first_column_header,
|
|
1142
|
+
"p99 Latency (ms)",
|
|
1143
|
+
"Client Response Wait (ms)",
|
|
1144
|
+
"Server Queue (ms)",
|
|
1145
|
+
"Server Compute Input (ms)",
|
|
1146
|
+
"Server Compute Infer (ms)",
|
|
1147
|
+
"Throughput (infer/sec)",
|
|
1148
|
+
"Max CPU Memory Usage (MB)",
|
|
1149
|
+
"Max GPU Memory Usage (MB)",
|
|
1150
|
+
"Average GPU Utilization (%)",
|
|
1151
|
+
]
|
|
1152
|
+
else:
|
|
1153
|
+
headers = [
|
|
1154
|
+
first_column_header,
|
|
1155
|
+
"p99 Latency (ms)",
|
|
1156
|
+
"Client Response Wait (ms)",
|
|
1157
|
+
"Server Queue (ms)",
|
|
1158
|
+
"Server Compute Input (ms)",
|
|
1159
|
+
"Server Compute Infer (ms)",
|
|
1160
|
+
"Throughput (infer/sec)",
|
|
1161
|
+
"Max CPU Memory Usage (MB)",
|
|
1162
|
+
]
|
|
1163
|
+
|
|
1164
|
+
if not self._cpu_metrics_were_gathered():
|
|
1165
|
+
headers.remove("Max CPU Memory Usage (MB)")
|
|
1166
|
+
|
|
1167
|
+
detailed_table = ResultTable(headers, title="Detailed Table")
|
|
1168
|
+
|
|
1169
|
+
# Construct table
|
|
1170
|
+
if report_gpu_metrics:
|
|
1171
|
+
for measurement in measurements:
|
|
1172
|
+
row = [
|
|
1173
|
+
# TODO-TMA-568: This needs to be updated because there will be multiple model configs
|
|
1174
|
+
measurement.model_specific_pa_params()[0][first_column_tag],
|
|
1175
|
+
measurement.get_non_gpu_metric_value("perf_latency_p99"),
|
|
1176
|
+
measurement.get_non_gpu_metric_value("perf_client_response_wait"),
|
|
1177
|
+
measurement.get_non_gpu_metric_value("perf_server_queue"),
|
|
1178
|
+
measurement.get_non_gpu_metric_value("perf_server_compute_input"),
|
|
1179
|
+
measurement.get_non_gpu_metric_value("perf_server_compute_infer"),
|
|
1180
|
+
measurement.get_non_gpu_metric_value("perf_throughput"),
|
|
1181
|
+
]
|
|
1182
|
+
if self._cpu_metrics_were_gathered():
|
|
1183
|
+
row.append(measurement.get_non_gpu_metric_value("cpu_used_ram"))
|
|
1184
|
+
|
|
1185
|
+
row.append(measurement.get_gpu_metric_value("gpu_used_memory"))
|
|
1186
|
+
row.append(
|
|
1187
|
+
round(measurement.get_gpu_metric_value("gpu_utilization"), 1)
|
|
1188
|
+
)
|
|
1189
|
+
|
|
1190
|
+
detailed_table.insert_row_by_index(row)
|
|
1191
|
+
else:
|
|
1192
|
+
for measurement in measurements:
|
|
1193
|
+
row = [
|
|
1194
|
+
# TODO-TMA-568: This needs to be updated because there will be multiple model configs
|
|
1195
|
+
measurement.model_specific_pa_params()[0][first_column_tag],
|
|
1196
|
+
measurement.get_non_gpu_metric_value("perf_latency_p99"),
|
|
1197
|
+
measurement.get_non_gpu_metric_value("perf_client_response_wait"),
|
|
1198
|
+
measurement.get_non_gpu_metric_value("perf_server_queue"),
|
|
1199
|
+
measurement.get_non_gpu_metric_value("perf_server_compute_input"),
|
|
1200
|
+
measurement.get_non_gpu_metric_value("perf_server_compute_infer"),
|
|
1201
|
+
measurement.get_non_gpu_metric_value("perf_throughput"),
|
|
1202
|
+
]
|
|
1203
|
+
if self._cpu_metrics_were_gathered():
|
|
1204
|
+
row.append(measurement.get_non_gpu_metric_value("cpu_used_ram"))
|
|
1205
|
+
|
|
1206
|
+
detailed_table.insert_row_by_index(row)
|
|
1207
|
+
return detailed_table
|
|
1208
|
+
|
|
1209
|
+
def _build_detailed_info(self, model_config_name):
|
|
1210
|
+
"""
|
|
1211
|
+
Constructs important info sentence about the model config
|
|
1212
|
+
specified
|
|
1213
|
+
"""
|
|
1214
|
+
|
|
1215
|
+
run_config, measurements = self._detailed_report_data[model_config_name]
|
|
1216
|
+
|
|
1217
|
+
# TODO-TMA-568 - add support for multi-model
|
|
1218
|
+
model_config = run_config.model_run_configs()[0].model_config()
|
|
1219
|
+
instance_group_string = self._create_instance_group_phrase(model_config)
|
|
1220
|
+
dynamic_batching = model_config.dynamic_batching_string()
|
|
1221
|
+
max_batch_size = model_config.max_batch_size()
|
|
1222
|
+
platform = model_config.get_field("platform")
|
|
1223
|
+
|
|
1224
|
+
max_batch_size_string = f"a max batch size of {max_batch_size}"
|
|
1225
|
+
|
|
1226
|
+
if dynamic_batching == "Disabled":
|
|
1227
|
+
dynamic_batching_string = "dynamic batching disabled"
|
|
1228
|
+
else:
|
|
1229
|
+
dynamic_batching_string = "dynamic batching enabled"
|
|
1230
|
+
|
|
1231
|
+
gpu_cpu_string = "CPU"
|
|
1232
|
+
|
|
1233
|
+
report_gpu_metrics = (
|
|
1234
|
+
self._config.always_report_gpu_metrics or not run_config.cpu_only()
|
|
1235
|
+
)
|
|
1236
|
+
|
|
1237
|
+
if report_gpu_metrics:
|
|
1238
|
+
gpu_names, max_memories = self._get_gpu_stats(measurements)
|
|
1239
|
+
gpu_cpu_string = f"GPU(s) {gpu_names} with total memory {max_memories}"
|
|
1240
|
+
|
|
1241
|
+
if run_config.is_ensemble_model():
|
|
1242
|
+
sentence = f"<strong>{model_config_name}</strong> is comprised of the following composing models:"
|
|
1243
|
+
|
|
1244
|
+
for composing_config_variant in run_config.composing_config_variants():
|
|
1245
|
+
sentence = (
|
|
1246
|
+
sentence
|
|
1247
|
+
+ "<LI> "
|
|
1248
|
+
+ self._create_summary_config_info(composing_config_variant)
|
|
1249
|
+
+ " </LI>"
|
|
1250
|
+
)
|
|
1251
|
+
|
|
1252
|
+
sentence = (
|
|
1253
|
+
sentence
|
|
1254
|
+
+ f"<br>{len(measurements)} measurement(s) were obtained for the model config on {gpu_cpu_string}."
|
|
1255
|
+
)
|
|
1256
|
+
elif run_config.is_bls_model():
|
|
1257
|
+
sentence = f"<strong>{model_config_name}</strong> is comprised of the following composing models:"
|
|
1258
|
+
|
|
1259
|
+
for composing_config_variant in run_config.composing_config_variants():
|
|
1260
|
+
sentence = (
|
|
1261
|
+
sentence
|
|
1262
|
+
+ "<LI> "
|
|
1263
|
+
+ self._create_summary_config_info(composing_config_variant)
|
|
1264
|
+
+ " </LI>"
|
|
1265
|
+
)
|
|
1266
|
+
|
|
1267
|
+
sentence = (
|
|
1268
|
+
sentence
|
|
1269
|
+
+ f"<br>{len(measurements)} measurement(s) were obtained for the model config on {gpu_cpu_string}."
|
|
1270
|
+
)
|
|
1271
|
+
else:
|
|
1272
|
+
sentence = (
|
|
1273
|
+
f"The model config <strong>{model_config_name}</strong> uses {instance_group_string} "
|
|
1274
|
+
f"with {max_batch_size_string} and has {dynamic_batching_string}. "
|
|
1275
|
+
f"{len(measurements)} measurement(s) were obtained for the model config on "
|
|
1276
|
+
f"{gpu_cpu_string}. "
|
|
1277
|
+
f"This model uses the platform {platform}."
|
|
1278
|
+
)
|
|
1279
|
+
|
|
1280
|
+
return sentence
|
|
1281
|
+
|
|
1282
|
+
def _get_gpu_count(self):
|
|
1283
|
+
return len(self._gpu_info)
|
|
1284
|
+
|
|
1285
|
+
def _get_gpu_stats(
|
|
1286
|
+
self, measurements: List["RunConfigMeasurement"]
|
|
1287
|
+
) -> Tuple[str, str]:
|
|
1288
|
+
"""
|
|
1289
|
+
Gets names and max total memory of GPUs used in measurements as a
|
|
1290
|
+
tuple of strings
|
|
1291
|
+
|
|
1292
|
+
Returns
|
|
1293
|
+
-------
|
|
1294
|
+
(gpu_names_str, max_memory_str):
|
|
1295
|
+
The GPU names as a string, and the total combined memory as a string
|
|
1296
|
+
"""
|
|
1297
|
+
|
|
1298
|
+
gpu_dict: Dict[str, Any] = {}
|
|
1299
|
+
for gpu_uuid, gpu_info in self._gpu_info.items():
|
|
1300
|
+
for measurement in measurements:
|
|
1301
|
+
if gpu_uuid in measurement.gpus_used():
|
|
1302
|
+
gpu_name = gpu_info["name"]
|
|
1303
|
+
max_memory = round(gpu_info["total_memory"] / (2**30), 1)
|
|
1304
|
+
if gpu_name not in gpu_dict:
|
|
1305
|
+
gpu_dict[gpu_name] = {"memory": max_memory, "count": 1}
|
|
1306
|
+
else:
|
|
1307
|
+
gpu_dict[gpu_name]["count"] += 1
|
|
1308
|
+
break
|
|
1309
|
+
|
|
1310
|
+
gpu_names = ""
|
|
1311
|
+
max_memory = 0
|
|
1312
|
+
for name in gpu_dict.keys():
|
|
1313
|
+
count = gpu_dict[name]["count"]
|
|
1314
|
+
memory = gpu_dict[name]["memory"]
|
|
1315
|
+
if gpu_names != "":
|
|
1316
|
+
gpu_names += ", "
|
|
1317
|
+
gpu_names += f"{count} x {name}"
|
|
1318
|
+
max_memory += memory * count
|
|
1319
|
+
|
|
1320
|
+
max_mem_str = f"{max_memory} GB"
|
|
1321
|
+
return (gpu_names, max_mem_str)
|
|
1322
|
+
|
|
1323
|
+
def _build_constraint_strings(self) -> Dict[str, str]:
|
|
1324
|
+
"""
|
|
1325
|
+
Constructs constraint strings to show the constraints under which
|
|
1326
|
+
each model is being run.
|
|
1327
|
+
"""
|
|
1328
|
+
|
|
1329
|
+
constraint_strs = {}
|
|
1330
|
+
|
|
1331
|
+
for (
|
|
1332
|
+
model_name,
|
|
1333
|
+
model_constraints,
|
|
1334
|
+
) in self._constraint_manager.get_constraints_for_all_models().items():
|
|
1335
|
+
strs = []
|
|
1336
|
+
if model_constraints:
|
|
1337
|
+
for metric, constraint in model_constraints.items():
|
|
1338
|
+
metric_header = MetricsManager.get_metric_types([metric])[0].header(
|
|
1339
|
+
aggregation_tag=""
|
|
1340
|
+
)
|
|
1341
|
+
for constraint_type, constraint_val in constraint.items():
|
|
1342
|
+
# String looks like 'Max p99 Latency: 99 ms'
|
|
1343
|
+
metric_header_name = metric_header.rsplit(" ", 1)[0]
|
|
1344
|
+
metric_unit = metric_header.rsplit(" ", 1)[1][1:-1]
|
|
1345
|
+
strs.append(
|
|
1346
|
+
f"{constraint_type.capitalize()} {metric_header_name}: {constraint_val} {metric_unit}"
|
|
1347
|
+
)
|
|
1348
|
+
constraint_strs[model_name] = ", ".join(strs)
|
|
1349
|
+
return constraint_strs
|
|
1350
|
+
|
|
1351
|
+
def _cpu_metrics_were_gathered(self):
|
|
1352
|
+
if self._cpu_metrics_gathered_sticky is None:
|
|
1353
|
+
used_ram = None
|
|
1354
|
+
if self._detailed_report_data:
|
|
1355
|
+
key = list(self._detailed_report_data.keys())[0]
|
|
1356
|
+
_, measurements = self._detailed_report_data[key]
|
|
1357
|
+
used_ram = measurements[0].get_non_gpu_metric_value("cpu_used_ram")
|
|
1358
|
+
else:
|
|
1359
|
+
key = list(self._summary_data.keys())[0]
|
|
1360
|
+
_, measurement = self._summary_data[key][0]
|
|
1361
|
+
used_ram = measurement.get_non_gpu_metric_value("cpu_used_ram")
|
|
1362
|
+
|
|
1363
|
+
self._cpu_metrics_gathered_sticky = used_ram != 0
|
|
1364
|
+
|
|
1365
|
+
return self._cpu_metrics_gathered_sticky
|
|
1366
|
+
|
|
1367
|
+
def _was_measured_with_request_rate(
|
|
1368
|
+
self, measurement: RunConfigMeasurement
|
|
1369
|
+
) -> bool:
|
|
1370
|
+
if (
|
|
1371
|
+
"request-rate-range" in measurement.model_specific_pa_params()[0]
|
|
1372
|
+
and measurement.model_specific_pa_params()[0]["request-rate-range"]
|
|
1373
|
+
):
|
|
1374
|
+
return True
|
|
1375
|
+
else:
|
|
1376
|
+
return False
|