triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,646 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
21
|
+
from model_analyzer.output.file_writer import FileWriter
|
|
22
|
+
|
|
23
|
+
from .result_table import ResultTable
|
|
24
|
+
from .result_utils import format_for_csv
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ResultTableManager:
|
|
30
|
+
"""
|
|
31
|
+
This class provides methods to create, and add to
|
|
32
|
+
ResultTables. Each ResultTable holds results from
|
|
33
|
+
multiple runs.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
headers = {
|
|
37
|
+
"model_name": "Model",
|
|
38
|
+
"batch_size": "Batch",
|
|
39
|
+
"concurrency": "Concurrency",
|
|
40
|
+
"request_rate": "Request Rate",
|
|
41
|
+
"model_config_path": "Model Config Path",
|
|
42
|
+
"instance_group": "Instance Group",
|
|
43
|
+
"max_batch_size": "Max Batch Size",
|
|
44
|
+
"satisfies_constraints": "Satisfies Constraints",
|
|
45
|
+
"gpu_uuid": "GPU UUID",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
server_only_table_key = "server_gpu_metrics"
|
|
49
|
+
model_gpu_table_key = "model_gpu_metrics"
|
|
50
|
+
model_inference_table_key = "model_inference_metrics"
|
|
51
|
+
backend_parameter_key_prefix = "backend_parameter/"
|
|
52
|
+
|
|
53
|
+
def __init__(self, config, result_manager):
|
|
54
|
+
self._config = config
|
|
55
|
+
self._result_manager = result_manager
|
|
56
|
+
|
|
57
|
+
# Headers Dictionary and result tables
|
|
58
|
+
self._gpu_metrics_to_headers = {}
|
|
59
|
+
self._non_gpu_metrics_to_headers = {}
|
|
60
|
+
self._result_tables = {}
|
|
61
|
+
|
|
62
|
+
def create_tables(self):
|
|
63
|
+
"""
|
|
64
|
+
Creates the inference, gpu, and server tables
|
|
65
|
+
"""
|
|
66
|
+
self._determine_table_headers()
|
|
67
|
+
|
|
68
|
+
self._create_inference_table()
|
|
69
|
+
self._create_gpu_table()
|
|
70
|
+
self._create_server_table()
|
|
71
|
+
|
|
72
|
+
def tabulate_results(self):
|
|
73
|
+
"""
|
|
74
|
+
The function called at the end of all runs
|
|
75
|
+
FOR ALL MODELs that compiles all results and
|
|
76
|
+
dumps the data into tables for exporting.
|
|
77
|
+
"""
|
|
78
|
+
self._add_server_data()
|
|
79
|
+
|
|
80
|
+
# Fill rows in descending order
|
|
81
|
+
for model in self._result_manager.get_model_names():
|
|
82
|
+
for result in self._result_manager.get_model_sorted_results(
|
|
83
|
+
model
|
|
84
|
+
).results():
|
|
85
|
+
self._tabulate_measurements(result)
|
|
86
|
+
|
|
87
|
+
def write_results(self):
|
|
88
|
+
"""
|
|
89
|
+
Writes table to console
|
|
90
|
+
"""
|
|
91
|
+
self._write_results(writer=FileWriter(), column_separator=" ")
|
|
92
|
+
|
|
93
|
+
def export_results(self):
|
|
94
|
+
"""
|
|
95
|
+
Makes calls to _write_results out to streams or files. If
|
|
96
|
+
exporting results is requested, uses a FileWriter for specified output
|
|
97
|
+
files.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
results_export_directory = os.path.join(self._config.export_path, "results")
|
|
101
|
+
os.makedirs(results_export_directory, exist_ok=True)
|
|
102
|
+
|
|
103
|
+
self._export_results(
|
|
104
|
+
name="server only",
|
|
105
|
+
dir=results_export_directory,
|
|
106
|
+
filename=self._config.filename_server_only,
|
|
107
|
+
key=self.server_only_table_key,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
self._export_results(
|
|
111
|
+
name="inference",
|
|
112
|
+
dir=results_export_directory,
|
|
113
|
+
filename=self._config.filename_model_inference,
|
|
114
|
+
key=self.model_inference_table_key,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self._export_results(
|
|
118
|
+
name="GPU",
|
|
119
|
+
dir=results_export_directory,
|
|
120
|
+
filename=self._config.filename_model_gpu,
|
|
121
|
+
key=self.model_gpu_table_key,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _export_results(self, name, dir, filename, key):
|
|
125
|
+
table = self._result_tables[key]
|
|
126
|
+
if table.size():
|
|
127
|
+
outfile = os.path.join(dir, filename)
|
|
128
|
+
logger.info(f"Exporting {name} metrics to {outfile}")
|
|
129
|
+
self._write_result(
|
|
130
|
+
table=table,
|
|
131
|
+
writer=FileWriter(filename=outfile),
|
|
132
|
+
column_separator=",",
|
|
133
|
+
ignore_widths=True,
|
|
134
|
+
include_title=False,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def _determine_table_headers(self):
|
|
138
|
+
# Finds which metric(s) are actually collected during profile phase.
|
|
139
|
+
# Since a profile phase can be run twice with different metric(s)
|
|
140
|
+
# being collected.
|
|
141
|
+
gpu_metrics_from_measurements = {}
|
|
142
|
+
non_gpu_metrics_from_measurements = {}
|
|
143
|
+
|
|
144
|
+
# Server data
|
|
145
|
+
data = self._result_manager.get_server_only_data()
|
|
146
|
+
for gpu_metrics in data.values():
|
|
147
|
+
for gpu_metric in gpu_metrics:
|
|
148
|
+
if gpu_metric.tag not in gpu_metrics_from_measurements:
|
|
149
|
+
gpu_metrics_from_measurements[gpu_metric.tag] = gpu_metric
|
|
150
|
+
|
|
151
|
+
# Measurements
|
|
152
|
+
results = self._result_manager.get_results()
|
|
153
|
+
|
|
154
|
+
for run_config_measurement in results.get_list_of_run_config_measurements():
|
|
155
|
+
for gpu_metrics in run_config_measurement.gpu_data().values():
|
|
156
|
+
for gpu_metric in gpu_metrics:
|
|
157
|
+
if gpu_metric.tag not in gpu_metrics_from_measurements:
|
|
158
|
+
gpu_metrics_from_measurements[gpu_metric.tag] = gpu_metric
|
|
159
|
+
|
|
160
|
+
for non_gpu_metric_list in run_config_measurement.non_gpu_data():
|
|
161
|
+
for non_gpu_metric in non_gpu_metric_list:
|
|
162
|
+
if non_gpu_metric.tag not in non_gpu_metrics_from_measurements:
|
|
163
|
+
non_gpu_metrics_from_measurements[
|
|
164
|
+
non_gpu_metric.tag
|
|
165
|
+
] = non_gpu_metric
|
|
166
|
+
|
|
167
|
+
gpu_specific_metrics = gpu_metrics_from_measurements.values()
|
|
168
|
+
non_gpu_specific_metrics = non_gpu_metrics_from_measurements.values()
|
|
169
|
+
|
|
170
|
+
# Add metric tags to header mappings
|
|
171
|
+
for metric in gpu_specific_metrics:
|
|
172
|
+
self._gpu_metrics_to_headers[metric.tag] = metric.header()
|
|
173
|
+
for metric in non_gpu_specific_metrics:
|
|
174
|
+
self._non_gpu_metrics_to_headers[metric.tag] = metric.header()
|
|
175
|
+
|
|
176
|
+
def _create_server_table(self):
|
|
177
|
+
# Server only
|
|
178
|
+
server_output_headers = []
|
|
179
|
+
server_output_fields = []
|
|
180
|
+
for server_output_field in self._config.server_output_fields:
|
|
181
|
+
if server_output_field in self.headers:
|
|
182
|
+
server_output_headers.append(self.headers[server_output_field])
|
|
183
|
+
elif server_output_field in self._gpu_metrics_to_headers:
|
|
184
|
+
server_output_headers.append(
|
|
185
|
+
self._gpu_metrics_to_headers[server_output_field]
|
|
186
|
+
)
|
|
187
|
+
else:
|
|
188
|
+
logger.warning(
|
|
189
|
+
f'Server output field "{server_output_field}", has no data'
|
|
190
|
+
)
|
|
191
|
+
continue
|
|
192
|
+
server_output_fields.append(server_output_field)
|
|
193
|
+
|
|
194
|
+
self._add_result_table(
|
|
195
|
+
table_key=self.server_only_table_key,
|
|
196
|
+
title="Server Only",
|
|
197
|
+
headers=server_output_headers,
|
|
198
|
+
)
|
|
199
|
+
self._server_output_fields = server_output_fields
|
|
200
|
+
|
|
201
|
+
def _create_inference_table(self):
|
|
202
|
+
# Inference only
|
|
203
|
+
inference_output_headers = []
|
|
204
|
+
inference_output_fields = []
|
|
205
|
+
for inference_output_field in self._config.inference_output_fields:
|
|
206
|
+
if inference_output_field in self.headers:
|
|
207
|
+
inference_output_headers.append(self.headers[inference_output_field])
|
|
208
|
+
elif inference_output_field in self._non_gpu_metrics_to_headers:
|
|
209
|
+
inference_output_headers.append(
|
|
210
|
+
self._non_gpu_metrics_to_headers[inference_output_field]
|
|
211
|
+
)
|
|
212
|
+
elif inference_output_field.startswith(self.backend_parameter_key_prefix):
|
|
213
|
+
inference_output_headers.append(inference_output_field)
|
|
214
|
+
else:
|
|
215
|
+
logger.warning(
|
|
216
|
+
f'Inference output field "{inference_output_field}", has no data'
|
|
217
|
+
)
|
|
218
|
+
continue
|
|
219
|
+
inference_output_fields.append(inference_output_field)
|
|
220
|
+
|
|
221
|
+
self._inference_output_fields = inference_output_fields
|
|
222
|
+
self._add_result_table(
|
|
223
|
+
table_key=self.model_inference_table_key,
|
|
224
|
+
title="Models (Inference)",
|
|
225
|
+
headers=inference_output_headers,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def _create_gpu_table(self):
|
|
229
|
+
gpu_output_headers = []
|
|
230
|
+
gpu_output_fields = []
|
|
231
|
+
for gpu_output_field in self._config.gpu_output_fields:
|
|
232
|
+
if gpu_output_field in self.headers:
|
|
233
|
+
gpu_output_headers.append(self.headers[gpu_output_field])
|
|
234
|
+
elif gpu_output_field in self._gpu_metrics_to_headers:
|
|
235
|
+
gpu_output_headers.append(
|
|
236
|
+
self._gpu_metrics_to_headers[gpu_output_field]
|
|
237
|
+
)
|
|
238
|
+
else:
|
|
239
|
+
logger.warning(f'GPU output field "{gpu_output_field}", has no data')
|
|
240
|
+
continue
|
|
241
|
+
gpu_output_fields.append(gpu_output_field)
|
|
242
|
+
|
|
243
|
+
self._gpu_output_fields = gpu_output_fields
|
|
244
|
+
# Model GPU Metrics
|
|
245
|
+
self._add_result_table(
|
|
246
|
+
table_key=self.model_gpu_table_key,
|
|
247
|
+
title="Models (GPU Metrics)",
|
|
248
|
+
headers=gpu_output_headers,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def _find_index_for_field(self, fields, field_name):
|
|
252
|
+
try:
|
|
253
|
+
index = fields.index(field_name)
|
|
254
|
+
return index
|
|
255
|
+
except ValueError:
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
def _write_results(self, writer, column_separator):
|
|
259
|
+
"""
|
|
260
|
+
Writes the tables using the writer with the given column
|
|
261
|
+
specifications.
|
|
262
|
+
|
|
263
|
+
Parameters
|
|
264
|
+
----------
|
|
265
|
+
writer : OutputWriter
|
|
266
|
+
Used to write the result tables to an output stream
|
|
267
|
+
column_separator : str
|
|
268
|
+
The string that will be inserted between each column
|
|
269
|
+
of the table
|
|
270
|
+
|
|
271
|
+
Raises
|
|
272
|
+
------
|
|
273
|
+
TritonModelAnalyzerException
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
for table in self._result_tables.values():
|
|
277
|
+
self._write_result(table, writer, column_separator, ignore_widths=False)
|
|
278
|
+
|
|
279
|
+
def _write_result(
|
|
280
|
+
self, table, writer, column_separator, ignore_widths=False, include_title=True
|
|
281
|
+
):
|
|
282
|
+
"""
|
|
283
|
+
Utility function that writes any table
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
if include_title:
|
|
287
|
+
writer.write(
|
|
288
|
+
"\n".join(
|
|
289
|
+
[
|
|
290
|
+
table.title() + ":",
|
|
291
|
+
table.to_formatted_string(
|
|
292
|
+
separator=column_separator, ignore_widths=ignore_widths
|
|
293
|
+
),
|
|
294
|
+
"\n",
|
|
295
|
+
]
|
|
296
|
+
)
|
|
297
|
+
)
|
|
298
|
+
else:
|
|
299
|
+
writer.write(
|
|
300
|
+
table.to_formatted_string(
|
|
301
|
+
separator=column_separator, ignore_widths=ignore_widths
|
|
302
|
+
)
|
|
303
|
+
+ "\n\n"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def _get_gpu_count(self):
|
|
307
|
+
return self._result_tables[self.server_only_table_key].size()
|
|
308
|
+
|
|
309
|
+
def _add_server_data(self):
|
|
310
|
+
"""
|
|
311
|
+
Adds data to directly to the server only table
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
data : dict
|
|
316
|
+
keys are gpu ids and values are lists of metric values
|
|
317
|
+
"""
|
|
318
|
+
|
|
319
|
+
server_fields = self._server_output_fields
|
|
320
|
+
server_only_data = self._result_manager.get_server_only_data()
|
|
321
|
+
|
|
322
|
+
for gpu_uuid, metrics in server_only_data.items():
|
|
323
|
+
data_row = [None] * len(server_fields)
|
|
324
|
+
|
|
325
|
+
model_name_index = self._find_index_for_field(server_fields, "model_name")
|
|
326
|
+
if model_name_index is not None:
|
|
327
|
+
data_row[model_name_index] = "triton-server"
|
|
328
|
+
|
|
329
|
+
gpu_uuid_index = self._find_index_for_field(server_fields, "gpu_uuid")
|
|
330
|
+
if gpu_uuid_index is not None:
|
|
331
|
+
data_row[gpu_uuid_index] = gpu_uuid
|
|
332
|
+
|
|
333
|
+
for metric in metrics:
|
|
334
|
+
metric_tag_index = self._find_index_for_field(server_fields, metric.tag)
|
|
335
|
+
|
|
336
|
+
if metric_tag_index is not None:
|
|
337
|
+
data_row[metric_tag_index] = round(metric.value(), 1)
|
|
338
|
+
self._result_tables[self.server_only_table_key].insert_row_by_index(
|
|
339
|
+
data_row
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
def _add_result_table(self, table_key, title, headers):
|
|
343
|
+
"""
|
|
344
|
+
Utility function that creates a table with column
|
|
345
|
+
headers corresponding to perf_analyzer arguments
|
|
346
|
+
and requested metrics. Also sets the result
|
|
347
|
+
comparator for that table.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
self._result_tables[table_key] = ResultTable(headers=headers, title=title)
|
|
351
|
+
|
|
352
|
+
def _tabulate_measurements(self, run_config_result):
|
|
353
|
+
"""
|
|
354
|
+
checks RunConfigMeasurements against constraints,
|
|
355
|
+
and puts them into the correct (passing or failing)
|
|
356
|
+
table
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
model_name = run_config_result.model_name()
|
|
360
|
+
(
|
|
361
|
+
instance_groups,
|
|
362
|
+
max_batch_sizes,
|
|
363
|
+
dynamic_batchings,
|
|
364
|
+
cpu_onlys,
|
|
365
|
+
backend_parameters,
|
|
366
|
+
composing_config_names,
|
|
367
|
+
) = self._tabulate_measurements_setup(run_config_result)
|
|
368
|
+
|
|
369
|
+
passing_measurements = run_config_result.passing_measurements()
|
|
370
|
+
failing_measurements = run_config_result.failing_measurements()
|
|
371
|
+
|
|
372
|
+
for run_config_measurements, passes in [
|
|
373
|
+
(passing_measurements, True),
|
|
374
|
+
(failing_measurements, False),
|
|
375
|
+
]:
|
|
376
|
+
for run_config_measurement in run_config_measurements:
|
|
377
|
+
self._tabulate_measurement(
|
|
378
|
+
model_name=model_name,
|
|
379
|
+
instance_groups=instance_groups,
|
|
380
|
+
max_batch_sizes=max_batch_sizes,
|
|
381
|
+
dynamic_batchings=dynamic_batchings,
|
|
382
|
+
run_config_measurement=run_config_measurement,
|
|
383
|
+
passes=passes,
|
|
384
|
+
cpu_onlys=cpu_onlys,
|
|
385
|
+
backend_parameters=backend_parameters,
|
|
386
|
+
composing_config_names=composing_config_names,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
def _tabulate_measurements_setup(self, run_config_result):
|
|
390
|
+
if run_config_result.run_config().is_ensemble_model():
|
|
391
|
+
model_config_variants = (
|
|
392
|
+
run_config_result.run_config().composing_config_variants()
|
|
393
|
+
)
|
|
394
|
+
model_configs = run_config_result.run_config().composing_configs()
|
|
395
|
+
composing_config_names = [mcv.variant_name for mcv in model_config_variants]
|
|
396
|
+
else:
|
|
397
|
+
model_configs = [
|
|
398
|
+
model_run_configs.model_config()
|
|
399
|
+
for model_run_configs in run_config_result.run_config().model_run_configs()
|
|
400
|
+
]
|
|
401
|
+
|
|
402
|
+
composing_config_names = []
|
|
403
|
+
|
|
404
|
+
instance_groups = [
|
|
405
|
+
model_config.instance_group_string(self._get_gpu_count())
|
|
406
|
+
for model_config in model_configs
|
|
407
|
+
]
|
|
408
|
+
max_batch_sizes = [
|
|
409
|
+
model_config.max_batch_size() for model_config in model_configs
|
|
410
|
+
]
|
|
411
|
+
dynamic_batchings = [
|
|
412
|
+
model_config.dynamic_batching_string() for model_config in model_configs
|
|
413
|
+
]
|
|
414
|
+
cpu_onlys = [run_config_result.run_config().cpu_only() for _ in model_configs]
|
|
415
|
+
backend_parameters = [
|
|
416
|
+
model_config._model_config.parameters for model_config in model_configs
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
return (
|
|
420
|
+
instance_groups,
|
|
421
|
+
max_batch_sizes,
|
|
422
|
+
dynamic_batchings,
|
|
423
|
+
cpu_onlys,
|
|
424
|
+
backend_parameters,
|
|
425
|
+
composing_config_names,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
def _tabulate_measurement(
|
|
429
|
+
self,
|
|
430
|
+
model_name,
|
|
431
|
+
instance_groups,
|
|
432
|
+
max_batch_sizes,
|
|
433
|
+
dynamic_batchings,
|
|
434
|
+
run_config_measurement,
|
|
435
|
+
passes,
|
|
436
|
+
cpu_onlys,
|
|
437
|
+
backend_parameters,
|
|
438
|
+
composing_config_names,
|
|
439
|
+
):
|
|
440
|
+
"""
|
|
441
|
+
Add a single RunConfigMeasurement to the specified
|
|
442
|
+
table
|
|
443
|
+
"""
|
|
444
|
+
|
|
445
|
+
model_config_name = run_config_measurement.model_variants_name()
|
|
446
|
+
if composing_config_names:
|
|
447
|
+
model_config_name = model_config_name + ": "
|
|
448
|
+
for composing_config_name in composing_config_names:
|
|
449
|
+
model_config_name = model_config_name + composing_config_name
|
|
450
|
+
|
|
451
|
+
if composing_config_name != composing_config_names[-1]:
|
|
452
|
+
model_config_name = model_config_name + ", "
|
|
453
|
+
|
|
454
|
+
(
|
|
455
|
+
model_specific_pa_params,
|
|
456
|
+
batch_sizes,
|
|
457
|
+
concurrencies,
|
|
458
|
+
request_rates,
|
|
459
|
+
) = self._tabulate_measurement_setup(run_config_measurement)
|
|
460
|
+
|
|
461
|
+
satisfies = "Yes" if passes else "No"
|
|
462
|
+
|
|
463
|
+
# Non GPU specific data
|
|
464
|
+
inference_fields = self._inference_output_fields
|
|
465
|
+
inference_row = self._get_common_row_items(
|
|
466
|
+
inference_fields,
|
|
467
|
+
batch_sizes,
|
|
468
|
+
concurrencies,
|
|
469
|
+
request_rates,
|
|
470
|
+
satisfies,
|
|
471
|
+
model_name,
|
|
472
|
+
model_config_name,
|
|
473
|
+
dynamic_batchings,
|
|
474
|
+
instance_groups,
|
|
475
|
+
max_batch_sizes,
|
|
476
|
+
backend_parameters,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
self._populate_inference_rows(
|
|
480
|
+
run_config_measurement, inference_fields, inference_row
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
self._result_tables[self.model_inference_table_key].insert_row_by_index(
|
|
484
|
+
inference_row
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
# GPU specific data (only put measurement if not cpu only)
|
|
488
|
+
if not any(cpu_onlys):
|
|
489
|
+
for gpu_uuid, metrics in run_config_measurement.gpu_data().items():
|
|
490
|
+
gpu_fields = self._gpu_output_fields
|
|
491
|
+
|
|
492
|
+
gpu_row = self._get_common_row_items(
|
|
493
|
+
gpu_fields,
|
|
494
|
+
batch_sizes,
|
|
495
|
+
concurrencies,
|
|
496
|
+
request_rates,
|
|
497
|
+
satisfies,
|
|
498
|
+
model_name,
|
|
499
|
+
model_config_name,
|
|
500
|
+
dynamic_batchings,
|
|
501
|
+
instance_groups,
|
|
502
|
+
max_batch_sizes,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
self._add_uuid_to_gpu_row(gpu_row, gpu_uuid, gpu_fields)
|
|
506
|
+
self._add_metrics_to_gpu_row(gpu_row, metrics, gpu_fields)
|
|
507
|
+
|
|
508
|
+
self._result_tables[self.model_gpu_table_key].insert_row_by_index(
|
|
509
|
+
row=gpu_row
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
def _tabulate_measurement_setup(self, run_config_measurement):
|
|
513
|
+
model_specific_pa_params = run_config_measurement.model_specific_pa_params()
|
|
514
|
+
batch_sizes = [
|
|
515
|
+
pa_params["batch-size"]
|
|
516
|
+
for pa_params in model_specific_pa_params
|
|
517
|
+
if "batch-size" in pa_params
|
|
518
|
+
]
|
|
519
|
+
concurrencies = [
|
|
520
|
+
pa_params["concurrency-range"]
|
|
521
|
+
for pa_params in model_specific_pa_params
|
|
522
|
+
if "concurrency-range" in pa_params
|
|
523
|
+
]
|
|
524
|
+
request_rates = [
|
|
525
|
+
pa_params["request-rate-range"]
|
|
526
|
+
for pa_params in model_specific_pa_params
|
|
527
|
+
if "request-rate-range" in pa_params
|
|
528
|
+
]
|
|
529
|
+
|
|
530
|
+
return model_specific_pa_params, batch_sizes, concurrencies, request_rates
|
|
531
|
+
|
|
532
|
+
def _populate_inference_rows(
|
|
533
|
+
self, run_config_measurement, inference_fields, inference_row
|
|
534
|
+
):
|
|
535
|
+
# FIXME: TMA-686 - Need to figure out what to do if models have different tags
|
|
536
|
+
for metric in run_config_measurement.non_gpu_data()[0]:
|
|
537
|
+
metric_tag_index = self._find_index_for_field(inference_fields, metric.tag)
|
|
538
|
+
if metric_tag_index is not None:
|
|
539
|
+
inference_row[metric_tag_index] = self._create_non_gpu_metric_row_entry(
|
|
540
|
+
run_config_measurement, metric
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
def _add_uuid_to_gpu_row(self, gpu_row, gpu_uuid, gpu_fields):
|
|
544
|
+
gpu_uuid_index = self._find_index_for_field(gpu_fields, "gpu_uuid")
|
|
545
|
+
|
|
546
|
+
if gpu_uuid_index is not None:
|
|
547
|
+
gpu_row[gpu_uuid_index] = gpu_uuid
|
|
548
|
+
|
|
549
|
+
def _add_metrics_to_gpu_row(self, gpu_row, metrics, gpu_fields):
|
|
550
|
+
for metric in metrics:
|
|
551
|
+
metric_tag_index = self._find_index_for_field(gpu_fields, metric.tag)
|
|
552
|
+
|
|
553
|
+
if metric_tag_index is not None:
|
|
554
|
+
gpu_row[metric_tag_index] = round(metric.value(), 1)
|
|
555
|
+
|
|
556
|
+
def _create_non_gpu_metric_row_entry(self, run_config_measurement, metric):
|
|
557
|
+
metric_value = run_config_measurement.get_non_gpu_metric_value(metric.tag)
|
|
558
|
+
non_gpu_metrics = run_config_measurement.get_non_gpu_metric(metric.tag)
|
|
559
|
+
|
|
560
|
+
if len(non_gpu_metrics) > 1:
|
|
561
|
+
rounded_non_gpu_metrics = [
|
|
562
|
+
round(metric.value(), 1)
|
|
563
|
+
for metric in run_config_measurement.get_non_gpu_metric(metric.tag)
|
|
564
|
+
]
|
|
565
|
+
|
|
566
|
+
return format_for_csv([round(metric_value, 1), rounded_non_gpu_metrics])
|
|
567
|
+
|
|
568
|
+
else:
|
|
569
|
+
return format_for_csv(round(metric_value, 1))
|
|
570
|
+
|
|
571
|
+
def _get_common_row_items(
|
|
572
|
+
self,
|
|
573
|
+
fields,
|
|
574
|
+
batch_sizes,
|
|
575
|
+
concurrencies,
|
|
576
|
+
request_rates,
|
|
577
|
+
satisfies,
|
|
578
|
+
model_name,
|
|
579
|
+
model_config_path,
|
|
580
|
+
dynamic_batchings,
|
|
581
|
+
instance_groups,
|
|
582
|
+
max_batch_sizes,
|
|
583
|
+
backend_parameters=None,
|
|
584
|
+
):
|
|
585
|
+
row = [None] * len(fields)
|
|
586
|
+
|
|
587
|
+
# Model Name
|
|
588
|
+
model_name_index = self._find_index_for_field(fields, "model_name")
|
|
589
|
+
if model_name_index is not None:
|
|
590
|
+
row[model_name_index] = format_for_csv(model_name)
|
|
591
|
+
|
|
592
|
+
# Batch Size
|
|
593
|
+
batch_size_index = self._find_index_for_field(fields, "batch_size")
|
|
594
|
+
if batch_size_index is not None:
|
|
595
|
+
row[batch_size_index] = format_for_csv(batch_sizes)
|
|
596
|
+
|
|
597
|
+
# Concurrency
|
|
598
|
+
concurrency_index = self._find_index_for_field(fields, "concurrency")
|
|
599
|
+
if concurrency_index is not None:
|
|
600
|
+
row[concurrency_index] = format_for_csv(concurrencies)
|
|
601
|
+
|
|
602
|
+
# Request rate
|
|
603
|
+
request_rate_index = self._find_index_for_field(fields, "request_rate")
|
|
604
|
+
if request_rate_index is not None:
|
|
605
|
+
row[request_rate_index] = format_for_csv(request_rates)
|
|
606
|
+
|
|
607
|
+
# Satisfies
|
|
608
|
+
satisfies_constraints_index = self._find_index_for_field(
|
|
609
|
+
fields, "satisfies_constraints"
|
|
610
|
+
)
|
|
611
|
+
if satisfies_constraints_index is not None:
|
|
612
|
+
row[satisfies_constraints_index] = format_for_csv(satisfies)
|
|
613
|
+
|
|
614
|
+
# Model Config Path
|
|
615
|
+
model_config_path_idx = self._find_index_for_field(fields, "model_config_path")
|
|
616
|
+
if model_config_path_idx is not None:
|
|
617
|
+
row[model_config_path_idx] = format_for_csv(model_config_path)
|
|
618
|
+
|
|
619
|
+
# Instance Group
|
|
620
|
+
instance_group_idx = self._find_index_for_field(fields, "instance_group")
|
|
621
|
+
if instance_group_idx is not None:
|
|
622
|
+
row[instance_group_idx] = format_for_csv(instance_groups)
|
|
623
|
+
|
|
624
|
+
# Max Batch Size
|
|
625
|
+
max_batch_size_idx = self._find_index_for_field(fields, "max_batch_size")
|
|
626
|
+
if max_batch_size_idx is not None:
|
|
627
|
+
row[max_batch_size_idx] = format_for_csv(max_batch_sizes)
|
|
628
|
+
|
|
629
|
+
# Backend parameters
|
|
630
|
+
if backend_parameters is not None:
|
|
631
|
+
for key in fields:
|
|
632
|
+
if key.startswith(self.backend_parameter_key_prefix):
|
|
633
|
+
backend_parameter_key = key.replace(
|
|
634
|
+
self.backend_parameter_key_prefix, ""
|
|
635
|
+
)
|
|
636
|
+
backend_parameter_idx = self._find_index_for_field(fields, key)
|
|
637
|
+
|
|
638
|
+
if (
|
|
639
|
+
backend_parameter_idx is not None
|
|
640
|
+
and backend_parameter_key in backend_parameters
|
|
641
|
+
):
|
|
642
|
+
row[backend_parameter_idx] = backend_parameters[
|
|
643
|
+
backend_parameter_key
|
|
644
|
+
].string_value
|
|
645
|
+
|
|
646
|
+
return row
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
"""
|
|
17
|
+
A module for various functions
|
|
18
|
+
needed across the result module
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def format_for_csv(obj, interior=False):
|
|
23
|
+
"""
|
|
24
|
+
Takes an object, which could be a string, int, or list of either
|
|
25
|
+
and formats it so it will be parsable in a csv
|
|
26
|
+
"""
|
|
27
|
+
if type(obj) == list:
|
|
28
|
+
if len(obj) > 1:
|
|
29
|
+
if interior:
|
|
30
|
+
return f" [{','.join([str(o) for o in obj])}]"
|
|
31
|
+
else:
|
|
32
|
+
return (
|
|
33
|
+
'"'
|
|
34
|
+
+ ",".join([format_for_csv(o, interior=True) for o in obj])
|
|
35
|
+
+ '"'
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
return str(obj[0])
|
|
39
|
+
elif type(obj) == str and "," in obj:
|
|
40
|
+
return '"' + obj + '"'
|
|
41
|
+
else:
|
|
42
|
+
return str(obj)
|