triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from bisect import insort
|
|
19
|
+
from functools import total_ordering
|
|
20
|
+
from typing import List
|
|
21
|
+
|
|
22
|
+
from model_analyzer.config.run.run_config import RunConfig
|
|
23
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
24
|
+
from model_analyzer.result.constraint_manager import ConstraintManager
|
|
25
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
26
|
+
from model_analyzer.result.run_config_result_comparator import RunConfigResultComparator
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@total_ordering
|
|
32
|
+
class RunConfigResult:
|
|
33
|
+
"""
|
|
34
|
+
A class that represents the group of measurements (result) from
|
|
35
|
+
a single RunConfig. This RunConfigResult belongs
|
|
36
|
+
to a particular ResultTable
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
model_name: str,
|
|
42
|
+
run_config: RunConfig,
|
|
43
|
+
comparator: RunConfigResultComparator,
|
|
44
|
+
constraint_manager: ConstraintManager,
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
model_name: str
|
|
50
|
+
The name of the model
|
|
51
|
+
run_config : RunConfig
|
|
52
|
+
comparator : RunConfigResultComparator
|
|
53
|
+
An object whose compare function receives two
|
|
54
|
+
RunConfigResults and returns 1 if the first is better than
|
|
55
|
+
the second, 0 if they are equal and -1
|
|
56
|
+
otherwise
|
|
57
|
+
constraint_manager: ConstraintManager
|
|
58
|
+
The object that handles processing and applying
|
|
59
|
+
constraints on a given measurements
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
self._model_name = model_name
|
|
63
|
+
self._run_config = run_config
|
|
64
|
+
self._comparator = comparator
|
|
65
|
+
self._constraint_manager = constraint_manager
|
|
66
|
+
|
|
67
|
+
# Heaps
|
|
68
|
+
self._measurements: List[RunConfigMeasurement] = []
|
|
69
|
+
self._passing_measurements: List[RunConfigMeasurement] = []
|
|
70
|
+
self._failing_measurements: List[RunConfigMeasurement] = []
|
|
71
|
+
|
|
72
|
+
def model_name(self):
|
|
73
|
+
"""
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
str
|
|
77
|
+
Returns the name of the model corresponding to this RunConfigResult
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
return self._model_name
|
|
81
|
+
|
|
82
|
+
def run_config(self):
|
|
83
|
+
"""
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
RunConfig
|
|
87
|
+
associated with this result
|
|
88
|
+
"""
|
|
89
|
+
return self._run_config
|
|
90
|
+
|
|
91
|
+
def failing(self):
|
|
92
|
+
"""
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
bool
|
|
96
|
+
Returns true if there are no passing RunConfigMeasurements
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
if not self._passing_measurements:
|
|
100
|
+
return True
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
def add_run_config_measurement(self, run_config_measurement):
|
|
104
|
+
"""
|
|
105
|
+
This function checks whether a RunConfigMeasurement
|
|
106
|
+
passes the constraints and adds the measurements to
|
|
107
|
+
the corresponding heap
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
run_config_measurement : RunConfigMeasurement
|
|
112
|
+
The profiled RunConfigMeasurement
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
insort(self._measurements, run_config_measurement)
|
|
116
|
+
|
|
117
|
+
if self._constraint_manager.satisfies_constraints(run_config_measurement):
|
|
118
|
+
insort(self._passing_measurements, run_config_measurement)
|
|
119
|
+
else:
|
|
120
|
+
insort(self._failing_measurements, run_config_measurement)
|
|
121
|
+
|
|
122
|
+
def run_config_measurements(self):
|
|
123
|
+
"""
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
list
|
|
127
|
+
of RunConfigMeasurements in this RunConfigResult
|
|
128
|
+
"""
|
|
129
|
+
return [measurement for measurement in reversed(self._measurements)]
|
|
130
|
+
|
|
131
|
+
def passing_measurements(self):
|
|
132
|
+
"""
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
list
|
|
136
|
+
of passing measurements in this RunConfigResult
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
return [
|
|
140
|
+
passing_measurement
|
|
141
|
+
for passing_measurement in reversed(self._passing_measurements)
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
def failing_measurements(self):
|
|
145
|
+
"""
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
list
|
|
149
|
+
of failing measurements in this RunConfigResult
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
return [
|
|
153
|
+
failing_measurement
|
|
154
|
+
for failing_measurement in reversed(self._failing_measurements)
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
def top_n_measurements(self, n):
|
|
158
|
+
"""
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
n : int
|
|
162
|
+
The number of top RunConfigMeasurements
|
|
163
|
+
to retrieve
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
list of RunConfigMeasurements
|
|
168
|
+
The top n RunConfigMeasurements
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
if len(self._passing_measurements) == 0:
|
|
172
|
+
logger.warning(
|
|
173
|
+
f"Requested top {n} RunConfigMeasurements, but none satisfied constraints. "
|
|
174
|
+
"Showing available constraint failing measurements for this config."
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if n > len(self._failing_measurements):
|
|
178
|
+
logger.warning(
|
|
179
|
+
f"Requested top {n} failing RunConfigMeasurements, "
|
|
180
|
+
f"but found only {len(self._failing_measurements)}. "
|
|
181
|
+
"Showing all available constraint failing measurements for this config."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return [
|
|
185
|
+
failing_measurement
|
|
186
|
+
for failing_measurement in reversed(self._failing_measurements[-n:])
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
if n > len(self._passing_measurements):
|
|
190
|
+
logger.warning(
|
|
191
|
+
f"Requested top {n} RunConfigMeasurements, but "
|
|
192
|
+
f"found only {len(self._passing_measurements)}. "
|
|
193
|
+
"Showing all available measurements for this config."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return [
|
|
197
|
+
passing_measurement
|
|
198
|
+
for passing_measurement in reversed(self._passing_measurements[-n:])
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
def __lt__(self, other):
|
|
202
|
+
"""
|
|
203
|
+
Checks whether this RunConfigResult is better
|
|
204
|
+
than other
|
|
205
|
+
|
|
206
|
+
If True, this means this RunConfigResult is better
|
|
207
|
+
than the other.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
return self._comparator.is_better_than(self, other)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import Dict, List
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RunConfigResultComparator:
|
|
21
|
+
"""
|
|
22
|
+
Stores information needed to compare two RunConfigResults.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self, metric_objectives_list: List[Dict[str, int]], model_weights: List[int]
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
List of
|
|
32
|
+
metric_objectives : dict of RecordTypes
|
|
33
|
+
keys are the metric types, and values are The relative importance
|
|
34
|
+
of the keys with respect to other. If the values are 0,
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
# Normalize metric weights
|
|
38
|
+
self._metric_weights = []
|
|
39
|
+
self._model_weights = []
|
|
40
|
+
for i, metric_objectives in enumerate(metric_objectives_list):
|
|
41
|
+
self._metric_weights.append(
|
|
42
|
+
{
|
|
43
|
+
key: (val / sum(metric_objectives.values()))
|
|
44
|
+
for key, val in metric_objectives.items()
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
self._model_weights.append(model_weights[i])
|
|
49
|
+
|
|
50
|
+
def get_metric_weights(self):
|
|
51
|
+
return self._metric_weights
|
|
52
|
+
|
|
53
|
+
def get_model_weights(self):
|
|
54
|
+
return self._model_weights
|
|
55
|
+
|
|
56
|
+
def is_better_than(self, run_config_result1, run_config_result2):
|
|
57
|
+
"""
|
|
58
|
+
Aggregates and compares the score for two RunConfigResults
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
run_config_result1 : RunConfigResult
|
|
63
|
+
first result to be compared
|
|
64
|
+
run_config_result2 : RunConfigResult
|
|
65
|
+
second result to be compared
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
bool
|
|
70
|
+
True: if result1 is better than result2
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
agg_run_config_measurement1 = self._aggregate_run_config_measurements(
|
|
74
|
+
run_config_result1, aggregation_func=max
|
|
75
|
+
)
|
|
76
|
+
agg_run_config_measurement2 = self._aggregate_run_config_measurements(
|
|
77
|
+
run_config_result2, aggregation_func=max
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return agg_run_config_measurement1.is_better_than(agg_run_config_measurement2)
|
|
81
|
+
|
|
82
|
+
def _aggregate_run_config_measurements(self, run_config_result, aggregation_func):
|
|
83
|
+
"""
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
(list, list)
|
|
87
|
+
A 2-tuple of average RunConfigMeasurements,
|
|
88
|
+
The first is across non-gpu specific metrics
|
|
89
|
+
The second is across gpu-specific measurements
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# For the gpu_measurements we have a list of dicts of lists
|
|
93
|
+
# Assumption here is that its okay to average over all GPUs over all perf runs
|
|
94
|
+
# This is done within the measurement itself
|
|
95
|
+
|
|
96
|
+
if run_config_result.passing_measurements():
|
|
97
|
+
aggregated_run_config_measurement = aggregation_func(
|
|
98
|
+
run_config_result.passing_measurements()
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
aggregated_run_config_measurement = aggregation_func(
|
|
102
|
+
run_config_result.run_config_measurements()
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
aggregated_run_config_measurement.set_model_config_weighting(
|
|
106
|
+
self._model_weights
|
|
107
|
+
)
|
|
108
|
+
aggregated_run_config_measurement.set_metric_weightings(self._metric_weights)
|
|
109
|
+
|
|
110
|
+
return aggregated_run_config_measurement
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from copy import deepcopy
|
|
19
|
+
from typing import List, Optional
|
|
20
|
+
|
|
21
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
22
|
+
from model_analyzer.result.run_config_result import RunConfigResult
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SortedResults:
|
|
28
|
+
"""
|
|
29
|
+
A data structure used by the result manager
|
|
30
|
+
to store and sort results
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
GET_ALL_RESULTS = -1
|
|
34
|
+
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
self._run_config_results: List[RunConfigResult] = []
|
|
37
|
+
|
|
38
|
+
def results(self) -> List[RunConfigResult]:
|
|
39
|
+
"""
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
All the results
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
self._run_config_results.sort()
|
|
46
|
+
return self._run_config_results
|
|
47
|
+
|
|
48
|
+
def add_result(self, run_config_result: RunConfigResult) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Adds a run_config_result to the result lists
|
|
51
|
+
This can either be a new result or new measurements added
|
|
52
|
+
to an existing result
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
result: ModelResult
|
|
57
|
+
The result to be added
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
existing_run_config_result = self._find_existing_run_config_result(
|
|
61
|
+
run_config_result
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if existing_run_config_result:
|
|
65
|
+
self._add_measurements_to_existing_run_config_result(
|
|
66
|
+
existing_run_config_result, run_config_result
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
self._add_new_run_config_result(run_config_result)
|
|
70
|
+
|
|
71
|
+
def top_n_results(self, n: int) -> List[RunConfigResult]:
|
|
72
|
+
"""
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
n : int
|
|
76
|
+
The number of top results
|
|
77
|
+
to retrieve
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
list of RunConfigResults
|
|
82
|
+
The n best results for this model,
|
|
83
|
+
must all be passing results
|
|
84
|
+
"""
|
|
85
|
+
passing_results, failing_results = self._create_passing_and_failing_lists()
|
|
86
|
+
|
|
87
|
+
if len(passing_results) == 0:
|
|
88
|
+
logger.warning(
|
|
89
|
+
f"Requested top {n} configs, but none satisfied constraints. "
|
|
90
|
+
"Showing available constraint failing configs for this model."
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return self._get_top_n_results(failing_results, n)
|
|
94
|
+
else:
|
|
95
|
+
return self._get_top_n_results(passing_results, n)
|
|
96
|
+
|
|
97
|
+
def _find_existing_run_config_result(
|
|
98
|
+
self, run_config_result: RunConfigResult
|
|
99
|
+
) -> Optional[RunConfigResult]:
|
|
100
|
+
if not run_config_result.run_config():
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
for rcr in self._run_config_results:
|
|
104
|
+
if (
|
|
105
|
+
run_config_result.run_config().model_variants_name()
|
|
106
|
+
== rcr.run_config().model_variants_name()
|
|
107
|
+
):
|
|
108
|
+
return rcr
|
|
109
|
+
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
def _add_measurements_to_existing_run_config_result(
|
|
113
|
+
self,
|
|
114
|
+
existing_run_config_result: RunConfigResult,
|
|
115
|
+
new_run_config_result: RunConfigResult,
|
|
116
|
+
) -> None:
|
|
117
|
+
for rcm in new_run_config_result.run_config_measurements():
|
|
118
|
+
existing_run_config_result.add_run_config_measurement(rcm)
|
|
119
|
+
|
|
120
|
+
def _add_new_run_config_result(self, run_config_result: RunConfigResult) -> None:
|
|
121
|
+
new_run_config_result = deepcopy(run_config_result)
|
|
122
|
+
|
|
123
|
+
self._run_config_results.append(new_run_config_result)
|
|
124
|
+
|
|
125
|
+
def _create_passing_and_failing_lists(self):
|
|
126
|
+
self._run_config_results.sort()
|
|
127
|
+
|
|
128
|
+
passing = []
|
|
129
|
+
failing = []
|
|
130
|
+
for rcr in self._run_config_results:
|
|
131
|
+
if rcr.failing():
|
|
132
|
+
failing.append(rcr)
|
|
133
|
+
else:
|
|
134
|
+
passing.append(rcr)
|
|
135
|
+
|
|
136
|
+
return passing, failing
|
|
137
|
+
|
|
138
|
+
def _get_top_n_results(
|
|
139
|
+
self, results: List[RunConfigResult], n: int
|
|
140
|
+
) -> List[RunConfigResult]:
|
|
141
|
+
if n == SortedResults.GET_ALL_RESULTS:
|
|
142
|
+
return results
|
|
143
|
+
if n > len(results):
|
|
144
|
+
logger.warning(
|
|
145
|
+
f"Requested top {n} configs, "
|
|
146
|
+
f"but found only {len(results)}. "
|
|
147
|
+
"Showing all available configs for this model."
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
result_len = min(n, len(results))
|
|
151
|
+
return results[0:result_len]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.record.record import RecordType
|
|
18
|
+
from model_analyzer.result.results import Results
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AnalyzerState:
|
|
22
|
+
"""
|
|
23
|
+
All the state information needed by
|
|
24
|
+
Model Analyzer in one place
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
self._state_dict = {}
|
|
29
|
+
|
|
30
|
+
def to_dict(self):
|
|
31
|
+
return self._state_dict
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_dict(cls, state_dict):
|
|
35
|
+
state = AnalyzerState()
|
|
36
|
+
|
|
37
|
+
# Model Variant mapping
|
|
38
|
+
state._state_dict["ModelManager.model_variant_name_manager"] = state_dict[
|
|
39
|
+
"ModelManager.model_variant_name_manager"
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# Fill results
|
|
43
|
+
state._state_dict["ResultManager.results"] = Results.from_dict(
|
|
44
|
+
state_dict["ResultManager.results"]
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Server data
|
|
48
|
+
state._state_dict["ResultManager.server_only_data"] = {}
|
|
49
|
+
for gpu_uuid, gpu_data_list in state_dict[
|
|
50
|
+
"ResultManager.server_only_data"
|
|
51
|
+
].items():
|
|
52
|
+
metric_list = []
|
|
53
|
+
for [tag, record_dict] in gpu_data_list:
|
|
54
|
+
record_type = RecordType.get(tag)
|
|
55
|
+
record = record_type.from_dict(record_dict)
|
|
56
|
+
metric_list.append(record)
|
|
57
|
+
state._state_dict["ResultManager.server_only_data"][gpu_uuid] = metric_list
|
|
58
|
+
|
|
59
|
+
# GPU data
|
|
60
|
+
state._state_dict["MetricsManager.gpus"] = state_dict["MetricsManager.gpus"]
|
|
61
|
+
|
|
62
|
+
# Optuna Seed
|
|
63
|
+
state._state_dict["OptunaRunConfigGenerator.seed"] = state_dict.get(
|
|
64
|
+
"OptunaRunConfigGenerator.seed", 0
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return state
|
|
68
|
+
|
|
69
|
+
def get(self, name):
|
|
70
|
+
if name in self._state_dict:
|
|
71
|
+
return self._state_dict[name]
|
|
72
|
+
else:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
def set(self, name, value):
|
|
76
|
+
self._state_dict[name] = value
|