triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Generator, List, Optional
|
|
19
|
+
|
|
20
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
21
|
+
from model_analyzer.constants import (
|
|
22
|
+
LOGGER_NAME,
|
|
23
|
+
THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES,
|
|
24
|
+
THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
|
|
25
|
+
THROUGHPUT_MINIMUM_GAIN,
|
|
26
|
+
)
|
|
27
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
28
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
29
|
+
|
|
30
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
31
|
+
from .generator_utils import GeneratorUtils as utils
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
37
|
+
"""
|
|
38
|
+
Given Perf Analyzer configuration options, generates Perf Analyzer configs
|
|
39
|
+
|
|
40
|
+
All combinations are pregenerated in __init__, but it may return is_done==true
|
|
41
|
+
earlier depending on results that it receives
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
cli_config: ConfigCommandProfile,
|
|
47
|
+
model_name: str,
|
|
48
|
+
model_perf_analyzer_flags: dict,
|
|
49
|
+
model_parameters: dict,
|
|
50
|
+
early_exit_enable: bool,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
cli_config: ConfigCommandProfile
|
|
56
|
+
CLI Configuration Options
|
|
57
|
+
|
|
58
|
+
model_name: string
|
|
59
|
+
The model name to profile
|
|
60
|
+
|
|
61
|
+
model_perf_analyzer_flags: Dict
|
|
62
|
+
custom perf analyzer configuration
|
|
63
|
+
|
|
64
|
+
model_parameters: Dict
|
|
65
|
+
model constraints for batch_sizes, concurrency and/or request rate
|
|
66
|
+
|
|
67
|
+
early_exit_enable: Bool
|
|
68
|
+
If true, this class can early exit during search of concurrency/request rate
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
self._early_exit_enable = early_exit_enable
|
|
72
|
+
|
|
73
|
+
# All configs are pregenerated in _configs[][]
|
|
74
|
+
# Indexed as follows:
|
|
75
|
+
# _configs[_curr_batch_size_index][_curr_parameter_index]
|
|
76
|
+
#
|
|
77
|
+
self._curr_parameter_index = 0
|
|
78
|
+
self._curr_batch_size_index = 0
|
|
79
|
+
self._configs: List[List[PerfAnalyzerConfig]] = []
|
|
80
|
+
self._parameter_warning_printed = False
|
|
81
|
+
|
|
82
|
+
# Flag to indicate we have started to return results
|
|
83
|
+
#
|
|
84
|
+
self._generator_started = False
|
|
85
|
+
|
|
86
|
+
self._last_results: List[RunConfigMeasurement] = []
|
|
87
|
+
self._parameter_results: List[Optional[RunConfigMeasurement]] = []
|
|
88
|
+
self._batch_size_results: List[Optional[RunConfigMeasurement]] = []
|
|
89
|
+
|
|
90
|
+
self._model_name = model_name
|
|
91
|
+
self._perf_analyzer_flags = model_perf_analyzer_flags
|
|
92
|
+
|
|
93
|
+
self._batch_sizes = sorted(model_parameters["batch_sizes"])
|
|
94
|
+
self._cli_config = cli_config
|
|
95
|
+
|
|
96
|
+
self._model_parameters = model_parameters
|
|
97
|
+
self._parameters = self._create_parameter_list()
|
|
98
|
+
self._generate_perf_configs()
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def throughput_gain_valid_helper(
|
|
102
|
+
throughputs: List[Optional[RunConfigMeasurement]],
|
|
103
|
+
min_tries: int = THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
|
|
104
|
+
min_gain: float = THROUGHPUT_MINIMUM_GAIN,
|
|
105
|
+
) -> bool:
|
|
106
|
+
if len(throughputs) < min_tries:
|
|
107
|
+
return True
|
|
108
|
+
|
|
109
|
+
throughputs_in_range = [
|
|
110
|
+
PerfAnalyzerConfigGenerator.get_throughput(throughputs[x])
|
|
111
|
+
for x in range(-min_tries, 0)
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
first = throughputs_in_range[0]
|
|
115
|
+
best = max(throughputs_in_range)
|
|
116
|
+
|
|
117
|
+
gain = (best - first) / first
|
|
118
|
+
|
|
119
|
+
return gain > min_gain
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def get_throughput(measurement: Optional[RunConfigMeasurement]) -> float:
|
|
123
|
+
if measurement:
|
|
124
|
+
return measurement.get_non_gpu_metric_value("perf_throughput")
|
|
125
|
+
else:
|
|
126
|
+
return 0.0
|
|
127
|
+
|
|
128
|
+
def _is_done(self) -> bool:
|
|
129
|
+
"""Returns true if this generator is done generating configs"""
|
|
130
|
+
return self._generator_started and self._done_walking()
|
|
131
|
+
|
|
132
|
+
def get_configs(self) -> Generator[PerfAnalyzerConfig, None, None]:
|
|
133
|
+
"""Returns the next generated config"""
|
|
134
|
+
while True:
|
|
135
|
+
if self._is_done():
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
self._generator_started = True
|
|
139
|
+
config = self._configs[self._curr_batch_size_index][
|
|
140
|
+
self._curr_parameter_index
|
|
141
|
+
]
|
|
142
|
+
yield (config)
|
|
143
|
+
|
|
144
|
+
if self._last_results_erroneous():
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
self._step()
|
|
148
|
+
|
|
149
|
+
def set_last_results(
|
|
150
|
+
self, measurements: List[Optional[RunConfigMeasurement]]
|
|
151
|
+
) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Given the results from the last PerfAnalyzerConfig, make decisions
|
|
154
|
+
about future configurations to generate
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
measurements: List of Measurements from the last run(s)
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
# Remove 'NONE' cases, and find single max measurement from the list
|
|
162
|
+
valid_measurements = [m for m in measurements if m]
|
|
163
|
+
|
|
164
|
+
self._last_results = []
|
|
165
|
+
if valid_measurements:
|
|
166
|
+
measurement = [max(valid_measurements)]
|
|
167
|
+
|
|
168
|
+
self._last_results = measurement
|
|
169
|
+
self._parameter_results.extend(measurement)
|
|
170
|
+
|
|
171
|
+
def _create_parameter_list(self) -> List[int]:
|
|
172
|
+
# The two possible parameters are request rate or concurrency
|
|
173
|
+
# Concurrency is the default and will be used unless the user specifies
|
|
174
|
+
# request rate, either as a model parameter or a config option
|
|
175
|
+
if self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
176
|
+
return self._create_request_rate_list()
|
|
177
|
+
else:
|
|
178
|
+
return self._create_concurrency_list()
|
|
179
|
+
|
|
180
|
+
def _create_request_rate_list(self) -> List[int]:
|
|
181
|
+
if self._model_parameters["request_rate"]:
|
|
182
|
+
return sorted(self._model_parameters["request_rate"])
|
|
183
|
+
elif self._cli_config.run_config_search_disable:
|
|
184
|
+
return [1]
|
|
185
|
+
else:
|
|
186
|
+
return utils.generate_doubled_list(
|
|
187
|
+
self._cli_config.run_config_search_min_request_rate,
|
|
188
|
+
self._cli_config.run_config_search_max_request_rate,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def _create_concurrency_list(self) -> List[int]:
|
|
192
|
+
if self._model_parameters["concurrency"]:
|
|
193
|
+
return sorted(self._model_parameters["concurrency"])
|
|
194
|
+
elif self._cli_config.run_config_search_disable:
|
|
195
|
+
return [1]
|
|
196
|
+
else:
|
|
197
|
+
return utils.generate_doubled_list(
|
|
198
|
+
self._cli_config.run_config_search_min_concurrency,
|
|
199
|
+
self._cli_config.run_config_search_max_concurrency,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def _generate_perf_configs(self) -> None:
|
|
203
|
+
perf_config_non_parameter_values = (
|
|
204
|
+
self._create_non_parameter_perf_config_values()
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
for params in utils.generate_parameter_combinations(
|
|
208
|
+
perf_config_non_parameter_values
|
|
209
|
+
):
|
|
210
|
+
configs_with_concurrency = []
|
|
211
|
+
for parameter in self._parameters:
|
|
212
|
+
new_perf_config = PerfAnalyzerConfig()
|
|
213
|
+
|
|
214
|
+
new_perf_config.update_config_from_profile_config(
|
|
215
|
+
self._model_name, self._cli_config
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
new_perf_config.update_config(params)
|
|
219
|
+
|
|
220
|
+
if self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
221
|
+
new_perf_config.update_config({"request-rate-range": parameter})
|
|
222
|
+
else:
|
|
223
|
+
new_perf_config.update_config({"concurrency-range": parameter})
|
|
224
|
+
|
|
225
|
+
# User provided flags can override the search parameters
|
|
226
|
+
new_perf_config.update_config(self._perf_analyzer_flags)
|
|
227
|
+
|
|
228
|
+
configs_with_concurrency.append(new_perf_config)
|
|
229
|
+
self._configs.append(configs_with_concurrency)
|
|
230
|
+
|
|
231
|
+
def _create_non_parameter_perf_config_values(self) -> dict:
|
|
232
|
+
perf_config_values = {
|
|
233
|
+
"batch-size": self._batch_sizes,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return perf_config_values
|
|
237
|
+
|
|
238
|
+
def _step(self) -> None:
|
|
239
|
+
self._step_parameter()
|
|
240
|
+
|
|
241
|
+
if self._done_walking_parameters():
|
|
242
|
+
self._add_best_throughput_to_batch_sizes()
|
|
243
|
+
self._reset_parameters()
|
|
244
|
+
self._step_batch_size()
|
|
245
|
+
|
|
246
|
+
def _add_best_throughput_to_batch_sizes(self) -> None:
|
|
247
|
+
if self._parameter_results:
|
|
248
|
+
# type is List[Optional[RCM]]
|
|
249
|
+
best = max(self._parameter_results) # type: ignore
|
|
250
|
+
self._batch_size_results.append(best)
|
|
251
|
+
|
|
252
|
+
def _reset_parameters(self) -> None:
|
|
253
|
+
self._curr_parameter_index = 0
|
|
254
|
+
self._parameter_warning_printed = False
|
|
255
|
+
self._parameter_results = []
|
|
256
|
+
|
|
257
|
+
def _step_parameter(self) -> None:
|
|
258
|
+
self._curr_parameter_index += 1
|
|
259
|
+
|
|
260
|
+
def _step_batch_size(self) -> None:
|
|
261
|
+
self._curr_batch_size_index += 1
|
|
262
|
+
|
|
263
|
+
def _done_walking(self) -> bool:
|
|
264
|
+
return self._done_walking_batch_sizes()
|
|
265
|
+
|
|
266
|
+
def _done_walking_parameters(self) -> bool:
|
|
267
|
+
if len(self._parameters) == self._curr_parameter_index:
|
|
268
|
+
return True
|
|
269
|
+
if self._early_exit_enable and not self._parameter_throughput_gain_valid():
|
|
270
|
+
if not self._parameter_warning_printed:
|
|
271
|
+
if self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
272
|
+
logger.info(
|
|
273
|
+
"No longer increasing request rate as throughput has plateaued"
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
logger.info(
|
|
277
|
+
"No longer increasing concurrency as throughput has plateaued"
|
|
278
|
+
)
|
|
279
|
+
self._parameter_warning_printed = True
|
|
280
|
+
return True
|
|
281
|
+
return False
|
|
282
|
+
|
|
283
|
+
def _done_walking_batch_sizes(self) -> bool:
|
|
284
|
+
if len(self._batch_sizes) == self._curr_batch_size_index:
|
|
285
|
+
return True
|
|
286
|
+
|
|
287
|
+
if self._early_exit_enable and not self._batch_size_throughput_gain_valid():
|
|
288
|
+
logger.info(
|
|
289
|
+
"No longer increasing client batch size as throughput has plateaued"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return True
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
def _last_results_erroneous(self) -> bool:
|
|
296
|
+
return not self._last_results or self._last_results[-1] is None
|
|
297
|
+
|
|
298
|
+
def _parameter_throughput_gain_valid(self) -> bool:
|
|
299
|
+
"""Check if any of the last X parameter results resulted in valid gain"""
|
|
300
|
+
return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
|
|
301
|
+
throughputs=self._parameter_results,
|
|
302
|
+
min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
|
|
303
|
+
min_gain=THROUGHPUT_MINIMUM_GAIN,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def _batch_size_throughput_gain_valid(self) -> bool:
|
|
307
|
+
"""Check if any of the last X batch_size results resulted in valid gain"""
|
|
308
|
+
return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
|
|
309
|
+
throughputs=self._batch_size_results,
|
|
310
|
+
min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES,
|
|
311
|
+
min_gain=THROUGHPUT_MINIMUM_GAIN,
|
|
312
|
+
)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from copy import deepcopy
|
|
19
|
+
from typing import Generator, List, Optional
|
|
20
|
+
|
|
21
|
+
from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
|
|
22
|
+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
|
|
23
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
24
|
+
ModelVariantNameManager,
|
|
25
|
+
)
|
|
26
|
+
from model_analyzer.config.generate.quick_run_config_generator import (
|
|
27
|
+
QuickRunConfigGenerator,
|
|
28
|
+
)
|
|
29
|
+
from model_analyzer.config.generate.search_config import SearchConfig
|
|
30
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
31
|
+
from model_analyzer.config.run.run_config import RunConfig
|
|
32
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
33
|
+
from model_analyzer.result.parameter_search import ParameterSearch
|
|
34
|
+
from model_analyzer.result.result_manager import ResultManager
|
|
35
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
36
|
+
|
|
37
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class QuickPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
|
|
43
|
+
"""
|
|
44
|
+
First run QuickRunConfigGenerator for a hill climbing search, then use
|
|
45
|
+
ParameterSearch for a concurrency sweep + binary search of the default
|
|
46
|
+
and Top N results
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
search_config: SearchConfig,
|
|
52
|
+
config: ConfigCommandProfile,
|
|
53
|
+
gpu_count: int,
|
|
54
|
+
models: List[ModelProfileSpec],
|
|
55
|
+
composing_models: List[ModelProfileSpec],
|
|
56
|
+
result_manager: ResultManager,
|
|
57
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
search_config: SearchConfig
|
|
63
|
+
Defines parameters and dimensions for the search
|
|
64
|
+
config: ConfigCommandProfile
|
|
65
|
+
Profile configuration information
|
|
66
|
+
gpu_count: Number of gpus in the system
|
|
67
|
+
models: List of ModelProfileSpec
|
|
68
|
+
List of models to profile
|
|
69
|
+
composing_models: List of ModelProfileSpec
|
|
70
|
+
List of composing models that exist inside of the supplied models
|
|
71
|
+
result_manager: ResultManager
|
|
72
|
+
The object that handles storing and sorting the results from the perf analyzer
|
|
73
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
74
|
+
Maps model variants to config names
|
|
75
|
+
"""
|
|
76
|
+
self._search_config = search_config
|
|
77
|
+
self._config = config
|
|
78
|
+
self._gpu_count = gpu_count
|
|
79
|
+
self._models = models
|
|
80
|
+
self._composing_models = composing_models
|
|
81
|
+
self._result_manager = result_manager
|
|
82
|
+
self._model_variant_name_manager = model_variant_name_manager
|
|
83
|
+
|
|
84
|
+
def set_last_results(
|
|
85
|
+
self, measurements: List[Optional[RunConfigMeasurement]]
|
|
86
|
+
) -> None:
|
|
87
|
+
self._last_measurement = measurements[-1]
|
|
88
|
+
self._rcg.set_last_results(measurements)
|
|
89
|
+
|
|
90
|
+
def get_configs(self) -> Generator[RunConfig, None, None]:
|
|
91
|
+
"""
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
RunConfig
|
|
95
|
+
The next RunConfig generated by this class
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
logger.info("")
|
|
99
|
+
logger.info("Starting quick mode search to find optimal configs")
|
|
100
|
+
logger.info("")
|
|
101
|
+
yield from self._execute_quick_search()
|
|
102
|
+
logger.info("")
|
|
103
|
+
if self._config.concurrency_sweep_disable:
|
|
104
|
+
logger.info("Done with quick mode search.")
|
|
105
|
+
else:
|
|
106
|
+
logger.info(
|
|
107
|
+
"Done with quick mode search. Gathering concurrency sweep measurements for reports"
|
|
108
|
+
)
|
|
109
|
+
logger.info("")
|
|
110
|
+
yield from ConcurrencySweeper(
|
|
111
|
+
config=self._config, result_manager=self._result_manager
|
|
112
|
+
).get_configs()
|
|
113
|
+
logger.info("")
|
|
114
|
+
logger.info("Done gathering concurrency sweep measurements for reports")
|
|
115
|
+
logger.info("")
|
|
116
|
+
|
|
117
|
+
def _execute_quick_search(self) -> Generator[RunConfig, None, None]:
|
|
118
|
+
self._rcg: ConfigGeneratorInterface = self._create_quick_run_config_generator()
|
|
119
|
+
|
|
120
|
+
yield from self._rcg.get_configs()
|
|
121
|
+
|
|
122
|
+
def _create_quick_run_config_generator(self) -> QuickRunConfigGenerator:
|
|
123
|
+
return QuickRunConfigGenerator(
|
|
124
|
+
search_config=self._search_config,
|
|
125
|
+
config=self._config,
|
|
126
|
+
gpu_count=self._gpu_count,
|
|
127
|
+
models=self._models,
|
|
128
|
+
composing_models=self._composing_models,
|
|
129
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
130
|
+
)
|