triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,753 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from sys import maxsize
|
|
19
|
+
from typing import Dict, Generator, List, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from model_analyzer.config.generate.base_model_config_generator import (
|
|
22
|
+
BaseModelConfigGenerator,
|
|
23
|
+
)
|
|
24
|
+
from model_analyzer.config.generate.brute_run_config_generator import (
|
|
25
|
+
BruteRunConfigGenerator,
|
|
26
|
+
)
|
|
27
|
+
from model_analyzer.config.generate.coordinate import Coordinate
|
|
28
|
+
from model_analyzer.config.generate.coordinate_data import CoordinateData
|
|
29
|
+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
|
|
30
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
31
|
+
ModelVariantNameManager,
|
|
32
|
+
)
|
|
33
|
+
from model_analyzer.config.generate.neighborhood import Neighborhood
|
|
34
|
+
from model_analyzer.config.generate.search_config import SearchConfig
|
|
35
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
36
|
+
from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
|
|
37
|
+
from model_analyzer.config.run.model_run_config import ModelRunConfig
|
|
38
|
+
from model_analyzer.config.run.run_config import RunConfig
|
|
39
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
40
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
41
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
42
|
+
from model_analyzer.triton.model.model_config import ModelConfig
|
|
43
|
+
from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
|
|
44
|
+
|
|
45
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
46
|
+
from .generator_utils import GeneratorUtils
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
49
|
+
from copy import deepcopy
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
53
|
+
"""
|
|
54
|
+
Hill climbing algorithm to create RunConfigs
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
search_config: SearchConfig,
|
|
60
|
+
config: ConfigCommandProfile,
|
|
61
|
+
gpu_count: int,
|
|
62
|
+
models: List[ModelProfileSpec],
|
|
63
|
+
composing_models: List[ModelProfileSpec],
|
|
64
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
search_config: SearchConfig
|
|
70
|
+
Defines parameters and dimensions for the search
|
|
71
|
+
config: ConfigCommandProfile
|
|
72
|
+
Profile configuration information
|
|
73
|
+
gpu_count: Number of gpus in the system
|
|
74
|
+
models: List of ModelProfileSpec
|
|
75
|
+
List of models to profile
|
|
76
|
+
composing_models: List of ModelProfileSpec
|
|
77
|
+
List of composing model profiles
|
|
78
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
79
|
+
"""
|
|
80
|
+
self._search_config = search_config
|
|
81
|
+
self._config = config
|
|
82
|
+
self._gpu_count = gpu_count
|
|
83
|
+
self._models = models
|
|
84
|
+
self._composing_models = composing_models
|
|
85
|
+
|
|
86
|
+
self._model_variant_name_manager = model_variant_name_manager
|
|
87
|
+
|
|
88
|
+
self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models)
|
|
89
|
+
|
|
90
|
+
self._c_api_mode = config.triton_launch_mode == "c_api"
|
|
91
|
+
|
|
92
|
+
# This tracks measured results for all coordinates
|
|
93
|
+
self._coordinate_data = CoordinateData()
|
|
94
|
+
|
|
95
|
+
# This is an initial center that the neighborhood is built around.
|
|
96
|
+
# It is updated every new creation of the neighborhood.
|
|
97
|
+
self._home_coordinate = self._get_starting_coordinate()
|
|
98
|
+
|
|
99
|
+
# This is the coordinate that we want to measure next. It is
|
|
100
|
+
# updated every step of this generator
|
|
101
|
+
self._coordinate_to_measure: Coordinate = self._home_coordinate
|
|
102
|
+
|
|
103
|
+
# Track the best coordinate seen so far that can be used during
|
|
104
|
+
# the back-off stage.
|
|
105
|
+
self._best_coordinate = self._home_coordinate
|
|
106
|
+
self._best_measurement: Optional[RunConfigMeasurement] = None
|
|
107
|
+
|
|
108
|
+
self._neighborhood = Neighborhood(
|
|
109
|
+
self._search_config.get_neighborhood_config(),
|
|
110
|
+
self._home_coordinate,
|
|
111
|
+
self._coordinate_data,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Sticky bit. Once true, we should never stay at a home that is failing or None
|
|
115
|
+
self._home_has_passed = False
|
|
116
|
+
|
|
117
|
+
self._done = False
|
|
118
|
+
|
|
119
|
+
def _is_done(self) -> bool:
|
|
120
|
+
return self._done
|
|
121
|
+
|
|
122
|
+
def get_configs(self) -> Generator[RunConfig, None, None]:
|
|
123
|
+
"""
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
RunConfig
|
|
127
|
+
The next RunConfig generated by this class
|
|
128
|
+
"""
|
|
129
|
+
config = self._create_default_run_config()
|
|
130
|
+
yield (config)
|
|
131
|
+
|
|
132
|
+
while True:
|
|
133
|
+
if self._is_done():
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
config = self._get_next_run_config()
|
|
137
|
+
yield (config)
|
|
138
|
+
self._step()
|
|
139
|
+
|
|
140
|
+
def _step(self) -> None:
|
|
141
|
+
"""
|
|
142
|
+
Determine self._coordinate_to_measure, which is what is used to
|
|
143
|
+
create the next RunConfig
|
|
144
|
+
"""
|
|
145
|
+
if self._should_step_back():
|
|
146
|
+
self._take_step_back()
|
|
147
|
+
elif self._neighborhood.enough_coordinates_initialized():
|
|
148
|
+
self._take_step()
|
|
149
|
+
else:
|
|
150
|
+
self._pick_coordinate_to_initialize()
|
|
151
|
+
|
|
152
|
+
def set_last_results(
|
|
153
|
+
self, measurements: List[Optional[RunConfigMeasurement]]
|
|
154
|
+
) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Given the results from the last RunConfig, make decisions
|
|
157
|
+
about future configurations to generate
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
measurements: List of Measurements from the last run(s)
|
|
162
|
+
"""
|
|
163
|
+
self._coordinate_data.set_measurement(
|
|
164
|
+
coordinate=self._coordinate_to_measure, measurement=measurements[0]
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if measurements[0] is not None:
|
|
168
|
+
self._update_best_measurement(measurement=measurements[0])
|
|
169
|
+
|
|
170
|
+
if (
|
|
171
|
+
self._measuring_home_coordinate()
|
|
172
|
+
and measurements[0].is_passing_constraints()
|
|
173
|
+
):
|
|
174
|
+
self._home_has_passed = True
|
|
175
|
+
|
|
176
|
+
self._print_debug_logs(measurements)
|
|
177
|
+
|
|
178
|
+
def _update_best_measurement(self, measurement: RunConfigMeasurement) -> None:
|
|
179
|
+
"""Keep track of the best coordinate/measurement seen so far."""
|
|
180
|
+
if self._best_measurement is None:
|
|
181
|
+
self._best_coordinate = self._coordinate_to_measure
|
|
182
|
+
self._best_measurement = measurement
|
|
183
|
+
|
|
184
|
+
elif (
|
|
185
|
+
not self._best_measurement.is_passing_constraints()
|
|
186
|
+
and measurement.is_passing_constraints()
|
|
187
|
+
):
|
|
188
|
+
self._best_coordinate = self._coordinate_to_measure
|
|
189
|
+
self._best_measurement = measurement
|
|
190
|
+
|
|
191
|
+
elif (
|
|
192
|
+
not self._best_measurement.is_passing_constraints()
|
|
193
|
+
and not measurement.is_passing_constraints()
|
|
194
|
+
):
|
|
195
|
+
comparison = self._best_measurement.compare_constraints(other=measurement)
|
|
196
|
+
|
|
197
|
+
if comparison and comparison > 0:
|
|
198
|
+
self._best_coordinate = self._coordinate_to_measure
|
|
199
|
+
self._best_measurement = measurement
|
|
200
|
+
|
|
201
|
+
elif (
|
|
202
|
+
self._best_measurement.is_passing_constraints()
|
|
203
|
+
and measurement.is_passing_constraints()
|
|
204
|
+
):
|
|
205
|
+
comparison = self._best_measurement.compare_measurements(other=measurement)
|
|
206
|
+
|
|
207
|
+
if comparison and comparison > 0:
|
|
208
|
+
self._best_coordinate = self._coordinate_to_measure
|
|
209
|
+
self._best_measurement = measurement
|
|
210
|
+
|
|
211
|
+
def _get_last_results(self) -> Optional[RunConfigMeasurement]:
|
|
212
|
+
return self._coordinate_data.get_measurement(
|
|
213
|
+
coordinate=self._coordinate_to_measure
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def _take_step(self) -> None:
|
|
217
|
+
new_coordinate = self._neighborhood.determine_new_home()
|
|
218
|
+
self._determine_if_done(new_coordinate)
|
|
219
|
+
|
|
220
|
+
logger.debug(f"Stepping {self._home_coordinate}->{new_coordinate}")
|
|
221
|
+
self._home_coordinate = new_coordinate
|
|
222
|
+
self._coordinate_to_measure = new_coordinate
|
|
223
|
+
self._recreate_neighborhood(force_slow_mode=False)
|
|
224
|
+
|
|
225
|
+
def _take_step_back(self) -> None:
|
|
226
|
+
new_coordinate = self._neighborhood.get_nearest_neighbor(
|
|
227
|
+
coordinate_in=self._best_coordinate
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# TODO: TMA-871: handle back-off (and its termination) better.
|
|
231
|
+
if new_coordinate == self._home_coordinate:
|
|
232
|
+
self._done = True
|
|
233
|
+
|
|
234
|
+
logger.debug(f"Stepping back: {self._home_coordinate}->{new_coordinate}")
|
|
235
|
+
self._home_coordinate = new_coordinate
|
|
236
|
+
self._coordinate_to_measure = new_coordinate
|
|
237
|
+
self._recreate_neighborhood(force_slow_mode=True)
|
|
238
|
+
|
|
239
|
+
def _should_step_back(self) -> bool:
|
|
240
|
+
"""
|
|
241
|
+
Step back if take any of the following steps:
|
|
242
|
+
- Step from a passing home to a failing home
|
|
243
|
+
- Step from any home to home with a None measurement
|
|
244
|
+
"""
|
|
245
|
+
if self._measuring_home_coordinate():
|
|
246
|
+
last_results = self._get_last_results()
|
|
247
|
+
if not last_results:
|
|
248
|
+
return True
|
|
249
|
+
last_results_passed = last_results.is_passing_constraints()
|
|
250
|
+
if not last_results_passed and self._home_has_passed:
|
|
251
|
+
return True
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
def _measuring_home_coordinate(self) -> bool:
|
|
255
|
+
return self._coordinate_to_measure == self._home_coordinate
|
|
256
|
+
|
|
257
|
+
def _determine_if_done(self, new_coordinate: Coordinate) -> None:
|
|
258
|
+
"""
|
|
259
|
+
Based on the new coordinate picked, determine if the generator is done
|
|
260
|
+
and if so, update self._done
|
|
261
|
+
"""
|
|
262
|
+
if new_coordinate == self._home_coordinate:
|
|
263
|
+
self._done = True
|
|
264
|
+
if self._coordinate_data.get_visit_count(new_coordinate) >= 2:
|
|
265
|
+
self._done = True
|
|
266
|
+
|
|
267
|
+
def _recreate_neighborhood(self, force_slow_mode: bool) -> None:
|
|
268
|
+
neighborhood_config = self._search_config.get_neighborhood_config()
|
|
269
|
+
|
|
270
|
+
self._neighborhood = Neighborhood(
|
|
271
|
+
neighborhood_config, self._home_coordinate, self._coordinate_data
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
self._coordinate_data.increment_visit_count(self._home_coordinate)
|
|
275
|
+
|
|
276
|
+
if force_slow_mode:
|
|
277
|
+
self._neighborhood.force_slow_mode()
|
|
278
|
+
|
|
279
|
+
def _pick_coordinate_to_initialize(self) -> None:
|
|
280
|
+
next_coordinate = self._neighborhood.pick_coordinate_to_initialize()
|
|
281
|
+
|
|
282
|
+
if next_coordinate:
|
|
283
|
+
self._coordinate_to_measure = next_coordinate
|
|
284
|
+
logger.debug(f"Need more data. Measuring {self._coordinate_to_measure}")
|
|
285
|
+
else:
|
|
286
|
+
logger.info("No coordinate to measure. Exiting")
|
|
287
|
+
self._done = True
|
|
288
|
+
|
|
289
|
+
def _get_starting_coordinate(self) -> Coordinate:
|
|
290
|
+
min_indexes = self._search_config.get_min_indexes()
|
|
291
|
+
return Coordinate(min_indexes)
|
|
292
|
+
|
|
293
|
+
def _get_coordinate_values(
|
|
294
|
+
self, coordinate: Coordinate, key: int
|
|
295
|
+
) -> Dict[str, Union[int, float]]:
|
|
296
|
+
dims = self._search_config.get_dimensions()
|
|
297
|
+
values = dims.get_values_for_coordinate(coordinate)
|
|
298
|
+
return values[key]
|
|
299
|
+
|
|
300
|
+
def _get_next_run_config(self) -> RunConfig:
|
|
301
|
+
run_config = RunConfig(self._triton_env)
|
|
302
|
+
|
|
303
|
+
model_index = 0
|
|
304
|
+
for model in self._models:
|
|
305
|
+
mrc, model_index = self._get_next_model_run_config(model, model_index)
|
|
306
|
+
run_config.add_model_run_config(mrc)
|
|
307
|
+
|
|
308
|
+
return run_config
|
|
309
|
+
|
|
310
|
+
def _get_next_model_run_config(
|
|
311
|
+
self, model: ModelProfileSpec, start_model_index: int
|
|
312
|
+
) -> Tuple[ModelRunConfig, int]:
|
|
313
|
+
"""
|
|
314
|
+
Returns the next ModelRunConfig, along with the starting dimension
|
|
315
|
+
of the next model
|
|
316
|
+
"""
|
|
317
|
+
# The ordering of dimensions is dependent on the type of composing model:
|
|
318
|
+
# Ensemble - The top level model has no search dimensions - all dimensions
|
|
319
|
+
# come from the composing models
|
|
320
|
+
# BLS - The top level model has one dimension (instance) - and the
|
|
321
|
+
# remaining dimensions come from composing models
|
|
322
|
+
#
|
|
323
|
+
# In addition, for Ensemble models, it is necessary to create the composing model configs
|
|
324
|
+
# first, as these are needed when creating the top-level model config - while all other
|
|
325
|
+
# models want to create the top-level first
|
|
326
|
+
(
|
|
327
|
+
model_config_variant,
|
|
328
|
+
model_index,
|
|
329
|
+
) = self._get_next_non_composing_model_config_variant(model, start_model_index)
|
|
330
|
+
|
|
331
|
+
(
|
|
332
|
+
composing_model_config_variants,
|
|
333
|
+
model_index,
|
|
334
|
+
) = self._get_next_composing_model_config_variants(model_index)
|
|
335
|
+
|
|
336
|
+
# This will overwrite the empty ModelConfigVariant created above
|
|
337
|
+
if model.is_ensemble():
|
|
338
|
+
model_config_variant = self._get_next_ensemble_top_level_config_variant(
|
|
339
|
+
model, composing_model_config_variants, model_index
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
model_run_config = self._create_next_model_run_config(
|
|
343
|
+
model,
|
|
344
|
+
start_model_index,
|
|
345
|
+
model_config_variant,
|
|
346
|
+
composing_model_config_variants,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
return (model_run_config, model_index)
|
|
350
|
+
|
|
351
|
+
def _get_next_non_composing_model_config_variant(
|
|
352
|
+
self, model: ModelProfileSpec, model_index: int
|
|
353
|
+
) -> Tuple[ModelConfigVariant, int]:
|
|
354
|
+
if model.is_ensemble():
|
|
355
|
+
return (ModelConfigVariant(ModelConfig({}), ""), model_index)
|
|
356
|
+
else:
|
|
357
|
+
return (
|
|
358
|
+
self._get_next_model_config_variant(model, model_index),
|
|
359
|
+
model_index + 1,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
def _get_next_composing_model_config_variants(
|
|
363
|
+
self, model_index: int
|
|
364
|
+
) -> Tuple[List[ModelConfigVariant], int]:
|
|
365
|
+
composing_model_config_variants = []
|
|
366
|
+
for composing_model in self._composing_models:
|
|
367
|
+
composing_model_config_variant = self._get_next_model_config_variant(
|
|
368
|
+
composing_model, model_index
|
|
369
|
+
)
|
|
370
|
+
model_index += 1
|
|
371
|
+
composing_model_config_variants.append(composing_model_config_variant)
|
|
372
|
+
|
|
373
|
+
return (composing_model_config_variants, model_index)
|
|
374
|
+
|
|
375
|
+
def _get_next_ensemble_top_level_config_variant(
|
|
376
|
+
self,
|
|
377
|
+
model: ModelProfileSpec,
|
|
378
|
+
composing_model_config_variants: List[ModelConfigVariant],
|
|
379
|
+
model_index: int,
|
|
380
|
+
) -> ModelConfigVariant:
|
|
381
|
+
param_combo = self._get_next_ensemble_param_combo(model_index)
|
|
382
|
+
|
|
383
|
+
model_config_variant = self._get_next_ensemble_model_config_variant(
|
|
384
|
+
model, composing_model_config_variants, param_combo
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
return model_config_variant
|
|
388
|
+
|
|
389
|
+
def _get_next_ensemble_param_combo(self, end_model_index: int) -> dict:
|
|
390
|
+
"""
|
|
391
|
+
For the ensemble model the only parameter we need to set
|
|
392
|
+
is the max batch size; which will be the minimum batch size
|
|
393
|
+
found in the composing_model max batch sizes
|
|
394
|
+
"""
|
|
395
|
+
min_val_of_max_batch_size = maxsize
|
|
396
|
+
for model_index in range(0, end_model_index):
|
|
397
|
+
dimension_values = self._get_coordinate_values(
|
|
398
|
+
self._coordinate_to_measure, model_index
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
min_val_of_max_batch_size = int(
|
|
402
|
+
min(
|
|
403
|
+
[
|
|
404
|
+
dimension_values.get("max_batch_size", 1),
|
|
405
|
+
min_val_of_max_batch_size,
|
|
406
|
+
]
|
|
407
|
+
)
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
param_combo = {"max_batch_size": min_val_of_max_batch_size}
|
|
411
|
+
|
|
412
|
+
return param_combo
|
|
413
|
+
|
|
414
|
+
def _get_next_ensemble_model_config_variant(
|
|
415
|
+
self,
|
|
416
|
+
model: ModelProfileSpec,
|
|
417
|
+
composing_config_variants: List[ModelConfigVariant],
|
|
418
|
+
param_combo: dict,
|
|
419
|
+
) -> ModelConfigVariant:
|
|
420
|
+
model_config_variant = (
|
|
421
|
+
BaseModelConfigGenerator.make_ensemble_model_config_variant(
|
|
422
|
+
model=model,
|
|
423
|
+
ensemble_composing_model_config_variants=composing_config_variants,
|
|
424
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
425
|
+
param_combo=param_combo,
|
|
426
|
+
c_api_mode=self._c_api_mode,
|
|
427
|
+
)
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
return model_config_variant
|
|
431
|
+
|
|
432
|
+
def _get_next_model_config_variant(
|
|
433
|
+
self, model: ModelProfileSpec, dimension_index: int
|
|
434
|
+
) -> ModelConfigVariant:
|
|
435
|
+
dimension_values = self._get_coordinate_values(
|
|
436
|
+
self._coordinate_to_measure, dimension_index
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
model_config_params = deepcopy(model.model_config_parameters())
|
|
440
|
+
if model_config_params:
|
|
441
|
+
model_config_params.pop("max_batch_size", None)
|
|
442
|
+
|
|
443
|
+
# This is guaranteed to only generate one combination (check is in config_command)
|
|
444
|
+
param_combos = GeneratorUtils.generate_combinations(model_config_params)
|
|
445
|
+
assert len(param_combos) == 1
|
|
446
|
+
|
|
447
|
+
param_combo = param_combos[0]
|
|
448
|
+
else:
|
|
449
|
+
param_combo = {}
|
|
450
|
+
|
|
451
|
+
kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
|
|
452
|
+
instance_count = self._calculate_instance_count(dimension_values)
|
|
453
|
+
|
|
454
|
+
param_combo["instance_group"] = [
|
|
455
|
+
{
|
|
456
|
+
"count": instance_count,
|
|
457
|
+
"kind": kind,
|
|
458
|
+
}
|
|
459
|
+
]
|
|
460
|
+
|
|
461
|
+
if "max_batch_size" in dimension_values:
|
|
462
|
+
param_combo["max_batch_size"] = self._calculate_model_batch_size(
|
|
463
|
+
dimension_values
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
if model.supports_dynamic_batching():
|
|
467
|
+
param_combo["dynamic_batching"] = {}
|
|
468
|
+
|
|
469
|
+
model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
|
|
470
|
+
param_combo=param_combo,
|
|
471
|
+
model=model,
|
|
472
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
473
|
+
c_api_mode=self._c_api_mode,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
return model_config_variant
|
|
477
|
+
|
|
478
|
+
def _create_next_model_run_config(
|
|
479
|
+
self,
|
|
480
|
+
model: ModelProfileSpec,
|
|
481
|
+
model_index: int,
|
|
482
|
+
model_config_variant: ModelConfigVariant,
|
|
483
|
+
composing_model_config_variants: List[ModelConfigVariant],
|
|
484
|
+
) -> ModelRunConfig:
|
|
485
|
+
perf_analyzer_config = self._get_next_perf_analyzer_config(
|
|
486
|
+
model.model_name(), model, model_index
|
|
487
|
+
)
|
|
488
|
+
model_run_config = ModelRunConfig(
|
|
489
|
+
model.model_name(), model_config_variant, perf_analyzer_config
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
if self._composing_models:
|
|
493
|
+
model_run_config.add_composing_model_config_variants(
|
|
494
|
+
composing_model_config_variants
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
return model_run_config
|
|
498
|
+
|
|
499
|
+
def _get_next_perf_analyzer_config(
|
|
500
|
+
self, model_name: str, model: ModelProfileSpec, model_index: int
|
|
501
|
+
) -> PerfAnalyzerConfig:
|
|
502
|
+
dimension_values = self._get_coordinate_values(
|
|
503
|
+
self._coordinate_to_measure, model_index
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
perf_analyzer_config = PerfAnalyzerConfig()
|
|
507
|
+
|
|
508
|
+
perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
|
|
509
|
+
|
|
510
|
+
concurrency = self._calculate_concurrency(dimension_values)
|
|
511
|
+
|
|
512
|
+
perf_config_params = {
|
|
513
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
514
|
+
"concurrency-range": concurrency,
|
|
515
|
+
}
|
|
516
|
+
perf_analyzer_config.update_config(perf_config_params)
|
|
517
|
+
|
|
518
|
+
perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
519
|
+
return perf_analyzer_config
|
|
520
|
+
|
|
521
|
+
def _calculate_model_batch_size(
|
|
522
|
+
self, dimension_values: Dict[str, Union[int, float]]
|
|
523
|
+
) -> int:
|
|
524
|
+
batch_size = int(dimension_values.get("max_batch_size", 1))
|
|
525
|
+
|
|
526
|
+
min_batch_size_is_set_by_config = self._config.get_config()[
|
|
527
|
+
"run_config_search_min_model_batch_size"
|
|
528
|
+
].is_set_by_user()
|
|
529
|
+
|
|
530
|
+
max_batch_size_is_set_by_config = self._config.get_config()[
|
|
531
|
+
"run_config_search_max_model_batch_size"
|
|
532
|
+
].is_set_by_user()
|
|
533
|
+
|
|
534
|
+
if (
|
|
535
|
+
min_batch_size_is_set_by_config
|
|
536
|
+
and batch_size < self._config.run_config_search_min_model_batch_size
|
|
537
|
+
):
|
|
538
|
+
return self._config.run_config_search_min_model_batch_size
|
|
539
|
+
|
|
540
|
+
if (
|
|
541
|
+
max_batch_size_is_set_by_config
|
|
542
|
+
and batch_size > self._config.run_config_search_max_model_batch_size
|
|
543
|
+
):
|
|
544
|
+
return self._config.run_config_search_max_model_batch_size
|
|
545
|
+
|
|
546
|
+
return batch_size
|
|
547
|
+
|
|
548
|
+
def _calculate_instance_count(
|
|
549
|
+
self, dimension_values: Dict[str, Union[int, float]]
|
|
550
|
+
) -> int:
|
|
551
|
+
instance_count = int(dimension_values.get("instance_count", 1))
|
|
552
|
+
|
|
553
|
+
min_instance_count_is_set_by_config = self._config.get_config()[
|
|
554
|
+
"run_config_search_min_instance_count"
|
|
555
|
+
].is_set_by_user()
|
|
556
|
+
|
|
557
|
+
max_instance_count_is_set_by_config = self._config.get_config()[
|
|
558
|
+
"run_config_search_max_instance_count"
|
|
559
|
+
].is_set_by_user()
|
|
560
|
+
|
|
561
|
+
if (
|
|
562
|
+
min_instance_count_is_set_by_config
|
|
563
|
+
and instance_count < self._config.run_config_search_min_instance_count
|
|
564
|
+
):
|
|
565
|
+
return self._config.run_config_search_min_instance_count
|
|
566
|
+
|
|
567
|
+
if (
|
|
568
|
+
max_instance_count_is_set_by_config
|
|
569
|
+
and instance_count > self._config.run_config_search_max_instance_count
|
|
570
|
+
):
|
|
571
|
+
return self._config.run_config_search_max_instance_count
|
|
572
|
+
|
|
573
|
+
return instance_count
|
|
574
|
+
|
|
575
|
+
def _calculate_concurrency(
|
|
576
|
+
self, dimension_values: Dict[str, Union[int, float]]
|
|
577
|
+
) -> int:
|
|
578
|
+
model_batch_size = self._calculate_model_batch_size(dimension_values)
|
|
579
|
+
instance_count = self._calculate_instance_count(dimension_values)
|
|
580
|
+
concurrency = 2 * model_batch_size * instance_count
|
|
581
|
+
|
|
582
|
+
min_concurrency_is_set_by_config = self._config.get_config()[
|
|
583
|
+
"run_config_search_min_concurrency"
|
|
584
|
+
].is_set_by_user()
|
|
585
|
+
|
|
586
|
+
max_concurrency_is_set_by_config = self._config.get_config()[
|
|
587
|
+
"run_config_search_max_concurrency"
|
|
588
|
+
].is_set_by_user()
|
|
589
|
+
|
|
590
|
+
if (
|
|
591
|
+
min_concurrency_is_set_by_config
|
|
592
|
+
and concurrency < self._config.run_config_search_min_concurrency
|
|
593
|
+
):
|
|
594
|
+
return self._config.run_config_search_min_concurrency
|
|
595
|
+
|
|
596
|
+
if (
|
|
597
|
+
max_concurrency_is_set_by_config
|
|
598
|
+
and concurrency > self._config.run_config_search_max_concurrency
|
|
599
|
+
):
|
|
600
|
+
return self._config.run_config_search_max_concurrency
|
|
601
|
+
|
|
602
|
+
return concurrency
|
|
603
|
+
|
|
604
|
+
def _create_default_run_config(self) -> RunConfig:
|
|
605
|
+
default_run_config = RunConfig(self._triton_env)
|
|
606
|
+
|
|
607
|
+
for model in self._models:
|
|
608
|
+
if model.is_ensemble():
|
|
609
|
+
default_run_config.add_model_run_config(
|
|
610
|
+
self._create_default_ensemble_model_run_config(model)
|
|
611
|
+
)
|
|
612
|
+
else:
|
|
613
|
+
default_run_config.add_model_run_config(
|
|
614
|
+
self._create_default_model_run_config(model)
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
return default_run_config
|
|
618
|
+
|
|
619
|
+
def _create_default_ensemble_model_run_config(
|
|
620
|
+
self, model: ModelProfileSpec
|
|
621
|
+
) -> ModelRunConfig:
|
|
622
|
+
default_composing_model_config_variants = (
|
|
623
|
+
self._create_default_composing_model_config_variants(model)
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
default_ensemble_model_config_variant = BaseModelConfigGenerator.make_ensemble_model_config_variant(
|
|
627
|
+
model=model,
|
|
628
|
+
ensemble_composing_model_config_variants=default_composing_model_config_variants,
|
|
629
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
630
|
+
c_api_mode=self._c_api_mode,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
default_perf_analyzer_config = self._create_default_perf_analyzer_config(
|
|
634
|
+
model, default_ensemble_model_config_variant.model_config
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
default_model_run_config = ModelRunConfig(
|
|
638
|
+
model.model_name(),
|
|
639
|
+
default_ensemble_model_config_variant,
|
|
640
|
+
default_perf_analyzer_config,
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
default_model_run_config.add_composing_model_config_variants(
|
|
644
|
+
default_composing_model_config_variants
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
return default_model_run_config
|
|
648
|
+
|
|
649
|
+
def _create_default_composing_model_config_variants(
|
|
650
|
+
self, model: ModelProfileSpec
|
|
651
|
+
) -> List[ModelConfigVariant]:
|
|
652
|
+
default_composing_model_config_variants: List[ModelConfigVariant] = []
|
|
653
|
+
for composing_model in self._composing_models:
|
|
654
|
+
default_composing_model_config_variants.append(
|
|
655
|
+
BaseModelConfigGenerator.make_model_config_variant(
|
|
656
|
+
param_combo={},
|
|
657
|
+
model=composing_model,
|
|
658
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
659
|
+
c_api_mode=self._c_api_mode,
|
|
660
|
+
)
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
return default_composing_model_config_variants
|
|
664
|
+
|
|
665
|
+
def _create_default_model_run_config(
|
|
666
|
+
self, model: ModelProfileSpec
|
|
667
|
+
) -> ModelRunConfig:
|
|
668
|
+
default_model_config_variant = (
|
|
669
|
+
BaseModelConfigGenerator.make_model_config_variant(
|
|
670
|
+
param_combo={},
|
|
671
|
+
model=model,
|
|
672
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
673
|
+
c_api_mode=self._c_api_mode,
|
|
674
|
+
)
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
default_perf_analyzer_config = self._create_default_perf_analyzer_config(
|
|
678
|
+
model, default_model_config_variant.model_config
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
default_model_run_config = ModelRunConfig(
|
|
682
|
+
model.model_name(),
|
|
683
|
+
default_model_config_variant,
|
|
684
|
+
default_perf_analyzer_config,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
default_composing_model_config_variants = (
|
|
688
|
+
self._create_default_composing_model_config_variants(model)
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
if default_composing_model_config_variants:
|
|
692
|
+
default_model_run_config.add_composing_model_config_variants(
|
|
693
|
+
default_composing_model_config_variants
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
return default_model_run_config
|
|
697
|
+
|
|
698
|
+
def _create_default_perf_analyzer_config(
|
|
699
|
+
self, model: ModelProfileSpec, model_config: ModelConfig
|
|
700
|
+
) -> PerfAnalyzerConfig:
|
|
701
|
+
default_perf_analyzer_config = PerfAnalyzerConfig()
|
|
702
|
+
default_perf_analyzer_config.update_config_from_profile_config(
|
|
703
|
+
model_config.get_field("name"), self._config
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
default_concurrency = self._calculate_default_concurrency(model_config)
|
|
707
|
+
|
|
708
|
+
perf_config_params = {
|
|
709
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
710
|
+
"concurrency-range": default_concurrency,
|
|
711
|
+
}
|
|
712
|
+
default_perf_analyzer_config.update_config(perf_config_params)
|
|
713
|
+
|
|
714
|
+
default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
715
|
+
|
|
716
|
+
return default_perf_analyzer_config
|
|
717
|
+
|
|
718
|
+
def _calculate_default_concurrency(self, model_config: ModelConfig) -> int:
|
|
719
|
+
default_max_batch_size = model_config.max_batch_size()
|
|
720
|
+
default_instance_count = model_config.instance_group_count(
|
|
721
|
+
system_gpu_count=self._gpu_count
|
|
722
|
+
)
|
|
723
|
+
default_concurrency = 2 * default_max_batch_size * default_instance_count
|
|
724
|
+
|
|
725
|
+
return default_concurrency
|
|
726
|
+
|
|
727
|
+
def _print_debug_logs(
|
|
728
|
+
self, measurements: List[Union[RunConfigMeasurement, None]]
|
|
729
|
+
) -> None:
|
|
730
|
+
if measurements is not None and measurements[0] is not None:
|
|
731
|
+
assert len(measurements) == 1
|
|
732
|
+
|
|
733
|
+
throughput = measurements[0].get_non_gpu_metric_value("perf_throughput")
|
|
734
|
+
latency = measurements[0].get_non_gpu_metric_value("perf_latency_p99")
|
|
735
|
+
|
|
736
|
+
if self._best_measurement:
|
|
737
|
+
best_throughput = self._best_measurement.get_non_gpu_metric_value(
|
|
738
|
+
"perf_throughput"
|
|
739
|
+
)
|
|
740
|
+
best_latency = self._best_measurement.get_non_gpu_metric_value(
|
|
741
|
+
"perf_latency_p99"
|
|
742
|
+
)
|
|
743
|
+
else:
|
|
744
|
+
best_throughput = 0
|
|
745
|
+
best_latency = 0
|
|
746
|
+
|
|
747
|
+
logger.debug(
|
|
748
|
+
f"Measurement for {self._coordinate_to_measure}: "
|
|
749
|
+
f"throughput = {throughput}, latency = {latency} "
|
|
750
|
+
f"(best throughput: {best_throughput}, best_latency: {best_latency})"
|
|
751
|
+
)
|
|
752
|
+
else:
|
|
753
|
+
logger.debug(f"Measurement for {self._coordinate_to_measure}: None.")
|