triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,838 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from random import randint
|
|
19
|
+
from sys import maxsize
|
|
20
|
+
from typing import Any, Dict, Generator, List, Optional, TypeAlias, Union
|
|
21
|
+
|
|
22
|
+
import optuna
|
|
23
|
+
|
|
24
|
+
from model_analyzer.config.generate.base_model_config_generator import (
|
|
25
|
+
BaseModelConfigGenerator,
|
|
26
|
+
)
|
|
27
|
+
from model_analyzer.config.generate.brute_run_config_generator import (
|
|
28
|
+
BruteRunConfigGenerator,
|
|
29
|
+
)
|
|
30
|
+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
|
|
31
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
32
|
+
ModelVariantNameManager,
|
|
33
|
+
)
|
|
34
|
+
from model_analyzer.config.generate.search_parameter import (
|
|
35
|
+
ParameterCategory,
|
|
36
|
+
SearchParameter,
|
|
37
|
+
)
|
|
38
|
+
from model_analyzer.config.generate.search_parameters import SearchParameters
|
|
39
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
40
|
+
from model_analyzer.config.input.config_defaults import (
|
|
41
|
+
DEFAULT_BATCH_SIZES,
|
|
42
|
+
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
|
|
43
|
+
)
|
|
44
|
+
from model_analyzer.config.run.model_run_config import ModelRunConfig
|
|
45
|
+
from model_analyzer.config.run.run_config import RunConfig
|
|
46
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
47
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
48
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
49
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
50
|
+
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
|
|
51
|
+
from model_analyzer.triton.model.model_config import ModelConfig
|
|
52
|
+
from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
|
|
53
|
+
|
|
54
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
55
|
+
|
|
56
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
57
|
+
|
|
58
|
+
ModelName: TypeAlias = str
|
|
59
|
+
ParameterName: TypeAlias = str
|
|
60
|
+
ObjectiveName: TypeAlias = str
|
|
61
|
+
|
|
62
|
+
TrialObjective: TypeAlias = Union[str | int]
|
|
63
|
+
ModelTrialObjectives: TypeAlias = Dict[ParameterName, TrialObjective]
|
|
64
|
+
AllTrialObjectives: TypeAlias = Dict[ModelName, ModelTrialObjectives]
|
|
65
|
+
ComposingTrialObjectives: TypeAlias = AllTrialObjectives
|
|
66
|
+
|
|
67
|
+
ParameterCombo: TypeAlias = Dict[str, Any]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class OptunaRunConfigGenerator(ConfigGeneratorInterface):
|
|
71
|
+
"""
|
|
72
|
+
Use Optuna algorithm to create RunConfigs
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
# This list represents all possible parameters Optuna can currently search for
|
|
76
|
+
optuna_parameter_list = [
|
|
77
|
+
"batch_sizes",
|
|
78
|
+
"max_batch_size",
|
|
79
|
+
"instance_group",
|
|
80
|
+
"concurrency",
|
|
81
|
+
"max_queue_delay_microseconds",
|
|
82
|
+
"request_rate",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
# TODO: TMA-1927: Figure out the correct value for this
|
|
86
|
+
NO_MEASUREMENT_SCORE = -1
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
config: ConfigCommandProfile,
|
|
91
|
+
state_manager: AnalyzerStateManager,
|
|
92
|
+
gpu_count: int,
|
|
93
|
+
models: List[ModelProfileSpec],
|
|
94
|
+
composing_models: List[ModelProfileSpec],
|
|
95
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
96
|
+
search_parameters: Dict[str, SearchParameters],
|
|
97
|
+
composing_search_parameters: Dict[str, SearchParameters],
|
|
98
|
+
user_seed: Optional[int] = None,
|
|
99
|
+
):
|
|
100
|
+
"""
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
config: ConfigCommandProfile
|
|
104
|
+
Profile configuration information
|
|
105
|
+
state_manager: AnalyzerStateManager
|
|
106
|
+
The object that allows control and update of checkpoint state
|
|
107
|
+
gpu_count: Number of gpus in the system
|
|
108
|
+
models: List of ModelProfileSpec
|
|
109
|
+
List of models to profile
|
|
110
|
+
composing_models: List of ModelProfileSpec
|
|
111
|
+
List of composing models
|
|
112
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
113
|
+
search_parameters: SearchParameters
|
|
114
|
+
The object that handles the users configuration search parameters
|
|
115
|
+
composing_search_parameters: SearchParameters
|
|
116
|
+
The object that handles the users configuration search parameters for composing models
|
|
117
|
+
user_seed: int
|
|
118
|
+
The seed to use. If not provided, one will be generated (fresh run) or read from checkpoint
|
|
119
|
+
"""
|
|
120
|
+
self._config = config
|
|
121
|
+
self._state_manager = state_manager
|
|
122
|
+
self._gpu_count = gpu_count
|
|
123
|
+
self._models = models
|
|
124
|
+
self._composing_models = composing_models
|
|
125
|
+
self._search_parameters = search_parameters
|
|
126
|
+
|
|
127
|
+
self._composing_search_parameters = {}
|
|
128
|
+
for composing_model in composing_models:
|
|
129
|
+
self._composing_search_parameters[
|
|
130
|
+
composing_model.model_name()
|
|
131
|
+
] = composing_search_parameters[composing_model.model_name()]
|
|
132
|
+
|
|
133
|
+
self._model_variant_name_manager = model_variant_name_manager
|
|
134
|
+
|
|
135
|
+
self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models)
|
|
136
|
+
|
|
137
|
+
self._num_models = len(models)
|
|
138
|
+
self._last_measurement: Optional[RunConfigMeasurement] = None
|
|
139
|
+
self._best_config_name = ""
|
|
140
|
+
self._best_config_score: Optional[float] = None
|
|
141
|
+
self._best_trial_number: Optional[int] = None
|
|
142
|
+
|
|
143
|
+
self._c_api_mode = config.triton_launch_mode == "c_api"
|
|
144
|
+
|
|
145
|
+
self._done = False
|
|
146
|
+
|
|
147
|
+
self._seed = self._create_seed(user_seed)
|
|
148
|
+
|
|
149
|
+
self._sampler = optuna.samplers.TPESampler(seed=self._seed)
|
|
150
|
+
|
|
151
|
+
self._study_name = ",".join([model.model_name() for model in self._models])
|
|
152
|
+
|
|
153
|
+
self._study = optuna.create_study(
|
|
154
|
+
study_name=self._study_name,
|
|
155
|
+
direction="maximize",
|
|
156
|
+
sampler=self._sampler,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self._init_state()
|
|
160
|
+
|
|
161
|
+
def _get_seed(self) -> int:
|
|
162
|
+
return self._state_manager.get_state_variable("OptunaRunConfigGenerator.seed")
|
|
163
|
+
|
|
164
|
+
def _create_seed(self, user_seed: Optional[int]) -> int:
|
|
165
|
+
if self._state_manager.starting_fresh_run():
|
|
166
|
+
seed = randint(0, 10000) if user_seed is None else user_seed
|
|
167
|
+
else:
|
|
168
|
+
seed = self._get_seed() if user_seed is None else user_seed
|
|
169
|
+
|
|
170
|
+
return seed
|
|
171
|
+
|
|
172
|
+
def _init_state(self) -> None:
|
|
173
|
+
self._state_manager.set_state_variable(
|
|
174
|
+
"OptunaRunConfigGenerator.seed", self._seed
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _is_done(self) -> bool:
|
|
178
|
+
return self._done
|
|
179
|
+
|
|
180
|
+
def set_last_results(
|
|
181
|
+
self, measurements: List[Optional[RunConfigMeasurement]]
|
|
182
|
+
) -> None:
|
|
183
|
+
# TODO: TMA-1927: Add support for multi-model
|
|
184
|
+
if measurements[0] is not None:
|
|
185
|
+
self._last_measurement = measurements[0]
|
|
186
|
+
else:
|
|
187
|
+
self._last_measurement = None
|
|
188
|
+
|
|
189
|
+
def get_configs(self) -> Generator[RunConfig, None, None]:
|
|
190
|
+
"""
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
RunConfig
|
|
194
|
+
The next RunConfig generated by this class
|
|
195
|
+
"""
|
|
196
|
+
logger.info(
|
|
197
|
+
"Measuring default configuration to establish a baseline measurement"
|
|
198
|
+
)
|
|
199
|
+
default_run_config = self._create_default_run_config()
|
|
200
|
+
yield default_run_config
|
|
201
|
+
|
|
202
|
+
self._capture_default_measurement(default_run_config)
|
|
203
|
+
self._set_best_measurement(default_run_config)
|
|
204
|
+
|
|
205
|
+
if logging.DEBUG:
|
|
206
|
+
self._print_debug_search_space_info()
|
|
207
|
+
|
|
208
|
+
min_configs_to_search = self._determine_minimum_number_of_configs_to_search()
|
|
209
|
+
max_configs_to_search = self._determine_maximum_number_of_configs_to_search()
|
|
210
|
+
|
|
211
|
+
for trial_number in range(1, max_configs_to_search + 1):
|
|
212
|
+
trial = self._study.ask()
|
|
213
|
+
trial_objectives = self._create_trial_objectives(trial)
|
|
214
|
+
composing_trial_objectives = self._create_composing_trial_objectives(trial)
|
|
215
|
+
logger.debug(f"Trial {trial_number} of {max_configs_to_search}:")
|
|
216
|
+
run_config = self._create_objective_based_run_config(
|
|
217
|
+
trial_objectives, composing_trial_objectives
|
|
218
|
+
)
|
|
219
|
+
yield run_config
|
|
220
|
+
|
|
221
|
+
score = self._calculate_score()
|
|
222
|
+
self._set_best_measurement(run_config, score, trial_number)
|
|
223
|
+
|
|
224
|
+
if logging.DEBUG:
|
|
225
|
+
self._print_debug_score_info(run_config, score)
|
|
226
|
+
|
|
227
|
+
if self._should_terminate_early(min_configs_to_search, trial_number):
|
|
228
|
+
logger.debug("Early termination threshold reached")
|
|
229
|
+
break
|
|
230
|
+
self._study.tell(trial, score)
|
|
231
|
+
|
|
232
|
+
def _capture_default_measurement(self, default_run_config: RunConfig) -> None:
|
|
233
|
+
if not self._last_measurement:
|
|
234
|
+
raise TritonModelAnalyzerException(
|
|
235
|
+
"Default configuration did not return a measurement. Please check PA/Tritonserver log files."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
self._default_measurement = self._last_measurement
|
|
239
|
+
|
|
240
|
+
def _set_best_measurement(
|
|
241
|
+
self, run_config: RunConfig, score: float = 0, trial_number: int = 0
|
|
242
|
+
) -> None:
|
|
243
|
+
if self._best_config_score is None or score > self._best_config_score:
|
|
244
|
+
self._best_config_name = run_config.combined_model_variants_name()
|
|
245
|
+
self._best_config_score = score
|
|
246
|
+
self._best_trial_number = trial_number
|
|
247
|
+
|
|
248
|
+
def _determine_maximum_number_of_configs_to_search(self) -> int:
|
|
249
|
+
max_trials_based_on_percentage_of_search_space = (
|
|
250
|
+
self._determine_trials_based_on_max_percentage_of_search_space()
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
max_configs_to_search = self._decide_max_between_percentage_and_trial_count(
|
|
254
|
+
max_trials_based_on_percentage_of_search_space
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return max_configs_to_search
|
|
258
|
+
|
|
259
|
+
def _determine_trials_based_on_max_percentage_of_search_space(self) -> int:
|
|
260
|
+
total_num_of_possible_configs = self._calculate_num_of_configs_in_search_space()
|
|
261
|
+
max_trials_based_on_percentage_of_search_space = int(
|
|
262
|
+
total_num_of_possible_configs
|
|
263
|
+
* self._config.max_percentage_of_search_space
|
|
264
|
+
/ 100
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return max_trials_based_on_percentage_of_search_space
|
|
268
|
+
|
|
269
|
+
def _decide_max_between_percentage_and_trial_count(
|
|
270
|
+
self, max_trials_based_on_percentage_of_search_space: int
|
|
271
|
+
) -> int:
|
|
272
|
+
# By default we will search based on percentage of search space
|
|
273
|
+
# If the user specifies a number of trials we will use that instead
|
|
274
|
+
# If both are specified we will use the smaller number
|
|
275
|
+
max_trials_set_by_user = self._config.get_config()[
|
|
276
|
+
"optuna_max_trials"
|
|
277
|
+
].is_set_by_user()
|
|
278
|
+
max_percentage_set_by_user = self._config.get_config()[
|
|
279
|
+
"max_percentage_of_search_space"
|
|
280
|
+
].is_set_by_user()
|
|
281
|
+
|
|
282
|
+
if max_trials_set_by_user and max_percentage_set_by_user:
|
|
283
|
+
if (
|
|
284
|
+
self._config.optuna_max_trials
|
|
285
|
+
< max_trials_based_on_percentage_of_search_space
|
|
286
|
+
):
|
|
287
|
+
logger.debug(
|
|
288
|
+
f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)"
|
|
289
|
+
)
|
|
290
|
+
max_configs_to_search = self._config.optuna_max_trials
|
|
291
|
+
else:
|
|
292
|
+
logger.debug(
|
|
293
|
+
f"Maximum number of trials: {max_trials_based_on_percentage_of_search_space} "
|
|
294
|
+
f"({self._config.max_percentage_of_search_space}% of search space)"
|
|
295
|
+
)
|
|
296
|
+
max_configs_to_search = max_trials_based_on_percentage_of_search_space
|
|
297
|
+
elif max_trials_set_by_user:
|
|
298
|
+
logger.debug(
|
|
299
|
+
f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)"
|
|
300
|
+
)
|
|
301
|
+
max_configs_to_search = self._config.optuna_max_trials
|
|
302
|
+
else:
|
|
303
|
+
logger.debug(
|
|
304
|
+
f"Maximum number of trials: {max_trials_based_on_percentage_of_search_space} "
|
|
305
|
+
f"({self._config.max_percentage_of_search_space}% of search space)"
|
|
306
|
+
)
|
|
307
|
+
max_configs_to_search = max_trials_based_on_percentage_of_search_space
|
|
308
|
+
|
|
309
|
+
if logging.DEBUG:
|
|
310
|
+
logger.info("")
|
|
311
|
+
return max_configs_to_search
|
|
312
|
+
|
|
313
|
+
def _determine_minimum_number_of_configs_to_search(self) -> int:
|
|
314
|
+
min_trials_based_on_percentage_of_search_space = (
|
|
315
|
+
self._determine_trials_based_on_min_percentage_of_search_space()
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
min_configs_to_search = self._decide_min_between_percentage_and_trial_count(
|
|
319
|
+
min_trials_based_on_percentage_of_search_space
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
return min_configs_to_search
|
|
323
|
+
|
|
324
|
+
def _determine_trials_based_on_min_percentage_of_search_space(self) -> int:
|
|
325
|
+
total_num_of_possible_configs = self._calculate_num_of_configs_in_search_space()
|
|
326
|
+
min_trials_based_on_percentage_of_search_space = int(
|
|
327
|
+
total_num_of_possible_configs
|
|
328
|
+
* self._config.min_percentage_of_search_space
|
|
329
|
+
/ 100
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
return min_trials_based_on_percentage_of_search_space
|
|
333
|
+
|
|
334
|
+
def _decide_min_between_percentage_and_trial_count(
|
|
335
|
+
self, min_trials_based_on_percentage_of_search_space: int
|
|
336
|
+
) -> int:
|
|
337
|
+
# By default we will search based on percentage of search space
|
|
338
|
+
# If the user specifies a number of trials we will use that instead
|
|
339
|
+
# If both are specified we will use the larger number
|
|
340
|
+
min_trials_set_by_user = self._config.get_config()[
|
|
341
|
+
"optuna_min_trials"
|
|
342
|
+
].is_set_by_user()
|
|
343
|
+
min_percentage_set_by_user = self._config.get_config()[
|
|
344
|
+
"min_percentage_of_search_space"
|
|
345
|
+
].is_set_by_user()
|
|
346
|
+
|
|
347
|
+
if min_trials_set_by_user and min_percentage_set_by_user:
|
|
348
|
+
if (
|
|
349
|
+
self._config.optuna_min_trials
|
|
350
|
+
> min_trials_based_on_percentage_of_search_space
|
|
351
|
+
):
|
|
352
|
+
logger.debug(
|
|
353
|
+
f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)"
|
|
354
|
+
)
|
|
355
|
+
min_configs_to_search = self._config.optuna_min_trials
|
|
356
|
+
else:
|
|
357
|
+
logger.debug(
|
|
358
|
+
f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} "
|
|
359
|
+
f"({self._config.min_percentage_of_search_space}% of search space)"
|
|
360
|
+
)
|
|
361
|
+
min_configs_to_search = min_trials_based_on_percentage_of_search_space
|
|
362
|
+
elif min_trials_set_by_user:
|
|
363
|
+
logger.debug(
|
|
364
|
+
f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)"
|
|
365
|
+
)
|
|
366
|
+
min_configs_to_search = self._config.optuna_min_trials
|
|
367
|
+
else:
|
|
368
|
+
logger.debug(
|
|
369
|
+
f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} "
|
|
370
|
+
f"({self._config.min_percentage_of_search_space}% of search space)"
|
|
371
|
+
)
|
|
372
|
+
min_configs_to_search = min_trials_based_on_percentage_of_search_space
|
|
373
|
+
|
|
374
|
+
return min_configs_to_search
|
|
375
|
+
|
|
376
|
+
def _create_trial_objectives(self, trial: optuna.Trial) -> AllTrialObjectives:
|
|
377
|
+
trial_objectives: AllTrialObjectives = {}
|
|
378
|
+
|
|
379
|
+
for model in self._models:
|
|
380
|
+
model_name = model.model_name()
|
|
381
|
+
trial_objectives[model_name] = {}
|
|
382
|
+
|
|
383
|
+
for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list:
|
|
384
|
+
parameter = self._search_parameters[model_name].get_parameter(
|
|
385
|
+
parameter_name
|
|
386
|
+
)
|
|
387
|
+
if parameter:
|
|
388
|
+
objective_name = self._create_trial_objective_name(
|
|
389
|
+
model_name=model_name, parameter_name=parameter_name
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
trial_objectives[model_name][
|
|
393
|
+
parameter_name
|
|
394
|
+
] = self._create_trial_objective(trial, objective_name, parameter)
|
|
395
|
+
|
|
396
|
+
if self._config.use_concurrency_formula:
|
|
397
|
+
trial_objectives[model_name][
|
|
398
|
+
"concurrency"
|
|
399
|
+
] = self._get_objective_concurrency(model_name, trial_objectives)
|
|
400
|
+
|
|
401
|
+
return trial_objectives
|
|
402
|
+
|
|
403
|
+
def _create_composing_trial_objectives(
|
|
404
|
+
self, trial: optuna.Trial
|
|
405
|
+
) -> ComposingTrialObjectives:
|
|
406
|
+
composing_trial_objectives: ComposingTrialObjectives = {}
|
|
407
|
+
for composing_model in self._composing_models:
|
|
408
|
+
composing_trial_objectives[composing_model.model_name()] = {}
|
|
409
|
+
for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list:
|
|
410
|
+
parameter = self._composing_search_parameters[
|
|
411
|
+
composing_model.model_name()
|
|
412
|
+
].get_parameter(parameter_name)
|
|
413
|
+
|
|
414
|
+
if parameter:
|
|
415
|
+
objective_name = self._create_trial_objective_name(
|
|
416
|
+
model_name=composing_model.model_name(),
|
|
417
|
+
parameter_name=parameter_name,
|
|
418
|
+
)
|
|
419
|
+
composing_trial_objectives[composing_model.model_name()][
|
|
420
|
+
parameter_name
|
|
421
|
+
] = self._create_trial_objective(trial, objective_name, parameter)
|
|
422
|
+
|
|
423
|
+
return composing_trial_objectives
|
|
424
|
+
|
|
425
|
+
def _create_trial_objective_name(
|
|
426
|
+
self, model_name: ModelName, parameter_name: ParameterName
|
|
427
|
+
) -> ObjectiveName:
|
|
428
|
+
# This ensures that Optuna has a unique name
|
|
429
|
+
# for each objective we are searching
|
|
430
|
+
objective_name = f"{model_name}::{parameter_name}"
|
|
431
|
+
|
|
432
|
+
return objective_name
|
|
433
|
+
|
|
434
|
+
def _create_trial_objective(
|
|
435
|
+
self, trial: optuna.Trial, name: ObjectiveName, parameter: SearchParameter
|
|
436
|
+
) -> TrialObjective:
|
|
437
|
+
if parameter.category is ParameterCategory.INTEGER:
|
|
438
|
+
objective = trial.suggest_int(
|
|
439
|
+
name, parameter.min_range, parameter.max_range
|
|
440
|
+
)
|
|
441
|
+
elif parameter.category is ParameterCategory.EXPONENTIAL:
|
|
442
|
+
objective = int(
|
|
443
|
+
2 ** trial.suggest_int(name, parameter.min_range, parameter.max_range)
|
|
444
|
+
)
|
|
445
|
+
elif parameter.category is ParameterCategory.INT_LIST:
|
|
446
|
+
objective = int(trial.suggest_categorical(name, parameter.enumerated_list))
|
|
447
|
+
elif parameter.category is ParameterCategory.STR_LIST:
|
|
448
|
+
objective = trial.suggest_categorical(name, parameter.enumerated_list)
|
|
449
|
+
|
|
450
|
+
return objective
|
|
451
|
+
|
|
452
|
+
def _get_objective_concurrency(
|
|
453
|
+
self, model_name: str, trial_objectives: AllTrialObjectives
|
|
454
|
+
) -> int:
|
|
455
|
+
max_batch_size = trial_objectives[model_name].get("max_batch_size", 1)
|
|
456
|
+
concurrency_formula = (
|
|
457
|
+
2 * int(trial_objectives[model_name]["instance_group"]) * max_batch_size
|
|
458
|
+
)
|
|
459
|
+
concurrency = (
|
|
460
|
+
self._config.run_config_search_max_concurrency
|
|
461
|
+
if concurrency_formula > self._config.run_config_search_max_concurrency
|
|
462
|
+
else concurrency_formula
|
|
463
|
+
)
|
|
464
|
+
concurrency = (
|
|
465
|
+
self._config.run_config_search_min_concurrency
|
|
466
|
+
if concurrency_formula < self._config.run_config_search_min_concurrency
|
|
467
|
+
else concurrency_formula
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
return concurrency
|
|
471
|
+
|
|
472
|
+
def _create_objective_based_run_config(
|
|
473
|
+
self,
|
|
474
|
+
trial_objectives: AllTrialObjectives,
|
|
475
|
+
composing_trial_objectives: ComposingTrialObjectives,
|
|
476
|
+
) -> RunConfig:
|
|
477
|
+
run_config = RunConfig(self._triton_env)
|
|
478
|
+
|
|
479
|
+
composing_model_config_variants = self._create_composing_model_config_variants(
|
|
480
|
+
composing_trial_objectives
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
for model in self._models:
|
|
484
|
+
model_config_variant = self._create_model_config_variant(
|
|
485
|
+
model=model,
|
|
486
|
+
trial_objectives=trial_objectives[model.model_name()],
|
|
487
|
+
composing_trial_objectives=composing_trial_objectives,
|
|
488
|
+
composing_model_config_variants=composing_model_config_variants,
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
model_run_config = self._create_model_run_config(
|
|
492
|
+
model=model,
|
|
493
|
+
model_config_variant=model_config_variant,
|
|
494
|
+
composing_model_config_variants=composing_model_config_variants,
|
|
495
|
+
trial_objectives=trial_objectives[model.model_name()],
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
run_config.add_model_run_config(model_run_config=model_run_config)
|
|
499
|
+
|
|
500
|
+
return run_config
|
|
501
|
+
|
|
502
|
+
def _create_parameter_combo(
|
|
503
|
+
self,
|
|
504
|
+
model: ModelProfileSpec,
|
|
505
|
+
trial_objectives: ModelTrialObjectives,
|
|
506
|
+
composing_trial_objectives: ComposingTrialObjectives,
|
|
507
|
+
) -> ParameterCombo:
|
|
508
|
+
if model.is_ensemble():
|
|
509
|
+
param_combo = self._create_ensemble_parameter_combo(
|
|
510
|
+
composing_trial_objectives
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
param_combo = self._create_non_ensemble_parameter_combo(
|
|
514
|
+
model, trial_objectives
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
return param_combo
|
|
518
|
+
|
|
519
|
+
def _create_ensemble_parameter_combo(
|
|
520
|
+
self,
|
|
521
|
+
composing_trial_objectives: ComposingTrialObjectives,
|
|
522
|
+
) -> ParameterCombo:
|
|
523
|
+
"""
|
|
524
|
+
For the ensemble model the only parameter we need to set
|
|
525
|
+
is the max batch size; which will be the minimum batch size
|
|
526
|
+
found in the composing_model max batch sizes
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
min_val_of_max_batch_size = maxsize
|
|
530
|
+
for composing_trial_objective in composing_trial_objectives.values():
|
|
531
|
+
min_val_of_max_batch_size = int(
|
|
532
|
+
min(
|
|
533
|
+
composing_trial_objective.get("max_batch_size", 1),
|
|
534
|
+
min_val_of_max_batch_size,
|
|
535
|
+
)
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
param_combo = {"max_batch_size": min_val_of_max_batch_size}
|
|
539
|
+
|
|
540
|
+
return param_combo
|
|
541
|
+
|
|
542
|
+
def _create_non_ensemble_parameter_combo(
|
|
543
|
+
self, model: ModelProfileSpec, trial_objectives: ModelTrialObjectives
|
|
544
|
+
) -> ParameterCombo:
|
|
545
|
+
param_combo: ParameterCombo = {}
|
|
546
|
+
|
|
547
|
+
if model.supports_dynamic_batching():
|
|
548
|
+
param_combo["dynamic_batching"] = []
|
|
549
|
+
|
|
550
|
+
if "instance_group" in trial_objectives:
|
|
551
|
+
kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
|
|
552
|
+
param_combo["instance_group"] = [
|
|
553
|
+
{
|
|
554
|
+
"count": trial_objectives["instance_group"],
|
|
555
|
+
"kind": kind,
|
|
556
|
+
}
|
|
557
|
+
]
|
|
558
|
+
|
|
559
|
+
if "max_batch_size" in trial_objectives:
|
|
560
|
+
param_combo["max_batch_size"] = trial_objectives["max_batch_size"]
|
|
561
|
+
|
|
562
|
+
if "max_queue_delay_microseconds" in trial_objectives:
|
|
563
|
+
param_combo["dynamic_batching"] = {
|
|
564
|
+
"max_queue_delay_microseconds": trial_objectives[
|
|
565
|
+
"max_queue_delay_microseconds"
|
|
566
|
+
]
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
return param_combo
|
|
570
|
+
|
|
571
|
+
def _create_model_config_variant(
|
|
572
|
+
self,
|
|
573
|
+
model: ModelProfileSpec,
|
|
574
|
+
trial_objectives: ModelTrialObjectives,
|
|
575
|
+
composing_trial_objectives: ComposingTrialObjectives = {},
|
|
576
|
+
composing_model_config_variants: List[ModelConfigVariant] = [],
|
|
577
|
+
) -> ModelConfigVariant:
|
|
578
|
+
param_combo = self._create_parameter_combo(
|
|
579
|
+
model, trial_objectives, composing_trial_objectives
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if model.is_ensemble():
|
|
583
|
+
model_config_variant = BaseModelConfigGenerator.make_ensemble_model_config_variant(
|
|
584
|
+
model=model,
|
|
585
|
+
ensemble_composing_model_config_variants=composing_model_config_variants,
|
|
586
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
587
|
+
param_combo=param_combo,
|
|
588
|
+
c_api_mode=self._c_api_mode,
|
|
589
|
+
)
|
|
590
|
+
else:
|
|
591
|
+
model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
|
|
592
|
+
param_combo=param_combo,
|
|
593
|
+
model=model,
|
|
594
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
595
|
+
c_api_mode=self._c_api_mode,
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
return model_config_variant
|
|
599
|
+
|
|
600
|
+
def _create_composing_model_config_variants(
|
|
601
|
+
self, composing_trial_objectives: ComposingTrialObjectives
|
|
602
|
+
) -> List[ModelConfigVariant]:
|
|
603
|
+
composing_model_config_variants = []
|
|
604
|
+
for composing_model in self._composing_models:
|
|
605
|
+
composing_model_config_variant = self._create_model_config_variant(
|
|
606
|
+
model=composing_model,
|
|
607
|
+
trial_objectives=composing_trial_objectives[
|
|
608
|
+
composing_model.model_name()
|
|
609
|
+
],
|
|
610
|
+
)
|
|
611
|
+
composing_model_config_variants.append(composing_model_config_variant)
|
|
612
|
+
|
|
613
|
+
return composing_model_config_variants
|
|
614
|
+
|
|
615
|
+
def _calculate_score(self) -> float:
|
|
616
|
+
if self._last_measurement:
|
|
617
|
+
score = self._default_measurement.compare_measurements( # type: ignore
|
|
618
|
+
self._last_measurement
|
|
619
|
+
)
|
|
620
|
+
else:
|
|
621
|
+
score = OptunaRunConfigGenerator.NO_MEASUREMENT_SCORE
|
|
622
|
+
|
|
623
|
+
return score
|
|
624
|
+
|
|
625
|
+
def _create_default_run_config(self) -> RunConfig:
|
|
626
|
+
default_run_config = RunConfig(self._triton_env)
|
|
627
|
+
for model in self._models:
|
|
628
|
+
default_model_run_config = self._create_default_model_run_config(model)
|
|
629
|
+
default_run_config.add_model_run_config(default_model_run_config)
|
|
630
|
+
|
|
631
|
+
return default_run_config
|
|
632
|
+
|
|
633
|
+
def _create_default_model_run_config(
|
|
634
|
+
self, model: ModelProfileSpec
|
|
635
|
+
) -> ModelRunConfig:
|
|
636
|
+
default_model_config_variant = (
|
|
637
|
+
BaseModelConfigGenerator.make_model_config_variant(
|
|
638
|
+
param_combo={},
|
|
639
|
+
model=model,
|
|
640
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
641
|
+
c_api_mode=self._c_api_mode,
|
|
642
|
+
)
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
default_perf_analyzer_config = self._create_default_perf_analyzer_config(
|
|
646
|
+
model, default_model_config_variant.model_config
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
default_model_run_config = ModelRunConfig(
|
|
650
|
+
model.model_name(),
|
|
651
|
+
default_model_config_variant,
|
|
652
|
+
default_perf_analyzer_config,
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
default_composing_model_config_variants = (
|
|
656
|
+
self._create_default_composing_model_config_variants(model)
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
if default_composing_model_config_variants:
|
|
660
|
+
default_model_run_config.add_composing_model_config_variants(
|
|
661
|
+
default_composing_model_config_variants
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
return default_model_run_config
|
|
665
|
+
|
|
666
|
+
def _create_default_perf_analyzer_config(
|
|
667
|
+
self, model: ModelProfileSpec, model_config: ModelConfig
|
|
668
|
+
) -> PerfAnalyzerConfig:
|
|
669
|
+
default_perf_analyzer_config = PerfAnalyzerConfig()
|
|
670
|
+
default_perf_analyzer_config.update_config_from_profile_config(
|
|
671
|
+
model_config.get_field("name"), self._config
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
if self._search_parameters[model_config.get_field("name")].get_parameter(
|
|
675
|
+
"request_rate"
|
|
676
|
+
):
|
|
677
|
+
perf_config_params = {
|
|
678
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
679
|
+
"request-rate-range": DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
|
|
680
|
+
}
|
|
681
|
+
self._config.concurrency_sweep_disable = True
|
|
682
|
+
else:
|
|
683
|
+
default_concurrency = self._calculate_default_concurrency(model_config)
|
|
684
|
+
perf_config_params = {
|
|
685
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
686
|
+
"concurrency-range": default_concurrency,
|
|
687
|
+
}
|
|
688
|
+
default_perf_analyzer_config.update_config(perf_config_params)
|
|
689
|
+
default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
690
|
+
|
|
691
|
+
return default_perf_analyzer_config
|
|
692
|
+
|
|
693
|
+
def _create_default_composing_model_config_variants(
|
|
694
|
+
self, model: ModelProfileSpec
|
|
695
|
+
) -> List[ModelConfigVariant]:
|
|
696
|
+
default_composing_model_config_variants: List[ModelConfigVariant] = []
|
|
697
|
+
for composing_model in self._composing_models:
|
|
698
|
+
default_composing_model_config_variants.append(
|
|
699
|
+
BaseModelConfigGenerator.make_model_config_variant(
|
|
700
|
+
param_combo={},
|
|
701
|
+
model=composing_model,
|
|
702
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
703
|
+
c_api_mode=self._c_api_mode,
|
|
704
|
+
)
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
return default_composing_model_config_variants
|
|
708
|
+
|
|
709
|
+
def _calculate_default_concurrency(self, model_config: ModelConfig) -> int:
|
|
710
|
+
default_max_batch_size = model_config.max_batch_size()
|
|
711
|
+
default_instance_count = model_config.instance_group_count(
|
|
712
|
+
system_gpu_count=self._gpu_count
|
|
713
|
+
)
|
|
714
|
+
default_concurrency = 2 * default_max_batch_size * default_instance_count
|
|
715
|
+
|
|
716
|
+
return default_concurrency
|
|
717
|
+
|
|
718
|
+
def _create_model_run_config(
|
|
719
|
+
self,
|
|
720
|
+
model: ModelProfileSpec,
|
|
721
|
+
model_config_variant: ModelConfigVariant,
|
|
722
|
+
composing_model_config_variants: List[ModelConfigVariant],
|
|
723
|
+
trial_objectives: ModelTrialObjectives,
|
|
724
|
+
) -> ModelRunConfig:
|
|
725
|
+
perf_analyzer_config = self._create_perf_analyzer_config(
|
|
726
|
+
model_name=model.model_name(),
|
|
727
|
+
model=model,
|
|
728
|
+
trial_objectives=trial_objectives,
|
|
729
|
+
)
|
|
730
|
+
model_run_config = ModelRunConfig(
|
|
731
|
+
model.model_name(), model_config_variant, perf_analyzer_config
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
if self._composing_models:
|
|
735
|
+
model_run_config.add_composing_model_config_variants(
|
|
736
|
+
composing_model_config_variants
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
return model_run_config
|
|
740
|
+
|
|
741
|
+
def _create_perf_analyzer_config(
|
|
742
|
+
self,
|
|
743
|
+
model_name: str,
|
|
744
|
+
model: ModelProfileSpec,
|
|
745
|
+
trial_objectives: ModelTrialObjectives,
|
|
746
|
+
) -> PerfAnalyzerConfig:
|
|
747
|
+
perf_analyzer_config = PerfAnalyzerConfig()
|
|
748
|
+
|
|
749
|
+
perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
|
|
750
|
+
|
|
751
|
+
batch_sizes = (
|
|
752
|
+
int(trial_objectives["batch_sizes"])
|
|
753
|
+
if "batch_sizes" in trial_objectives
|
|
754
|
+
else DEFAULT_BATCH_SIZES
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
perf_config_params = {"batch-size": batch_sizes}
|
|
758
|
+
|
|
759
|
+
if "concurrency" in trial_objectives:
|
|
760
|
+
perf_config_params["concurrency-range"] = int(
|
|
761
|
+
trial_objectives["concurrency"]
|
|
762
|
+
)
|
|
763
|
+
elif "request_rate" in trial_objectives:
|
|
764
|
+
perf_config_params["request-rate-range"] = int(
|
|
765
|
+
trial_objectives["request_rate"]
|
|
766
|
+
)
|
|
767
|
+
self._config.concurrency_sweep_disable = True
|
|
768
|
+
|
|
769
|
+
perf_analyzer_config.update_config(perf_config_params)
|
|
770
|
+
|
|
771
|
+
perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
772
|
+
return perf_analyzer_config
|
|
773
|
+
|
|
774
|
+
def _should_terminate_early(
|
|
775
|
+
self, min_configs_to_search: int, trial_number: int
|
|
776
|
+
) -> bool:
|
|
777
|
+
number_of_trials_since_best = trial_number - self._best_trial_number # type: ignore
|
|
778
|
+
if trial_number < min_configs_to_search:
|
|
779
|
+
should_terminate_early = False
|
|
780
|
+
elif number_of_trials_since_best >= self._config.optuna_early_exit_threshold:
|
|
781
|
+
should_terminate_early = True
|
|
782
|
+
else:
|
|
783
|
+
should_terminate_early = False
|
|
784
|
+
|
|
785
|
+
return should_terminate_early
|
|
786
|
+
|
|
787
|
+
def _print_debug_search_space_info(self) -> None:
|
|
788
|
+
logger.info("")
|
|
789
|
+
num_of_configs_in_search_space = (
|
|
790
|
+
self._calculate_num_of_configs_in_search_space()
|
|
791
|
+
)
|
|
792
|
+
logger.debug(
|
|
793
|
+
f"Number of configs in search space: {num_of_configs_in_search_space}"
|
|
794
|
+
)
|
|
795
|
+
self._print_debug_model_search_space_info()
|
|
796
|
+
logger.info("")
|
|
797
|
+
|
|
798
|
+
def _calculate_num_of_configs_in_search_space(self) -> int:
|
|
799
|
+
num_of_configs_in_search_space = 1
|
|
800
|
+
for search_parameter in self._search_parameters.values():
|
|
801
|
+
num_of_configs_in_search_space *= (
|
|
802
|
+
search_parameter.number_of_total_possible_configurations()
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
for composing_search_parameter in self._composing_search_parameters.values():
|
|
806
|
+
num_of_configs_in_search_space *= (
|
|
807
|
+
composing_search_parameter.number_of_total_possible_configurations()
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
return num_of_configs_in_search_space
|
|
811
|
+
|
|
812
|
+
def _print_debug_model_search_space_info(self) -> None:
|
|
813
|
+
for model in self._models:
|
|
814
|
+
model_name = model.model_name()
|
|
815
|
+
logger.debug(f"Model - {model_name}:")
|
|
816
|
+
for name in self._search_parameters[model_name].get_search_parameters():
|
|
817
|
+
logger.debug(self._search_parameters[model_name].print_info(name))
|
|
818
|
+
|
|
819
|
+
for (
|
|
820
|
+
composing_model_name,
|
|
821
|
+
composing_search_parameters,
|
|
822
|
+
) in self._composing_search_parameters.items():
|
|
823
|
+
logger.debug(f"Composing model - {composing_model_name}:")
|
|
824
|
+
for name in composing_search_parameters.get_search_parameters():
|
|
825
|
+
logger.debug(composing_search_parameters.print_info(name))
|
|
826
|
+
|
|
827
|
+
def _print_debug_score_info(
|
|
828
|
+
self,
|
|
829
|
+
run_config: RunConfig,
|
|
830
|
+
score: float,
|
|
831
|
+
) -> None:
|
|
832
|
+
if score != OptunaRunConfigGenerator.NO_MEASUREMENT_SCORE:
|
|
833
|
+
logger.debug(
|
|
834
|
+
f"Objective score for {run_config.combined_model_variants_name()}: {int(score * 100)} --- " # type: ignore
|
|
835
|
+
f"Best: {self._best_config_name} ({int(self._best_config_score * 100)})" # type: ignore
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
logger.info("")
|