triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,838 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from random import randint
19
+ from sys import maxsize
20
+ from typing import Any, Dict, Generator, List, Optional, TypeAlias, Union
21
+
22
+ import optuna
23
+
24
+ from model_analyzer.config.generate.base_model_config_generator import (
25
+ BaseModelConfigGenerator,
26
+ )
27
+ from model_analyzer.config.generate.brute_run_config_generator import (
28
+ BruteRunConfigGenerator,
29
+ )
30
+ from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
31
+ from model_analyzer.config.generate.model_variant_name_manager import (
32
+ ModelVariantNameManager,
33
+ )
34
+ from model_analyzer.config.generate.search_parameter import (
35
+ ParameterCategory,
36
+ SearchParameter,
37
+ )
38
+ from model_analyzer.config.generate.search_parameters import SearchParameters
39
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
40
+ from model_analyzer.config.input.config_defaults import (
41
+ DEFAULT_BATCH_SIZES,
42
+ DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
43
+ )
44
+ from model_analyzer.config.run.model_run_config import ModelRunConfig
45
+ from model_analyzer.config.run.run_config import RunConfig
46
+ from model_analyzer.constants import LOGGER_NAME
47
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
48
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
49
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
50
+ from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
51
+ from model_analyzer.triton.model.model_config import ModelConfig
52
+ from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
53
+
54
+ from .config_generator_interface import ConfigGeneratorInterface
55
+
56
+ logger = logging.getLogger(LOGGER_NAME)
57
+
58
+ ModelName: TypeAlias = str
59
+ ParameterName: TypeAlias = str
60
+ ObjectiveName: TypeAlias = str
61
+
62
+ TrialObjective: TypeAlias = Union[str | int]
63
+ ModelTrialObjectives: TypeAlias = Dict[ParameterName, TrialObjective]
64
+ AllTrialObjectives: TypeAlias = Dict[ModelName, ModelTrialObjectives]
65
+ ComposingTrialObjectives: TypeAlias = AllTrialObjectives
66
+
67
+ ParameterCombo: TypeAlias = Dict[str, Any]
68
+
69
+
70
+ class OptunaRunConfigGenerator(ConfigGeneratorInterface):
71
+ """
72
+ Use Optuna algorithm to create RunConfigs
73
+ """
74
+
75
+ # This list represents all possible parameters Optuna can currently search for
76
+ optuna_parameter_list = [
77
+ "batch_sizes",
78
+ "max_batch_size",
79
+ "instance_group",
80
+ "concurrency",
81
+ "max_queue_delay_microseconds",
82
+ "request_rate",
83
+ ]
84
+
85
+ # TODO: TMA-1927: Figure out the correct value for this
86
+ NO_MEASUREMENT_SCORE = -1
87
+
88
+ def __init__(
89
+ self,
90
+ config: ConfigCommandProfile,
91
+ state_manager: AnalyzerStateManager,
92
+ gpu_count: int,
93
+ models: List[ModelProfileSpec],
94
+ composing_models: List[ModelProfileSpec],
95
+ model_variant_name_manager: ModelVariantNameManager,
96
+ search_parameters: Dict[str, SearchParameters],
97
+ composing_search_parameters: Dict[str, SearchParameters],
98
+ user_seed: Optional[int] = None,
99
+ ):
100
+ """
101
+ Parameters
102
+ ----------
103
+ config: ConfigCommandProfile
104
+ Profile configuration information
105
+ state_manager: AnalyzerStateManager
106
+ The object that allows control and update of checkpoint state
107
+ gpu_count: Number of gpus in the system
108
+ models: List of ModelProfileSpec
109
+ List of models to profile
110
+ composing_models: List of ModelProfileSpec
111
+ List of composing models
112
+ model_variant_name_manager: ModelVariantNameManager
113
+ search_parameters: SearchParameters
114
+ The object that handles the users configuration search parameters
115
+ composing_search_parameters: SearchParameters
116
+ The object that handles the users configuration search parameters for composing models
117
+ user_seed: int
118
+ The seed to use. If not provided, one will be generated (fresh run) or read from checkpoint
119
+ """
120
+ self._config = config
121
+ self._state_manager = state_manager
122
+ self._gpu_count = gpu_count
123
+ self._models = models
124
+ self._composing_models = composing_models
125
+ self._search_parameters = search_parameters
126
+
127
+ self._composing_search_parameters = {}
128
+ for composing_model in composing_models:
129
+ self._composing_search_parameters[
130
+ composing_model.model_name()
131
+ ] = composing_search_parameters[composing_model.model_name()]
132
+
133
+ self._model_variant_name_manager = model_variant_name_manager
134
+
135
+ self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models)
136
+
137
+ self._num_models = len(models)
138
+ self._last_measurement: Optional[RunConfigMeasurement] = None
139
+ self._best_config_name = ""
140
+ self._best_config_score: Optional[float] = None
141
+ self._best_trial_number: Optional[int] = None
142
+
143
+ self._c_api_mode = config.triton_launch_mode == "c_api"
144
+
145
+ self._done = False
146
+
147
+ self._seed = self._create_seed(user_seed)
148
+
149
+ self._sampler = optuna.samplers.TPESampler(seed=self._seed)
150
+
151
+ self._study_name = ",".join([model.model_name() for model in self._models])
152
+
153
+ self._study = optuna.create_study(
154
+ study_name=self._study_name,
155
+ direction="maximize",
156
+ sampler=self._sampler,
157
+ )
158
+
159
+ self._init_state()
160
+
161
+ def _get_seed(self) -> int:
162
+ return self._state_manager.get_state_variable("OptunaRunConfigGenerator.seed")
163
+
164
+ def _create_seed(self, user_seed: Optional[int]) -> int:
165
+ if self._state_manager.starting_fresh_run():
166
+ seed = randint(0, 10000) if user_seed is None else user_seed
167
+ else:
168
+ seed = self._get_seed() if user_seed is None else user_seed
169
+
170
+ return seed
171
+
172
+ def _init_state(self) -> None:
173
+ self._state_manager.set_state_variable(
174
+ "OptunaRunConfigGenerator.seed", self._seed
175
+ )
176
+
177
+ def _is_done(self) -> bool:
178
+ return self._done
179
+
180
+ def set_last_results(
181
+ self, measurements: List[Optional[RunConfigMeasurement]]
182
+ ) -> None:
183
+ # TODO: TMA-1927: Add support for multi-model
184
+ if measurements[0] is not None:
185
+ self._last_measurement = measurements[0]
186
+ else:
187
+ self._last_measurement = None
188
+
189
+ def get_configs(self) -> Generator[RunConfig, None, None]:
190
+ """
191
+ Returns
192
+ -------
193
+ RunConfig
194
+ The next RunConfig generated by this class
195
+ """
196
+ logger.info(
197
+ "Measuring default configuration to establish a baseline measurement"
198
+ )
199
+ default_run_config = self._create_default_run_config()
200
+ yield default_run_config
201
+
202
+ self._capture_default_measurement(default_run_config)
203
+ self._set_best_measurement(default_run_config)
204
+
205
+ if logging.DEBUG:
206
+ self._print_debug_search_space_info()
207
+
208
+ min_configs_to_search = self._determine_minimum_number_of_configs_to_search()
209
+ max_configs_to_search = self._determine_maximum_number_of_configs_to_search()
210
+
211
+ for trial_number in range(1, max_configs_to_search + 1):
212
+ trial = self._study.ask()
213
+ trial_objectives = self._create_trial_objectives(trial)
214
+ composing_trial_objectives = self._create_composing_trial_objectives(trial)
215
+ logger.debug(f"Trial {trial_number} of {max_configs_to_search}:")
216
+ run_config = self._create_objective_based_run_config(
217
+ trial_objectives, composing_trial_objectives
218
+ )
219
+ yield run_config
220
+
221
+ score = self._calculate_score()
222
+ self._set_best_measurement(run_config, score, trial_number)
223
+
224
+ if logging.DEBUG:
225
+ self._print_debug_score_info(run_config, score)
226
+
227
+ if self._should_terminate_early(min_configs_to_search, trial_number):
228
+ logger.debug("Early termination threshold reached")
229
+ break
230
+ self._study.tell(trial, score)
231
+
232
+ def _capture_default_measurement(self, default_run_config: RunConfig) -> None:
233
+ if not self._last_measurement:
234
+ raise TritonModelAnalyzerException(
235
+ "Default configuration did not return a measurement. Please check PA/Tritonserver log files."
236
+ )
237
+
238
+ self._default_measurement = self._last_measurement
239
+
240
+ def _set_best_measurement(
241
+ self, run_config: RunConfig, score: float = 0, trial_number: int = 0
242
+ ) -> None:
243
+ if self._best_config_score is None or score > self._best_config_score:
244
+ self._best_config_name = run_config.combined_model_variants_name()
245
+ self._best_config_score = score
246
+ self._best_trial_number = trial_number
247
+
248
+ def _determine_maximum_number_of_configs_to_search(self) -> int:
249
+ max_trials_based_on_percentage_of_search_space = (
250
+ self._determine_trials_based_on_max_percentage_of_search_space()
251
+ )
252
+
253
+ max_configs_to_search = self._decide_max_between_percentage_and_trial_count(
254
+ max_trials_based_on_percentage_of_search_space
255
+ )
256
+
257
+ return max_configs_to_search
258
+
259
+ def _determine_trials_based_on_max_percentage_of_search_space(self) -> int:
260
+ total_num_of_possible_configs = self._calculate_num_of_configs_in_search_space()
261
+ max_trials_based_on_percentage_of_search_space = int(
262
+ total_num_of_possible_configs
263
+ * self._config.max_percentage_of_search_space
264
+ / 100
265
+ )
266
+
267
+ return max_trials_based_on_percentage_of_search_space
268
+
269
+ def _decide_max_between_percentage_and_trial_count(
270
+ self, max_trials_based_on_percentage_of_search_space: int
271
+ ) -> int:
272
+ # By default we will search based on percentage of search space
273
+ # If the user specifies a number of trials we will use that instead
274
+ # If both are specified we will use the smaller number
275
+ max_trials_set_by_user = self._config.get_config()[
276
+ "optuna_max_trials"
277
+ ].is_set_by_user()
278
+ max_percentage_set_by_user = self._config.get_config()[
279
+ "max_percentage_of_search_space"
280
+ ].is_set_by_user()
281
+
282
+ if max_trials_set_by_user and max_percentage_set_by_user:
283
+ if (
284
+ self._config.optuna_max_trials
285
+ < max_trials_based_on_percentage_of_search_space
286
+ ):
287
+ logger.debug(
288
+ f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)"
289
+ )
290
+ max_configs_to_search = self._config.optuna_max_trials
291
+ else:
292
+ logger.debug(
293
+ f"Maximum number of trials: {max_trials_based_on_percentage_of_search_space} "
294
+ f"({self._config.max_percentage_of_search_space}% of search space)"
295
+ )
296
+ max_configs_to_search = max_trials_based_on_percentage_of_search_space
297
+ elif max_trials_set_by_user:
298
+ logger.debug(
299
+ f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)"
300
+ )
301
+ max_configs_to_search = self._config.optuna_max_trials
302
+ else:
303
+ logger.debug(
304
+ f"Maximum number of trials: {max_trials_based_on_percentage_of_search_space} "
305
+ f"({self._config.max_percentage_of_search_space}% of search space)"
306
+ )
307
+ max_configs_to_search = max_trials_based_on_percentage_of_search_space
308
+
309
+ if logging.DEBUG:
310
+ logger.info("")
311
+ return max_configs_to_search
312
+
313
+ def _determine_minimum_number_of_configs_to_search(self) -> int:
314
+ min_trials_based_on_percentage_of_search_space = (
315
+ self._determine_trials_based_on_min_percentage_of_search_space()
316
+ )
317
+
318
+ min_configs_to_search = self._decide_min_between_percentage_and_trial_count(
319
+ min_trials_based_on_percentage_of_search_space
320
+ )
321
+
322
+ return min_configs_to_search
323
+
324
+ def _determine_trials_based_on_min_percentage_of_search_space(self) -> int:
325
+ total_num_of_possible_configs = self._calculate_num_of_configs_in_search_space()
326
+ min_trials_based_on_percentage_of_search_space = int(
327
+ total_num_of_possible_configs
328
+ * self._config.min_percentage_of_search_space
329
+ / 100
330
+ )
331
+
332
+ return min_trials_based_on_percentage_of_search_space
333
+
334
+ def _decide_min_between_percentage_and_trial_count(
335
+ self, min_trials_based_on_percentage_of_search_space: int
336
+ ) -> int:
337
+ # By default we will search based on percentage of search space
338
+ # If the user specifies a number of trials we will use that instead
339
+ # If both are specified we will use the larger number
340
+ min_trials_set_by_user = self._config.get_config()[
341
+ "optuna_min_trials"
342
+ ].is_set_by_user()
343
+ min_percentage_set_by_user = self._config.get_config()[
344
+ "min_percentage_of_search_space"
345
+ ].is_set_by_user()
346
+
347
+ if min_trials_set_by_user and min_percentage_set_by_user:
348
+ if (
349
+ self._config.optuna_min_trials
350
+ > min_trials_based_on_percentage_of_search_space
351
+ ):
352
+ logger.debug(
353
+ f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)"
354
+ )
355
+ min_configs_to_search = self._config.optuna_min_trials
356
+ else:
357
+ logger.debug(
358
+ f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} "
359
+ f"({self._config.min_percentage_of_search_space}% of search space)"
360
+ )
361
+ min_configs_to_search = min_trials_based_on_percentage_of_search_space
362
+ elif min_trials_set_by_user:
363
+ logger.debug(
364
+ f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)"
365
+ )
366
+ min_configs_to_search = self._config.optuna_min_trials
367
+ else:
368
+ logger.debug(
369
+ f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} "
370
+ f"({self._config.min_percentage_of_search_space}% of search space)"
371
+ )
372
+ min_configs_to_search = min_trials_based_on_percentage_of_search_space
373
+
374
+ return min_configs_to_search
375
+
376
+ def _create_trial_objectives(self, trial: optuna.Trial) -> AllTrialObjectives:
377
+ trial_objectives: AllTrialObjectives = {}
378
+
379
+ for model in self._models:
380
+ model_name = model.model_name()
381
+ trial_objectives[model_name] = {}
382
+
383
+ for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list:
384
+ parameter = self._search_parameters[model_name].get_parameter(
385
+ parameter_name
386
+ )
387
+ if parameter:
388
+ objective_name = self._create_trial_objective_name(
389
+ model_name=model_name, parameter_name=parameter_name
390
+ )
391
+
392
+ trial_objectives[model_name][
393
+ parameter_name
394
+ ] = self._create_trial_objective(trial, objective_name, parameter)
395
+
396
+ if self._config.use_concurrency_formula:
397
+ trial_objectives[model_name][
398
+ "concurrency"
399
+ ] = self._get_objective_concurrency(model_name, trial_objectives)
400
+
401
+ return trial_objectives
402
+
403
+ def _create_composing_trial_objectives(
404
+ self, trial: optuna.Trial
405
+ ) -> ComposingTrialObjectives:
406
+ composing_trial_objectives: ComposingTrialObjectives = {}
407
+ for composing_model in self._composing_models:
408
+ composing_trial_objectives[composing_model.model_name()] = {}
409
+ for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list:
410
+ parameter = self._composing_search_parameters[
411
+ composing_model.model_name()
412
+ ].get_parameter(parameter_name)
413
+
414
+ if parameter:
415
+ objective_name = self._create_trial_objective_name(
416
+ model_name=composing_model.model_name(),
417
+ parameter_name=parameter_name,
418
+ )
419
+ composing_trial_objectives[composing_model.model_name()][
420
+ parameter_name
421
+ ] = self._create_trial_objective(trial, objective_name, parameter)
422
+
423
+ return composing_trial_objectives
424
+
425
+ def _create_trial_objective_name(
426
+ self, model_name: ModelName, parameter_name: ParameterName
427
+ ) -> ObjectiveName:
428
+ # This ensures that Optuna has a unique name
429
+ # for each objective we are searching
430
+ objective_name = f"{model_name}::{parameter_name}"
431
+
432
+ return objective_name
433
+
434
+ def _create_trial_objective(
435
+ self, trial: optuna.Trial, name: ObjectiveName, parameter: SearchParameter
436
+ ) -> TrialObjective:
437
+ if parameter.category is ParameterCategory.INTEGER:
438
+ objective = trial.suggest_int(
439
+ name, parameter.min_range, parameter.max_range
440
+ )
441
+ elif parameter.category is ParameterCategory.EXPONENTIAL:
442
+ objective = int(
443
+ 2 ** trial.suggest_int(name, parameter.min_range, parameter.max_range)
444
+ )
445
+ elif parameter.category is ParameterCategory.INT_LIST:
446
+ objective = int(trial.suggest_categorical(name, parameter.enumerated_list))
447
+ elif parameter.category is ParameterCategory.STR_LIST:
448
+ objective = trial.suggest_categorical(name, parameter.enumerated_list)
449
+
450
+ return objective
451
+
452
+ def _get_objective_concurrency(
453
+ self, model_name: str, trial_objectives: AllTrialObjectives
454
+ ) -> int:
455
+ max_batch_size = trial_objectives[model_name].get("max_batch_size", 1)
456
+ concurrency_formula = (
457
+ 2 * int(trial_objectives[model_name]["instance_group"]) * max_batch_size
458
+ )
459
+ concurrency = (
460
+ self._config.run_config_search_max_concurrency
461
+ if concurrency_formula > self._config.run_config_search_max_concurrency
462
+ else concurrency_formula
463
+ )
464
+ concurrency = (
465
+ self._config.run_config_search_min_concurrency
466
+ if concurrency_formula < self._config.run_config_search_min_concurrency
467
+ else concurrency_formula
468
+ )
469
+
470
+ return concurrency
471
+
472
+ def _create_objective_based_run_config(
473
+ self,
474
+ trial_objectives: AllTrialObjectives,
475
+ composing_trial_objectives: ComposingTrialObjectives,
476
+ ) -> RunConfig:
477
+ run_config = RunConfig(self._triton_env)
478
+
479
+ composing_model_config_variants = self._create_composing_model_config_variants(
480
+ composing_trial_objectives
481
+ )
482
+
483
+ for model in self._models:
484
+ model_config_variant = self._create_model_config_variant(
485
+ model=model,
486
+ trial_objectives=trial_objectives[model.model_name()],
487
+ composing_trial_objectives=composing_trial_objectives,
488
+ composing_model_config_variants=composing_model_config_variants,
489
+ )
490
+
491
+ model_run_config = self._create_model_run_config(
492
+ model=model,
493
+ model_config_variant=model_config_variant,
494
+ composing_model_config_variants=composing_model_config_variants,
495
+ trial_objectives=trial_objectives[model.model_name()],
496
+ )
497
+
498
+ run_config.add_model_run_config(model_run_config=model_run_config)
499
+
500
+ return run_config
501
+
502
+ def _create_parameter_combo(
503
+ self,
504
+ model: ModelProfileSpec,
505
+ trial_objectives: ModelTrialObjectives,
506
+ composing_trial_objectives: ComposingTrialObjectives,
507
+ ) -> ParameterCombo:
508
+ if model.is_ensemble():
509
+ param_combo = self._create_ensemble_parameter_combo(
510
+ composing_trial_objectives
511
+ )
512
+ else:
513
+ param_combo = self._create_non_ensemble_parameter_combo(
514
+ model, trial_objectives
515
+ )
516
+
517
+ return param_combo
518
+
519
+ def _create_ensemble_parameter_combo(
520
+ self,
521
+ composing_trial_objectives: ComposingTrialObjectives,
522
+ ) -> ParameterCombo:
523
+ """
524
+ For the ensemble model the only parameter we need to set
525
+ is the max batch size; which will be the minimum batch size
526
+ found in the composing_model max batch sizes
527
+ """
528
+
529
+ min_val_of_max_batch_size = maxsize
530
+ for composing_trial_objective in composing_trial_objectives.values():
531
+ min_val_of_max_batch_size = int(
532
+ min(
533
+ composing_trial_objective.get("max_batch_size", 1),
534
+ min_val_of_max_batch_size,
535
+ )
536
+ )
537
+
538
+ param_combo = {"max_batch_size": min_val_of_max_batch_size}
539
+
540
+ return param_combo
541
+
542
+ def _create_non_ensemble_parameter_combo(
543
+ self, model: ModelProfileSpec, trial_objectives: ModelTrialObjectives
544
+ ) -> ParameterCombo:
545
+ param_combo: ParameterCombo = {}
546
+
547
+ if model.supports_dynamic_batching():
548
+ param_combo["dynamic_batching"] = []
549
+
550
+ if "instance_group" in trial_objectives:
551
+ kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
552
+ param_combo["instance_group"] = [
553
+ {
554
+ "count": trial_objectives["instance_group"],
555
+ "kind": kind,
556
+ }
557
+ ]
558
+
559
+ if "max_batch_size" in trial_objectives:
560
+ param_combo["max_batch_size"] = trial_objectives["max_batch_size"]
561
+
562
+ if "max_queue_delay_microseconds" in trial_objectives:
563
+ param_combo["dynamic_batching"] = {
564
+ "max_queue_delay_microseconds": trial_objectives[
565
+ "max_queue_delay_microseconds"
566
+ ]
567
+ }
568
+
569
+ return param_combo
570
+
571
+ def _create_model_config_variant(
572
+ self,
573
+ model: ModelProfileSpec,
574
+ trial_objectives: ModelTrialObjectives,
575
+ composing_trial_objectives: ComposingTrialObjectives = {},
576
+ composing_model_config_variants: List[ModelConfigVariant] = [],
577
+ ) -> ModelConfigVariant:
578
+ param_combo = self._create_parameter_combo(
579
+ model, trial_objectives, composing_trial_objectives
580
+ )
581
+
582
+ if model.is_ensemble():
583
+ model_config_variant = BaseModelConfigGenerator.make_ensemble_model_config_variant(
584
+ model=model,
585
+ ensemble_composing_model_config_variants=composing_model_config_variants,
586
+ model_variant_name_manager=self._model_variant_name_manager,
587
+ param_combo=param_combo,
588
+ c_api_mode=self._c_api_mode,
589
+ )
590
+ else:
591
+ model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
592
+ param_combo=param_combo,
593
+ model=model,
594
+ model_variant_name_manager=self._model_variant_name_manager,
595
+ c_api_mode=self._c_api_mode,
596
+ )
597
+
598
+ return model_config_variant
599
+
600
+ def _create_composing_model_config_variants(
601
+ self, composing_trial_objectives: ComposingTrialObjectives
602
+ ) -> List[ModelConfigVariant]:
603
+ composing_model_config_variants = []
604
+ for composing_model in self._composing_models:
605
+ composing_model_config_variant = self._create_model_config_variant(
606
+ model=composing_model,
607
+ trial_objectives=composing_trial_objectives[
608
+ composing_model.model_name()
609
+ ],
610
+ )
611
+ composing_model_config_variants.append(composing_model_config_variant)
612
+
613
+ return composing_model_config_variants
614
+
615
+ def _calculate_score(self) -> float:
616
+ if self._last_measurement:
617
+ score = self._default_measurement.compare_measurements( # type: ignore
618
+ self._last_measurement
619
+ )
620
+ else:
621
+ score = OptunaRunConfigGenerator.NO_MEASUREMENT_SCORE
622
+
623
+ return score
624
+
625
+ def _create_default_run_config(self) -> RunConfig:
626
+ default_run_config = RunConfig(self._triton_env)
627
+ for model in self._models:
628
+ default_model_run_config = self._create_default_model_run_config(model)
629
+ default_run_config.add_model_run_config(default_model_run_config)
630
+
631
+ return default_run_config
632
+
633
+ def _create_default_model_run_config(
634
+ self, model: ModelProfileSpec
635
+ ) -> ModelRunConfig:
636
+ default_model_config_variant = (
637
+ BaseModelConfigGenerator.make_model_config_variant(
638
+ param_combo={},
639
+ model=model,
640
+ model_variant_name_manager=self._model_variant_name_manager,
641
+ c_api_mode=self._c_api_mode,
642
+ )
643
+ )
644
+
645
+ default_perf_analyzer_config = self._create_default_perf_analyzer_config(
646
+ model, default_model_config_variant.model_config
647
+ )
648
+
649
+ default_model_run_config = ModelRunConfig(
650
+ model.model_name(),
651
+ default_model_config_variant,
652
+ default_perf_analyzer_config,
653
+ )
654
+
655
+ default_composing_model_config_variants = (
656
+ self._create_default_composing_model_config_variants(model)
657
+ )
658
+
659
+ if default_composing_model_config_variants:
660
+ default_model_run_config.add_composing_model_config_variants(
661
+ default_composing_model_config_variants
662
+ )
663
+
664
+ return default_model_run_config
665
+
666
+ def _create_default_perf_analyzer_config(
667
+ self, model: ModelProfileSpec, model_config: ModelConfig
668
+ ) -> PerfAnalyzerConfig:
669
+ default_perf_analyzer_config = PerfAnalyzerConfig()
670
+ default_perf_analyzer_config.update_config_from_profile_config(
671
+ model_config.get_field("name"), self._config
672
+ )
673
+
674
+ if self._search_parameters[model_config.get_field("name")].get_parameter(
675
+ "request_rate"
676
+ ):
677
+ perf_config_params = {
678
+ "batch-size": DEFAULT_BATCH_SIZES,
679
+ "request-rate-range": DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
680
+ }
681
+ self._config.concurrency_sweep_disable = True
682
+ else:
683
+ default_concurrency = self._calculate_default_concurrency(model_config)
684
+ perf_config_params = {
685
+ "batch-size": DEFAULT_BATCH_SIZES,
686
+ "concurrency-range": default_concurrency,
687
+ }
688
+ default_perf_analyzer_config.update_config(perf_config_params)
689
+ default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
690
+
691
+ return default_perf_analyzer_config
692
+
693
+ def _create_default_composing_model_config_variants(
694
+ self, model: ModelProfileSpec
695
+ ) -> List[ModelConfigVariant]:
696
+ default_composing_model_config_variants: List[ModelConfigVariant] = []
697
+ for composing_model in self._composing_models:
698
+ default_composing_model_config_variants.append(
699
+ BaseModelConfigGenerator.make_model_config_variant(
700
+ param_combo={},
701
+ model=composing_model,
702
+ model_variant_name_manager=self._model_variant_name_manager,
703
+ c_api_mode=self._c_api_mode,
704
+ )
705
+ )
706
+
707
+ return default_composing_model_config_variants
708
+
709
+ def _calculate_default_concurrency(self, model_config: ModelConfig) -> int:
710
+ default_max_batch_size = model_config.max_batch_size()
711
+ default_instance_count = model_config.instance_group_count(
712
+ system_gpu_count=self._gpu_count
713
+ )
714
+ default_concurrency = 2 * default_max_batch_size * default_instance_count
715
+
716
+ return default_concurrency
717
+
718
+ def _create_model_run_config(
719
+ self,
720
+ model: ModelProfileSpec,
721
+ model_config_variant: ModelConfigVariant,
722
+ composing_model_config_variants: List[ModelConfigVariant],
723
+ trial_objectives: ModelTrialObjectives,
724
+ ) -> ModelRunConfig:
725
+ perf_analyzer_config = self._create_perf_analyzer_config(
726
+ model_name=model.model_name(),
727
+ model=model,
728
+ trial_objectives=trial_objectives,
729
+ )
730
+ model_run_config = ModelRunConfig(
731
+ model.model_name(), model_config_variant, perf_analyzer_config
732
+ )
733
+
734
+ if self._composing_models:
735
+ model_run_config.add_composing_model_config_variants(
736
+ composing_model_config_variants
737
+ )
738
+
739
+ return model_run_config
740
+
741
+ def _create_perf_analyzer_config(
742
+ self,
743
+ model_name: str,
744
+ model: ModelProfileSpec,
745
+ trial_objectives: ModelTrialObjectives,
746
+ ) -> PerfAnalyzerConfig:
747
+ perf_analyzer_config = PerfAnalyzerConfig()
748
+
749
+ perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
750
+
751
+ batch_sizes = (
752
+ int(trial_objectives["batch_sizes"])
753
+ if "batch_sizes" in trial_objectives
754
+ else DEFAULT_BATCH_SIZES
755
+ )
756
+
757
+ perf_config_params = {"batch-size": batch_sizes}
758
+
759
+ if "concurrency" in trial_objectives:
760
+ perf_config_params["concurrency-range"] = int(
761
+ trial_objectives["concurrency"]
762
+ )
763
+ elif "request_rate" in trial_objectives:
764
+ perf_config_params["request-rate-range"] = int(
765
+ trial_objectives["request_rate"]
766
+ )
767
+ self._config.concurrency_sweep_disable = True
768
+
769
+ perf_analyzer_config.update_config(perf_config_params)
770
+
771
+ perf_analyzer_config.update_config(model.perf_analyzer_flags())
772
+ return perf_analyzer_config
773
+
774
+ def _should_terminate_early(
775
+ self, min_configs_to_search: int, trial_number: int
776
+ ) -> bool:
777
+ number_of_trials_since_best = trial_number - self._best_trial_number # type: ignore
778
+ if trial_number < min_configs_to_search:
779
+ should_terminate_early = False
780
+ elif number_of_trials_since_best >= self._config.optuna_early_exit_threshold:
781
+ should_terminate_early = True
782
+ else:
783
+ should_terminate_early = False
784
+
785
+ return should_terminate_early
786
+
787
+ def _print_debug_search_space_info(self) -> None:
788
+ logger.info("")
789
+ num_of_configs_in_search_space = (
790
+ self._calculate_num_of_configs_in_search_space()
791
+ )
792
+ logger.debug(
793
+ f"Number of configs in search space: {num_of_configs_in_search_space}"
794
+ )
795
+ self._print_debug_model_search_space_info()
796
+ logger.info("")
797
+
798
+ def _calculate_num_of_configs_in_search_space(self) -> int:
799
+ num_of_configs_in_search_space = 1
800
+ for search_parameter in self._search_parameters.values():
801
+ num_of_configs_in_search_space *= (
802
+ search_parameter.number_of_total_possible_configurations()
803
+ )
804
+
805
+ for composing_search_parameter in self._composing_search_parameters.values():
806
+ num_of_configs_in_search_space *= (
807
+ composing_search_parameter.number_of_total_possible_configurations()
808
+ )
809
+
810
+ return num_of_configs_in_search_space
811
+
812
+ def _print_debug_model_search_space_info(self) -> None:
813
+ for model in self._models:
814
+ model_name = model.model_name()
815
+ logger.debug(f"Model - {model_name}:")
816
+ for name in self._search_parameters[model_name].get_search_parameters():
817
+ logger.debug(self._search_parameters[model_name].print_info(name))
818
+
819
+ for (
820
+ composing_model_name,
821
+ composing_search_parameters,
822
+ ) in self._composing_search_parameters.items():
823
+ logger.debug(f"Composing model - {composing_model_name}:")
824
+ for name in composing_search_parameters.get_search_parameters():
825
+ logger.debug(composing_search_parameters.print_info(name))
826
+
827
+ def _print_debug_score_info(
828
+ self,
829
+ run_config: RunConfig,
830
+ score: float,
831
+ ) -> None:
832
+ if score != OptunaRunConfigGenerator.NO_MEASUREMENT_SCORE:
833
+ logger.debug(
834
+ f"Objective score for {run_config.combined_model_variants_name()}: {int(score * 100)} --- " # type: ignore
835
+ f"Best: {self._best_config_name} ({int(self._best_config_score * 100)})" # type: ignore
836
+ )
837
+
838
+ logger.info("")