triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,753 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from sys import maxsize
19
+ from typing import Dict, Generator, List, Optional, Tuple, Union
20
+
21
+ from model_analyzer.config.generate.base_model_config_generator import (
22
+ BaseModelConfigGenerator,
23
+ )
24
+ from model_analyzer.config.generate.brute_run_config_generator import (
25
+ BruteRunConfigGenerator,
26
+ )
27
+ from model_analyzer.config.generate.coordinate import Coordinate
28
+ from model_analyzer.config.generate.coordinate_data import CoordinateData
29
+ from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
30
+ from model_analyzer.config.generate.model_variant_name_manager import (
31
+ ModelVariantNameManager,
32
+ )
33
+ from model_analyzer.config.generate.neighborhood import Neighborhood
34
+ from model_analyzer.config.generate.search_config import SearchConfig
35
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
36
+ from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
37
+ from model_analyzer.config.run.model_run_config import ModelRunConfig
38
+ from model_analyzer.config.run.run_config import RunConfig
39
+ from model_analyzer.constants import LOGGER_NAME
40
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
41
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
42
+ from model_analyzer.triton.model.model_config import ModelConfig
43
+ from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
44
+
45
+ from .config_generator_interface import ConfigGeneratorInterface
46
+ from .generator_utils import GeneratorUtils
47
+
48
+ logger = logging.getLogger(LOGGER_NAME)
49
+ from copy import deepcopy
50
+
51
+
52
+ class QuickRunConfigGenerator(ConfigGeneratorInterface):
53
+ """
54
+ Hill climbing algorithm to create RunConfigs
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ search_config: SearchConfig,
60
+ config: ConfigCommandProfile,
61
+ gpu_count: int,
62
+ models: List[ModelProfileSpec],
63
+ composing_models: List[ModelProfileSpec],
64
+ model_variant_name_manager: ModelVariantNameManager,
65
+ ):
66
+ """
67
+ Parameters
68
+ ----------
69
+ search_config: SearchConfig
70
+ Defines parameters and dimensions for the search
71
+ config: ConfigCommandProfile
72
+ Profile configuration information
73
+ gpu_count: Number of gpus in the system
74
+ models: List of ModelProfileSpec
75
+ List of models to profile
76
+ composing_models: List of ModelProfileSpec
77
+ List of composing model profiles
78
+ model_variant_name_manager: ModelVariantNameManager
79
+ """
80
+ self._search_config = search_config
81
+ self._config = config
82
+ self._gpu_count = gpu_count
83
+ self._models = models
84
+ self._composing_models = composing_models
85
+
86
+ self._model_variant_name_manager = model_variant_name_manager
87
+
88
+ self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models)
89
+
90
+ self._c_api_mode = config.triton_launch_mode == "c_api"
91
+
92
+ # This tracks measured results for all coordinates
93
+ self._coordinate_data = CoordinateData()
94
+
95
+ # This is an initial center that the neighborhood is built around.
96
+ # It is updated every new creation of the neighborhood.
97
+ self._home_coordinate = self._get_starting_coordinate()
98
+
99
+ # This is the coordinate that we want to measure next. It is
100
+ # updated every step of this generator
101
+ self._coordinate_to_measure: Coordinate = self._home_coordinate
102
+
103
+ # Track the best coordinate seen so far that can be used during
104
+ # the back-off stage.
105
+ self._best_coordinate = self._home_coordinate
106
+ self._best_measurement: Optional[RunConfigMeasurement] = None
107
+
108
+ self._neighborhood = Neighborhood(
109
+ self._search_config.get_neighborhood_config(),
110
+ self._home_coordinate,
111
+ self._coordinate_data,
112
+ )
113
+
114
+ # Sticky bit. Once true, we should never stay at a home that is failing or None
115
+ self._home_has_passed = False
116
+
117
+ self._done = False
118
+
119
+ def _is_done(self) -> bool:
120
+ return self._done
121
+
122
+ def get_configs(self) -> Generator[RunConfig, None, None]:
123
+ """
124
+ Returns
125
+ -------
126
+ RunConfig
127
+ The next RunConfig generated by this class
128
+ """
129
+ config = self._create_default_run_config()
130
+ yield (config)
131
+
132
+ while True:
133
+ if self._is_done():
134
+ break
135
+
136
+ config = self._get_next_run_config()
137
+ yield (config)
138
+ self._step()
139
+
140
+ def _step(self) -> None:
141
+ """
142
+ Determine self._coordinate_to_measure, which is what is used to
143
+ create the next RunConfig
144
+ """
145
+ if self._should_step_back():
146
+ self._take_step_back()
147
+ elif self._neighborhood.enough_coordinates_initialized():
148
+ self._take_step()
149
+ else:
150
+ self._pick_coordinate_to_initialize()
151
+
152
+ def set_last_results(
153
+ self, measurements: List[Optional[RunConfigMeasurement]]
154
+ ) -> None:
155
+ """
156
+ Given the results from the last RunConfig, make decisions
157
+ about future configurations to generate
158
+
159
+ Parameters
160
+ ----------
161
+ measurements: List of Measurements from the last run(s)
162
+ """
163
+ self._coordinate_data.set_measurement(
164
+ coordinate=self._coordinate_to_measure, measurement=measurements[0]
165
+ )
166
+
167
+ if measurements[0] is not None:
168
+ self._update_best_measurement(measurement=measurements[0])
169
+
170
+ if (
171
+ self._measuring_home_coordinate()
172
+ and measurements[0].is_passing_constraints()
173
+ ):
174
+ self._home_has_passed = True
175
+
176
+ self._print_debug_logs(measurements)
177
+
178
+ def _update_best_measurement(self, measurement: RunConfigMeasurement) -> None:
179
+ """Keep track of the best coordinate/measurement seen so far."""
180
+ if self._best_measurement is None:
181
+ self._best_coordinate = self._coordinate_to_measure
182
+ self._best_measurement = measurement
183
+
184
+ elif (
185
+ not self._best_measurement.is_passing_constraints()
186
+ and measurement.is_passing_constraints()
187
+ ):
188
+ self._best_coordinate = self._coordinate_to_measure
189
+ self._best_measurement = measurement
190
+
191
+ elif (
192
+ not self._best_measurement.is_passing_constraints()
193
+ and not measurement.is_passing_constraints()
194
+ ):
195
+ comparison = self._best_measurement.compare_constraints(other=measurement)
196
+
197
+ if comparison and comparison > 0:
198
+ self._best_coordinate = self._coordinate_to_measure
199
+ self._best_measurement = measurement
200
+
201
+ elif (
202
+ self._best_measurement.is_passing_constraints()
203
+ and measurement.is_passing_constraints()
204
+ ):
205
+ comparison = self._best_measurement.compare_measurements(other=measurement)
206
+
207
+ if comparison and comparison > 0:
208
+ self._best_coordinate = self._coordinate_to_measure
209
+ self._best_measurement = measurement
210
+
211
+ def _get_last_results(self) -> Optional[RunConfigMeasurement]:
212
+ return self._coordinate_data.get_measurement(
213
+ coordinate=self._coordinate_to_measure
214
+ )
215
+
216
+ def _take_step(self) -> None:
217
+ new_coordinate = self._neighborhood.determine_new_home()
218
+ self._determine_if_done(new_coordinate)
219
+
220
+ logger.debug(f"Stepping {self._home_coordinate}->{new_coordinate}")
221
+ self._home_coordinate = new_coordinate
222
+ self._coordinate_to_measure = new_coordinate
223
+ self._recreate_neighborhood(force_slow_mode=False)
224
+
225
+ def _take_step_back(self) -> None:
226
+ new_coordinate = self._neighborhood.get_nearest_neighbor(
227
+ coordinate_in=self._best_coordinate
228
+ )
229
+
230
+ # TODO: TMA-871: handle back-off (and its termination) better.
231
+ if new_coordinate == self._home_coordinate:
232
+ self._done = True
233
+
234
+ logger.debug(f"Stepping back: {self._home_coordinate}->{new_coordinate}")
235
+ self._home_coordinate = new_coordinate
236
+ self._coordinate_to_measure = new_coordinate
237
+ self._recreate_neighborhood(force_slow_mode=True)
238
+
239
+ def _should_step_back(self) -> bool:
240
+ """
241
+ Step back if take any of the following steps:
242
+ - Step from a passing home to a failing home
243
+ - Step from any home to home with a None measurement
244
+ """
245
+ if self._measuring_home_coordinate():
246
+ last_results = self._get_last_results()
247
+ if not last_results:
248
+ return True
249
+ last_results_passed = last_results.is_passing_constraints()
250
+ if not last_results_passed and self._home_has_passed:
251
+ return True
252
+ return False
253
+
254
+ def _measuring_home_coordinate(self) -> bool:
255
+ return self._coordinate_to_measure == self._home_coordinate
256
+
257
+ def _determine_if_done(self, new_coordinate: Coordinate) -> None:
258
+ """
259
+ Based on the new coordinate picked, determine if the generator is done
260
+ and if so, update self._done
261
+ """
262
+ if new_coordinate == self._home_coordinate:
263
+ self._done = True
264
+ if self._coordinate_data.get_visit_count(new_coordinate) >= 2:
265
+ self._done = True
266
+
267
+ def _recreate_neighborhood(self, force_slow_mode: bool) -> None:
268
+ neighborhood_config = self._search_config.get_neighborhood_config()
269
+
270
+ self._neighborhood = Neighborhood(
271
+ neighborhood_config, self._home_coordinate, self._coordinate_data
272
+ )
273
+
274
+ self._coordinate_data.increment_visit_count(self._home_coordinate)
275
+
276
+ if force_slow_mode:
277
+ self._neighborhood.force_slow_mode()
278
+
279
+ def _pick_coordinate_to_initialize(self) -> None:
280
+ next_coordinate = self._neighborhood.pick_coordinate_to_initialize()
281
+
282
+ if next_coordinate:
283
+ self._coordinate_to_measure = next_coordinate
284
+ logger.debug(f"Need more data. Measuring {self._coordinate_to_measure}")
285
+ else:
286
+ logger.info("No coordinate to measure. Exiting")
287
+ self._done = True
288
+
289
+ def _get_starting_coordinate(self) -> Coordinate:
290
+ min_indexes = self._search_config.get_min_indexes()
291
+ return Coordinate(min_indexes)
292
+
293
+ def _get_coordinate_values(
294
+ self, coordinate: Coordinate, key: int
295
+ ) -> Dict[str, Union[int, float]]:
296
+ dims = self._search_config.get_dimensions()
297
+ values = dims.get_values_for_coordinate(coordinate)
298
+ return values[key]
299
+
300
+ def _get_next_run_config(self) -> RunConfig:
301
+ run_config = RunConfig(self._triton_env)
302
+
303
+ model_index = 0
304
+ for model in self._models:
305
+ mrc, model_index = self._get_next_model_run_config(model, model_index)
306
+ run_config.add_model_run_config(mrc)
307
+
308
+ return run_config
309
+
310
+ def _get_next_model_run_config(
311
+ self, model: ModelProfileSpec, start_model_index: int
312
+ ) -> Tuple[ModelRunConfig, int]:
313
+ """
314
+ Returns the next ModelRunConfig, along with the starting dimension
315
+ of the next model
316
+ """
317
+ # The ordering of dimensions is dependent on the type of composing model:
318
+ # Ensemble - The top level model has no search dimensions - all dimensions
319
+ # come from the composing models
320
+ # BLS - The top level model has one dimension (instance) - and the
321
+ # remaining dimensions come from composing models
322
+ #
323
+ # In addition, for Ensemble models, it is necessary to create the composing model configs
324
+ # first, as these are needed when creating the top-level model config - while all other
325
+ # models want to create the top-level first
326
+ (
327
+ model_config_variant,
328
+ model_index,
329
+ ) = self._get_next_non_composing_model_config_variant(model, start_model_index)
330
+
331
+ (
332
+ composing_model_config_variants,
333
+ model_index,
334
+ ) = self._get_next_composing_model_config_variants(model_index)
335
+
336
+ # This will overwrite the empty ModelConfigVariant created above
337
+ if model.is_ensemble():
338
+ model_config_variant = self._get_next_ensemble_top_level_config_variant(
339
+ model, composing_model_config_variants, model_index
340
+ )
341
+
342
+ model_run_config = self._create_next_model_run_config(
343
+ model,
344
+ start_model_index,
345
+ model_config_variant,
346
+ composing_model_config_variants,
347
+ )
348
+
349
+ return (model_run_config, model_index)
350
+
351
+ def _get_next_non_composing_model_config_variant(
352
+ self, model: ModelProfileSpec, model_index: int
353
+ ) -> Tuple[ModelConfigVariant, int]:
354
+ if model.is_ensemble():
355
+ return (ModelConfigVariant(ModelConfig({}), ""), model_index)
356
+ else:
357
+ return (
358
+ self._get_next_model_config_variant(model, model_index),
359
+ model_index + 1,
360
+ )
361
+
362
+ def _get_next_composing_model_config_variants(
363
+ self, model_index: int
364
+ ) -> Tuple[List[ModelConfigVariant], int]:
365
+ composing_model_config_variants = []
366
+ for composing_model in self._composing_models:
367
+ composing_model_config_variant = self._get_next_model_config_variant(
368
+ composing_model, model_index
369
+ )
370
+ model_index += 1
371
+ composing_model_config_variants.append(composing_model_config_variant)
372
+
373
+ return (composing_model_config_variants, model_index)
374
+
375
+ def _get_next_ensemble_top_level_config_variant(
376
+ self,
377
+ model: ModelProfileSpec,
378
+ composing_model_config_variants: List[ModelConfigVariant],
379
+ model_index: int,
380
+ ) -> ModelConfigVariant:
381
+ param_combo = self._get_next_ensemble_param_combo(model_index)
382
+
383
+ model_config_variant = self._get_next_ensemble_model_config_variant(
384
+ model, composing_model_config_variants, param_combo
385
+ )
386
+
387
+ return model_config_variant
388
+
389
+ def _get_next_ensemble_param_combo(self, end_model_index: int) -> dict:
390
+ """
391
+ For the ensemble model the only parameter we need to set
392
+ is the max batch size; which will be the minimum batch size
393
+ found in the composing_model max batch sizes
394
+ """
395
+ min_val_of_max_batch_size = maxsize
396
+ for model_index in range(0, end_model_index):
397
+ dimension_values = self._get_coordinate_values(
398
+ self._coordinate_to_measure, model_index
399
+ )
400
+
401
+ min_val_of_max_batch_size = int(
402
+ min(
403
+ [
404
+ dimension_values.get("max_batch_size", 1),
405
+ min_val_of_max_batch_size,
406
+ ]
407
+ )
408
+ )
409
+
410
+ param_combo = {"max_batch_size": min_val_of_max_batch_size}
411
+
412
+ return param_combo
413
+
414
+ def _get_next_ensemble_model_config_variant(
415
+ self,
416
+ model: ModelProfileSpec,
417
+ composing_config_variants: List[ModelConfigVariant],
418
+ param_combo: dict,
419
+ ) -> ModelConfigVariant:
420
+ model_config_variant = (
421
+ BaseModelConfigGenerator.make_ensemble_model_config_variant(
422
+ model=model,
423
+ ensemble_composing_model_config_variants=composing_config_variants,
424
+ model_variant_name_manager=self._model_variant_name_manager,
425
+ param_combo=param_combo,
426
+ c_api_mode=self._c_api_mode,
427
+ )
428
+ )
429
+
430
+ return model_config_variant
431
+
432
+ def _get_next_model_config_variant(
433
+ self, model: ModelProfileSpec, dimension_index: int
434
+ ) -> ModelConfigVariant:
435
+ dimension_values = self._get_coordinate_values(
436
+ self._coordinate_to_measure, dimension_index
437
+ )
438
+
439
+ model_config_params = deepcopy(model.model_config_parameters())
440
+ if model_config_params:
441
+ model_config_params.pop("max_batch_size", None)
442
+
443
+ # This is guaranteed to only generate one combination (check is in config_command)
444
+ param_combos = GeneratorUtils.generate_combinations(model_config_params)
445
+ assert len(param_combos) == 1
446
+
447
+ param_combo = param_combos[0]
448
+ else:
449
+ param_combo = {}
450
+
451
+ kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
452
+ instance_count = self._calculate_instance_count(dimension_values)
453
+
454
+ param_combo["instance_group"] = [
455
+ {
456
+ "count": instance_count,
457
+ "kind": kind,
458
+ }
459
+ ]
460
+
461
+ if "max_batch_size" in dimension_values:
462
+ param_combo["max_batch_size"] = self._calculate_model_batch_size(
463
+ dimension_values
464
+ )
465
+
466
+ if model.supports_dynamic_batching():
467
+ param_combo["dynamic_batching"] = {}
468
+
469
+ model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
470
+ param_combo=param_combo,
471
+ model=model,
472
+ model_variant_name_manager=self._model_variant_name_manager,
473
+ c_api_mode=self._c_api_mode,
474
+ )
475
+
476
+ return model_config_variant
477
+
478
+ def _create_next_model_run_config(
479
+ self,
480
+ model: ModelProfileSpec,
481
+ model_index: int,
482
+ model_config_variant: ModelConfigVariant,
483
+ composing_model_config_variants: List[ModelConfigVariant],
484
+ ) -> ModelRunConfig:
485
+ perf_analyzer_config = self._get_next_perf_analyzer_config(
486
+ model.model_name(), model, model_index
487
+ )
488
+ model_run_config = ModelRunConfig(
489
+ model.model_name(), model_config_variant, perf_analyzer_config
490
+ )
491
+
492
+ if self._composing_models:
493
+ model_run_config.add_composing_model_config_variants(
494
+ composing_model_config_variants
495
+ )
496
+
497
+ return model_run_config
498
+
499
+ def _get_next_perf_analyzer_config(
500
+ self, model_name: str, model: ModelProfileSpec, model_index: int
501
+ ) -> PerfAnalyzerConfig:
502
+ dimension_values = self._get_coordinate_values(
503
+ self._coordinate_to_measure, model_index
504
+ )
505
+
506
+ perf_analyzer_config = PerfAnalyzerConfig()
507
+
508
+ perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
509
+
510
+ concurrency = self._calculate_concurrency(dimension_values)
511
+
512
+ perf_config_params = {
513
+ "batch-size": DEFAULT_BATCH_SIZES,
514
+ "concurrency-range": concurrency,
515
+ }
516
+ perf_analyzer_config.update_config(perf_config_params)
517
+
518
+ perf_analyzer_config.update_config(model.perf_analyzer_flags())
519
+ return perf_analyzer_config
520
+
521
+ def _calculate_model_batch_size(
522
+ self, dimension_values: Dict[str, Union[int, float]]
523
+ ) -> int:
524
+ batch_size = int(dimension_values.get("max_batch_size", 1))
525
+
526
+ min_batch_size_is_set_by_config = self._config.get_config()[
527
+ "run_config_search_min_model_batch_size"
528
+ ].is_set_by_user()
529
+
530
+ max_batch_size_is_set_by_config = self._config.get_config()[
531
+ "run_config_search_max_model_batch_size"
532
+ ].is_set_by_user()
533
+
534
+ if (
535
+ min_batch_size_is_set_by_config
536
+ and batch_size < self._config.run_config_search_min_model_batch_size
537
+ ):
538
+ return self._config.run_config_search_min_model_batch_size
539
+
540
+ if (
541
+ max_batch_size_is_set_by_config
542
+ and batch_size > self._config.run_config_search_max_model_batch_size
543
+ ):
544
+ return self._config.run_config_search_max_model_batch_size
545
+
546
+ return batch_size
547
+
548
+ def _calculate_instance_count(
549
+ self, dimension_values: Dict[str, Union[int, float]]
550
+ ) -> int:
551
+ instance_count = int(dimension_values.get("instance_count", 1))
552
+
553
+ min_instance_count_is_set_by_config = self._config.get_config()[
554
+ "run_config_search_min_instance_count"
555
+ ].is_set_by_user()
556
+
557
+ max_instance_count_is_set_by_config = self._config.get_config()[
558
+ "run_config_search_max_instance_count"
559
+ ].is_set_by_user()
560
+
561
+ if (
562
+ min_instance_count_is_set_by_config
563
+ and instance_count < self._config.run_config_search_min_instance_count
564
+ ):
565
+ return self._config.run_config_search_min_instance_count
566
+
567
+ if (
568
+ max_instance_count_is_set_by_config
569
+ and instance_count > self._config.run_config_search_max_instance_count
570
+ ):
571
+ return self._config.run_config_search_max_instance_count
572
+
573
+ return instance_count
574
+
575
+ def _calculate_concurrency(
576
+ self, dimension_values: Dict[str, Union[int, float]]
577
+ ) -> int:
578
+ model_batch_size = self._calculate_model_batch_size(dimension_values)
579
+ instance_count = self._calculate_instance_count(dimension_values)
580
+ concurrency = 2 * model_batch_size * instance_count
581
+
582
+ min_concurrency_is_set_by_config = self._config.get_config()[
583
+ "run_config_search_min_concurrency"
584
+ ].is_set_by_user()
585
+
586
+ max_concurrency_is_set_by_config = self._config.get_config()[
587
+ "run_config_search_max_concurrency"
588
+ ].is_set_by_user()
589
+
590
+ if (
591
+ min_concurrency_is_set_by_config
592
+ and concurrency < self._config.run_config_search_min_concurrency
593
+ ):
594
+ return self._config.run_config_search_min_concurrency
595
+
596
+ if (
597
+ max_concurrency_is_set_by_config
598
+ and concurrency > self._config.run_config_search_max_concurrency
599
+ ):
600
+ return self._config.run_config_search_max_concurrency
601
+
602
+ return concurrency
603
+
604
+ def _create_default_run_config(self) -> RunConfig:
605
+ default_run_config = RunConfig(self._triton_env)
606
+
607
+ for model in self._models:
608
+ if model.is_ensemble():
609
+ default_run_config.add_model_run_config(
610
+ self._create_default_ensemble_model_run_config(model)
611
+ )
612
+ else:
613
+ default_run_config.add_model_run_config(
614
+ self._create_default_model_run_config(model)
615
+ )
616
+
617
+ return default_run_config
618
+
619
+ def _create_default_ensemble_model_run_config(
620
+ self, model: ModelProfileSpec
621
+ ) -> ModelRunConfig:
622
+ default_composing_model_config_variants = (
623
+ self._create_default_composing_model_config_variants(model)
624
+ )
625
+
626
+ default_ensemble_model_config_variant = BaseModelConfigGenerator.make_ensemble_model_config_variant(
627
+ model=model,
628
+ ensemble_composing_model_config_variants=default_composing_model_config_variants,
629
+ model_variant_name_manager=self._model_variant_name_manager,
630
+ c_api_mode=self._c_api_mode,
631
+ )
632
+
633
+ default_perf_analyzer_config = self._create_default_perf_analyzer_config(
634
+ model, default_ensemble_model_config_variant.model_config
635
+ )
636
+
637
+ default_model_run_config = ModelRunConfig(
638
+ model.model_name(),
639
+ default_ensemble_model_config_variant,
640
+ default_perf_analyzer_config,
641
+ )
642
+
643
+ default_model_run_config.add_composing_model_config_variants(
644
+ default_composing_model_config_variants
645
+ )
646
+
647
+ return default_model_run_config
648
+
649
+ def _create_default_composing_model_config_variants(
650
+ self, model: ModelProfileSpec
651
+ ) -> List[ModelConfigVariant]:
652
+ default_composing_model_config_variants: List[ModelConfigVariant] = []
653
+ for composing_model in self._composing_models:
654
+ default_composing_model_config_variants.append(
655
+ BaseModelConfigGenerator.make_model_config_variant(
656
+ param_combo={},
657
+ model=composing_model,
658
+ model_variant_name_manager=self._model_variant_name_manager,
659
+ c_api_mode=self._c_api_mode,
660
+ )
661
+ )
662
+
663
+ return default_composing_model_config_variants
664
+
665
+ def _create_default_model_run_config(
666
+ self, model: ModelProfileSpec
667
+ ) -> ModelRunConfig:
668
+ default_model_config_variant = (
669
+ BaseModelConfigGenerator.make_model_config_variant(
670
+ param_combo={},
671
+ model=model,
672
+ model_variant_name_manager=self._model_variant_name_manager,
673
+ c_api_mode=self._c_api_mode,
674
+ )
675
+ )
676
+
677
+ default_perf_analyzer_config = self._create_default_perf_analyzer_config(
678
+ model, default_model_config_variant.model_config
679
+ )
680
+
681
+ default_model_run_config = ModelRunConfig(
682
+ model.model_name(),
683
+ default_model_config_variant,
684
+ default_perf_analyzer_config,
685
+ )
686
+
687
+ default_composing_model_config_variants = (
688
+ self._create_default_composing_model_config_variants(model)
689
+ )
690
+
691
+ if default_composing_model_config_variants:
692
+ default_model_run_config.add_composing_model_config_variants(
693
+ default_composing_model_config_variants
694
+ )
695
+
696
+ return default_model_run_config
697
+
698
+ def _create_default_perf_analyzer_config(
699
+ self, model: ModelProfileSpec, model_config: ModelConfig
700
+ ) -> PerfAnalyzerConfig:
701
+ default_perf_analyzer_config = PerfAnalyzerConfig()
702
+ default_perf_analyzer_config.update_config_from_profile_config(
703
+ model_config.get_field("name"), self._config
704
+ )
705
+
706
+ default_concurrency = self._calculate_default_concurrency(model_config)
707
+
708
+ perf_config_params = {
709
+ "batch-size": DEFAULT_BATCH_SIZES,
710
+ "concurrency-range": default_concurrency,
711
+ }
712
+ default_perf_analyzer_config.update_config(perf_config_params)
713
+
714
+ default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
715
+
716
+ return default_perf_analyzer_config
717
+
718
+ def _calculate_default_concurrency(self, model_config: ModelConfig) -> int:
719
+ default_max_batch_size = model_config.max_batch_size()
720
+ default_instance_count = model_config.instance_group_count(
721
+ system_gpu_count=self._gpu_count
722
+ )
723
+ default_concurrency = 2 * default_max_batch_size * default_instance_count
724
+
725
+ return default_concurrency
726
+
727
+ def _print_debug_logs(
728
+ self, measurements: List[Union[RunConfigMeasurement, None]]
729
+ ) -> None:
730
+ if measurements is not None and measurements[0] is not None:
731
+ assert len(measurements) == 1
732
+
733
+ throughput = measurements[0].get_non_gpu_metric_value("perf_throughput")
734
+ latency = measurements[0].get_non_gpu_metric_value("perf_latency_p99")
735
+
736
+ if self._best_measurement:
737
+ best_throughput = self._best_measurement.get_non_gpu_metric_value(
738
+ "perf_throughput"
739
+ )
740
+ best_latency = self._best_measurement.get_non_gpu_metric_value(
741
+ "perf_latency_p99"
742
+ )
743
+ else:
744
+ best_throughput = 0
745
+ best_latency = 0
746
+
747
+ logger.debug(
748
+ f"Measurement for {self._coordinate_to_measure}: "
749
+ f"throughput = {throughput}, latency = {latency} "
750
+ f"(best throughput: {best_throughput}, best_latency: {best_latency})"
751
+ )
752
+ else:
753
+ logger.debug(f"Measurement for {self._coordinate_to_measure}: None.")