triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from typing import Generator, List, Optional
19
+
20
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
21
+ from model_analyzer.constants import (
22
+ LOGGER_NAME,
23
+ THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES,
24
+ THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
25
+ THROUGHPUT_MINIMUM_GAIN,
26
+ )
27
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
28
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
29
+
30
+ from .config_generator_interface import ConfigGeneratorInterface
31
+ from .generator_utils import GeneratorUtils as utils
32
+
33
+ logger = logging.getLogger(LOGGER_NAME)
34
+
35
+
36
+ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
37
+ """
38
+ Given Perf Analyzer configuration options, generates Perf Analyzer configs
39
+
40
+ All combinations are pregenerated in __init__, but it may return is_done==true
41
+ earlier depending on results that it receives
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ cli_config: ConfigCommandProfile,
47
+ model_name: str,
48
+ model_perf_analyzer_flags: dict,
49
+ model_parameters: dict,
50
+ early_exit_enable: bool,
51
+ ) -> None:
52
+ """
53
+ Parameters
54
+ ----------
55
+ cli_config: ConfigCommandProfile
56
+ CLI Configuration Options
57
+
58
+ model_name: string
59
+ The model name to profile
60
+
61
+ model_perf_analyzer_flags: Dict
62
+ custom perf analyzer configuration
63
+
64
+ model_parameters: Dict
65
+ model constraints for batch_sizes, concurrency and/or request rate
66
+
67
+ early_exit_enable: Bool
68
+ If true, this class can early exit during search of concurrency/request rate
69
+ """
70
+
71
+ self._early_exit_enable = early_exit_enable
72
+
73
+ # All configs are pregenerated in _configs[][]
74
+ # Indexed as follows:
75
+ # _configs[_curr_batch_size_index][_curr_parameter_index]
76
+ #
77
+ self._curr_parameter_index = 0
78
+ self._curr_batch_size_index = 0
79
+ self._configs: List[List[PerfAnalyzerConfig]] = []
80
+ self._parameter_warning_printed = False
81
+
82
+ # Flag to indicate we have started to return results
83
+ #
84
+ self._generator_started = False
85
+
86
+ self._last_results: List[RunConfigMeasurement] = []
87
+ self._parameter_results: List[Optional[RunConfigMeasurement]] = []
88
+ self._batch_size_results: List[Optional[RunConfigMeasurement]] = []
89
+
90
+ self._model_name = model_name
91
+ self._perf_analyzer_flags = model_perf_analyzer_flags
92
+
93
+ self._batch_sizes = sorted(model_parameters["batch_sizes"])
94
+ self._cli_config = cli_config
95
+
96
+ self._model_parameters = model_parameters
97
+ self._parameters = self._create_parameter_list()
98
+ self._generate_perf_configs()
99
+
100
+ @staticmethod
101
+ def throughput_gain_valid_helper(
102
+ throughputs: List[Optional[RunConfigMeasurement]],
103
+ min_tries: int = THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
104
+ min_gain: float = THROUGHPUT_MINIMUM_GAIN,
105
+ ) -> bool:
106
+ if len(throughputs) < min_tries:
107
+ return True
108
+
109
+ throughputs_in_range = [
110
+ PerfAnalyzerConfigGenerator.get_throughput(throughputs[x])
111
+ for x in range(-min_tries, 0)
112
+ ]
113
+
114
+ first = throughputs_in_range[0]
115
+ best = max(throughputs_in_range)
116
+
117
+ gain = (best - first) / first
118
+
119
+ return gain > min_gain
120
+
121
+ @staticmethod
122
+ def get_throughput(measurement: Optional[RunConfigMeasurement]) -> float:
123
+ if measurement:
124
+ return measurement.get_non_gpu_metric_value("perf_throughput")
125
+ else:
126
+ return 0.0
127
+
128
+ def _is_done(self) -> bool:
129
+ """Returns true if this generator is done generating configs"""
130
+ return self._generator_started and self._done_walking()
131
+
132
+ def get_configs(self) -> Generator[PerfAnalyzerConfig, None, None]:
133
+ """Returns the next generated config"""
134
+ while True:
135
+ if self._is_done():
136
+ break
137
+
138
+ self._generator_started = True
139
+ config = self._configs[self._curr_batch_size_index][
140
+ self._curr_parameter_index
141
+ ]
142
+ yield (config)
143
+
144
+ if self._last_results_erroneous():
145
+ break
146
+
147
+ self._step()
148
+
149
+ def set_last_results(
150
+ self, measurements: List[Optional[RunConfigMeasurement]]
151
+ ) -> None:
152
+ """
153
+ Given the results from the last PerfAnalyzerConfig, make decisions
154
+ about future configurations to generate
155
+
156
+ Parameters
157
+ ----------
158
+ measurements: List of Measurements from the last run(s)
159
+ """
160
+
161
+ # Remove 'NONE' cases, and find single max measurement from the list
162
+ valid_measurements = [m for m in measurements if m]
163
+
164
+ self._last_results = []
165
+ if valid_measurements:
166
+ measurement = [max(valid_measurements)]
167
+
168
+ self._last_results = measurement
169
+ self._parameter_results.extend(measurement)
170
+
171
+ def _create_parameter_list(self) -> List[int]:
172
+ # The two possible parameters are request rate or concurrency
173
+ # Concurrency is the default and will be used unless the user specifies
174
+ # request rate, either as a model parameter or a config option
175
+ if self._cli_config.is_request_rate_specified(self._model_parameters):
176
+ return self._create_request_rate_list()
177
+ else:
178
+ return self._create_concurrency_list()
179
+
180
+ def _create_request_rate_list(self) -> List[int]:
181
+ if self._model_parameters["request_rate"]:
182
+ return sorted(self._model_parameters["request_rate"])
183
+ elif self._cli_config.run_config_search_disable:
184
+ return [1]
185
+ else:
186
+ return utils.generate_doubled_list(
187
+ self._cli_config.run_config_search_min_request_rate,
188
+ self._cli_config.run_config_search_max_request_rate,
189
+ )
190
+
191
+ def _create_concurrency_list(self) -> List[int]:
192
+ if self._model_parameters["concurrency"]:
193
+ return sorted(self._model_parameters["concurrency"])
194
+ elif self._cli_config.run_config_search_disable:
195
+ return [1]
196
+ else:
197
+ return utils.generate_doubled_list(
198
+ self._cli_config.run_config_search_min_concurrency,
199
+ self._cli_config.run_config_search_max_concurrency,
200
+ )
201
+
202
+ def _generate_perf_configs(self) -> None:
203
+ perf_config_non_parameter_values = (
204
+ self._create_non_parameter_perf_config_values()
205
+ )
206
+
207
+ for params in utils.generate_parameter_combinations(
208
+ perf_config_non_parameter_values
209
+ ):
210
+ configs_with_concurrency = []
211
+ for parameter in self._parameters:
212
+ new_perf_config = PerfAnalyzerConfig()
213
+
214
+ new_perf_config.update_config_from_profile_config(
215
+ self._model_name, self._cli_config
216
+ )
217
+
218
+ new_perf_config.update_config(params)
219
+
220
+ if self._cli_config.is_request_rate_specified(self._model_parameters):
221
+ new_perf_config.update_config({"request-rate-range": parameter})
222
+ else:
223
+ new_perf_config.update_config({"concurrency-range": parameter})
224
+
225
+ # User provided flags can override the search parameters
226
+ new_perf_config.update_config(self._perf_analyzer_flags)
227
+
228
+ configs_with_concurrency.append(new_perf_config)
229
+ self._configs.append(configs_with_concurrency)
230
+
231
+ def _create_non_parameter_perf_config_values(self) -> dict:
232
+ perf_config_values = {
233
+ "batch-size": self._batch_sizes,
234
+ }
235
+
236
+ return perf_config_values
237
+
238
+ def _step(self) -> None:
239
+ self._step_parameter()
240
+
241
+ if self._done_walking_parameters():
242
+ self._add_best_throughput_to_batch_sizes()
243
+ self._reset_parameters()
244
+ self._step_batch_size()
245
+
246
+ def _add_best_throughput_to_batch_sizes(self) -> None:
247
+ if self._parameter_results:
248
+ # type is List[Optional[RCM]]
249
+ best = max(self._parameter_results) # type: ignore
250
+ self._batch_size_results.append(best)
251
+
252
+ def _reset_parameters(self) -> None:
253
+ self._curr_parameter_index = 0
254
+ self._parameter_warning_printed = False
255
+ self._parameter_results = []
256
+
257
+ def _step_parameter(self) -> None:
258
+ self._curr_parameter_index += 1
259
+
260
+ def _step_batch_size(self) -> None:
261
+ self._curr_batch_size_index += 1
262
+
263
+ def _done_walking(self) -> bool:
264
+ return self._done_walking_batch_sizes()
265
+
266
+ def _done_walking_parameters(self) -> bool:
267
+ if len(self._parameters) == self._curr_parameter_index:
268
+ return True
269
+ if self._early_exit_enable and not self._parameter_throughput_gain_valid():
270
+ if not self._parameter_warning_printed:
271
+ if self._cli_config.is_request_rate_specified(self._model_parameters):
272
+ logger.info(
273
+ "No longer increasing request rate as throughput has plateaued"
274
+ )
275
+ else:
276
+ logger.info(
277
+ "No longer increasing concurrency as throughput has plateaued"
278
+ )
279
+ self._parameter_warning_printed = True
280
+ return True
281
+ return False
282
+
283
+ def _done_walking_batch_sizes(self) -> bool:
284
+ if len(self._batch_sizes) == self._curr_batch_size_index:
285
+ return True
286
+
287
+ if self._early_exit_enable and not self._batch_size_throughput_gain_valid():
288
+ logger.info(
289
+ "No longer increasing client batch size as throughput has plateaued"
290
+ )
291
+
292
+ return True
293
+ return False
294
+
295
+ def _last_results_erroneous(self) -> bool:
296
+ return not self._last_results or self._last_results[-1] is None
297
+
298
+ def _parameter_throughput_gain_valid(self) -> bool:
299
+ """Check if any of the last X parameter results resulted in valid gain"""
300
+ return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
301
+ throughputs=self._parameter_results,
302
+ min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
303
+ min_gain=THROUGHPUT_MINIMUM_GAIN,
304
+ )
305
+
306
+ def _batch_size_throughput_gain_valid(self) -> bool:
307
+ """Check if any of the last X batch_size results resulted in valid gain"""
308
+ return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
309
+ throughputs=self._batch_size_results,
310
+ min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES,
311
+ min_gain=THROUGHPUT_MINIMUM_GAIN,
312
+ )
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from copy import deepcopy
19
+ from typing import Generator, List, Optional
20
+
21
+ from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
22
+ from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
23
+ from model_analyzer.config.generate.model_variant_name_manager import (
24
+ ModelVariantNameManager,
25
+ )
26
+ from model_analyzer.config.generate.quick_run_config_generator import (
27
+ QuickRunConfigGenerator,
28
+ )
29
+ from model_analyzer.config.generate.search_config import SearchConfig
30
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
31
+ from model_analyzer.config.run.run_config import RunConfig
32
+ from model_analyzer.constants import LOGGER_NAME
33
+ from model_analyzer.result.parameter_search import ParameterSearch
34
+ from model_analyzer.result.result_manager import ResultManager
35
+ from model_analyzer.result.run_config_measurement import RunConfigMeasurement
36
+
37
+ from .config_generator_interface import ConfigGeneratorInterface
38
+
39
+ logger = logging.getLogger(LOGGER_NAME)
40
+
41
+
42
+ class QuickPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
43
+ """
44
+ First run QuickRunConfigGenerator for a hill climbing search, then use
45
+ ParameterSearch for a concurrency sweep + binary search of the default
46
+ and Top N results
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ search_config: SearchConfig,
52
+ config: ConfigCommandProfile,
53
+ gpu_count: int,
54
+ models: List[ModelProfileSpec],
55
+ composing_models: List[ModelProfileSpec],
56
+ result_manager: ResultManager,
57
+ model_variant_name_manager: ModelVariantNameManager,
58
+ ):
59
+ """
60
+ Parameters
61
+ ----------
62
+ search_config: SearchConfig
63
+ Defines parameters and dimensions for the search
64
+ config: ConfigCommandProfile
65
+ Profile configuration information
66
+ gpu_count: Number of gpus in the system
67
+ models: List of ModelProfileSpec
68
+ List of models to profile
69
+ composing_models: List of ModelProfileSpec
70
+ List of composing models that exist inside of the supplied models
71
+ result_manager: ResultManager
72
+ The object that handles storing and sorting the results from the perf analyzer
73
+ model_variant_name_manager: ModelVariantNameManager
74
+ Maps model variants to config names
75
+ """
76
+ self._search_config = search_config
77
+ self._config = config
78
+ self._gpu_count = gpu_count
79
+ self._models = models
80
+ self._composing_models = composing_models
81
+ self._result_manager = result_manager
82
+ self._model_variant_name_manager = model_variant_name_manager
83
+
84
+ def set_last_results(
85
+ self, measurements: List[Optional[RunConfigMeasurement]]
86
+ ) -> None:
87
+ self._last_measurement = measurements[-1]
88
+ self._rcg.set_last_results(measurements)
89
+
90
+ def get_configs(self) -> Generator[RunConfig, None, None]:
91
+ """
92
+ Returns
93
+ -------
94
+ RunConfig
95
+ The next RunConfig generated by this class
96
+ """
97
+
98
+ logger.info("")
99
+ logger.info("Starting quick mode search to find optimal configs")
100
+ logger.info("")
101
+ yield from self._execute_quick_search()
102
+ logger.info("")
103
+ if self._config.concurrency_sweep_disable:
104
+ logger.info("Done with quick mode search.")
105
+ else:
106
+ logger.info(
107
+ "Done with quick mode search. Gathering concurrency sweep measurements for reports"
108
+ )
109
+ logger.info("")
110
+ yield from ConcurrencySweeper(
111
+ config=self._config, result_manager=self._result_manager
112
+ ).get_configs()
113
+ logger.info("")
114
+ logger.info("Done gathering concurrency sweep measurements for reports")
115
+ logger.info("")
116
+
117
+ def _execute_quick_search(self) -> Generator[RunConfig, None, None]:
118
+ self._rcg: ConfigGeneratorInterface = self._create_quick_run_config_generator()
119
+
120
+ yield from self._rcg.get_configs()
121
+
122
+ def _create_quick_run_config_generator(self) -> QuickRunConfigGenerator:
123
+ return QuickRunConfigGenerator(
124
+ search_config=self._search_config,
125
+ config=self._config,
126
+ gpu_count=self._gpu_count,
127
+ models=self._models,
128
+ composing_models=self._composing_models,
129
+ model_variant_name_manager=self._model_variant_name_manager,
130
+ )