triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,388 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from math import log2
18
+ from typing import Any, Dict, List, Optional, Tuple, Union
19
+
20
+ from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
21
+ from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
22
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
23
+
24
+ from .search_parameter import ParameterCategory, ParameterUsage, SearchParameter
25
+
26
+
27
+ class SearchParameters:
28
+ """
29
+ Contains information about all configuration parameters the user wants to search
30
+ """
31
+
32
+ # These map to the run-config-search fields
33
+ # See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md
34
+ exponential_rcs_parameters = [
35
+ "max_batch_size",
36
+ "batch_sizes",
37
+ "concurrency",
38
+ "request_rate",
39
+ ]
40
+ linear_rcs_parameters = ["instance_group"]
41
+
42
+ model_parameters = [
43
+ "max_batch_size",
44
+ "instance_group",
45
+ "max_queue_delay_microseconds",
46
+ ]
47
+ runtime_parameters = ["batch_sizes", "concurrency", "request_rate"]
48
+
49
+ def __init__(
50
+ self,
51
+ model: ModelProfileSpec,
52
+ config: ConfigCommandProfile = ConfigCommandProfile(),
53
+ is_bls_model: bool = False,
54
+ is_ensemble_model: bool = False,
55
+ is_composing_model: bool = False,
56
+ ):
57
+ self._config = config
58
+ self._parameters = model.parameters()
59
+ self._model_config_parameters = model.model_config_parameters()
60
+ self._supports_max_batch_size = model.supports_batching()
61
+ self._search_parameters: Dict[str, SearchParameter] = {}
62
+ self._is_ensemble_model = is_ensemble_model
63
+ self._is_bls_model = is_bls_model
64
+ self._is_composing_model = is_composing_model
65
+
66
+ self._populate_search_parameters()
67
+
68
+ def get_search_parameters(self) -> Dict[str, SearchParameter]:
69
+ return self._search_parameters
70
+
71
+ def get_parameter(self, name: str) -> Optional[SearchParameter]:
72
+ return self._search_parameters.get(name)
73
+
74
+ def get_type(self, name: str) -> ParameterUsage:
75
+ return self._search_parameters[name].usage
76
+
77
+ def get_category(self, name: str) -> ParameterCategory:
78
+ return self._search_parameters[name].category
79
+
80
+ def get_range(self, name: str) -> Tuple[Optional[int], Optional[int]]:
81
+ return (
82
+ self._search_parameters[name].min_range,
83
+ self._search_parameters[name].max_range,
84
+ )
85
+
86
+ def get_list(self, name: str) -> Optional[List[Any]]:
87
+ return self._search_parameters[name].enumerated_list
88
+
89
+ def number_of_total_possible_configurations(self) -> int:
90
+ total_number_of_configs = 1
91
+ for parameter in self._search_parameters.values():
92
+ total_number_of_configs *= self._number_of_configurations_for_parameter(
93
+ parameter
94
+ )
95
+
96
+ return total_number_of_configs
97
+
98
+ def print_info(self, name: str) -> str:
99
+ info_string = f" {name}: "
100
+
101
+ parameter = self._search_parameters[name]
102
+ if parameter.category is ParameterCategory.INTEGER:
103
+ info_string += f"{parameter.min_range} to {parameter.max_range}"
104
+ elif parameter.category is ParameterCategory.EXPONENTIAL:
105
+ info_string += f"{2**parameter.min_range} to {2**parameter.max_range}" # type: ignore
106
+ elif (
107
+ parameter.category is ParameterCategory.INT_LIST
108
+ or parameter.category is ParameterCategory.STR_LIST
109
+ ):
110
+ info_string += f"{parameter.enumerated_list}"
111
+
112
+ info_string += f" ({self._number_of_configurations_for_parameter(parameter)})"
113
+
114
+ return info_string
115
+
116
+ def _number_of_configurations_for_parameter(
117
+ self, parameter: SearchParameter
118
+ ) -> int:
119
+ if (
120
+ parameter.category is ParameterCategory.INTEGER
121
+ or parameter.category is ParameterCategory.EXPONENTIAL
122
+ ):
123
+ number_of_parameter_configs = parameter.max_range - parameter.min_range + 1 # type: ignore
124
+ else:
125
+ number_of_parameter_configs = len(parameter.enumerated_list) # type: ignore
126
+
127
+ return number_of_parameter_configs
128
+
129
+ def _populate_search_parameters(self) -> None:
130
+ self._populate_parameters()
131
+ self._populate_model_config_parameters()
132
+
133
+ def _populate_parameters(self) -> None:
134
+ self._populate_batch_sizes()
135
+
136
+ if not self._is_composing_model:
137
+ if self._config.is_request_rate_specified(self._parameters):
138
+ self._populate_request_rate()
139
+ else:
140
+ self._populate_concurrency()
141
+
142
+ def _populate_model_config_parameters(self) -> None:
143
+ self._populate_max_batch_size()
144
+ self._populate_instance_group()
145
+ self._populate_max_queue_delay_microseconds()
146
+
147
+ def _populate_batch_sizes(self) -> None:
148
+ if self._parameters and self._parameters["batch_sizes"]:
149
+ self._populate_list_parameter(
150
+ parameter_name="batch_sizes",
151
+ parameter_list=self._parameters["batch_sizes"],
152
+ parameter_category=ParameterCategory.INT_LIST,
153
+ )
154
+
155
+ def _populate_concurrency(self) -> None:
156
+ if self._parameters and self._parameters["concurrency"]:
157
+ self._populate_list_parameter(
158
+ parameter_name="concurrency",
159
+ parameter_list=self._parameters["concurrency"],
160
+ parameter_category=ParameterCategory.INT_LIST,
161
+ )
162
+ elif self._config.use_concurrency_formula:
163
+ return
164
+ else:
165
+ self._populate_rcs_parameter(
166
+ parameter_name="concurrency",
167
+ rcs_parameter_min_value=self._config.run_config_search_min_concurrency,
168
+ rcs_parameter_max_value=self._config.run_config_search_max_concurrency,
169
+ )
170
+
171
+ def _populate_request_rate(self) -> None:
172
+ if self._parameters and self._parameters["request_rate"]:
173
+ self._populate_list_parameter(
174
+ parameter_name="request_rate",
175
+ parameter_list=self._parameters["request_rate"],
176
+ parameter_category=ParameterCategory.INT_LIST,
177
+ )
178
+ else:
179
+ self._populate_rcs_parameter(
180
+ parameter_name="request_rate",
181
+ rcs_parameter_min_value=self._config.run_config_search_min_request_rate,
182
+ rcs_parameter_max_value=self._config.run_config_search_max_request_rate,
183
+ )
184
+
185
+ def _populate_max_batch_size(self) -> None:
186
+ # Example config format:
187
+ # model_config_parameters:
188
+ # max_batch_size: [1, 4, 16]
189
+ if self._is_key_in_model_config_parameters("max_batch_size"):
190
+ parameter_list = self._model_config_parameters["max_batch_size"]
191
+ self._populate_list_parameter(
192
+ parameter_name="max_batch_size",
193
+ parameter_list=parameter_list,
194
+ parameter_category=ParameterCategory.INT_LIST,
195
+ )
196
+ elif self._supports_max_batch_size and not self._is_bls_model:
197
+ # Need to populate max_batch_size based on RCS min/max values
198
+ # when no model config parameters are present
199
+ self._populate_rcs_parameter(
200
+ parameter_name="max_batch_size",
201
+ rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
202
+ rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
203
+ )
204
+
205
+ def _populate_instance_group(self) -> None:
206
+ # Example config format:
207
+ #
208
+ # model_config_parameters:
209
+ # instance_group:
210
+ # - kind: KIND_GPU
211
+ # count: [1, 2, 3, 4]
212
+ if self._is_key_in_model_config_parameters("instance_group"):
213
+ parameter_list = self._model_config_parameters["instance_group"][0][0][
214
+ "count"
215
+ ]
216
+
217
+ self._populate_list_parameter(
218
+ parameter_name="instance_group",
219
+ parameter_list=parameter_list,
220
+ parameter_category=ParameterCategory.INT_LIST,
221
+ )
222
+ elif not self._is_ensemble_model:
223
+ # Need to populate instance_group based on RCS min/max values
224
+ # when no model config parameters are present
225
+ self._populate_rcs_parameter(
226
+ parameter_name="instance_group",
227
+ rcs_parameter_min_value=self._config.run_config_search_min_instance_count,
228
+ rcs_parameter_max_value=self._config.run_config_search_max_instance_count,
229
+ )
230
+
231
+ def _is_key_in_model_config_parameters(self, key: str) -> bool:
232
+ key_found = bool(
233
+ self._model_config_parameters and key in self._model_config_parameters
234
+ )
235
+
236
+ return key_found
237
+
238
+ def _populate_max_queue_delay_microseconds(self) -> None:
239
+ # Example format
240
+ #
241
+ # model_config_parameters:
242
+ # dynamic_batching:
243
+ # max_queue_delay_microseconds: [100, 200, 300]
244
+
245
+ # There is no RCS field for max_queue_delay_microseconds
246
+ if self._is_max_queue_delay_in_model_config_parameters():
247
+ self._populate_list_parameter(
248
+ parameter_name="max_queue_delay_microseconds",
249
+ parameter_list=self._model_config_parameters["dynamic_batching"][0][
250
+ "max_queue_delay_microseconds"
251
+ ],
252
+ parameter_category=ParameterCategory.INT_LIST,
253
+ )
254
+
255
+ def _is_max_queue_delay_in_model_config_parameters(self) -> bool:
256
+ if self._model_config_parameters:
257
+ max_queue_delay_present = (
258
+ "dynamic_batching" in self._model_config_parameters.keys()
259
+ and (
260
+ "max_queue_delay_microseconds"
261
+ in self._model_config_parameters["dynamic_batching"][0]
262
+ )
263
+ )
264
+ else:
265
+ max_queue_delay_present = False
266
+
267
+ return max_queue_delay_present
268
+
269
+ def _populate_list_parameter(
270
+ self,
271
+ parameter_name: str,
272
+ parameter_list: List[Union[int, str]],
273
+ parameter_category: ParameterCategory,
274
+ ) -> None:
275
+ usage = self._determine_parameter_usage(parameter_name)
276
+
277
+ self._add_search_parameter(
278
+ name=parameter_name,
279
+ usage=usage,
280
+ category=parameter_category,
281
+ enumerated_list=parameter_list,
282
+ )
283
+
284
+ def _populate_rcs_parameter(
285
+ self,
286
+ parameter_name: str,
287
+ rcs_parameter_min_value: int,
288
+ rcs_parameter_max_value: int,
289
+ ) -> None:
290
+ usage = self._determine_parameter_usage(parameter_name)
291
+ category = self._determine_parameter_category(parameter_name)
292
+
293
+ if category == ParameterCategory.EXPONENTIAL:
294
+ min_range = int(log2(rcs_parameter_min_value)) # type: ignore
295
+ max_range = int(log2(rcs_parameter_max_value)) # type: ignore
296
+ else:
297
+ min_range = rcs_parameter_min_value # type: ignore
298
+ max_range = rcs_parameter_max_value # type: ignore
299
+
300
+ self._add_search_parameter(
301
+ name=parameter_name,
302
+ usage=usage,
303
+ category=category,
304
+ min_range=min_range,
305
+ max_range=max_range,
306
+ )
307
+
308
+ def _determine_parameter_category(self, name: str) -> ParameterCategory:
309
+ if name in SearchParameters.exponential_rcs_parameters:
310
+ category = ParameterCategory.EXPONENTIAL
311
+ elif name in SearchParameters.linear_rcs_parameters:
312
+ category = ParameterCategory.INTEGER
313
+ else:
314
+ TritonModelAnalyzerException(f"ParameterCategory not found for {name}")
315
+
316
+ return category
317
+
318
+ def _determine_parameter_usage(self, name: str) -> ParameterUsage:
319
+ if name in SearchParameters.model_parameters:
320
+ usage = ParameterUsage.MODEL
321
+ elif name in SearchParameters.runtime_parameters:
322
+ usage = ParameterUsage.RUNTIME
323
+ else:
324
+ TritonModelAnalyzerException(f"ParameterUsage not found for {name}")
325
+
326
+ return usage
327
+
328
+ def _add_search_parameter(
329
+ self,
330
+ name: str,
331
+ usage: ParameterUsage,
332
+ category: ParameterCategory,
333
+ min_range: Optional[int] = None,
334
+ max_range: Optional[int] = None,
335
+ enumerated_list: List[Any] = [],
336
+ ) -> None:
337
+ self._check_for_illegal_input(category, min_range, max_range, enumerated_list)
338
+
339
+ self._search_parameters[name] = SearchParameter(
340
+ usage=usage,
341
+ category=category,
342
+ enumerated_list=enumerated_list,
343
+ min_range=min_range,
344
+ max_range=max_range,
345
+ )
346
+
347
+ def _check_for_illegal_input(
348
+ self,
349
+ category: ParameterCategory,
350
+ min_range: Optional[int],
351
+ max_range: Optional[int],
352
+ enumerated_list: List[Any],
353
+ ) -> None:
354
+ if (
355
+ category is ParameterCategory.INT_LIST
356
+ or category is ParameterCategory.STR_LIST
357
+ ):
358
+ self._check_for_illegal_list_input(min_range, max_range, enumerated_list)
359
+ else:
360
+ if min_range is None or max_range is None:
361
+ raise TritonModelAnalyzerException(
362
+ f"Both min_range and max_range must be specified"
363
+ )
364
+
365
+ if min_range and max_range:
366
+ if min_range > max_range:
367
+ raise TritonModelAnalyzerException(
368
+ f"min_range cannot be larger than max_range"
369
+ )
370
+
371
+ def _check_for_illegal_list_input(
372
+ self,
373
+ min_range: Optional[int],
374
+ max_range: Optional[int],
375
+ enumerated_list: List[Any],
376
+ ) -> None:
377
+ if not enumerated_list:
378
+ raise TritonModelAnalyzerException(
379
+ f"enumerated_list must be specified for a ParameterCategory.LIST"
380
+ )
381
+ elif min_range is not None:
382
+ raise TritonModelAnalyzerException(
383
+ f"min_range cannot be specified for a ParameterCategory.LIST"
384
+ )
385
+ elif max_range is not None:
386
+ raise TritonModelAnalyzerException(
387
+ f"max_range cannot be specified for a ParameterCategory.LIST"
388
+ )
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.