triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,313 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ from typing import Dict, List, Optional
19
+
20
+ from model_analyzer.constants import LOGGER_NAME
21
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
22
+ from model_analyzer.triton.model.model_config import ModelConfig
23
+ from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
24
+
25
+ logger = logging.getLogger(LOGGER_NAME)
26
+
27
+
28
+ class ModelRunConfig:
29
+ """
30
+ Encapsulates all the information (ModelConfigVariant + PerfConfig) needed to run
31
+ a model in Perf Analyzer
32
+ """
33
+
34
+ DEFAULT_MAX_BATCH_SIZE = 1
35
+ DEFAULT_PERF_BATCH_SIZE = 1
36
+
37
+ def __init__(
38
+ self,
39
+ model_name: str,
40
+ model_config_variant: ModelConfigVariant,
41
+ perf_config: PerfAnalyzerConfig,
42
+ ) -> None:
43
+ """
44
+ Parameters
45
+ ----------
46
+ model_name: str
47
+ The name of the model
48
+ model_config_variant : ModelConfigVariant
49
+ model config variant corresponding to this run
50
+ perf_config : PerfAnalyzerConfig
51
+ List of possible run parameters to pass
52
+ to Perf Analyzer
53
+ """
54
+
55
+ self._model_name = model_name
56
+ self._model_config_variant = model_config_variant
57
+ self._perf_config = perf_config
58
+ self._composing_config_variants: List[ModelConfigVariant] = []
59
+
60
+ def model_name(self) -> str:
61
+ """
62
+ Get the original model name for this run config.
63
+
64
+ Returns
65
+ -------
66
+ str
67
+ Original model name
68
+ """
69
+
70
+ return self._model_name
71
+
72
+ def model_variant_name(self) -> str:
73
+ """
74
+ Get the model config variant name for this config.
75
+
76
+ Returns
77
+ -------
78
+ str
79
+ Model variant name
80
+ """
81
+ return (
82
+ self._model_config_variant.variant_name
83
+ if self._model_config_variant
84
+ else ""
85
+ )
86
+
87
+ def model_config_variant(self) -> ModelConfigVariant:
88
+ """
89
+ Returns
90
+ -------
91
+ ModelConfigVariant
92
+ The ModelConfigVariant corresponding to this run
93
+ """
94
+
95
+ return self._model_config_variant
96
+
97
+ def model_config(self) -> Optional[ModelConfig]:
98
+ """
99
+ Returns
100
+ -------
101
+ ModelConfig
102
+ The ModelConfig corresponding to this run
103
+ """
104
+
105
+ return (
106
+ self._model_config_variant.model_config
107
+ if self._model_config_variant
108
+ else None
109
+ )
110
+
111
+ def perf_config(self) -> PerfAnalyzerConfig:
112
+ """
113
+ Returns
114
+ -------
115
+ PerfAnalyzerConfig
116
+ run parameters corresponding to this run of
117
+ the perf analyzer
118
+ """
119
+
120
+ return self._perf_config
121
+
122
+ def composing_config_variants(self) -> List[ModelConfigVariant]:
123
+ """
124
+ Returns the list of composing model config variants
125
+ """
126
+
127
+ return self._composing_config_variants
128
+
129
+ def composing_configs(self) -> List[ModelConfig]:
130
+ """
131
+ Returns the list of composing model configs
132
+ """
133
+
134
+ if self._composing_config_variants:
135
+ composing_configs = [
136
+ composing_config_variant.model_config
137
+ for composing_config_variant in self._composing_config_variants
138
+ ]
139
+ return composing_configs
140
+ else:
141
+ return []
142
+
143
+ def representation(self) -> str:
144
+ """
145
+ Returns a representation string for the ModelRunConfig that can be used
146
+ as a key to uniquely identify it
147
+ """
148
+ repr = self.model_variant_name()
149
+ repr += " " + self.perf_config().representation()
150
+
151
+ if self._composing_config_variants:
152
+ repr += " " + (",").join(self.get_composing_config_names()) # type: ignore
153
+
154
+ return repr
155
+
156
+ def _check_for_client_vs_model_batch_size(self) -> bool:
157
+ """
158
+ Returns false if client batch size is greater than model batch size. Else true
159
+ """
160
+ model_config = self._model_config_variant.model_config.get_config()
161
+
162
+ max_batch_size = (
163
+ model_config["max_batch_size"]
164
+ if "max_batch_size" in model_config
165
+ else self.DEFAULT_MAX_BATCH_SIZE
166
+ )
167
+ perf_batch_size = (
168
+ self._perf_config["batch-size"]
169
+ if "batch-size" in self._perf_config
170
+ else self.DEFAULT_PERF_BATCH_SIZE
171
+ )
172
+
173
+ legal = max_batch_size >= perf_batch_size
174
+ if not legal:
175
+ logger.debug(
176
+ f"Illegal model run config because client batch size {perf_batch_size} is greater than model max batch size {max_batch_size}"
177
+ )
178
+
179
+ return legal
180
+
181
+ def _check_for_preferred_vs_model_batch_size(self) -> bool:
182
+ """
183
+ Returns false if maximum of preferred batch size is greater than model batch size. Else true
184
+ """
185
+ legal = True
186
+
187
+ model_configs = self._create_model_config_dicts()
188
+
189
+ for model_config in model_configs:
190
+ max_batch_size = (
191
+ model_config["max_batch_size"]
192
+ if "max_batch_size" in model_config
193
+ else self.DEFAULT_MAX_BATCH_SIZE
194
+ )
195
+
196
+ if (
197
+ "dynamic_batching" in model_config
198
+ and "preferred_batch_size" in model_config["dynamic_batching"]
199
+ ):
200
+ max_preferred_batch_size = max(
201
+ model_config["dynamic_batching"]["preferred_batch_size"]
202
+ )
203
+ legal = max_batch_size >= max_preferred_batch_size
204
+
205
+ if not legal:
206
+ logger.debug(
207
+ f"Illegal model run config because maximum of {model_config['name']}'s preferred batch size {max_preferred_batch_size} is greater than model max batch size {max_batch_size}"
208
+ )
209
+ return legal
210
+
211
+ return legal
212
+
213
+ def _create_model_config_dicts(self) -> List[Dict]:
214
+ """
215
+ Create a list of model config dictionaries for
216
+ the given model + composing models
217
+ """
218
+ model_configs = (
219
+ []
220
+ if self.is_ensemble_model()
221
+ else [self._model_config_variant.model_config.get_config()]
222
+ )
223
+
224
+ model_configs.extend(
225
+ [
226
+ composing_config_variant.model_config.get_config()
227
+ for composing_config_variant in self._composing_config_variants
228
+ ]
229
+ )
230
+
231
+ return model_configs
232
+
233
+ def is_legal_combination(self):
234
+ """
235
+ Returns true if the run_config is valid and should be run. Else false
236
+ """
237
+ legal = (
238
+ self._check_for_client_vs_model_batch_size()
239
+ and self._check_for_preferred_vs_model_batch_size()
240
+ )
241
+
242
+ return legal
243
+
244
+ def is_ensemble_model(self) -> bool:
245
+ """
246
+ Returns true if the model config is an ensemble model
247
+ """
248
+ return self._model_config_variant.model_config.is_ensemble()
249
+
250
+ def is_bls_model(self) -> bool:
251
+ """
252
+ Returns true if the model config is a BLS model
253
+ """
254
+ # If composing configs are present and it's not an ensemble it must be a BLS
255
+ # Note: this will need to change if we allow ensembles to contain BLS models
256
+ return (
257
+ not self._model_config_variant.model_config.is_ensemble()
258
+ and len(self._composing_config_variants) > 0
259
+ )
260
+
261
+ def get_composing_config_names(self) -> Optional[List[str]]:
262
+ """
263
+ Returns list of composing config names
264
+ """
265
+ return [
266
+ composing_config_variant.variant_name
267
+ for composing_config_variant in self._composing_config_variants
268
+ ]
269
+
270
+ def add_composing_model_config_variants(
271
+ self, composing_model_config_variants: List[ModelConfigVariant]
272
+ ) -> None:
273
+ """
274
+ Adds a list of composing model config variants
275
+ """
276
+ for composing_model_config_variant in composing_model_config_variants:
277
+ self._composing_config_variants.append(composing_model_config_variant)
278
+
279
+ @classmethod
280
+ def from_dict(cls, model_run_config_dict):
281
+ model_run_config = ModelRunConfig(None, None, None)
282
+ model_run_config._model_name = model_run_config_dict["_model_name"]
283
+
284
+ if "_model_config_variant" in model_run_config_dict:
285
+ model_config = ModelConfig.from_dict(
286
+ model_run_config_dict["_model_config_variant"]["model_config"]
287
+ )
288
+ variant_name = model_run_config_dict["_model_config_variant"][
289
+ "variant_name"
290
+ ]
291
+
292
+ model_run_config._model_config_variant = ModelConfigVariant(
293
+ model_config, variant_name
294
+ )
295
+
296
+ model_run_config._perf_config = PerfAnalyzerConfig.from_dict(
297
+ model_run_config_dict["_perf_config"]
298
+ )
299
+
300
+ if "_composing_config_variants" in model_run_config_dict:
301
+ model_run_config._composing_config_variants = [
302
+ ModelConfigVariant(
303
+ ModelConfig.from_dict(
304
+ composing_config_variant_dict["model_config"]
305
+ ),
306
+ composing_config_variant_dict["variant_name"],
307
+ )
308
+ for composing_config_variant_dict in model_run_config_dict[
309
+ "_composing_config_variants"
310
+ ]
311
+ ]
312
+
313
+ return model_run_config
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from typing import List
18
+
19
+ from model_analyzer.config.run.model_run_config import ModelRunConfig
20
+ from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig
21
+
22
+
23
+ class RunConfig:
24
+ """
25
+ Encapsulates all the information needed to run one or more models
26
+ at the same time in Perf Analyzer
27
+ """
28
+
29
+ def __init__(self, triton_env, genai_perf_flags=None):
30
+ """
31
+ Parameters
32
+ ----------
33
+ triton_env : dict
34
+ A dictionary of environment variables to set
35
+ when launching tritonserver
36
+
37
+ genai_perf_flags: dict
38
+ The set of flags used when calling genai_perf for LLM models
39
+ """
40
+
41
+ self._triton_env = triton_env
42
+ self._genai_perf_config = GenaiPerfConfig()
43
+ self._genai_perf_config.update_config(genai_perf_flags)
44
+ self._model_run_configs: List[ModelRunConfig] = []
45
+
46
+ def add_model_run_config(self, model_run_config):
47
+ """
48
+ Add a ModelRunConfig to this RunConfig
49
+ """
50
+ self._model_run_configs.append(model_run_config)
51
+
52
+ def model_run_configs(self) -> List[ModelRunConfig]:
53
+ """
54
+ Returns the list of ModelRunConfigs to run concurrently
55
+ """
56
+ return self._model_run_configs
57
+
58
+ def representation(self):
59
+ """
60
+ Returns a representation string for the RunConfig that can be used
61
+ as a key to uniquely identify it
62
+ """
63
+ return "".join([mrc.representation() for mrc in self.model_run_configs()])
64
+
65
+ def is_legal_combination(self):
66
+ """
67
+ Returns true if all model_run_configs are valid
68
+ """
69
+ return all(
70
+ [
71
+ model_run_config.is_legal_combination()
72
+ for model_run_config in self._model_run_configs
73
+ ]
74
+ )
75
+
76
+ def is_ensemble_model(self) -> bool:
77
+ """
78
+ Returns true if the first model config is an ensemble
79
+ (an ensemble cannot be part of a multi-model)
80
+ """
81
+ return self._model_run_configs[0].is_ensemble_model()
82
+
83
+ def is_bls_model(self) -> bool:
84
+ """
85
+ Returns true if the first model config is a BLS model
86
+ (a BLS cannot be part of a multi-model)
87
+ """
88
+ return self._model_run_configs[0].is_bls_model()
89
+
90
+ def cpu_only(self):
91
+ """
92
+ Returns true if all model_run_configs only operate on the CPU
93
+ """
94
+ return all(
95
+ [
96
+ model_run_config.model_config_variant().cpu_only
97
+ for model_run_config in self._model_run_configs
98
+ ]
99
+ )
100
+
101
+ def triton_environment(self):
102
+ """
103
+ Returns
104
+ -------
105
+ dict
106
+ The environment that tritonserver
107
+ was run with for this RunConfig
108
+ """
109
+
110
+ return self._triton_env
111
+
112
+ def genai_perf_config(self):
113
+ return self._genai_perf_config
114
+
115
+ def models_name(self):
116
+ """Returns a single comma-joined name of the original model names"""
117
+ return ",".join([mrc.model_name() for mrc in self.model_run_configs()])
118
+
119
+ def model_variants_name(self):
120
+ """Returns a single comma-joined name of the model variant names"""
121
+ return ",".join([mrc.model_variant_name() for mrc in self.model_run_configs()])
122
+
123
+ def composing_config_variants(self):
124
+ """
125
+ Returns a list of composing model config variants from the first model run config
126
+ (an ensemble/BLS cannot be part of a multi-model profile)
127
+ """
128
+ return self._model_run_configs[0].composing_config_variants()
129
+
130
+ def composing_model_variants_name(self):
131
+ """
132
+ Returns a single comma-joined name of the composing model variant names
133
+ (an ensemble/BLS cannot be part of a multi-model profile)
134
+ """
135
+ return ",".join(
136
+ [
137
+ cvc.variant_name
138
+ for cvc in self.model_run_configs()[0].composing_config_variants()
139
+ ]
140
+ )
141
+
142
+ def composing_configs(self):
143
+ """
144
+ Returns a list of composing model configs from the first model run config
145
+ (an ensemble/BLS cannot be part of a multi-model profile)
146
+ """
147
+ return self._model_run_configs[0].composing_configs()
148
+
149
+ def combined_model_variants_name(self):
150
+ """
151
+ Combines the model + composing model's variant names (joined with a '::')
152
+ """
153
+ if self.composing_model_variants_name():
154
+ return (
155
+ f"{self.model_variants_name()}::{self.composing_model_variants_name()}"
156
+ )
157
+ else:
158
+ return self.model_variants_name()
159
+
160
+ @classmethod
161
+ def from_dict(cls, run_config_dict):
162
+ run_config = RunConfig({})
163
+
164
+ run_config._triton_env = run_config_dict["_triton_env"]
165
+ for mrc_dict in run_config_dict["_model_run_configs"]:
166
+ run_config._model_run_configs.append(ModelRunConfig.from_dict(mrc_dict))
167
+
168
+ return run_config
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from typing import Any, Dict
18
+
19
+ # Config constants
20
+ CONFIG_PARSER_SUCCESS = 1
21
+ CONFIG_PARSER_FAILURE = 0
22
+
23
+ # Result Table constants
24
+ RESULT_TABLE_COLUMN_PADDING = 2
25
+
26
+ # Result Comparator Constants
27
+ COMPARISON_SCORE_THRESHOLD = 0
28
+
29
+ # Dict of parameters to apply on top of the default
30
+ # config to result in the default config (empty dict)
31
+ DEFAULT_CONFIG_PARAMS: Dict[str, Any] = {}
32
+
33
+ # Run Search
34
+ THROUGHPUT_MINIMUM_GAIN = 0.05
35
+ THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES = 4
36
+ THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES = 4
37
+
38
+ # Quick search algorithm constants
39
+ RADIUS = 3
40
+ MIN_INITIALIZED = 3
41
+
42
+ # Reports
43
+ TOP_MODELS_REPORT_KEY = "Best Configs Across All Models"
44
+
45
+ # State Management
46
+ MAX_NUMBER_OF_INTERRUPTS = 3
47
+
48
+ # Perf Analyzer
49
+ MEASUREMENT_WINDOW_STEP = 1000
50
+ MEASUREMENT_REQUEST_COUNT_STEP = 50
51
+ INTERVAL_SLEEP_TIME = 1
52
+ PERF_ANALYZER_MEASUREMENT_WINDOW = 5000
53
+ PERF_ANALYZER_MINIMUM_REQUEST_COUNT = 50
54
+ SECONDS_TO_MILLISECONDS_MULTIPLIER = 1000
55
+
56
+ # Triton Server
57
+ SERVER_OUTPUT_TIMEOUT_SECS = 5
58
+
59
+ # Logging
60
+ LOGGER_NAME = "model_analyzer_logger"
61
+
62
+ # PA Error Log Filename
63
+ PA_ERROR_LOG_FILENAME = "perf_analyzer_error.log"
64
+
65
+ # Constraints
66
+ GLOBAL_CONSTRAINTS_KEY = "__default__"
67
+
68
+ # Measurement constants
69
+ INVALID_MEASUREMENT_THRESHOLD = 2
70
+
71
+ # Model analyzer package name
72
+ PACKAGE_NAME = "triton-model-analyzer"
73
+
74
+ # GENAI-PERF
75
+ GENAI_PERF_CSV = "profile_export_genai_perf.csv"
76
+ GENAI_PERF_COLLATERAL = ["llm_inputs.json", "profile_export.json"]
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+
18
+ class Device:
19
+ """
20
+ Generic device class representing devices being monitored
21
+ """
22
+
23
+ def __init__(self):
24
+ pass
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from model_analyzer.device.device import Device
18
+
19
+
20
+ class GPUDevice(Device):
21
+ """
22
+ Representing a GPU device
23
+ """
24
+
25
+ def __init__(self, device_name, device_id, pci_bus_id, device_uuid):
26
+ """
27
+ Parameters
28
+ ----------
29
+ device_name: str
30
+ Human readable name of the device
31
+ device_id : int
32
+ Device id according to the `nvidia-smi` output
33
+ pci_bus_id : str
34
+ PCI bus id
35
+ device_uuid : str
36
+ Device UUID
37
+ """
38
+
39
+ assert type(device_name) is str
40
+ assert type(device_id) is int
41
+ assert type(pci_bus_id) is str
42
+ assert type(device_uuid) is str
43
+
44
+ self._device_name = device_name
45
+ self._device_id = device_id
46
+ self._pci_bus_id = pci_bus_id
47
+ self._device_uuid = device_uuid
48
+
49
+ def device_name(self):
50
+ """
51
+ Returns
52
+ -------
53
+ str
54
+ device name
55
+ """
56
+
57
+ return self._device_name
58
+
59
+ def device_id(self):
60
+ """
61
+ Returns
62
+ -------
63
+ int
64
+ device id of this GPU
65
+ """
66
+
67
+ return self._device_id
68
+
69
+ def pci_bus_id(self):
70
+ """
71
+ Returns
72
+ -------
73
+ bytes
74
+ PCI bus id of this GPU
75
+ """
76
+
77
+ return self._pci_bus_id
78
+
79
+ def device_uuid(self):
80
+ """
81
+ Returns
82
+ -------
83
+ str
84
+ UUID of this GPU
85
+ """
86
+
87
+ return self._device_uuid