triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1747 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+ import logging
19
+ import os
20
+ from copy import deepcopy
21
+
22
+ import numba.cuda
23
+ import psutil
24
+ from google.protobuf.descriptor import FieldDescriptor
25
+ from tritonclient.grpc.model_config_pb2 import ModelConfig
26
+
27
+ from model_analyzer.config.input.config_utils import (
28
+ binary_path_validator,
29
+ file_path_validator,
30
+ objective_list_output_mapper,
31
+ parent_path_validator,
32
+ )
33
+ from model_analyzer.constants import LOGGER_NAME
34
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
35
+ from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig
36
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
37
+ from model_analyzer.record.record import RecordType
38
+ from model_analyzer.triton.server.server_config import TritonServerConfig
39
+
40
+ from .config_command import ConfigCommand
41
+ from .config_defaults import (
42
+ DEFAULT_ALWAYS_REPORT_GPU_METRICS,
43
+ DEFAULT_BATCH_SIZES,
44
+ DEFAULT_CHECKPOINT_DIRECTORY,
45
+ DEFAULT_CLIENT_PROTOCOL,
46
+ DEFAULT_COLLECT_CPU_METRICS,
47
+ DEFAULT_CONCURRENCY_SWEEP_DISABLE,
48
+ DEFAULT_DCGM_DISABLE,
49
+ DEFAULT_DURATION_SECONDS,
50
+ DEFAULT_EXPORT_PATH,
51
+ DEFAULT_FILENAME_MODEL_GPU,
52
+ DEFAULT_FILENAME_MODEL_INFERENCE,
53
+ DEFAULT_FILENAME_SERVER_ONLY,
54
+ DEFAULT_GPU_OUTPUT_FIELDS,
55
+ DEFAULT_GPUS,
56
+ DEFAULT_INFERENCE_OUTPUT_FIELDS,
57
+ DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS,
58
+ DEFAULT_MAX_RETRIES,
59
+ DEFAULT_MODEL_TYPE,
60
+ DEFAULT_MODEL_WEIGHTING,
61
+ DEFAULT_MONITORING_INTERVAL,
62
+ DEFAULT_NUM_CONFIGS_PER_MODEL,
63
+ DEFAULT_NUM_TOP_MODEL_CONFIGS,
64
+ DEFAULT_OFFLINE_OBJECTIVES,
65
+ DEFAULT_OFFLINE_PLOTS,
66
+ DEFAULT_ONLINE_OBJECTIVES,
67
+ DEFAULT_ONLINE_PLOTS,
68
+ DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
69
+ DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
70
+ DEFAULT_OPTUNA_MAX_TRIALS,
71
+ DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
72
+ DEFAULT_OPTUNA_MIN_TRIALS,
73
+ DEFAULT_OUTPUT_MODEL_REPOSITORY,
74
+ DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG,
75
+ DEFAULT_PERF_ANALYZER_CPU_UTIL,
76
+ DEFAULT_PERF_ANALYZER_PATH,
77
+ DEFAULT_PERF_ANALYZER_TIMEOUT,
78
+ DEFAULT_PERF_MAX_AUTO_ADJUSTS,
79
+ DEFAULT_PERF_OUTPUT_FLAG,
80
+ DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS,
81
+ DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS,
82
+ DEFAULT_REQUEST_RATE_SEARCH_ENABLE,
83
+ DEFAULT_RUN_CONFIG_MAX_BINARY_SEARCH_STEPS,
84
+ DEFAULT_RUN_CONFIG_MAX_CONCURRENCY,
85
+ DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT,
86
+ DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE,
87
+ DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE,
88
+ DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
89
+ DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT,
90
+ DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE,
91
+ DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
92
+ DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE,
93
+ DEFAULT_RUN_CONFIG_SEARCH_DISABLE,
94
+ DEFAULT_RUN_CONFIG_SEARCH_MODE,
95
+ DEFAULT_SERVER_OUTPUT_FIELDS,
96
+ DEFAULT_SKIP_DETAILED_REPORTS,
97
+ DEFAULT_SKIP_SUMMARY_REPORTS,
98
+ DEFAULT_TRITON_DOCKER_IMAGE,
99
+ DEFAULT_TRITON_GRPC_ENDPOINT,
100
+ DEFAULT_TRITON_HTTP_ENDPOINT,
101
+ DEFAULT_TRITON_INSTALL_PATH,
102
+ DEFAULT_TRITON_LAUNCH_MODE,
103
+ DEFAULT_TRITON_METRICS_URL,
104
+ DEFAULT_TRITON_SERVER_PATH,
105
+ DEFAULT_USE_CONCURRENCY_FORMULA,
106
+ )
107
+ from .config_enum import ConfigEnum
108
+ from .config_field import ConfigField
109
+ from .config_list_generic import ConfigListGeneric
110
+ from .config_list_numeric import ConfigListNumeric
111
+ from .config_list_string import ConfigListString
112
+ from .config_none import ConfigNone
113
+ from .config_object import ConfigObject
114
+ from .config_primitive import ConfigPrimitive
115
+ from .config_sweep import ConfigSweep
116
+ from .config_union import ConfigUnion
117
+ from .objects.config_model_profile_spec import ConfigModelProfileSpec
118
+ from .objects.config_plot import ConfigPlot
119
+ from .objects.config_protobuf_utils import (
120
+ is_protobuf_type_primitive,
121
+ protobuf_to_config_type,
122
+ )
123
+
124
+ logger = logging.getLogger(LOGGER_NAME)
125
+
126
+
127
+ class ConfigCommandProfile(ConfigCommand):
128
+ """
129
+ Model Analyzer config object.
130
+ """
131
+
132
+ def __init__(self):
133
+ super().__init__()
134
+ self._fill_config()
135
+
136
+ def _resolve_protobuf_field(self, field: FieldDescriptor) -> ConfigSweep:
137
+ """
138
+ Recursively resolve protobuf fields.
139
+
140
+ Parameters
141
+ ----------
142
+ field : google.protobuf.pyext._message.FieldDescriptor
143
+
144
+ Returns
145
+ -------
146
+ ConfigValue
147
+ A config type equivalent to the protobuf type.
148
+
149
+ Raises
150
+ ------
151
+ TritonModelAnalyzerException
152
+ If the protobuf config field cannot be resolved, this exception
153
+ will be raised.
154
+ """
155
+
156
+ if is_protobuf_type_primitive(field.type):
157
+ config_type = protobuf_to_config_type(field.type)
158
+
159
+ # If it is a repeated field, we should use ConfigListGeneric
160
+ if field.label == FieldDescriptor.LABEL_REPEATED:
161
+ config_type = ConfigListGeneric(ConfigPrimitive(config_type))
162
+ else:
163
+ config_type = ConfigPrimitive(config_type)
164
+
165
+ elif field.type == FieldDescriptor.TYPE_MESSAGE:
166
+ # If the field type is TYPE_MESSAGE, we need to create a new
167
+ # message of type ConfigObject
168
+ sub_field_schema = {}
169
+
170
+ # Custom handling for map field
171
+ # TODO: Add support for types in the keys
172
+ if (
173
+ field.message_type.has_options
174
+ and field.message_type.GetOptions().map_entry
175
+ ):
176
+ value_field_type = self._resolve_protobuf_field(
177
+ field.message_type.fields_by_name["value"]
178
+ )
179
+ sub_field_schema["*"] = value_field_type
180
+ config_type = ConfigObject(schema=sub_field_schema)
181
+
182
+ else:
183
+ fields = field.message_type.fields
184
+ for sub_field in fields:
185
+ sub_field_schema[sub_field.name] = self._resolve_protobuf_field(
186
+ sub_field
187
+ )
188
+ if field.label == FieldDescriptor.LABEL_REPEATED:
189
+ config_type = ConfigListGeneric(
190
+ ConfigObject(schema=sub_field_schema)
191
+ )
192
+ else:
193
+ config_type = ConfigObject(schema=sub_field_schema)
194
+ elif field.type == FieldDescriptor.TYPE_ENUM:
195
+ choices = []
196
+ enum_values = field.enum_type.values
197
+ for enum_value in enum_values:
198
+ choices.append(enum_value.name)
199
+ config_type = ConfigEnum(choices)
200
+ else:
201
+ raise TritonModelAnalyzerException(
202
+ "The current version of Model Config is not supported by Model Analyzer."
203
+ )
204
+
205
+ return ConfigSweep(ConfigUnion([config_type, ConfigNone()]))
206
+
207
+ def _get_model_config_fields(self):
208
+ """
209
+ Constructs a ConfigObject from the ModelConfig protobuf.
210
+ """
211
+
212
+ model_config_prototype = ModelConfig()
213
+ fields = model_config_prototype.DESCRIPTOR.fields
214
+
215
+ schema = {}
216
+ for field in fields:
217
+ schema[field.name] = self._resolve_protobuf_field(field)
218
+
219
+ return ConfigObject(schema)
220
+
221
+ def _fill_config(self):
222
+ """
223
+ Builder function makes calls to add config to
224
+ fill the config with options
225
+ """
226
+
227
+ self._add_config(
228
+ ConfigField(
229
+ "config_file",
230
+ field_type=ConfigPrimitive(str),
231
+ flags=["-f", "--config-file"],
232
+ description="Path to Config File for subcommand 'profile'.",
233
+ )
234
+ )
235
+ self._add_config(
236
+ ConfigField(
237
+ "checkpoint_directory",
238
+ flags=["-s", "--checkpoint-directory"],
239
+ default_value=DEFAULT_CHECKPOINT_DIRECTORY,
240
+ field_type=ConfigPrimitive(str, validator=parent_path_validator),
241
+ description="Full path to directory to which to read and write checkpoints and profile data.",
242
+ )
243
+ )
244
+ self._add_config(
245
+ ConfigField(
246
+ "monitoring_interval",
247
+ flags=["-i", "--monitoring-interval"],
248
+ field_type=ConfigPrimitive(float),
249
+ default_value=DEFAULT_MONITORING_INTERVAL,
250
+ description="Interval of time between metrics measurements in seconds",
251
+ )
252
+ )
253
+ self._add_config(
254
+ ConfigField(
255
+ "duration_seconds",
256
+ field_type=ConfigPrimitive(int),
257
+ flags=["-d", "--duration-seconds"],
258
+ default_value=DEFAULT_DURATION_SECONDS,
259
+ description="Specifies how long (seconds) to gather server-only metrics",
260
+ )
261
+ )
262
+ self._add_config(
263
+ ConfigField(
264
+ "collect_cpu_metrics",
265
+ field_type=ConfigPrimitive(bool),
266
+ flags=["--collect-cpu-metrics"],
267
+ parser_args={"action": "store_true"},
268
+ default_value=DEFAULT_COLLECT_CPU_METRICS,
269
+ description="Specify whether CPU metrics are collected or not",
270
+ )
271
+ )
272
+ self._add_config(
273
+ ConfigField(
274
+ "gpus",
275
+ flags=["--gpus"],
276
+ field_type=ConfigListString(),
277
+ default_value=DEFAULT_GPUS,
278
+ description="List of GPU UUIDs to be used for the profiling. "
279
+ "Use 'all' to profile all the GPUs visible by CUDA.",
280
+ )
281
+ )
282
+ self._add_config(
283
+ ConfigField(
284
+ "always_report_gpu_metrics",
285
+ flags=["--always-report-gpu-metrics"],
286
+ field_type=ConfigPrimitive(bool),
287
+ parser_args={"action": "store_true"},
288
+ default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
289
+ description="Report GPU metrics, even when the model is `cpu_only`.",
290
+ )
291
+ )
292
+ self._add_config(
293
+ ConfigField(
294
+ "dcgm_disable",
295
+ field_type=ConfigPrimitive(bool),
296
+ flags=["--dcgm-disable"],
297
+ parser_args={"action": "store_true"},
298
+ default_value=DEFAULT_DCGM_DISABLE,
299
+ description="Disables DCGM, which prevents obtaining information about GPUs",
300
+ )
301
+ )
302
+ self._add_config(
303
+ ConfigField(
304
+ "skip_summary_reports",
305
+ flags=["--skip-summary-reports"],
306
+ field_type=ConfigPrimitive(bool),
307
+ parser_args={"action": "store_true"},
308
+ default_value=DEFAULT_SKIP_SUMMARY_REPORTS,
309
+ description="Skips the generation of analysis summary reports and tables.",
310
+ )
311
+ )
312
+ self._add_config(
313
+ ConfigField(
314
+ "skip_detailed_reports",
315
+ flags=["--skip-detailed-reports"],
316
+ field_type=ConfigPrimitive(bool),
317
+ parser_args={"action": "store_true"},
318
+ default_value=DEFAULT_SKIP_DETAILED_REPORTS,
319
+ description="Skips the generation of detailed summary reports and tables.",
320
+ )
321
+ )
322
+ self._add_config(
323
+ ConfigField(
324
+ "model_type",
325
+ flags=["--model-type"],
326
+ field_type=ConfigPrimitive(str),
327
+ default_value=DEFAULT_MODEL_TYPE,
328
+ description="Type of model being profiled: generic or LLM",
329
+ )
330
+ )
331
+
332
+ self._add_repository_configs()
333
+ self._add_client_configs()
334
+ self._add_profile_models_configs()
335
+ self._add_perf_analyzer_configs()
336
+ self._add_triton_configs()
337
+ self._add_run_search_configs()
338
+ self._add_export_configs()
339
+ self._add_report_configs()
340
+ self._add_table_configs()
341
+ self._add_shorthand_configs()
342
+
343
+ def _add_repository_configs(self):
344
+ """
345
+ Adds configs specific to model repository
346
+ """
347
+ self._add_config(
348
+ ConfigField(
349
+ "model_repository",
350
+ flags=["-m", "--model-repository"],
351
+ field_type=ConfigPrimitive(str, validator=file_path_validator),
352
+ description="Triton Model repository location",
353
+ )
354
+ )
355
+ self._add_config(
356
+ ConfigField(
357
+ "output_model_repository_path",
358
+ field_type=ConfigPrimitive(str),
359
+ default_value=DEFAULT_OUTPUT_MODEL_REPOSITORY,
360
+ flags=["--output-model-repository-path"],
361
+ description="Output model repository path used by Model Analyzer."
362
+ " This is the directory that will contain all the generated model configurations",
363
+ )
364
+ )
365
+ self._add_config(
366
+ ConfigField(
367
+ "override_output_model_repository",
368
+ field_type=ConfigPrimitive(bool),
369
+ parser_args={"action": "store_true"},
370
+ default_value=DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG,
371
+ flags=["--override-output-model-repository"],
372
+ description="Will override the contents of the output model repository"
373
+ " and replace it with the new results.",
374
+ )
375
+ )
376
+
377
+ def _add_profile_models_configs(self):
378
+ """
379
+ Adds configs specific to model specifications
380
+ """
381
+ triton_server_flags_scheme = ConfigObject(
382
+ schema={k: ConfigPrimitive(str) for k in TritonServerConfig.allowed_keys()}
383
+ )
384
+ perf_analyzer_additive_keys = {
385
+ k: None for k in PerfAnalyzerConfig.additive_keys()
386
+ }
387
+ perf_analyzer_flags_scheme = ConfigObject(
388
+ schema={
389
+ k: (
390
+ (ConfigUnion([ConfigPrimitive(type_=str), ConfigListString()]))
391
+ if (k in perf_analyzer_additive_keys)
392
+ else ConfigPrimitive(type_=str)
393
+ )
394
+ for k in PerfAnalyzerConfig.allowed_keys()
395
+ }
396
+ )
397
+
398
+ genai_perf_flags_scheme = ConfigObject(
399
+ schema={k: ConfigPrimitive(str) for k in GenaiPerfConfig.allowed_keys()}
400
+ )
401
+
402
+ triton_server_environment_scheme = ConfigObject(
403
+ schema={"*": ConfigPrimitive(str)}
404
+ )
405
+
406
+ # This comes from the installed python package:
407
+ # <install_path>/lib/python3.8/dist-packages/docker/models/containers.py
408
+ # Only supporting values that are bool, int, string, or lists of strings
409
+ triton_docker_args_scheme = ConfigObject(
410
+ schema={
411
+ "image": ConfigPrimitive(str),
412
+ "command": ConfigPrimitive(str),
413
+ "auto_remove": ConfigPrimitive(bool),
414
+ "blkio_weight_device": ConfigListString(),
415
+ "blkio_weight": ConfigPrimitive(int),
416
+ "cap_add": ConfigListString(),
417
+ "cap_drop": ConfigListString(),
418
+ "cgroup_parent": ConfigPrimitive(str),
419
+ "cgroupns": ConfigPrimitive(str),
420
+ "cpu_count": ConfigPrimitive(int),
421
+ "cpu_percent": ConfigPrimitive(int),
422
+ "cpu_period": ConfigPrimitive(int),
423
+ "cpu_quota": ConfigPrimitive(int),
424
+ "cpu_rt_period": ConfigPrimitive(int),
425
+ "cpu_shares": ConfigPrimitive(int),
426
+ "cpuset_cpus": ConfigPrimitive(str),
427
+ "cpuset_mems": ConfigPrimitive(str),
428
+ "detach": ConfigPrimitive(bool),
429
+ "domainname": ConfigPrimitive(str),
430
+ "entrypoint": ConfigPrimitive(str),
431
+ "environment": ConfigListString(),
432
+ "hostname": ConfigPrimitive(str),
433
+ "init": ConfigPrimitive(bool),
434
+ "init_path": ConfigPrimitive(str),
435
+ "ipc_mode": ConfigPrimitive(str),
436
+ "isolation": ConfigPrimitive(str),
437
+ "kernel_memory": ConfigPrimitive(str),
438
+ "labels": ConfigListString(),
439
+ "mac_address": ConfigPrimitive(str),
440
+ "mem_limit": ConfigPrimitive(str),
441
+ "mem_reservation": ConfigPrimitive(str),
442
+ "memswap_limit": ConfigPrimitive(str),
443
+ "name": ConfigPrimitive(str),
444
+ "nano_cpus": ConfigPrimitive(int),
445
+ "network": ConfigPrimitive(str),
446
+ "network_disabled": ConfigPrimitive(bool),
447
+ "network_mode": ConfigPrimitive(str),
448
+ "oom_kill_disable": ConfigPrimitive(bool),
449
+ "oom_score_adj": ConfigPrimitive(int),
450
+ "pid_mode": ConfigPrimitive(str),
451
+ "pids_limit": ConfigPrimitive(int),
452
+ "platform": ConfigPrimitive(str),
453
+ "privileged": ConfigPrimitive(bool),
454
+ "publish_all_ports": ConfigPrimitive(bool),
455
+ "remove": ConfigPrimitive(bool),
456
+ "runtime": ConfigPrimitive(str),
457
+ "shm_size": ConfigPrimitive(str),
458
+ "stdin_open": ConfigPrimitive(bool),
459
+ "stdout": ConfigPrimitive(bool),
460
+ "stderr": ConfigPrimitive(bool),
461
+ "stop_signal": ConfigPrimitive(str),
462
+ "stream": ConfigPrimitive(bool),
463
+ "tty": ConfigPrimitive(bool),
464
+ "use_config_proxy": ConfigPrimitive(bool),
465
+ "user": ConfigPrimitive(str),
466
+ "userns_mode": ConfigPrimitive(str),
467
+ "uts_mode": ConfigPrimitive(str),
468
+ "version": ConfigPrimitive(str),
469
+ "volume_driver": ConfigPrimitive(str),
470
+ "volumes": ConfigListString(),
471
+ "working_dir": ConfigPrimitive(str),
472
+ }
473
+ )
474
+
475
+ self._add_config(
476
+ ConfigField(
477
+ "perf_analyzer_flags",
478
+ field_type=perf_analyzer_flags_scheme,
479
+ description="Allows custom configuration of the perf analyzer instances used by model analyzer.",
480
+ )
481
+ )
482
+ self._add_config(
483
+ ConfigField(
484
+ "genai_perf_flags",
485
+ field_type=genai_perf_flags_scheme,
486
+ description="Allows custom configuration of the GenAI Perf instances used by model analyzer.",
487
+ )
488
+ )
489
+ self._add_config(
490
+ ConfigField(
491
+ "triton_server_flags",
492
+ field_type=triton_server_flags_scheme,
493
+ description="Allows custom configuration of the triton instances used by model analyzer.",
494
+ )
495
+ )
496
+ self._add_config(
497
+ ConfigField(
498
+ "triton_server_environment",
499
+ field_type=triton_server_environment_scheme,
500
+ description="Allows setting environment variables for tritonserver server instances launched by Model Analyzer",
501
+ )
502
+ )
503
+ self._add_config(
504
+ ConfigField(
505
+ "triton_docker_args",
506
+ field_type=triton_docker_args_scheme,
507
+ description="Allows setting docker variables for tritonserver server instances launched by Model Analyzer",
508
+ )
509
+ )
510
+
511
+ objectives_scheme = ConfigUnion(
512
+ [
513
+ ConfigObject(
514
+ schema={
515
+ tag: ConfigPrimitive(type_=int)
516
+ for tag in RecordType.get_all_record_types().keys()
517
+ }
518
+ ),
519
+ ConfigListString(output_mapper=objective_list_output_mapper),
520
+ ]
521
+ )
522
+ constraints_scheme = ConfigObject(
523
+ schema={
524
+ "perf_throughput": ConfigObject(
525
+ schema={
526
+ "min": ConfigPrimitive(int),
527
+ }
528
+ ),
529
+ "output_token_throughput": ConfigObject(
530
+ schema={
531
+ "min": ConfigPrimitive(int),
532
+ }
533
+ ),
534
+ "perf_latency_avg": ConfigObject(
535
+ schema={
536
+ "max": ConfigPrimitive(int),
537
+ }
538
+ ),
539
+ "perf_latency_p90": ConfigObject(
540
+ schema={
541
+ "max": ConfigPrimitive(int),
542
+ }
543
+ ),
544
+ "perf_latency_p95": ConfigObject(
545
+ schema={
546
+ "max": ConfigPrimitive(int),
547
+ }
548
+ ),
549
+ "perf_latency_p99": ConfigObject(
550
+ schema={
551
+ "max": ConfigPrimitive(int),
552
+ }
553
+ ),
554
+ "perf_latency": ConfigObject(
555
+ schema={
556
+ "max": ConfigPrimitive(int),
557
+ }
558
+ ),
559
+ "gpu_used_memory": ConfigObject(
560
+ schema={
561
+ "max": ConfigPrimitive(int),
562
+ }
563
+ ),
564
+ "inter_token_latency_p99": ConfigObject(
565
+ schema={
566
+ "max": ConfigPrimitive(int),
567
+ }
568
+ ),
569
+ "inter_token_latency_p95": ConfigObject(
570
+ schema={
571
+ "max": ConfigPrimitive(int),
572
+ }
573
+ ),
574
+ "inter_token_latency_p90": ConfigObject(
575
+ schema={
576
+ "max": ConfigPrimitive(int),
577
+ }
578
+ ),
579
+ "inter_token_latency_p75": ConfigObject(
580
+ schema={
581
+ "max": ConfigPrimitive(int),
582
+ }
583
+ ),
584
+ "inter_token_latency_p50": ConfigObject(
585
+ schema={
586
+ "max": ConfigPrimitive(int),
587
+ }
588
+ ),
589
+ "inter_token_latency_p25": ConfigObject(
590
+ schema={
591
+ "max": ConfigPrimitive(int),
592
+ }
593
+ ),
594
+ "inter_token_latency_min": ConfigObject(
595
+ schema={
596
+ "max": ConfigPrimitive(int),
597
+ }
598
+ ),
599
+ "inter_token_latency_max": ConfigObject(
600
+ schema={
601
+ "max": ConfigPrimitive(int),
602
+ }
603
+ ),
604
+ "inter_token_latency_avg": ConfigObject(
605
+ schema={
606
+ "max": ConfigPrimitive(int),
607
+ }
608
+ ),
609
+ "time_to_first_token_p99": ConfigObject(
610
+ schema={
611
+ "max": ConfigPrimitive(int),
612
+ }
613
+ ),
614
+ "time_to_first_token_p95": ConfigObject(
615
+ schema={
616
+ "max": ConfigPrimitive(int),
617
+ }
618
+ ),
619
+ "time_to_first_token_p90": ConfigObject(
620
+ schema={
621
+ "max": ConfigPrimitive(int),
622
+ }
623
+ ),
624
+ "time_to_first_token_p75": ConfigObject(
625
+ schema={
626
+ "max": ConfigPrimitive(int),
627
+ }
628
+ ),
629
+ "time_to_first_token_p50": ConfigObject(
630
+ schema={
631
+ "max": ConfigPrimitive(int),
632
+ }
633
+ ),
634
+ "time_to_first_token_p25": ConfigObject(
635
+ schema={
636
+ "max": ConfigPrimitive(int),
637
+ }
638
+ ),
639
+ "time_to_first_token_min": ConfigObject(
640
+ schema={
641
+ "max": ConfigPrimitive(int),
642
+ }
643
+ ),
644
+ "time_to_first_token_max": ConfigObject(
645
+ schema={
646
+ "max": ConfigPrimitive(int),
647
+ }
648
+ ),
649
+ "time_to_first_token_avg": ConfigObject(
650
+ schema={
651
+ "max": ConfigPrimitive(int),
652
+ }
653
+ ),
654
+ }
655
+ )
656
+ self._add_config(
657
+ ConfigField(
658
+ "objectives",
659
+ field_type=objectives_scheme,
660
+ default_value=DEFAULT_OFFLINE_OBJECTIVES,
661
+ description="Model Analyzer uses the objectives described here to find the best configuration for each model.",
662
+ )
663
+ )
664
+ self._add_config(
665
+ ConfigField(
666
+ "constraints",
667
+ field_type=constraints_scheme,
668
+ description='Constraints on the objectives specified in the "objectives" field of the config.',
669
+ )
670
+ )
671
+ self._add_config(
672
+ ConfigField(
673
+ "weighting",
674
+ field_type=ConfigPrimitive(int),
675
+ description="A weighting used to bias the model when determining the best configuration",
676
+ )
677
+ )
678
+
679
+ model_config_fields = self._get_model_config_fields()
680
+ profile_model_scheme = ConfigObject(
681
+ required=True,
682
+ schema={
683
+ # Any key is allowed, but the keys must follow the pattern
684
+ # below
685
+ "*": ConfigObject(
686
+ schema={
687
+ "cpu_only": ConfigPrimitive(bool),
688
+ "parameters": ConfigObject(
689
+ schema={
690
+ "batch_sizes": ConfigListNumeric(type_=int),
691
+ "concurrency": ConfigListNumeric(type_=int),
692
+ "request_rate": ConfigListNumeric(type_=int),
693
+ }
694
+ ),
695
+ "objectives": objectives_scheme,
696
+ "constraints": constraints_scheme,
697
+ "weighting": ConfigPrimitive(type_=int),
698
+ "model_config_parameters": model_config_fields,
699
+ "perf_analyzer_flags": perf_analyzer_flags_scheme,
700
+ "genai_perf_flags": genai_perf_flags_scheme,
701
+ "triton_server_flags": triton_server_flags_scheme,
702
+ "triton_server_environment": triton_server_environment_scheme,
703
+ "triton_docker_args": triton_docker_args_scheme,
704
+ }
705
+ )
706
+ },
707
+ output_mapper=ConfigModelProfileSpec.model_object_to_config_model_profile_spec,
708
+ )
709
+ self._add_config(
710
+ ConfigField(
711
+ "profile_models",
712
+ flags=["--profile-models"],
713
+ field_type=ConfigUnion(
714
+ [
715
+ profile_model_scheme,
716
+ ConfigListGeneric(
717
+ ConfigUnion(
718
+ [
719
+ profile_model_scheme,
720
+ ConfigPrimitive(
721
+ str,
722
+ output_mapper=ConfigModelProfileSpec.model_str_to_config_model_profile_spec,
723
+ ),
724
+ ]
725
+ ),
726
+ required=True,
727
+ output_mapper=ConfigModelProfileSpec.model_mixed_to_config_model_profile_spec,
728
+ ),
729
+ ConfigListString(
730
+ output_mapper=ConfigModelProfileSpec.model_list_to_config_model_profile_spec
731
+ ),
732
+ ],
733
+ required=True,
734
+ ),
735
+ description="List of the models to be profiled",
736
+ )
737
+ )
738
+ self._add_config(
739
+ ConfigField(
740
+ "batch_sizes",
741
+ flags=["-b", "--batch-sizes"],
742
+ field_type=ConfigListNumeric(int),
743
+ default_value=DEFAULT_BATCH_SIZES,
744
+ description="Comma-delimited list of batch sizes to use for the profiling",
745
+ )
746
+ )
747
+ self._add_config(
748
+ ConfigField(
749
+ "concurrency",
750
+ flags=["-c", "--concurrency"],
751
+ field_type=ConfigListNumeric(int),
752
+ description="Comma-delimited list of concurrency values or ranges <start:end:step>"
753
+ " to be used during profiling",
754
+ )
755
+ )
756
+ self._add_config(
757
+ ConfigField(
758
+ "request_rate",
759
+ flags=["--request-rate"],
760
+ field_type=ConfigListNumeric(int),
761
+ description="Comma-delimited list of request rate values or ranges <start:end:step>"
762
+ " to be used during profiling",
763
+ )
764
+ )
765
+ self._add_config(
766
+ ConfigField(
767
+ "reload_model_disable",
768
+ field_type=ConfigPrimitive(bool),
769
+ parser_args={"action": "store_true"},
770
+ default_value=False,
771
+ flags=["--reload-model-disable"],
772
+ description="Flag to indicate whether or not to disable model "
773
+ "loading and unloading in remote mode.",
774
+ )
775
+ )
776
+ self._add_config(
777
+ ConfigField(
778
+ "bls_composing_models",
779
+ flags=["--bls-composing-models"],
780
+ field_type=ConfigUnion(
781
+ [
782
+ profile_model_scheme,
783
+ ConfigListGeneric(
784
+ ConfigUnion(
785
+ [
786
+ profile_model_scheme,
787
+ ConfigPrimitive(
788
+ str,
789
+ output_mapper=ConfigModelProfileSpec.model_str_to_config_model_profile_spec,
790
+ ),
791
+ ]
792
+ ),
793
+ required=True,
794
+ output_mapper=ConfigModelProfileSpec.model_mixed_to_config_model_profile_spec,
795
+ ),
796
+ ConfigListString(
797
+ output_mapper=ConfigModelProfileSpec.model_list_to_config_model_profile_spec
798
+ ),
799
+ ],
800
+ required=True,
801
+ ),
802
+ default_value=[],
803
+ description="List of the models to be profiled",
804
+ )
805
+ )
806
+ self._add_config(
807
+ ConfigField(
808
+ "cpu_only_composing_models",
809
+ field_type=ConfigListString(),
810
+ flags=["--cpu-only-composing-models"],
811
+ description=(
812
+ "A list of strings representing composing models that should be profiled using CPU instances only. "
813
+ ),
814
+ )
815
+ )
816
+
817
+ def _add_client_configs(self):
818
+ """
819
+ Adds configs specific to tritonclient
820
+ """
821
+ self._add_config(
822
+ ConfigField(
823
+ "client_max_retries",
824
+ flags=["-r", "--client-max-retries"],
825
+ field_type=ConfigPrimitive(int),
826
+ default_value=DEFAULT_MAX_RETRIES,
827
+ description="Specifies the max number of retries for any requests to Triton server.",
828
+ )
829
+ )
830
+ self._add_config(
831
+ ConfigField(
832
+ "client_protocol",
833
+ flags=["--client-protocol"],
834
+ choices=["http", "grpc"],
835
+ field_type=ConfigPrimitive(str),
836
+ default_value=DEFAULT_CLIENT_PROTOCOL,
837
+ description="The protocol used to communicate with the Triton Inference Server",
838
+ )
839
+ )
840
+
841
+ def _add_run_search_configs(self):
842
+ """
843
+ Add the config options related
844
+ to the run search
845
+ """
846
+
847
+ self._add_config(
848
+ ConfigField(
849
+ "early_exit_enable",
850
+ field_type=ConfigPrimitive(bool),
851
+ parser_args={"action": "store_true"},
852
+ default_value=False,
853
+ flags=["--early-exit-enable"],
854
+ description="Flag to indicate if Model Analyzer can skip some configurations when manually searching concurrency/request rate, or max_batch_size",
855
+ )
856
+ )
857
+ self._add_config(
858
+ ConfigField(
859
+ "run_config_search_max_concurrency",
860
+ flags=["--run-config-search-max-concurrency"],
861
+ field_type=ConfigPrimitive(int),
862
+ default_value=DEFAULT_RUN_CONFIG_MAX_CONCURRENCY,
863
+ description="Max concurrency value that run config search should not go beyond that.",
864
+ )
865
+ )
866
+ self._add_config(
867
+ ConfigField(
868
+ "run_config_search_min_concurrency",
869
+ flags=["--run-config-search-min-concurrency"],
870
+ field_type=ConfigPrimitive(int),
871
+ default_value=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
872
+ description="Min concurrency value that run config search should start with.",
873
+ )
874
+ )
875
+ self._add_config(
876
+ ConfigField(
877
+ "run_config_search_max_request_rate",
878
+ flags=["--run-config-search-max-request-rate"],
879
+ field_type=ConfigPrimitive(int),
880
+ default_value=DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE,
881
+ description="Max request rate value that run config search should not go beyond that.",
882
+ )
883
+ )
884
+ self._add_config(
885
+ ConfigField(
886
+ "run_config_search_min_request_rate",
887
+ flags=["--run-config-search-min-request-rate"],
888
+ field_type=ConfigPrimitive(int),
889
+ default_value=DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
890
+ description="Min request rate value that run config search should start with.",
891
+ )
892
+ )
893
+ self._add_config(
894
+ ConfigField(
895
+ "run_config_search_max_instance_count",
896
+ flags=["--run-config-search-max-instance-count"],
897
+ field_type=ConfigPrimitive(int),
898
+ default_value=DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT,
899
+ description="Max instance count value that run config search should not go beyond that.",
900
+ )
901
+ )
902
+ self._add_config(
903
+ ConfigField(
904
+ "run_config_search_min_instance_count",
905
+ flags=["--run-config-search-min-instance-count"],
906
+ field_type=ConfigPrimitive(int),
907
+ default_value=DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT,
908
+ description="Min instance count value that run config search should start with.",
909
+ )
910
+ )
911
+ self._add_config(
912
+ ConfigField(
913
+ "run_config_search_max_model_batch_size",
914
+ flags=["--run-config-search-max-model-batch-size"],
915
+ field_type=ConfigPrimitive(int),
916
+ default_value=DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE,
917
+ description="Value for the model's max_batch_size that run config search will not go beyond.",
918
+ )
919
+ )
920
+ self._add_config(
921
+ ConfigField(
922
+ "run_config_search_min_model_batch_size",
923
+ flags=["--run-config-search-min-model-batch-size"],
924
+ field_type=ConfigPrimitive(int),
925
+ default_value=DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE,
926
+ description="Value for the model's max_batch_size that run config search will start from.",
927
+ )
928
+ )
929
+ self._add_config(
930
+ ConfigField(
931
+ "run_config_search_max_binary_search_steps",
932
+ flags=["--run-config-search-max-binary-search-steps"],
933
+ field_type=ConfigPrimitive(int),
934
+ default_value=DEFAULT_RUN_CONFIG_MAX_BINARY_SEARCH_STEPS,
935
+ description="Maximum number of steps take during the binary concurrency search.",
936
+ )
937
+ )
938
+ self._add_config(
939
+ ConfigField(
940
+ "min_percentage_of_search_space",
941
+ flags=["--min-percentage-of-search-space"],
942
+ field_type=ConfigPrimitive(int),
943
+ default_value=DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
944
+ description="Minimum percentage of the search space to profile when using Optuna",
945
+ )
946
+ )
947
+ self._add_config(
948
+ ConfigField(
949
+ "max_percentage_of_search_space",
950
+ flags=["--max-percentage-of-search-space"],
951
+ field_type=ConfigPrimitive(int),
952
+ default_value=DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
953
+ description="Maximum percentage of the search space to profile when using Optuna",
954
+ )
955
+ )
956
+ self._add_config(
957
+ ConfigField(
958
+ "optuna_min_trials",
959
+ flags=["--optuna-min-trials"],
960
+ field_type=ConfigPrimitive(int),
961
+ default_value=DEFAULT_OPTUNA_MIN_TRIALS,
962
+ description="Minimum number of trials to profile when using Optuna",
963
+ )
964
+ )
965
+ self._add_config(
966
+ ConfigField(
967
+ "optuna_max_trials",
968
+ flags=["--optuna-max-trials"],
969
+ field_type=ConfigPrimitive(int),
970
+ default_value=DEFAULT_OPTUNA_MAX_TRIALS,
971
+ description="Maximum number of trials to profile when using Optuna",
972
+ )
973
+ )
974
+ self._add_config(
975
+ ConfigField(
976
+ "optuna_early_exit_threshold",
977
+ flags=["--optuna-early-exit-threshold"],
978
+ field_type=ConfigPrimitive(int),
979
+ default_value=DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
980
+ description="Number of trials without improvement before triggering early exit when using Optuna",
981
+ )
982
+ )
983
+ self._add_config(
984
+ ConfigField(
985
+ "use_concurrency_formula",
986
+ flags=["--use-concurrency-formula"],
987
+ field_type=ConfigPrimitive(bool),
988
+ parser_args={"action": "store_true"},
989
+ default_value=DEFAULT_USE_CONCURRENCY_FORMULA,
990
+ description="Use the concurrency formula instead of searching the concurrency space in Optuna search mode",
991
+ )
992
+ )
993
+ self._add_config(
994
+ ConfigField(
995
+ "run_config_search_mode",
996
+ flags=["--run-config-search-mode"],
997
+ choices=["brute", "quick", "optuna"],
998
+ field_type=ConfigPrimitive(str),
999
+ default_value=DEFAULT_RUN_CONFIG_SEARCH_MODE,
1000
+ description="The search mode for Model Analyzer to find and evaluate"
1001
+ " model configurations. 'brute' will brute force all combinations of"
1002
+ " configuration options. 'quick' will attempt to find a near-optimal"
1003
+ " configuration as fast as possible, but isn't guaranteed to find the"
1004
+ " best. 'optuna' is a more generalized search algorithm allowing "
1005
+ " the user to quickly search over any set of parameters.",
1006
+ )
1007
+ )
1008
+ self._add_config(
1009
+ ConfigField(
1010
+ "run_config_search_disable",
1011
+ flags=["--run-config-search-disable"],
1012
+ field_type=ConfigPrimitive(bool),
1013
+ parser_args={"action": "store_true"},
1014
+ default_value=DEFAULT_RUN_CONFIG_SEARCH_DISABLE,
1015
+ description="Disable run config search.",
1016
+ )
1017
+ )
1018
+ self._add_config(
1019
+ ConfigField(
1020
+ "run_config_profile_models_concurrently_enable",
1021
+ flags=["--run-config-profile-models-concurrently-enable"],
1022
+ field_type=ConfigPrimitive(bool),
1023
+ parser_args={"action": "store_true"},
1024
+ default_value=DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE,
1025
+ description="Enable the profiling of all supplied models concurrently.",
1026
+ )
1027
+ )
1028
+ self._add_config(
1029
+ ConfigField(
1030
+ "request_rate_search_enable",
1031
+ flags=["--request-rate-search-enable"],
1032
+ field_type=ConfigPrimitive(bool),
1033
+ parser_args={"action": "store_true"},
1034
+ default_value=DEFAULT_REQUEST_RATE_SEARCH_ENABLE,
1035
+ description="Enables the searching of request rate (instead of concurrency).",
1036
+ )
1037
+ )
1038
+ self._add_config(
1039
+ ConfigField(
1040
+ "concurrency_sweep_disable",
1041
+ flags=["--concurrency-sweep-disable"],
1042
+ field_type=ConfigPrimitive(bool),
1043
+ parser_args={"action": "store_true"},
1044
+ default_value=DEFAULT_CONCURRENCY_SWEEP_DISABLE,
1045
+ description="Disables the sweeping of concurrencies for the top-N models after quick/optuna search completion.",
1046
+ )
1047
+ )
1048
+
1049
+ def _add_triton_configs(self):
1050
+ """
1051
+ Adds the triton related flags
1052
+ and config options
1053
+ """
1054
+
1055
+ self._add_config(
1056
+ ConfigField(
1057
+ "triton_launch_mode",
1058
+ field_type=ConfigPrimitive(str),
1059
+ flags=["--triton-launch-mode"],
1060
+ default_value=DEFAULT_TRITON_LAUNCH_MODE,
1061
+ choices=["local", "docker", "remote", "c_api"],
1062
+ description="The method by which to launch Triton Server. "
1063
+ "'local' assumes tritonserver binary is available locally. "
1064
+ "'docker' pulls and launches a triton docker container with "
1065
+ "the specified version. 'remote' connects to a running "
1066
+ "server using given http, grpc and metrics endpoints. "
1067
+ "'c_api' allows direct benchmarking of Triton locally"
1068
+ "without the use of endpoints.",
1069
+ )
1070
+ )
1071
+ self._add_config(
1072
+ ConfigField(
1073
+ "triton_docker_image",
1074
+ flags=["--triton-docker-image"],
1075
+ field_type=ConfigPrimitive(str),
1076
+ default_value=DEFAULT_TRITON_DOCKER_IMAGE,
1077
+ description="Triton Server Docker image tag",
1078
+ )
1079
+ )
1080
+ self._add_config(
1081
+ ConfigField(
1082
+ "triton_http_endpoint",
1083
+ flags=["--triton-http-endpoint"],
1084
+ field_type=ConfigPrimitive(str),
1085
+ default_value=DEFAULT_TRITON_HTTP_ENDPOINT,
1086
+ description="Triton Server HTTP endpoint url used by Model Analyzer client.",
1087
+ )
1088
+ )
1089
+ self._add_config(
1090
+ ConfigField(
1091
+ "triton_grpc_endpoint",
1092
+ flags=["--triton-grpc-endpoint"],
1093
+ field_type=ConfigPrimitive(str),
1094
+ default_value=DEFAULT_TRITON_GRPC_ENDPOINT,
1095
+ description="Triton Server HTTP endpoint url used by Model Analyzer client.",
1096
+ )
1097
+ )
1098
+ self._add_config(
1099
+ ConfigField(
1100
+ "triton_metrics_url",
1101
+ field_type=ConfigPrimitive(str),
1102
+ flags=["--triton-metrics-url"],
1103
+ default_value=DEFAULT_TRITON_METRICS_URL,
1104
+ description="Triton Server Metrics endpoint url. ",
1105
+ )
1106
+ )
1107
+ self._add_config(
1108
+ ConfigField(
1109
+ "triton_server_path",
1110
+ field_type=ConfigPrimitive(str),
1111
+ flags=["--triton-server-path"],
1112
+ default_value=DEFAULT_TRITON_SERVER_PATH,
1113
+ description="The full path to the tritonserver binary executable",
1114
+ )
1115
+ )
1116
+ self._add_config(
1117
+ ConfigField(
1118
+ "triton_output_path",
1119
+ field_type=ConfigPrimitive(str),
1120
+ flags=["--triton-output-path"],
1121
+ description=(
1122
+ "The full path to the file to which Triton server instance will "
1123
+ "append their log output. If not specified, they are not written."
1124
+ ),
1125
+ )
1126
+ )
1127
+ self._add_config(
1128
+ ConfigField(
1129
+ "triton_docker_mounts",
1130
+ field_type=ConfigListString(),
1131
+ flags=["--triton-docker-mounts"],
1132
+ description=(
1133
+ "A list of strings representing volumes to be mounted. "
1134
+ "The strings should have the format '<host path>:<container path>:<access mode>'."
1135
+ ),
1136
+ )
1137
+ )
1138
+ self._add_config(
1139
+ ConfigField(
1140
+ "triton_docker_labels",
1141
+ field_type=ConfigObject(schema={"*": ConfigPrimitive(str)}),
1142
+ description="A dictionary of name-value labels to set metadata for the Triton "
1143
+ "server docker container in docker launch mode",
1144
+ )
1145
+ )
1146
+ self._add_config(
1147
+ ConfigField(
1148
+ "triton_docker_shm_size",
1149
+ field_type=ConfigPrimitive(str),
1150
+ flags=["--triton-docker-shm-size"],
1151
+ description=(
1152
+ "The size of the /dev/shm for the triton docker container"
1153
+ ),
1154
+ )
1155
+ )
1156
+ self._add_config(
1157
+ ConfigField(
1158
+ "triton_install_path",
1159
+ field_type=ConfigPrimitive(str),
1160
+ default_value=DEFAULT_TRITON_INSTALL_PATH,
1161
+ flags=["--triton-install-path"],
1162
+ description=(
1163
+ "Path to Triton install directory i.e. the parent directory of 'lib/libtritonserver.so'."
1164
+ "Required only when using triton_launch_mode=c_api."
1165
+ ),
1166
+ )
1167
+ )
1168
+
1169
+ def _add_perf_analyzer_configs(self):
1170
+ """
1171
+ Add the perf_analyzer related config
1172
+ options
1173
+ """
1174
+
1175
+ self._add_config(
1176
+ ConfigField(
1177
+ "perf_analyzer_timeout",
1178
+ flags=["--perf-analyzer-timeout"],
1179
+ field_type=ConfigPrimitive(int),
1180
+ default_value=DEFAULT_PERF_ANALYZER_TIMEOUT,
1181
+ description="Perf analyzer timeout value in seconds.",
1182
+ )
1183
+ )
1184
+ self._add_config(
1185
+ ConfigField(
1186
+ "perf_analyzer_cpu_util",
1187
+ flags=["--perf-analyzer-cpu-util"],
1188
+ field_type=ConfigPrimitive(float),
1189
+ default_value=psutil.cpu_count() * DEFAULT_PERF_ANALYZER_CPU_UTIL,
1190
+ description="Maximum CPU utilization value allowed for the perf_analyzer.",
1191
+ )
1192
+ )
1193
+ self._add_config(
1194
+ ConfigField(
1195
+ "perf_analyzer_path",
1196
+ flags=["--perf-analyzer-path"],
1197
+ field_type=ConfigPrimitive(str, validator=binary_path_validator),
1198
+ default_value=DEFAULT_PERF_ANALYZER_PATH,
1199
+ description="The full path to the perf_analyzer binary executable",
1200
+ )
1201
+ )
1202
+ self._add_config(
1203
+ ConfigField(
1204
+ "perf_output",
1205
+ flags=["--perf-output"],
1206
+ parser_args={"action": "store_true"},
1207
+ field_type=ConfigPrimitive(bool),
1208
+ default_value=DEFAULT_PERF_OUTPUT_FLAG,
1209
+ description="Enables the output from the perf_analyzer to a file specified by"
1210
+ " perf_output_path. If perf_output_path is None, output will be"
1211
+ " written to stdout.",
1212
+ )
1213
+ )
1214
+ self._add_config(
1215
+ ConfigField(
1216
+ "perf_output_path",
1217
+ flags=["--perf-output-path"],
1218
+ field_type=ConfigPrimitive(str),
1219
+ description="Path to the file to which write perf_analyzer output, if enabled.",
1220
+ )
1221
+ )
1222
+ self._add_config(
1223
+ ConfigField(
1224
+ "perf_analyzer_max_auto_adjusts",
1225
+ flags=["--perf-analyzer-max-auto-adjusts"],
1226
+ field_type=ConfigPrimitive(int),
1227
+ default_value=DEFAULT_PERF_MAX_AUTO_ADJUSTS,
1228
+ description="Maximum number of times perf_analyzer is "
1229
+ "launched with auto adjusted parameters in an attempt to profile a model. ",
1230
+ )
1231
+ )
1232
+
1233
+ def _add_export_configs(self):
1234
+ """
1235
+ Add configs related to exporting data
1236
+ """
1237
+ self._add_config(
1238
+ ConfigField(
1239
+ "export_path",
1240
+ flags=["-e", "--export-path"],
1241
+ default_value=DEFAULT_EXPORT_PATH,
1242
+ field_type=ConfigPrimitive(str, validator=parent_path_validator),
1243
+ description="Full path to directory in which to store the results",
1244
+ )
1245
+ )
1246
+ self._add_config(
1247
+ ConfigField(
1248
+ "filename_model_inference",
1249
+ flags=["--filename-model-inference"],
1250
+ default_value=DEFAULT_FILENAME_MODEL_INFERENCE,
1251
+ field_type=ConfigPrimitive(str),
1252
+ description="Specifies filename for storing model inference metrics",
1253
+ )
1254
+ )
1255
+ self._add_config(
1256
+ ConfigField(
1257
+ "filename_model_gpu",
1258
+ flags=["--filename-model-gpu"],
1259
+ field_type=ConfigPrimitive(str),
1260
+ default_value=DEFAULT_FILENAME_MODEL_GPU,
1261
+ description="Specifies filename for storing model GPU metrics",
1262
+ )
1263
+ )
1264
+ self._add_config(
1265
+ ConfigField(
1266
+ "filename_server_only",
1267
+ flags=["--filename-server-only"],
1268
+ field_type=ConfigPrimitive(str),
1269
+ default_value=DEFAULT_FILENAME_SERVER_ONLY,
1270
+ description="Specifies filename for server-only metrics",
1271
+ )
1272
+ )
1273
+
1274
+ def _add_report_configs(self):
1275
+ """
1276
+ Adds report related configs
1277
+ """
1278
+ self._add_config(
1279
+ ConfigField(
1280
+ "num_configs_per_model",
1281
+ flags=["--num-configs-per-model"],
1282
+ field_type=ConfigPrimitive(int),
1283
+ default_value=DEFAULT_NUM_CONFIGS_PER_MODEL,
1284
+ description="The number of configurations to plot per model in the summary.",
1285
+ )
1286
+ )
1287
+ self._add_config(
1288
+ ConfigField(
1289
+ "num_top_model_configs",
1290
+ flags=["--num-top-model-configs"],
1291
+ field_type=ConfigPrimitive(int),
1292
+ default_value=DEFAULT_NUM_TOP_MODEL_CONFIGS,
1293
+ description="Model Analyzer will compare this many of the top models configs across all models.",
1294
+ )
1295
+ )
1296
+
1297
+ def _add_table_configs(self):
1298
+ """
1299
+ Adds result table related
1300
+ configs
1301
+ """
1302
+ self._add_config(
1303
+ ConfigField(
1304
+ "inference_output_fields",
1305
+ flags=["--inference-output-fields"],
1306
+ field_type=ConfigListString(),
1307
+ default_value=DEFAULT_INFERENCE_OUTPUT_FIELDS,
1308
+ description="Specifies column keys for model inference metrics table",
1309
+ )
1310
+ )
1311
+ self._add_config(
1312
+ ConfigField(
1313
+ "gpu_output_fields",
1314
+ flags=["--gpu-output-fields"],
1315
+ field_type=ConfigListString(),
1316
+ default_value=DEFAULT_GPU_OUTPUT_FIELDS,
1317
+ description="Specifies column keys for model gpu metrics table",
1318
+ )
1319
+ )
1320
+ self._add_config(
1321
+ ConfigField(
1322
+ "server_output_fields",
1323
+ flags=["--server-output-fields"],
1324
+ field_type=ConfigListString(),
1325
+ default_value=DEFAULT_SERVER_OUTPUT_FIELDS,
1326
+ description="Specifies column keys for server-only metrics table",
1327
+ )
1328
+ )
1329
+
1330
+ def _add_shorthand_configs(self):
1331
+ """
1332
+ Adds configs for various shorthands
1333
+ """
1334
+ self._add_config(
1335
+ ConfigField(
1336
+ "latency_budget",
1337
+ flags=["--latency-budget"],
1338
+ field_type=ConfigPrimitive(int),
1339
+ description="Shorthand flag for specifying a maximum latency in ms.",
1340
+ )
1341
+ )
1342
+
1343
+ self._add_config(
1344
+ ConfigField(
1345
+ "min_throughput",
1346
+ flags=["--min-throughput"],
1347
+ field_type=ConfigPrimitive(int),
1348
+ description="Shorthand flag for specifying a minimum throughput.",
1349
+ )
1350
+ )
1351
+
1352
+ def set_config_values(self, args: argparse.Namespace) -> None:
1353
+ """
1354
+ Set the config values. This function sets all the values for the
1355
+ config. CLI arguments have the highest priority, then YAML config
1356
+ values and then default values.
1357
+
1358
+ Parameters
1359
+ ----------
1360
+ args : argparse.Namespace
1361
+ Parsed arguments from the CLI
1362
+
1363
+ Raises
1364
+ ------
1365
+ TritonModelAnalyzerException
1366
+ If the required fields are not specified, it will raise
1367
+ this exception
1368
+ """
1369
+ if args.mode == "online" and "latency_budget" not in args:
1370
+ self._fields["objectives"].set_default_value(DEFAULT_ONLINE_OBJECTIVES)
1371
+
1372
+ super().set_config_values(args)
1373
+
1374
+ # Add plot configs and after config parse. Users should not be
1375
+ # able to edit these plots.
1376
+ self._add_plot_configs()
1377
+ if args.mode == "online":
1378
+ self._fields["plots"].set_value(DEFAULT_ONLINE_PLOTS)
1379
+ elif args.mode == "offline":
1380
+ self._fields["plots"].set_value(DEFAULT_OFFLINE_PLOTS)
1381
+
1382
+ def _add_plot_configs(self):
1383
+ """
1384
+ Add plots to the config
1385
+ """
1386
+ plots_scheme = ConfigObject(
1387
+ schema={
1388
+ "*": ConfigObject(
1389
+ schema={
1390
+ "title": ConfigPrimitive(type_=str),
1391
+ "x_axis": ConfigPrimitive(type_=str),
1392
+ "y_axis": ConfigPrimitive(type_=str),
1393
+ "monotonic": ConfigPrimitive(type_=bool),
1394
+ }
1395
+ )
1396
+ },
1397
+ output_mapper=ConfigPlot.from_object,
1398
+ )
1399
+ self._add_config(
1400
+ ConfigField(
1401
+ "plots",
1402
+ field_type=ConfigUnion(
1403
+ [
1404
+ plots_scheme,
1405
+ ConfigListGeneric(
1406
+ type_=plots_scheme, output_mapper=ConfigPlot.from_list
1407
+ ),
1408
+ ]
1409
+ ),
1410
+ description="Model analyzer uses the information in this section to construct plots of the results.",
1411
+ )
1412
+ )
1413
+
1414
+ def _preprocess_and_verify_arguments(self):
1415
+ """
1416
+ Enforces some rules on the config.
1417
+
1418
+ Raises
1419
+ ------
1420
+ TritonModelAnalyzerException
1421
+ If there is a problem with arguments or config.
1422
+ """
1423
+
1424
+ if self.triton_launch_mode == "remote":
1425
+ if self.client_protocol == "http" and not self.triton_http_endpoint:
1426
+ raise TritonModelAnalyzerException(
1427
+ "client-protocol is 'http'. Must specify triton-http-endpoint "
1428
+ "if connecting to already running server or change protocol using "
1429
+ "--client-protocol."
1430
+ )
1431
+ if self.client_protocol == "grpc" and not self.triton_grpc_endpoint:
1432
+ raise TritonModelAnalyzerException(
1433
+ "client-protocol is 'grpc'. Must specify triton-grpc-endpoint "
1434
+ "if connecting to already running server or change protocol using "
1435
+ "--client-protocol."
1436
+ )
1437
+ elif self.triton_docker_mounts or self.triton_docker_labels:
1438
+ if self.triton_launch_mode == "docker":
1439
+ # Verify format
1440
+ if self.triton_docker_mounts:
1441
+ for volume_str in self.triton_docker_mounts:
1442
+ if volume_str.count(":") != 2:
1443
+ raise TritonModelAnalyzerException(
1444
+ "triton_docker_mounts needs to be a list of strings. Each string "
1445
+ " should be of the format <host path>:<container dest>:<access mode>"
1446
+ )
1447
+ else:
1448
+ logger.warning(
1449
+ f"Triton launch mode is set to {self.triton_launch_mode}. "
1450
+ "Ignoring triton_docker_mounts and triton_docker_labels."
1451
+ )
1452
+
1453
+ if self.triton_launch_mode == "docker":
1454
+ if not self.triton_docker_image or self.triton_docker_image.isspace():
1455
+ raise TritonModelAnalyzerException(
1456
+ "triton_docker_image provided but is empty."
1457
+ )
1458
+
1459
+ if self.triton_launch_mode == "c_api":
1460
+ if self.triton_server_flags:
1461
+ logger.warning(
1462
+ "Triton launch mode is set to C_API. Model Analyzer cannot set "
1463
+ "triton_server_flags."
1464
+ )
1465
+ if self.triton_output_path:
1466
+ logger.warning(
1467
+ "Triton launch mode is set to C_API, triton logs are not supported. "
1468
+ "Triton server error output can be obtained by setting perf_output_path."
1469
+ )
1470
+
1471
+ if self.triton_launch_mode != "docker":
1472
+ if self.triton_docker_args:
1473
+ logger.warning(
1474
+ "Triton launch mode is not set to docker. Model Analyzer cannot set "
1475
+ "triton_docker_args."
1476
+ )
1477
+ # If run config search is disabled and no concurrency or request rate is provided,
1478
+ # set the default value.
1479
+ if self.run_config_search_disable:
1480
+ if len(self.concurrency) == 0 and len(self.request_rate) == 0:
1481
+ self.concurrency = [1]
1482
+
1483
+ if not self.export_path:
1484
+ logger.warning(
1485
+ f"--export-path not specified. Using {self._fields['export_path'].default_value()}"
1486
+ )
1487
+ elif os.path.exists(self.export_path) and not os.path.isdir(self.export_path):
1488
+ raise TritonModelAnalyzerException(
1489
+ f"Export path {self.export_path} is not a directory."
1490
+ )
1491
+ elif not os.path.exists(self.export_path):
1492
+ os.makedirs(self.export_path)
1493
+
1494
+ if self.num_top_model_configs > 0 and not self.constraints:
1495
+ raise TritonModelAnalyzerException(
1496
+ "If setting num_top_model_configs > 0, comparison across models is requested. "
1497
+ "This requires that global constraints be specified in the config to be used as default."
1498
+ )
1499
+
1500
+ def _autofill_values(self):
1501
+ """
1502
+ Fill in the implied or default
1503
+ config values.
1504
+ """
1505
+ cpu_only = False
1506
+ if self.triton_launch_mode != "remote" and (
1507
+ len(self.gpus) == 0 or not numba.cuda.is_available()
1508
+ ):
1509
+ cpu_only = True
1510
+
1511
+ # Set global constraints if latency budget is specified
1512
+ if self.latency_budget:
1513
+ if self.constraints:
1514
+ constraints = self.constraints
1515
+ constraints["perf_latency_p99"] = {"max": self.latency_budget}
1516
+ if "perf_latency" in constraints:
1517
+ # In case a tighter perf_latency is provided
1518
+ constraints["perf_latency"] = constraints["perf_latency_p99"]
1519
+ self._fields["constraints"].set_value(constraints)
1520
+ else:
1521
+ self._fields["constraints"].set_value(
1522
+ {"perf_latency_p99": {"max": self.latency_budget}}
1523
+ )
1524
+
1525
+ # Set global constraints if minimum throughput is specified
1526
+ if self.min_throughput:
1527
+ if self.constraints:
1528
+ constraints = self.constraints
1529
+ constraints["perf_throughput"] = {"min": self.min_throughput}
1530
+ self._fields["constraints"].set_value(constraints)
1531
+ else:
1532
+ self._fields["constraints"].set_value(
1533
+ {"perf_throughput": {"min": self.min_throughput}}
1534
+ )
1535
+
1536
+ # Switch default output fields if request rate is being used
1537
+ # and the user didn't specify a custom output field
1538
+ if self._using_request_rate():
1539
+ if not self._fields["inference_output_fields"].is_set_by_user():
1540
+ self.inference_output_fields = (
1541
+ DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS
1542
+ )
1543
+
1544
+ if not self._fields["gpu_output_fields"].is_set_by_user():
1545
+ self.gpu_output_fields = DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS
1546
+
1547
+ # Switch default output fields if user specifies model type of LLM
1548
+ # and the user didn't specify a custom output field
1549
+ if self.model_type == "LLM":
1550
+ if not self._fields["inference_output_fields"].is_set_by_user():
1551
+ self.inference_output_fields = DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS
1552
+
1553
+ new_profile_models = {}
1554
+ for i, model in enumerate(self.profile_models):
1555
+ new_model = {"cpu_only": (model.cpu_only() or cpu_only)}
1556
+
1557
+ # Objectives
1558
+ if not model.objectives():
1559
+ new_model["objectives"] = self.objectives
1560
+ else:
1561
+ new_model["objectives"] = model.objectives()
1562
+
1563
+ # Constraints
1564
+ if not model.constraints():
1565
+ if (
1566
+ "constraints" in self._fields
1567
+ and self._fields["constraints"].value()
1568
+ ):
1569
+ new_model["constraints"] = self.constraints
1570
+ else:
1571
+ new_model["constraints"] = model.constraints().to_dict()
1572
+
1573
+ # Weighting
1574
+ if not model.weighting():
1575
+ if "weighting" in self._fields and self.weighting:
1576
+ raise TritonModelAnalyzerException(
1577
+ "Weighting can not be specified as a global parameter. Please make this a model parameter."
1578
+ )
1579
+ else:
1580
+ new_model["weighting"] = DEFAULT_MODEL_WEIGHTING
1581
+ else:
1582
+ new_model["weighting"] = model.weighting()
1583
+
1584
+ # Shorthands
1585
+ if self.latency_budget:
1586
+ if "constraints" in new_model:
1587
+ new_model["constraints"]["perf_latency_p99"] = {
1588
+ "max": self.latency_budget
1589
+ }
1590
+ if "perf_latency" in new_model["constraints"]:
1591
+ # In case a tighter perf_latency is provided
1592
+ new_model["constraints"]["perf_latency"] = new_model[
1593
+ "constraints"
1594
+ ]["perf_latency_p99"]
1595
+ else:
1596
+ new_model["constraints"] = {
1597
+ "perf_latency_p99": {"max": self.latency_budget}
1598
+ }
1599
+
1600
+ if self.min_throughput:
1601
+ if "constraints" in new_model:
1602
+ new_model["constraints"]["perf_throughput"] = {
1603
+ "min": self.min_throughput
1604
+ }
1605
+ else:
1606
+ new_model["constraints"] = {
1607
+ "perf_throughput": {"min": self.min_throughput}
1608
+ }
1609
+
1610
+ # Run parameters
1611
+ if not model.parameters():
1612
+ if self.run_config_search_mode != "optuna":
1613
+ new_model["parameters"] = {
1614
+ "batch_sizes": self.batch_sizes,
1615
+ "concurrency": self.concurrency,
1616
+ "request_rate": self.request_rate,
1617
+ }
1618
+ else:
1619
+ if self._fields["batch_sizes"].is_set_by_user():
1620
+ new_model["parameters"] = {"batch_sizes": self.batch_sizes}
1621
+ else:
1622
+ new_model["parameters"] = {"batch_sizes": []}
1623
+
1624
+ new_model["parameters"]["concurrency"] = self.concurrency
1625
+ new_model["parameters"]["request_rate"] = self.request_rate
1626
+
1627
+ else:
1628
+ new_model["parameters"] = {}
1629
+ if "batch_sizes" in model.parameters():
1630
+ new_model["parameters"].update(
1631
+ {"batch_sizes": model.parameters()["batch_sizes"]}
1632
+ )
1633
+ else:
1634
+ if self.run_config_search_mode != "optuna":
1635
+ new_model["parameters"].update(
1636
+ {"batch_sizes": self.batch_sizes}
1637
+ )
1638
+ else:
1639
+ new_model["parameters"].update({"batch_sizes": []})
1640
+
1641
+ if "concurrency" in model.parameters():
1642
+ new_model["parameters"].update(
1643
+ {"concurrency": model.parameters()["concurrency"]}
1644
+ )
1645
+ elif not "request_rate" in model.parameters():
1646
+ new_model["parameters"].update({"concurrency": self.concurrency})
1647
+ else:
1648
+ new_model["parameters"].update({"concurrency": []})
1649
+
1650
+ if "request_rate" in model.parameters():
1651
+ new_model["parameters"].update(
1652
+ {"request_rate": model.parameters()["request_rate"]}
1653
+ )
1654
+ else:
1655
+ new_model["parameters"].update({"request_rate": self.request_rate})
1656
+
1657
+ if (
1658
+ new_model["parameters"]["request_rate"]
1659
+ and new_model["parameters"]["concurrency"]
1660
+ ):
1661
+ raise TritonModelAnalyzerException(
1662
+ "Cannot specify both concurrency and request rate as model parameters."
1663
+ )
1664
+
1665
+ # Perf analyzer flags
1666
+ if not model.perf_analyzer_flags():
1667
+ new_model["perf_analyzer_flags"] = self.perf_analyzer_flags
1668
+ else:
1669
+ new_model["perf_analyzer_flags"] = model.perf_analyzer_flags()
1670
+
1671
+ # GenAI Perf flags
1672
+ if not model.genai_perf_flags():
1673
+ new_model["genai_perf_flags"] = self.genai_perf_flags
1674
+ else:
1675
+ new_model["genai_perf_flags"] = model.genai_perf_flags()
1676
+
1677
+ # triton server flags
1678
+ if not model.triton_server_flags():
1679
+ new_model["triton_server_flags"] = self.triton_server_flags
1680
+ else:
1681
+ new_model["triton_server_flags"] = model.triton_server_flags()
1682
+
1683
+ # triton server env
1684
+ if not model.triton_server_environment():
1685
+ new_model["triton_server_environment"] = self.triton_server_environment
1686
+ else:
1687
+ new_model[
1688
+ "triton_server_environment"
1689
+ ] = model.triton_server_environment()
1690
+
1691
+ # triton docker args
1692
+ if not model.triton_docker_args():
1693
+ new_model["triton_docker_args"] = self.triton_docker_args
1694
+ else:
1695
+ new_model["triton_docker_args"] = model.triton_docker_args()
1696
+
1697
+ # Transfer model config parameters directly
1698
+ if model.model_config_parameters():
1699
+ new_model["model_config_parameters"] = model.model_config_parameters()
1700
+
1701
+ new_profile_models[model.model_name()] = new_model
1702
+
1703
+ # deepcopy is necessary, else it gets overwritten when updating profile_models
1704
+ self._fields["bls_composing_models"] = deepcopy(
1705
+ self._fields["bls_composing_models"]
1706
+ )
1707
+ self._fields["profile_models"].set_value(new_profile_models)
1708
+
1709
+ def _using_request_rate(self) -> bool:
1710
+ if self.request_rate or self.request_rate_search_enable:
1711
+ return True
1712
+ elif (
1713
+ self._fields["run_config_search_max_request_rate"].is_set_by_user()
1714
+ or self._fields["run_config_search_min_request_rate"].is_set_by_user()
1715
+ ):
1716
+ return True
1717
+ else:
1718
+ return self._are_models_using_request_rate()
1719
+
1720
+ def _are_models_using_request_rate(self) -> bool:
1721
+ model_using_request_rate = False
1722
+ model_using_concurrency = False
1723
+ for i, model in enumerate(self.profile_models):
1724
+ if model.parameters() and "request_rate" in model.parameters():
1725
+ model_using_request_rate = True
1726
+ else:
1727
+ model_using_concurrency = True
1728
+
1729
+ if model_using_request_rate and model_using_concurrency:
1730
+ raise TritonModelAnalyzerException(
1731
+ "Parameters in all profiled models must use request-rate-range. "
1732
+ "Model Analyzer does not support mixing concurrency-range and request-rate-range."
1733
+ )
1734
+ else:
1735
+ return model_using_request_rate
1736
+
1737
+ def is_request_rate_specified(self, model_parameters: dict) -> bool:
1738
+ """
1739
+ Returns true if either the model or the config specified request rate
1740
+ """
1741
+ return (
1742
+ "request_rate" in model_parameters
1743
+ and model_parameters["request_rate"]
1744
+ or self.request_rate_search_enable
1745
+ or self.get_config()["run_config_search_min_request_rate"].is_set_by_user()
1746
+ or self.get_config()["run_config_search_max_request_rate"].is_set_by_user()
1747
+ )