triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,267 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ import os
19
+
20
+ from model_analyzer.config.input.config_utils import (
21
+ file_path_validator,
22
+ parent_path_validator,
23
+ )
24
+ from model_analyzer.constants import LOGGER_NAME
25
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
26
+
27
+ from .config_command import ConfigCommand
28
+ from .config_defaults import (
29
+ DEFAULT_ALWAYS_REPORT_GPU_METRICS,
30
+ DEFAULT_CHECKPOINT_DIRECTORY,
31
+ DEFAULT_EXPORT_PATH,
32
+ DEFAULT_OFFLINE_REPORT_PLOTS,
33
+ DEFAULT_ONLINE_REPORT_PLOTS,
34
+ DEFAULT_REPORT_FORMAT,
35
+ )
36
+ from .config_enum import ConfigEnum
37
+ from .config_field import ConfigField
38
+ from .config_list_generic import ConfigListGeneric
39
+ from .config_list_string import ConfigListString
40
+ from .config_object import ConfigObject
41
+ from .config_primitive import ConfigPrimitive
42
+ from .config_union import ConfigUnion
43
+ from .objects.config_model_report_spec import ConfigModelReportSpec
44
+ from .objects.config_plot import ConfigPlot
45
+
46
+ logger = logging.getLogger(LOGGER_NAME)
47
+
48
+
49
+ class ConfigCommandReport(ConfigCommand):
50
+ """
51
+ Model Analyzer config object.
52
+ """
53
+
54
+ def __init__(self):
55
+ super().__init__()
56
+ self._fill_config()
57
+
58
+ def _fill_config(self):
59
+ """
60
+ Builder function makes calls to add config to
61
+ fill the config with options
62
+ """
63
+
64
+ self._add_config(
65
+ ConfigField(
66
+ "config_file",
67
+ flags=["-f", "--config-file"],
68
+ field_type=ConfigPrimitive(str),
69
+ description="Path to Config File for subcommand 'report'.",
70
+ )
71
+ )
72
+ self._add_config(
73
+ ConfigField(
74
+ "checkpoint_directory",
75
+ flags=["-s", "--checkpoint-directory"],
76
+ default_value=DEFAULT_CHECKPOINT_DIRECTORY,
77
+ field_type=ConfigPrimitive(str, validator=parent_path_validator),
78
+ description="Full path to directory to which to read and write checkpoints and profile data.",
79
+ )
80
+ )
81
+ self._add_config(
82
+ ConfigField(
83
+ "export_path",
84
+ flags=["-e", "--export-path"],
85
+ default_value=DEFAULT_EXPORT_PATH,
86
+ field_type=ConfigPrimitive(str, validator=file_path_validator),
87
+ description="Full path to directory in which to store the results",
88
+ )
89
+ )
90
+ plots_scheme = ConfigObject(
91
+ schema={
92
+ "*": ConfigObject(
93
+ schema={
94
+ "title": ConfigPrimitive(type_=str),
95
+ "x_axis": ConfigPrimitive(type_=str),
96
+ "y_axis": ConfigPrimitive(type_=str),
97
+ "monotonic": ConfigPrimitive(type_=bool),
98
+ }
99
+ )
100
+ },
101
+ output_mapper=ConfigPlot.from_object,
102
+ )
103
+ self._add_config(
104
+ ConfigField(
105
+ "plots",
106
+ field_type=ConfigUnion(
107
+ [
108
+ plots_scheme,
109
+ ConfigListGeneric(
110
+ type_=plots_scheme, output_mapper=ConfigPlot.from_list
111
+ ),
112
+ ]
113
+ ),
114
+ default_value=DEFAULT_ONLINE_REPORT_PLOTS,
115
+ description="Model analyzer uses the information in this section to construct plots of the results.",
116
+ )
117
+ )
118
+
119
+ report_model_scheme = ConfigObject(
120
+ required=True,
121
+ schema={
122
+ # Any key is allowed, but the keys must follow the pattern
123
+ # below
124
+ "*": ConfigObject(schema={"plots": plots_scheme})
125
+ },
126
+ output_mapper=ConfigModelReportSpec.model_object_to_config_model_report_spec,
127
+ )
128
+ self._add_config(
129
+ ConfigField(
130
+ "report_model_configs",
131
+ flags=["-n", "--report-model-configs"],
132
+ field_type=ConfigUnion(
133
+ [
134
+ report_model_scheme,
135
+ ConfigListGeneric(
136
+ ConfigUnion(
137
+ [
138
+ report_model_scheme,
139
+ ConfigPrimitive(
140
+ str,
141
+ output_mapper=ConfigModelReportSpec.model_str_to_config_model_report_spec,
142
+ ),
143
+ ]
144
+ ),
145
+ required=True,
146
+ output_mapper=ConfigModelReportSpec.model_mixed_to_config_model_report_spec,
147
+ ),
148
+ ConfigListString(
149
+ output_mapper=ConfigModelReportSpec.model_list_to_config_model_report_spec
150
+ ),
151
+ ],
152
+ required=True,
153
+ ),
154
+ description=(
155
+ "Comma delimited list of the names of model configs"
156
+ " for which to generate detailed reports."
157
+ ),
158
+ )
159
+ )
160
+ self._add_config(
161
+ ConfigField(
162
+ "output_formats",
163
+ flags=["-o", "--output-formats"],
164
+ default_value=DEFAULT_REPORT_FORMAT,
165
+ field_type=ConfigUnion(
166
+ [
167
+ ConfigListGeneric(
168
+ type_=ConfigEnum(choices=["pdf", "csv", "png"])
169
+ ),
170
+ ConfigListString(),
171
+ ]
172
+ ),
173
+ description="Output file format for detailed report.",
174
+ )
175
+ )
176
+ self._add_config(
177
+ ConfigField(
178
+ "always_report_gpu_metrics",
179
+ flags=["--always_report-gpu-metrics"],
180
+ field_type=ConfigPrimitive(bool),
181
+ parser_args={"action": "store_true"},
182
+ default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
183
+ description="Report GPU metrics, even when the model is `cpu_only`.",
184
+ )
185
+ )
186
+
187
+ def set_config_values(self, args):
188
+ """
189
+ Set the config values. This function sets all the values for the
190
+ config. CLI arguments have the highest priority, then YAML config
191
+ values and then default values.
192
+
193
+ Parameters
194
+ ----------
195
+ args : argparse.Namespace
196
+ Parsed arguments from the CLI
197
+
198
+ Raises
199
+ ------
200
+ TritonModelAnalyzerException
201
+ If the required fields are not specified, it will raise
202
+ this exception
203
+ """
204
+
205
+ if args.mode == "online":
206
+ self._fields["plots"].set_default_value(DEFAULT_ONLINE_REPORT_PLOTS)
207
+ elif args.mode == "offline":
208
+ self._fields["plots"].set_default_value(DEFAULT_OFFLINE_REPORT_PLOTS)
209
+
210
+ super().set_config_values(args)
211
+
212
+ def _preprocess_and_verify_arguments(self):
213
+ """
214
+ Enforces some rules on the config.
215
+
216
+ Raises
217
+ ------
218
+ TritonModelAnalyzerException
219
+ If there is a problem with arguments or config.
220
+ """
221
+
222
+ if not self.export_path:
223
+ logger.warning(
224
+ f"--export-path not specified. Using {self._fields['export_path'].default_value()}"
225
+ )
226
+ elif self.export_path and not os.path.isdir(self.export_path):
227
+ raise TritonModelAnalyzerException(
228
+ f"Export path {self.export_path} is not a directory."
229
+ )
230
+
231
+ def _autofill_values(self):
232
+ """
233
+ Fill in the implied or default
234
+ config values.
235
+ """
236
+
237
+ new_report_model_configs = {}
238
+ for model in self.report_model_configs:
239
+ new_report_model_config = {}
240
+
241
+ # Plots
242
+ if not model.plots():
243
+ new_report_model_config["plots"] = {
244
+ plot.name(): {
245
+ "title": plot.title(),
246
+ "x_axis": plot.x_axis(),
247
+ "y_axis": plot.y_axis(),
248
+ "monotonic": plot.monotonic(),
249
+ }
250
+ for plot in self.plots
251
+ }
252
+ else:
253
+ new_report_model_config["plots"] = {
254
+ plot.name(): {
255
+ "title": plot.title(),
256
+ "x_axis": plot.x_axis(),
257
+ "y_axis": plot.y_axis(),
258
+ "monotonic": plot.monotonic(),
259
+ }
260
+ for plot in model.plots()
261
+ }
262
+
263
+ new_report_model_configs[
264
+ model.model_config_name()
265
+ ] = new_report_model_config
266
+
267
+ self._fields["report_model_configs"].set_value(new_report_model_configs)
@@ -0,0 +1,236 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import os
18
+
19
+ #
20
+ # Common defaults
21
+ #
22
+
23
+ DEFAULT_CHECKPOINT_DIRECTORY = os.path.join(os.getcwd(), "checkpoints")
24
+ DEFAULT_ONLINE_OBJECTIVES = {"perf_throughput": 10}
25
+ DEFAULT_OFFLINE_OBJECTIVES = {"perf_throughput": 10}
26
+ DEFAULT_MODEL_WEIGHTING = 1
27
+
28
+ #
29
+ # Profile Config defaults
30
+ #
31
+
32
+ DEFAULT_MONITORING_INTERVAL = 1.0
33
+ DEFAULT_DURATION_SECONDS = 3
34
+ DEFAULT_COLLECT_CPU_METRICS = False
35
+ DEFAULT_LOG_LEVEL = "INFO"
36
+ DEFAULT_GPUS = "all"
37
+ DEFAULT_ALWAYS_REPORT_GPU_METRICS = False
38
+ DEFAULT_SKIP_SUMMARY_REPORTS = False
39
+ DEFAULT_SKIP_DETAILED_REPORTS = False
40
+ DEFAULT_OUTPUT_MODEL_REPOSITORY = os.path.join(os.getcwd(), "output_model_repository")
41
+ DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG = False
42
+ DEFAULT_BATCH_SIZES = 1
43
+ DEFAULT_MAX_RETRIES = 50
44
+ DEFAULT_CLIENT_PROTOCOL = "grpc"
45
+ DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024
46
+ DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1
47
+ DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE = 8192
48
+ DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE = 16
49
+ DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT = 5
50
+ DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT = 1
51
+ DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE = 1
52
+ DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE = 128
53
+ DEFAULT_RUN_CONFIG_MAX_BINARY_SEARCH_STEPS = 5
54
+ DEFAULT_RUN_CONFIG_SEARCH_DISABLE = False
55
+ DEFAULT_RUN_CONFIG_SEARCH_MODE = "brute"
56
+ DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE = False
57
+ DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE = 5
58
+ DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE = 10
59
+ DEFAULT_OPTUNA_MIN_TRIALS = 20
60
+ DEFAULT_OPTUNA_MAX_TRIALS = 200
61
+ DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD = 10
62
+ DEFAULT_USE_CONCURRENCY_FORMULA = False
63
+ DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
64
+ DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
65
+ DEFAULT_DCGM_DISABLE = False
66
+ DEFAULT_TRITON_LAUNCH_MODE = "local"
67
+ DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.11-py3"
68
+ DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
69
+ DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
70
+ DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"
71
+ DEFAULT_TRITON_SERVER_PATH = "tritonserver"
72
+ DEFAULT_TRITON_INSTALL_PATH = "/opt/tritonserver"
73
+ DEFAULT_PERF_ANALYZER_TIMEOUT = 600
74
+ DEFAULT_PERF_ANALYZER_CPU_UTIL = 80.0
75
+ DEFAULT_PERF_ANALYZER_PATH = "perf_analyzer"
76
+ DEFAULT_PERF_OUTPUT_FLAG = False
77
+ DEFAULT_PERF_MAX_AUTO_ADJUSTS = 10
78
+ DEFAULT_MEASUREMENT_MODE = "count_windows"
79
+ DEFAULT_MODEL_TYPE = "generic"
80
+
81
+ DEFAULT_ONLINE_PLOTS = {
82
+ "throughput_v_latency": {
83
+ "title": "Throughput vs. Latency",
84
+ "x_axis": "perf_latency_p99",
85
+ "y_axis": "perf_throughput",
86
+ "monotonic": True,
87
+ },
88
+ "gpu_mem_v_latency": {
89
+ "title": "GPU Memory vs. Latency",
90
+ "x_axis": "perf_latency_p99",
91
+ "y_axis": "gpu_used_memory",
92
+ "monotonic": False,
93
+ },
94
+ }
95
+
96
+ DEFAULT_OFFLINE_PLOTS = {
97
+ "through_v_batch_size": {
98
+ "title": "Throughput vs. Batch Size",
99
+ "x_axis": "batch_size",
100
+ "y_axis": "perf_throughput",
101
+ "monotonic": False,
102
+ }
103
+ }
104
+
105
+ DEFAULT_CPU_MEM_PLOT = {
106
+ "cpu_mem_v_latency": {
107
+ "title": "CPU Memory vs. Latency",
108
+ "x_axis": "perf_latency_p99",
109
+ "y_axis": "cpu_used_ram",
110
+ "monotonic": False,
111
+ }
112
+ }
113
+
114
+ DEFAULT_EXPORT_PATH = os.getcwd()
115
+ DEFAULT_FILENAME_MODEL_INFERENCE = "metrics-model-inference.csv"
116
+ DEFAULT_FILENAME_MODEL_GPU = "metrics-model-gpu.csv"
117
+ DEFAULT_FILENAME_SERVER_ONLY = "metrics-server-only.csv"
118
+
119
+ DEFAULT_INFERENCE_OUTPUT_FIELDS = [
120
+ "model_name",
121
+ "batch_size",
122
+ "concurrency",
123
+ "model_config_path",
124
+ "instance_group",
125
+ "max_batch_size",
126
+ "satisfies_constraints",
127
+ "perf_throughput",
128
+ "perf_latency_p99",
129
+ ]
130
+ DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS = [
131
+ "model_name",
132
+ "batch_size",
133
+ "concurrency",
134
+ "model_config_path",
135
+ "instance_group",
136
+ "max_batch_size",
137
+ "satisfies_constraints",
138
+ "perf_throughput",
139
+ "perf_latency_p99",
140
+ "inter_token_latency_p99",
141
+ "time_to_first_token_p99",
142
+ "output_token_throughput",
143
+ ]
144
+ DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS = [
145
+ "model_name",
146
+ "batch_size",
147
+ "request_rate",
148
+ "model_config_path",
149
+ "instance_group",
150
+ "max_batch_size",
151
+ "satisfies_constraints",
152
+ "perf_throughput",
153
+ "perf_latency_p99",
154
+ ]
155
+ DEFAULT_GPU_OUTPUT_FIELDS = [
156
+ "model_name",
157
+ "gpu_uuid",
158
+ "batch_size",
159
+ "concurrency",
160
+ "model_config_path",
161
+ "instance_group",
162
+ "satisfies_constraints",
163
+ "gpu_used_memory",
164
+ "gpu_utilization",
165
+ "gpu_power_usage",
166
+ ]
167
+ DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS = [
168
+ "model_name",
169
+ "gpu_uuid",
170
+ "batch_size",
171
+ "request_rate",
172
+ "model_config_path",
173
+ "instance_group",
174
+ "satisfies_constraints",
175
+ "gpu_used_memory",
176
+ "gpu_utilization",
177
+ "gpu_power_usage",
178
+ ]
179
+ DEFAULT_SERVER_OUTPUT_FIELDS = [
180
+ "model_name",
181
+ "gpu_uuid",
182
+ "gpu_used_memory",
183
+ "gpu_utilization",
184
+ "gpu_power_usage",
185
+ ]
186
+
187
+ DEFAULT_NUM_CONFIGS_PER_MODEL = 3
188
+ DEFAULT_NUM_TOP_MODEL_CONFIGS = 0
189
+
190
+ #
191
+ # Report Config defaults
192
+ #
193
+
194
+ DEFAULT_REPORT_FORMAT = "pdf"
195
+
196
+ DEFAULT_ONLINE_REPORT_PLOTS = {
197
+ "gpu_mem_v_latency": {
198
+ "title": "GPU Memory vs. Latency",
199
+ "x_axis": "perf_latency_p99",
200
+ "y_axis": "gpu_used_memory",
201
+ "monotonic": False,
202
+ },
203
+ "gpu_util_v_latency": {
204
+ "title": "GPU Utilization vs. Latency",
205
+ "x_axis": "perf_latency_p99",
206
+ "y_axis": "gpu_utilization",
207
+ "monotonic": False,
208
+ },
209
+ "cpu_mem_v_latency": {
210
+ "title": "RAM Usage vs. Latency",
211
+ "x_axis": "perf_latency_p99",
212
+ "y_axis": "cpu_used_ram",
213
+ "monotonic": False,
214
+ },
215
+ "gpu_power_v_latency": {
216
+ "title": "GPU Power vs. Latency",
217
+ "x_axis": "perf_latency_p99",
218
+ "y_axis": "gpu_power_usage",
219
+ "monotonic": False,
220
+ },
221
+ }
222
+
223
+ DEFAULT_OFFLINE_REPORT_PLOTS = {
224
+ "throughput_v_batch_size": {
225
+ "title": "Throughput vs. Batch Size",
226
+ "x_axis": "batch_size",
227
+ "y_axis": "perf_throughput",
228
+ "monotonic": False,
229
+ },
230
+ "latency_v_batch_size": {
231
+ "title": "p99 Latency vs. Batch Size",
232
+ "x_axis": "batch_size",
233
+ "y_axis": "perf_latency_p99",
234
+ "monotonic": False,
235
+ },
236
+ }
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from model_analyzer.constants import CONFIG_PARSER_FAILURE
18
+
19
+ from .config_status import ConfigStatus
20
+ from .config_value import ConfigValue
21
+
22
+
23
+ class ConfigEnum(ConfigValue):
24
+ """
25
+ Enum type support for config.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ choices,
31
+ preprocess=None,
32
+ required=False,
33
+ validator=None,
34
+ output_mapper=None,
35
+ name=None,
36
+ ):
37
+ """
38
+ Create a new enum config field.
39
+
40
+ Parameters
41
+ ----------
42
+ choices : A list of allowed choices
43
+ The type of elements in the list
44
+ preprocess : callable
45
+ Function be called before setting new values.
46
+ required : bool
47
+ Whether a given config is required or not.
48
+ validator : callable or None
49
+ A validator for the final value of the field.
50
+ output_mapper: callable
51
+ This callable unifies the output value of this field.
52
+ name : str
53
+ Fully qualified name for this field.
54
+ """
55
+
56
+ self._choices = choices
57
+ self._type = self
58
+ super().__init__(preprocess, required, validator, output_mapper, name)
59
+
60
+ def set_value(self, value):
61
+ choices = self._choices
62
+
63
+ if value not in choices:
64
+ return ConfigStatus(
65
+ CONFIG_PARSER_FAILURE,
66
+ f'Value "{value}" for field "{self.name()}" is not acceptable.'
67
+ f' Value should be one of the following values: "{choices}".',
68
+ self,
69
+ )
70
+
71
+ return super().set_value(value)
72
+
73
+ def cli_type(self):
74
+ """
75
+ Get the type of this field for CLI.
76
+
77
+ Returns
78
+ -------
79
+ type
80
+ str
81
+ """
82
+
83
+ return str