triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,306 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ import os
19
+ from urllib.parse import urlparse
20
+
21
+ from model_analyzer.config.input.config_utils import binary_path_validator
22
+ from model_analyzer.constants import CONFIG_PARSER_FAILURE, LOGGER_NAME
23
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
24
+
25
+ from .server_config import TritonServerConfig
26
+ from .server_docker import TritonServerDocker
27
+ from .server_local import TritonServerLocal
28
+
29
+ logger = logging.getLogger(LOGGER_NAME)
30
+
31
+
32
+ class TritonServerFactory:
33
+ """
34
+ A factory for creating TritonServer instances
35
+ """
36
+
37
+ @staticmethod
38
+ def create_server_docker(
39
+ image,
40
+ config,
41
+ gpus,
42
+ log_path=None,
43
+ mounts=None,
44
+ labels=None,
45
+ shm_size=None,
46
+ args=None,
47
+ ):
48
+ """
49
+ Parameters
50
+ ----------
51
+ image : str
52
+ The tritonserver docker image to pull and run
53
+ config : TritonServerConfig
54
+ the config object containing arguments for this server instance
55
+ gpus : list of str
56
+ List of GPU UUIDs to be mounted and used in the container
57
+ Use ["all"] to include all GPUs
58
+ log_path: str
59
+ Absolute path to the triton log file
60
+ mounts: list of str
61
+ The volumes to be mounted to the tritonserver container
62
+ labels: dict
63
+ name-value pairs for label to set metadata for triton docker
64
+ container. (Not the same as environment variables)
65
+ shm-size: str
66
+ The size of /dev/shm for the triton docker container.
67
+ args: dict
68
+ name-value pairs of triton docker args
69
+ Returns
70
+ -------
71
+ TritonServerDocker
72
+ """
73
+
74
+ return TritonServerDocker(
75
+ image=image,
76
+ config=config,
77
+ gpus=gpus,
78
+ log_path=log_path,
79
+ mounts=mounts,
80
+ labels=labels,
81
+ shm_size=shm_size,
82
+ args=args,
83
+ )
84
+
85
+ @staticmethod
86
+ def create_server_local(path, config, gpus, log_path=None):
87
+ """
88
+ Parameters
89
+ ----------
90
+ path : str
91
+ The absolute path to the tritonserver executable
92
+ config : TritonServerConfig
93
+ the config object containing arguments for this server instance
94
+ gpus: list of str
95
+ List of GPU UUIDs to be made visible to Triton
96
+ Use ["all"] to include all GPUs
97
+ log_path: str
98
+ Absolute path to the triton log file
99
+
100
+ Returns
101
+ -------
102
+ TritonServerLocal
103
+ """
104
+
105
+ return TritonServerLocal(path=path, config=config, gpus=gpus, log_path=log_path)
106
+
107
+ @staticmethod
108
+ def get_server_handle(config, gpus, use_model_repository=False):
109
+ """
110
+ Creates and returns a TritonServer
111
+ with specified arguments
112
+
113
+ Parameters
114
+ ----------
115
+ config : namespace
116
+ Arguments parsed from the CLI
117
+ gpus : list of str
118
+ Available, supported, visible requested GPU UUIDs
119
+ use_model_repository : bool
120
+ Optional flag to use the model directory instead
121
+ of the output model directory
122
+ Returns
123
+ -------
124
+ TritonServer
125
+ Handle to the Triton Server
126
+ """
127
+
128
+ if config.triton_launch_mode == "remote":
129
+ server = TritonServerFactory._get_remote_server_handle(
130
+ config, print_warning_message=use_model_repository
131
+ )
132
+ elif config.triton_launch_mode == "local":
133
+ server = TritonServerFactory._get_local_server_handle(
134
+ config, gpus, use_model_repository=True
135
+ )
136
+ elif config.triton_launch_mode == "docker":
137
+ server = TritonServerFactory._get_docker_server_handle(
138
+ config, gpus, use_model_repository=True
139
+ )
140
+ elif config.triton_launch_mode == "c_api":
141
+ server = TritonServerFactory._get_c_api_server_handle(
142
+ config, use_model_repository
143
+ )
144
+ else:
145
+ raise TritonModelAnalyzerException(
146
+ f"Unrecognized triton-launch-mode : {config.triton_launch_mode}"
147
+ )
148
+
149
+ return server
150
+
151
+ @staticmethod
152
+ def _get_remote_server_handle(config, print_warning_message=True):
153
+ triton_config = TritonServerConfig()
154
+ triton_config.update_config(config.triton_server_flags)
155
+ triton_config["model-repository"] = "remote-model-repository"
156
+ logger.info("Using remote Triton Server")
157
+ server = TritonServerFactory.create_server_local(
158
+ path=None, config=triton_config, gpus=[], log_path=""
159
+ )
160
+ if print_warning_message:
161
+ logger.warning(
162
+ "GPU memory metrics reported in the remote mode are not"
163
+ " accurate. Model Analyzer uses Triton explicit model control to"
164
+ " load/unload models. Some frameworks do not release the GPU"
165
+ " memory even when the memory is not being used. Consider"
166
+ ' using the "local" or "docker" mode if you want to accurately'
167
+ " monitor the GPU memory usage for different models."
168
+ )
169
+
170
+ return server
171
+
172
+ @staticmethod
173
+ def _get_local_server_handle(config, gpus, use_model_repository):
174
+ TritonServerFactory._validate_triton_server_path(config)
175
+
176
+ triton_config = TritonServerConfig()
177
+ triton_config.update_config(config.triton_server_flags)
178
+
179
+ triton_config["model-repository"] = (
180
+ config.model_repository
181
+ if use_model_repository and config.model_repository
182
+ else config.output_model_repository_path
183
+ )
184
+
185
+ if use_model_repository and config.model_repository:
186
+ triton_config["strict-model-config"] = "false"
187
+
188
+ triton_config["http-port"] = config.triton_http_endpoint.split(":")[-1]
189
+ triton_config["grpc-port"] = config.triton_grpc_endpoint.split(":")[-1]
190
+ triton_config["metrics-port"] = urlparse(config.triton_metrics_url).port
191
+ triton_config["model-control-mode"] = "explicit"
192
+ triton_config["metrics-interval-ms"] = int(config.monitoring_interval * 1e3)
193
+ logger.info("Starting a local Triton Server")
194
+ server = TritonServerFactory.create_server_local(
195
+ path=config.triton_server_path,
196
+ config=triton_config,
197
+ gpus=gpus,
198
+ log_path=config.triton_output_path,
199
+ )
200
+
201
+ return server
202
+
203
+ @staticmethod
204
+ def _get_docker_server_handle(config, gpus, use_model_repository):
205
+ triton_config = TritonServerConfig()
206
+ triton_config.update_config(config.triton_server_flags)
207
+
208
+ if use_model_repository:
209
+ triton_config["model-repository"] = os.path.abspath(config.model_repository)
210
+ else:
211
+ triton_config["model-repository"] = os.path.abspath(
212
+ config.output_model_repository_path
213
+ )
214
+
215
+ if use_model_repository:
216
+ triton_config["strict-model-config"] = "false"
217
+
218
+ triton_config["http-port"] = config.triton_http_endpoint.split(":")[-1]
219
+ triton_config["grpc-port"] = config.triton_grpc_endpoint.split(":")[-1]
220
+ triton_config["metrics-port"] = urlparse(config.triton_metrics_url).port
221
+ triton_config["model-control-mode"] = "explicit"
222
+ triton_config["metrics-interval-ms"] = int(config.monitoring_interval * 1e3)
223
+ logger.info("Starting a Triton Server using docker")
224
+ server = TritonServerFactory.create_server_docker(
225
+ image=config.triton_docker_image,
226
+ config=triton_config,
227
+ gpus=gpus,
228
+ log_path=config.triton_output_path,
229
+ mounts=config.triton_docker_mounts,
230
+ labels=config.triton_docker_labels,
231
+ shm_size=config.triton_docker_shm_size,
232
+ args=config.triton_docker_args,
233
+ )
234
+
235
+ return server
236
+
237
+ @staticmethod
238
+ def _get_c_api_server_handle(config, use_model_repository):
239
+ TritonServerFactory._validate_triton_install_path(config)
240
+
241
+ triton_config = TritonServerConfig()
242
+
243
+ if use_model_repository:
244
+ triton_config["model-repository"] = os.path.abspath(config.model_repository)
245
+ else:
246
+ triton_config["model-repository"] = os.path.abspath(
247
+ config.output_model_repository_path
248
+ )
249
+
250
+ if use_model_repository:
251
+ triton_config["strict-model-config"] = "false"
252
+
253
+ logger.info("Starting a Triton Server using perf_analyzer's C_API")
254
+ server = TritonServerFactory.create_server_local(
255
+ path=None, config=triton_config, gpus=[], log_path=""
256
+ )
257
+ logger.warning(
258
+ "When profiling with perf_analyzer's C_API, some metrics may be "
259
+ "affected. Triton is not launched with explicit model control "
260
+ "mode, and as a result, loads all model config variants as they "
261
+ "are created in the output_model_repository."
262
+ )
263
+
264
+ return server
265
+
266
+ @staticmethod
267
+ def _validate_triton_server_path(config):
268
+ """
269
+ Raises an exception if 'triton_server_path' doesn't exist
270
+
271
+ Parameters
272
+ ----------
273
+ config : namespace
274
+ Arguments parsed from the CLI
275
+ """
276
+ path = config.get_config()["triton_server_path"].value()
277
+ config_status = binary_path_validator(path)
278
+ if config_status.status() == CONFIG_PARSER_FAILURE:
279
+ raise TritonModelAnalyzerException(config_status.message())
280
+
281
+ @staticmethod
282
+ def _validate_triton_install_path(config):
283
+ """
284
+ Raises an exception in the following cases:
285
+ - 'triton_install_path' doesn't exist
286
+ - 'trtion_install_path' exists, but contains no files
287
+
288
+ Parameters
289
+ ----------
290
+ config : namespace
291
+ Arguments parsed from the CLI
292
+ """
293
+ path = config.get_config()["triton_install_path"].value()
294
+
295
+ # Check the file system
296
+ if not path or not os.path.exists(path) or not os.path.isdir(path):
297
+ raise TritonModelAnalyzerException(
298
+ f"triton_install_path {path} is not specified, does not exist, "
299
+ "or is not a directory."
300
+ )
301
+
302
+ # Make sure that files exist in the install directory
303
+ if len(os.listdir(path)) == 0:
304
+ raise TritonModelAnalyzerException(
305
+ f"triton_install_path {path} should not be empty."
306
+ )
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ import os
19
+ import tempfile
20
+ from io import TextIOWrapper
21
+ from subprocess import DEVNULL, STDOUT, Popen, TimeoutExpired
22
+
23
+ import psutil
24
+
25
+ from model_analyzer.constants import LOGGER_NAME, SERVER_OUTPUT_TIMEOUT_SECS
26
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
27
+
28
+ from .server import TritonServer
29
+
30
+ logger = logging.getLogger(LOGGER_NAME)
31
+
32
+
33
+ class TritonServerLocal(TritonServer):
34
+ """
35
+ Concrete Implementation of TritonServer interface that runs
36
+ tritonserver locally as as subprocess.
37
+ """
38
+
39
+ def __init__(self, path, config, gpus, log_path):
40
+ """
41
+ Parameters
42
+ ----------
43
+ path : str
44
+ The absolute path to the tritonserver executable
45
+ config : TritonServerConfig
46
+ the config object containing arguments for this server instance
47
+ gpus: list of str
48
+ List of GPU UUIDs to be made visible to Triton
49
+ log_path: str
50
+ Absolute path to the triton log file
51
+ """
52
+
53
+ self._tritonserver_process = None
54
+ self._server_config = config
55
+ self._server_path = path
56
+ self._gpus = gpus
57
+ self._log_path = log_path
58
+ self._log_file = DEVNULL
59
+ self._is_first_time_starting_server = True
60
+
61
+ assert self._server_config[
62
+ "model-repository"
63
+ ], "Triton Server requires --model-repository argument to be set."
64
+
65
+ def start(self, env=None):
66
+ """
67
+ Starts the tritonserver container locally
68
+ """
69
+
70
+ if self._server_path:
71
+ # Create command list and run subprocess
72
+ cmd = [self._server_path]
73
+ cmd += self._server_config.to_args_list()
74
+
75
+ # Set environment, update with user config env
76
+ triton_env = os.environ.copy()
77
+
78
+ if env:
79
+ # Filter env variables that use env lookups
80
+ for variable, value in env.items():
81
+ if value.find("$") == -1:
82
+ triton_env[variable] = value
83
+ else:
84
+ # Collect the ones that need lookups to give to the shell
85
+ triton_env[variable] = os.path.expandvars(value)
86
+
87
+ # List GPUs to be used by tritonserver
88
+ triton_env["CUDA_VISIBLE_DEVICES"] = ",".join(
89
+ [gpu.device_uuid() for gpu in self._gpus]
90
+ )
91
+
92
+ if self._log_path:
93
+ try:
94
+ if self._is_first_time_starting_server:
95
+ if os.path.exists(self._log_path):
96
+ os.remove(self._log_path)
97
+ self._log_file = open(self._log_path, "a+")
98
+ except OSError as e:
99
+ raise TritonModelAnalyzerException(e)
100
+ else:
101
+ self._log_file = tempfile.NamedTemporaryFile()
102
+
103
+ self._is_first_time_starting_server = False
104
+
105
+ # Construct Popen command
106
+ try:
107
+ self._tritonserver_process = Popen(
108
+ cmd,
109
+ stdout=self._log_file,
110
+ stderr=STDOUT,
111
+ start_new_session=True,
112
+ universal_newlines=True,
113
+ env=triton_env,
114
+ )
115
+
116
+ logger.debug("Triton Server started.")
117
+ except Exception as e:
118
+ raise TritonModelAnalyzerException(e)
119
+
120
+ def stop(self):
121
+ """
122
+ Stops the running tritonserver
123
+ """
124
+
125
+ # Terminate process, capture output
126
+ if self._tritonserver_process is not None:
127
+ self._tritonserver_process.terminate()
128
+ try:
129
+ self._tritonserver_process.communicate(
130
+ timeout=SERVER_OUTPUT_TIMEOUT_SECS
131
+ )
132
+ except TimeoutExpired:
133
+ self._tritonserver_process.kill()
134
+ self._tritonserver_process.communicate()
135
+ self._tritonserver_process = None
136
+ if self._log_path:
137
+ self._log_file.close()
138
+ logger.debug("Stopped Triton Server.")
139
+
140
+ def cpu_stats(self):
141
+ """
142
+ Returns the CPU memory usage and CPU available memory in MB
143
+ """
144
+
145
+ if self._tritonserver_process:
146
+ server_process = psutil.Process(self._tritonserver_process.pid)
147
+ process_memory_info = server_process.memory_full_info()
148
+ system_memory_info = psutil.virtual_memory()
149
+
150
+ # Divide by 1.0e6 to convert from bytes to MB
151
+ return (process_memory_info.uss // 1.0e6), (
152
+ system_memory_info.available // 1.0e6
153
+ )
154
+ else:
155
+ return 0.0, 0.0
156
+
157
+ def log_file(self) -> TextIOWrapper:
158
+ return self._log_file
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: triton-model-analyzer
3
+ Version: 1.48.0
4
+ Summary: Triton Model Analyzer is a tool to profile and analyze the runtime performance of one or more models on the Triton Inference Server
5
+ Author-email: "NVIDIA Inc." <sw-dl-triton@nvidia.com>
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://developer.nvidia.com/nvidia-triton-inference-server
8
+ Project-URL: Repository, https://github.com/triton-inference-server/model_analyzer
9
+ Keywords: triton,tensorrt,inference,server,service,analyzer,nvidia
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Intended Audience :: Information Technology
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Utilities
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Environment :: Console
25
+ Classifier: Natural Language :: English
26
+ Classifier: Operating System :: POSIX :: Linux
27
+ Requires-Python: >=3.8
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: cryptography>=3.3.2
31
+ Requires-Dist: distro>=1.5.0
32
+ Requires-Dist: docker>=4.3.1
33
+ Requires-Dist: gevent>=22.08.0
34
+ Requires-Dist: grpcio<1.68,>=1.63.0
35
+ Requires-Dist: httplib2>=0.19.0
36
+ Requires-Dist: importlib_metadata>=7.1.0
37
+ Requires-Dist: matplotlib>=3.3.4
38
+ Requires-Dist: numba>=0.51.2
39
+ Requires-Dist: optuna==3.6.1
40
+ Requires-Dist: pdfkit>=0.6.1
41
+ Requires-Dist: prometheus_client>=0.9.0
42
+ Requires-Dist: protobuf
43
+ Requires-Dist: psutil>=5.8.0
44
+ Requires-Dist: pyyaml>=5.3.1
45
+ Requires-Dist: requests>=2.24.0
46
+ Requires-Dist: tritonclient[all]>=2.4.0
47
+ Requires-Dist: urllib3>=2.0.7
48
+ Provides-Extra: perf-analyzer
49
+ Requires-Dist: perf-analyzer; extra == "perf-analyzer"
50
+ Dynamic: license-file
51
+
52
+ See the Model Analyzer's [installation documentation](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/install.md#using-pip3) for package details. The [quick start](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/quick_start.md) documentation describes how to get started with profiling and analysis using Triton Model Analyzer.