triton-model-analyzer 1.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. model_analyzer/__init__.py +15 -0
  2. model_analyzer/analyzer.py +448 -0
  3. model_analyzer/cli/__init__.py +15 -0
  4. model_analyzer/cli/cli.py +193 -0
  5. model_analyzer/config/__init__.py +15 -0
  6. model_analyzer/config/generate/__init__.py +15 -0
  7. model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
  8. model_analyzer/config/generate/base_model_config_generator.py +352 -0
  9. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
  10. model_analyzer/config/generate/brute_run_config_generator.py +154 -0
  11. model_analyzer/config/generate/concurrency_sweeper.py +75 -0
  12. model_analyzer/config/generate/config_generator_interface.py +52 -0
  13. model_analyzer/config/generate/coordinate.py +143 -0
  14. model_analyzer/config/generate/coordinate_data.py +86 -0
  15. model_analyzer/config/generate/generator_utils.py +116 -0
  16. model_analyzer/config/generate/manual_model_config_generator.py +187 -0
  17. model_analyzer/config/generate/model_config_generator_factory.py +92 -0
  18. model_analyzer/config/generate/model_profile_spec.py +74 -0
  19. model_analyzer/config/generate/model_run_config_generator.py +154 -0
  20. model_analyzer/config/generate/model_variant_name_manager.py +150 -0
  21. model_analyzer/config/generate/neighborhood.py +536 -0
  22. model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
  23. model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
  24. model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
  25. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
  26. model_analyzer/config/generate/quick_run_config_generator.py +753 -0
  27. model_analyzer/config/generate/run_config_generator_factory.py +329 -0
  28. model_analyzer/config/generate/search_config.py +112 -0
  29. model_analyzer/config/generate/search_dimension.py +73 -0
  30. model_analyzer/config/generate/search_dimensions.py +85 -0
  31. model_analyzer/config/generate/search_parameter.py +49 -0
  32. model_analyzer/config/generate/search_parameters.py +388 -0
  33. model_analyzer/config/input/__init__.py +15 -0
  34. model_analyzer/config/input/config_command.py +483 -0
  35. model_analyzer/config/input/config_command_profile.py +1747 -0
  36. model_analyzer/config/input/config_command_report.py +267 -0
  37. model_analyzer/config/input/config_defaults.py +236 -0
  38. model_analyzer/config/input/config_enum.py +83 -0
  39. model_analyzer/config/input/config_field.py +216 -0
  40. model_analyzer/config/input/config_list_generic.py +112 -0
  41. model_analyzer/config/input/config_list_numeric.py +151 -0
  42. model_analyzer/config/input/config_list_string.py +111 -0
  43. model_analyzer/config/input/config_none.py +71 -0
  44. model_analyzer/config/input/config_object.py +129 -0
  45. model_analyzer/config/input/config_primitive.py +81 -0
  46. model_analyzer/config/input/config_status.py +75 -0
  47. model_analyzer/config/input/config_sweep.py +83 -0
  48. model_analyzer/config/input/config_union.py +113 -0
  49. model_analyzer/config/input/config_utils.py +128 -0
  50. model_analyzer/config/input/config_value.py +243 -0
  51. model_analyzer/config/input/objects/__init__.py +15 -0
  52. model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
  53. model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
  54. model_analyzer/config/input/objects/config_plot.py +198 -0
  55. model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
  56. model_analyzer/config/input/yaml_config_validator.py +82 -0
  57. model_analyzer/config/run/__init__.py +15 -0
  58. model_analyzer/config/run/model_run_config.py +313 -0
  59. model_analyzer/config/run/run_config.py +168 -0
  60. model_analyzer/constants.py +76 -0
  61. model_analyzer/device/__init__.py +15 -0
  62. model_analyzer/device/device.py +24 -0
  63. model_analyzer/device/gpu_device.py +87 -0
  64. model_analyzer/device/gpu_device_factory.py +248 -0
  65. model_analyzer/entrypoint.py +307 -0
  66. model_analyzer/log_formatter.py +65 -0
  67. model_analyzer/model_analyzer_exceptions.py +24 -0
  68. model_analyzer/model_manager.py +255 -0
  69. model_analyzer/monitor/__init__.py +15 -0
  70. model_analyzer/monitor/cpu_monitor.py +69 -0
  71. model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
  72. model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
  73. model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
  74. model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
  75. model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
  76. model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
  77. model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
  78. model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
  79. model_analyzer/monitor/dcgm/__init__.py +15 -0
  80. model_analyzer/monitor/dcgm/common/__init__.py +13 -0
  81. model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
  82. model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
  83. model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
  84. model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
  85. model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
  86. model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
  87. model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
  88. model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
  89. model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
  90. model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
  91. model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
  92. model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
  93. model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
  94. model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
  95. model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
  96. model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
  97. model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
  98. model_analyzer/monitor/dcgm/pydcgm.py +47 -0
  99. model_analyzer/monitor/monitor.py +143 -0
  100. model_analyzer/monitor/remote_monitor.py +137 -0
  101. model_analyzer/output/__init__.py +15 -0
  102. model_analyzer/output/file_writer.py +63 -0
  103. model_analyzer/output/output_writer.py +42 -0
  104. model_analyzer/perf_analyzer/__init__.py +15 -0
  105. model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
  106. model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
  107. model_analyzer/perf_analyzer/perf_config.py +479 -0
  108. model_analyzer/plots/__init__.py +15 -0
  109. model_analyzer/plots/detailed_plot.py +266 -0
  110. model_analyzer/plots/plot_manager.py +224 -0
  111. model_analyzer/plots/simple_plot.py +213 -0
  112. model_analyzer/record/__init__.py +15 -0
  113. model_analyzer/record/gpu_record.py +68 -0
  114. model_analyzer/record/metrics_manager.py +887 -0
  115. model_analyzer/record/record.py +280 -0
  116. model_analyzer/record/record_aggregator.py +256 -0
  117. model_analyzer/record/types/__init__.py +15 -0
  118. model_analyzer/record/types/cpu_available_ram.py +93 -0
  119. model_analyzer/record/types/cpu_used_ram.py +93 -0
  120. model_analyzer/record/types/gpu_free_memory.py +96 -0
  121. model_analyzer/record/types/gpu_power_usage.py +107 -0
  122. model_analyzer/record/types/gpu_total_memory.py +96 -0
  123. model_analyzer/record/types/gpu_used_memory.py +96 -0
  124. model_analyzer/record/types/gpu_utilization.py +108 -0
  125. model_analyzer/record/types/inter_token_latency_avg.py +60 -0
  126. model_analyzer/record/types/inter_token_latency_base.py +74 -0
  127. model_analyzer/record/types/inter_token_latency_max.py +60 -0
  128. model_analyzer/record/types/inter_token_latency_min.py +60 -0
  129. model_analyzer/record/types/inter_token_latency_p25.py +60 -0
  130. model_analyzer/record/types/inter_token_latency_p50.py +60 -0
  131. model_analyzer/record/types/inter_token_latency_p75.py +60 -0
  132. model_analyzer/record/types/inter_token_latency_p90.py +60 -0
  133. model_analyzer/record/types/inter_token_latency_p95.py +60 -0
  134. model_analyzer/record/types/inter_token_latency_p99.py +60 -0
  135. model_analyzer/record/types/output_token_throughput.py +105 -0
  136. model_analyzer/record/types/perf_client_response_wait.py +97 -0
  137. model_analyzer/record/types/perf_client_send_recv.py +97 -0
  138. model_analyzer/record/types/perf_latency.py +111 -0
  139. model_analyzer/record/types/perf_latency_avg.py +60 -0
  140. model_analyzer/record/types/perf_latency_base.py +74 -0
  141. model_analyzer/record/types/perf_latency_p90.py +60 -0
  142. model_analyzer/record/types/perf_latency_p95.py +60 -0
  143. model_analyzer/record/types/perf_latency_p99.py +60 -0
  144. model_analyzer/record/types/perf_server_compute_infer.py +97 -0
  145. model_analyzer/record/types/perf_server_compute_input.py +97 -0
  146. model_analyzer/record/types/perf_server_compute_output.py +97 -0
  147. model_analyzer/record/types/perf_server_queue.py +97 -0
  148. model_analyzer/record/types/perf_throughput.py +105 -0
  149. model_analyzer/record/types/time_to_first_token_avg.py +60 -0
  150. model_analyzer/record/types/time_to_first_token_base.py +74 -0
  151. model_analyzer/record/types/time_to_first_token_max.py +60 -0
  152. model_analyzer/record/types/time_to_first_token_min.py +60 -0
  153. model_analyzer/record/types/time_to_first_token_p25.py +60 -0
  154. model_analyzer/record/types/time_to_first_token_p50.py +60 -0
  155. model_analyzer/record/types/time_to_first_token_p75.py +60 -0
  156. model_analyzer/record/types/time_to_first_token_p90.py +60 -0
  157. model_analyzer/record/types/time_to_first_token_p95.py +60 -0
  158. model_analyzer/record/types/time_to_first_token_p99.py +60 -0
  159. model_analyzer/reports/__init__.py +15 -0
  160. model_analyzer/reports/html_report.py +195 -0
  161. model_analyzer/reports/pdf_report.py +50 -0
  162. model_analyzer/reports/report.py +86 -0
  163. model_analyzer/reports/report_factory.py +62 -0
  164. model_analyzer/reports/report_manager.py +1376 -0
  165. model_analyzer/reports/report_utils.py +42 -0
  166. model_analyzer/result/__init__.py +15 -0
  167. model_analyzer/result/constraint_manager.py +150 -0
  168. model_analyzer/result/model_config_measurement.py +354 -0
  169. model_analyzer/result/model_constraints.py +105 -0
  170. model_analyzer/result/parameter_search.py +246 -0
  171. model_analyzer/result/result_manager.py +430 -0
  172. model_analyzer/result/result_statistics.py +159 -0
  173. model_analyzer/result/result_table.py +217 -0
  174. model_analyzer/result/result_table_manager.py +646 -0
  175. model_analyzer/result/result_utils.py +42 -0
  176. model_analyzer/result/results.py +277 -0
  177. model_analyzer/result/run_config_measurement.py +658 -0
  178. model_analyzer/result/run_config_result.py +210 -0
  179. model_analyzer/result/run_config_result_comparator.py +110 -0
  180. model_analyzer/result/sorted_results.py +151 -0
  181. model_analyzer/state/__init__.py +15 -0
  182. model_analyzer/state/analyzer_state.py +76 -0
  183. model_analyzer/state/analyzer_state_manager.py +215 -0
  184. model_analyzer/triton/__init__.py +15 -0
  185. model_analyzer/triton/client/__init__.py +15 -0
  186. model_analyzer/triton/client/client.py +234 -0
  187. model_analyzer/triton/client/client_factory.py +57 -0
  188. model_analyzer/triton/client/grpc_client.py +104 -0
  189. model_analyzer/triton/client/http_client.py +107 -0
  190. model_analyzer/triton/model/__init__.py +15 -0
  191. model_analyzer/triton/model/model_config.py +556 -0
  192. model_analyzer/triton/model/model_config_variant.py +29 -0
  193. model_analyzer/triton/server/__init__.py +15 -0
  194. model_analyzer/triton/server/server.py +76 -0
  195. model_analyzer/triton/server/server_config.py +269 -0
  196. model_analyzer/triton/server/server_docker.py +229 -0
  197. model_analyzer/triton/server/server_factory.py +306 -0
  198. model_analyzer/triton/server/server_local.py +158 -0
  199. triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
  200. triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
  201. triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
  202. triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
  203. triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
  204. triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,248 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+
19
+ import numba.cuda
20
+
21
+ import model_analyzer.monitor.dcgm.dcgm_agent as dcgm_agent
22
+ import model_analyzer.monitor.dcgm.dcgm_structs as structs
23
+ from model_analyzer.constants import LOGGER_NAME
24
+ from model_analyzer.device.gpu_device import GPUDevice
25
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
26
+
27
+ logger = logging.getLogger(LOGGER_NAME)
28
+
29
+
30
+ class GPUDeviceFactory:
31
+ """
32
+ Factory class for creating GPUDevices
33
+ """
34
+
35
+ def __init__(self):
36
+ self._devices = []
37
+ self._devices_by_bus_id = {}
38
+ self._devices_by_uuid = {}
39
+ self.init_all_devices()
40
+
41
+ def init_all_devices(self, dcgmPath=None):
42
+ """
43
+ Create GPUDevice objects for all DCGM visible
44
+ devices.
45
+
46
+ Parameters
47
+ ----------
48
+ dcgmPath : str
49
+ Absolute path to dcgm shared library
50
+ """
51
+
52
+ if numba.cuda.is_available():
53
+ logger.info("Initializing GPUDevice handles")
54
+ structs._dcgmInit(dcgmPath)
55
+ dcgm_agent.dcgmInit()
56
+
57
+ # Start DCGM in the embedded mode to use the shared library
58
+ dcgm_handle = dcgm_agent.dcgmStartEmbedded(
59
+ structs.DCGM_OPERATION_MODE_MANUAL
60
+ )
61
+
62
+ # Create a GPU device for every supported DCGM device
63
+ dcgm_device_ids = dcgm_agent.dcgmGetAllSupportedDevices(dcgm_handle)
64
+
65
+ for device_id in dcgm_device_ids:
66
+ device_atrributes = dcgm_agent.dcgmGetDeviceAttributes(
67
+ dcgm_handle, device_id
68
+ ).identifiers
69
+ pci_bus_id = device_atrributes.pciBusId
70
+ device_uuid = device_atrributes.uuid
71
+ device_name = device_atrributes.deviceName
72
+
73
+ gpu_device = GPUDevice(device_name, device_id, pci_bus_id, device_uuid)
74
+
75
+ self._devices.append(gpu_device)
76
+ self._devices_by_bus_id[pci_bus_id] = gpu_device
77
+ self._devices_by_uuid[device_uuid] = gpu_device
78
+
79
+ dcgm_agent.dcgmShutdown()
80
+
81
+ def get_device_by_bus_id(self, bus_id, dcgmPath=None):
82
+ """
83
+ Get a GPU device by using its bus ID.
84
+
85
+ Parameters
86
+ ----------
87
+ bus_id : bytes
88
+ Bus id corresponding to the GPU. The bus id should be created by
89
+ converting the colon separated hex notation into a bytes type
90
+ using ascii encoding. The bus id before conversion to bytes
91
+ should look like "00:65:00".
92
+
93
+ Returns
94
+ -------
95
+ Device
96
+ The device associated with this bus id.
97
+ """
98
+
99
+ if bus_id in self._devices_by_bus_id:
100
+ return self._devices_by_bus_id[bus_id]
101
+ else:
102
+ raise TritonModelAnalyzerException(
103
+ f"GPU with {bus_id} bus id is either not supported by DCGM or not present."
104
+ )
105
+
106
+ def get_device_by_cuda_index(self, index):
107
+ """
108
+ Get a GPU device using the CUDA index. This includes the index
109
+ provided by CUDA visible devices.
110
+
111
+ Parameters
112
+ ----------
113
+ index : int
114
+ index of the device in the list of visible CUDA devices.
115
+
116
+ Returns
117
+ -------
118
+ Device
119
+ The device associated with the index provided.
120
+
121
+ Raises
122
+ ------
123
+ IndexError
124
+ If the index is out of bound.
125
+ """
126
+
127
+ devices = numba.cuda.list_devices()
128
+ if index > len(devices) - 1:
129
+ raise IndexError
130
+
131
+ cuda_device = devices[index]
132
+ device_identity = cuda_device.get_device_identity()
133
+ pci_domain_id = device_identity["pci_domain_id"]
134
+ pci_device_id = device_identity["pci_device_id"]
135
+ pci_bus_id = device_identity["pci_bus_id"]
136
+ device_bus_id = f"{pci_domain_id:08X}:{pci_bus_id:02X}:{pci_device_id:02X}.0"
137
+
138
+ return self.get_device_by_bus_id(device_bus_id)
139
+
140
+ def get_device_by_uuid(self, uuid, dcgmPath=None):
141
+ """
142
+ Get a GPU device using the GPU uuid.
143
+
144
+ Parameters
145
+ ----------
146
+ uuid : str
147
+ index of the device in the list of visible CUDA devices.
148
+
149
+ Returns
150
+ -------
151
+ Device
152
+ The device associated with the uuid.
153
+
154
+ Raises
155
+ ------
156
+ TritonModelAnalyzerExcpetion
157
+ If the uuid does not exist this exception will be raised.
158
+ """
159
+
160
+ if uuid in self._devices_by_uuid:
161
+ return self._devices_by_uuid[uuid]
162
+ else:
163
+ raise TritonModelAnalyzerException(f"GPU UUID {uuid} was not found.")
164
+
165
+ def verify_requested_gpus(self, requested_gpus):
166
+ """
167
+ Creates a list of GPU UUIDs corresponding to the GPUs visible to
168
+ numba.cuda among the requested gpus
169
+
170
+ Parameters
171
+ ----------
172
+ requested_gpus : list of str or list of ints
173
+ Can either be GPU UUIDs or GPU device ids
174
+
175
+ Returns
176
+ -------
177
+ List of GPUDevices
178
+ list of GPUDevices corresponding to visible GPUs among requested
179
+
180
+ Raises
181
+ ------
182
+ TritonModelAnalyzerException
183
+ """
184
+
185
+ cuda_visible_gpus = self.get_cuda_visible_gpus()
186
+
187
+ if len(requested_gpus) == 1:
188
+ if requested_gpus[0] == "all":
189
+ self._log_gpus_used(cuda_visible_gpus)
190
+ return cuda_visible_gpus
191
+ elif requested_gpus[0] == "[]":
192
+ logger.info("No GPUs requested")
193
+ return []
194
+
195
+ try:
196
+ # Check if each string in the list can be parsed as an int
197
+ requested_cuda_indices = list(map(int, requested_gpus))
198
+ requested_gpus = []
199
+
200
+ for idx in requested_cuda_indices:
201
+ try:
202
+ requested_gpus.append(self.get_device_by_cuda_index(idx))
203
+ except TritonModelAnalyzerException:
204
+ raise TritonModelAnalyzerException(
205
+ f"Requested GPU with device id : {idx}. This GPU is not supported by DCGM."
206
+ )
207
+ except ValueError:
208
+ # requested_gpus are assumed to be UUIDs
209
+ requested_gpus = [self.get_device_by_uuid(uuid) for uuid in requested_gpus]
210
+
211
+ # Return the intersection of CUDA visible UUIDs and requested/supported UUIDs.
212
+ available_gpus = list(set(cuda_visible_gpus) & set(requested_gpus))
213
+ self._log_gpus_used(available_gpus)
214
+
215
+ return available_gpus
216
+
217
+ def get_cuda_visible_gpus(self):
218
+ """
219
+ Returns
220
+ -------
221
+ list of GPUDevice
222
+ UUIDs of the DCGM supported devices visible to CUDA
223
+ """
224
+
225
+ cuda_visible_gpus = []
226
+ if numba.cuda.is_available():
227
+ for cuda_device in numba.cuda.list_devices():
228
+ try:
229
+ cuda_visible_gpus.append(
230
+ self.get_device_by_cuda_index(cuda_device.id)
231
+ )
232
+ except TritonModelAnalyzerException:
233
+ # Device not supported by DCGM, log warning
234
+ logger.warning(
235
+ f"Device '{str(cuda_device.name, encoding='ascii')}' with "
236
+ f"cuda device id {cuda_device.id} is not supported by DCGM."
237
+ )
238
+ return cuda_visible_gpus
239
+
240
+ def _log_gpus_used(self, gpus):
241
+ """
242
+ Log the info for the GPUDevices in use
243
+ """
244
+
245
+ for gpu in gpus:
246
+ logger.info(
247
+ f"Using GPU {gpu.device_id()} {gpu.device_name()} with UUID {gpu.device_uuid()}"
248
+ )
@@ -0,0 +1,307 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ import os
19
+ import shutil
20
+ import sys
21
+ from pprint import pformat
22
+
23
+ from model_analyzer.constants import LOGGER_NAME
24
+ from model_analyzer.device.gpu_device_factory import GPUDeviceFactory
25
+
26
+ from .analyzer import Analyzer
27
+ from .cli.cli import CLI
28
+ from .config.input.config_command_profile import ConfigCommandProfile
29
+ from .config.input.config_command_report import ConfigCommandReport
30
+ from .log_formatter import setup_logging
31
+ from .model_analyzer_exceptions import TritonModelAnalyzerException
32
+ from .state.analyzer_state_manager import AnalyzerStateManager
33
+ from .triton.client.client_factory import TritonClientFactory
34
+ from .triton.server.server_factory import TritonServerFactory
35
+
36
+ logger = logging.getLogger(LOGGER_NAME)
37
+
38
+
39
+ def get_client_handle(config):
40
+ """
41
+ Creates and returns a TritonClient
42
+ with specified arguments
43
+
44
+ Parameters
45
+ ----------
46
+ config : namespace
47
+ Arguments parsed from the CLI
48
+ """
49
+
50
+ if config.client_protocol == "http":
51
+ http_ssl_options = get_http_ssl_options(config)
52
+ client = TritonClientFactory.create_http_client(
53
+ server_url=config.triton_http_endpoint, ssl_options=http_ssl_options
54
+ )
55
+ elif config.client_protocol == "grpc":
56
+ grpc_ssl_options = get_grpc_ssl_options(config)
57
+ client = TritonClientFactory.create_grpc_client(
58
+ server_url=config.triton_grpc_endpoint, ssl_options=grpc_ssl_options
59
+ )
60
+ else:
61
+ raise TritonModelAnalyzerException(
62
+ f"Unrecognized client-protocol : {config.client_protocol}"
63
+ )
64
+
65
+ return client
66
+
67
+
68
+ def get_http_ssl_options(config):
69
+ """
70
+ Returns HTTP SSL options dictionary
71
+
72
+ Parameters
73
+ ----------
74
+ config : namespace
75
+ Arguments parsed from the CLI
76
+ """
77
+
78
+ ssl_option_keys = [
79
+ "ssl-https-verify-peer",
80
+ "ssl-https-verify-host",
81
+ "ssl-https-ca-certificates-file",
82
+ "ssl-https-client-certificate-file",
83
+ "ssl-https-client-certificate-type",
84
+ "ssl-https-private-key-file",
85
+ "ssl-https-private-key-type",
86
+ ]
87
+
88
+ return {
89
+ key: config.perf_analyzer_flags[key]
90
+ for key in ssl_option_keys
91
+ if key in config.perf_analyzer_flags
92
+ }
93
+
94
+
95
+ def get_grpc_ssl_options(config):
96
+ """
97
+ Returns gRPC SSL options dictionary
98
+
99
+ Parameters
100
+ ----------
101
+ config : namespace
102
+ Arguments parsed from the CLI
103
+ """
104
+
105
+ ssl_option_keys = [
106
+ "ssl-grpc-use-ssl",
107
+ "ssl-grpc-root-certifications-file",
108
+ "ssl-grpc-private-key-file",
109
+ "ssl-grpc-certificate-chain-file",
110
+ ]
111
+
112
+ return {
113
+ key: config.perf_analyzer_flags[key]
114
+ for key in ssl_option_keys
115
+ if key in config.perf_analyzer_flags
116
+ }
117
+
118
+
119
+ def get_triton_handles(config, gpus):
120
+ """
121
+ Creates a TritonServer and starts it. Creates a TritonClient
122
+
123
+ Parameters
124
+ ----------
125
+ config : namespace
126
+ The arguments passed into the CLI
127
+ gpus : list of str
128
+ Available, supported, visible requested GPU UUIDs
129
+
130
+ Returns
131
+ -------
132
+ TritonClient, TritonServer
133
+ Handles for triton client/server pair.
134
+ """
135
+
136
+ client = get_client_handle(config)
137
+ fail_if_server_already_running(client, config)
138
+ server = TritonServerFactory.get_server_handle(config, gpus)
139
+
140
+ return client, server
141
+
142
+
143
+ def get_cli_and_config_options():
144
+ """
145
+ Parses CLI/Yaml Config file options
146
+ into Namespace and Config objects for
147
+ the correct subcommand
148
+
149
+ Returns
150
+ -------
151
+ args : Namespace
152
+ Object that contains the parse CLI commands
153
+ Used for the global options
154
+ config: CommandConfig
155
+ The config corresponding to the command being run,
156
+ already filled in with values from CLI or YAML.
157
+ """
158
+
159
+ # Parse CLI options
160
+ try:
161
+ config_profile = ConfigCommandProfile()
162
+ config_report = ConfigCommandReport()
163
+
164
+ cli = CLI()
165
+ cli.add_subcommand(
166
+ cmd="profile",
167
+ help="Run model inference profiling based on specified CLI or config options.",
168
+ config=config_profile,
169
+ )
170
+ cli.add_subcommand(
171
+ cmd="analyze",
172
+ help="DEPRECATED: Aliased to profile - please use profile subcommand.",
173
+ config=config_profile,
174
+ )
175
+ cli.add_subcommand(
176
+ cmd="report",
177
+ help="Generate detailed reports for a single config",
178
+ config=config_report,
179
+ )
180
+ return cli.parse()
181
+
182
+ except TritonModelAnalyzerException as e:
183
+ logger.error(f"Model Analyzer encountered an error: {e}")
184
+ sys.exit(1)
185
+
186
+
187
+ def create_output_model_repository(config):
188
+ """
189
+ Creates output model repository
190
+
191
+ Parameters
192
+ ----------
193
+ ConfigCommandProfile
194
+ The config containing the output_model_repository_path
195
+ """
196
+
197
+ try:
198
+ os.mkdir(config.output_model_repository_path)
199
+ except FileExistsError:
200
+ if not config.override_output_model_repository:
201
+ raise TritonModelAnalyzerException(
202
+ f'Path "{config.output_model_repository_path}" already exists. '
203
+ 'Please set or modify "--output-model-repository-path" flag or remove this directory.'
204
+ " You can also allow overriding of the output directory using"
205
+ ' the "--override-output-model-repository" flag.'
206
+ )
207
+ else:
208
+ shutil.rmtree(config.output_model_repository_path)
209
+ logger.warning(
210
+ "Overriding the output model repo path "
211
+ f'"{config.output_model_repository_path}"'
212
+ )
213
+ os.mkdir(config.output_model_repository_path)
214
+
215
+
216
+ def fail_if_server_already_running(client, config):
217
+ """
218
+ Checks if there is already a Triton server running
219
+ If there is and the launch mode is not 'remote' or 'c_api', throw an exception
220
+ Else, nothing will happen
221
+ """
222
+ if config.triton_launch_mode == "remote" or config.triton_launch_mode == "c_api":
223
+ return
224
+
225
+ is_server_running = True
226
+ try:
227
+ client.is_server_ready()
228
+ except Exception:
229
+ is_server_running = False
230
+ finally:
231
+ if is_server_running:
232
+ raise TritonModelAnalyzerException(
233
+ f"Another application (likely a Triton Server) is already using the desired port. In '{config.triton_launch_mode}' mode, Model Analyzer will launch a Triton Server and requires that the HTTP/GRPC port is not occupied by another application. Please kill the other application or specify a different port."
234
+ )
235
+
236
+
237
+ def main():
238
+ """
239
+ Main entrypoint of model_analyzer
240
+ """
241
+
242
+ # Need to create a basic logging format for logs we print
243
+ # before we have enough information to configure the full logger
244
+ logging.basicConfig(format="[Model Analyzer] %(message)s")
245
+
246
+ args, config = get_cli_and_config_options()
247
+
248
+ setup_logging(quiet=args.quiet, verbose=args.verbose)
249
+
250
+ logger.debug("\n%s", pformat(config.get_all_config()))
251
+
252
+ # Launch subcommand handlers
253
+ server = None
254
+ try:
255
+ # Make calls to correct analyzer subcommand functions
256
+ if args.subcommand == "profile" or args.subcommand == "analyze":
257
+ if _is_a_model_repository_required(args, config):
258
+ raise TritonModelAnalyzerException(
259
+ "No model repository specified. Please specify it using the YAML config file or using the --model-repository flag in CLI."
260
+ )
261
+
262
+ # Set up devices
263
+ if config.dcgm_disable:
264
+ gpus = []
265
+ else:
266
+ gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
267
+
268
+ # Check/create output model repository
269
+ create_output_model_repository(config)
270
+
271
+ client, server = get_triton_handles(config, gpus)
272
+ state_manager = AnalyzerStateManager(config=config, server=server)
273
+
274
+ # Only check for exit after the events that take a long time.
275
+ if state_manager.exiting():
276
+ return
277
+
278
+ analyzer = Analyzer(
279
+ config, server, state_manager, checkpoint_required=False
280
+ )
281
+ analyzer.profile(
282
+ client=client, gpus=gpus, mode=args.mode, verbose=args.verbose
283
+ )
284
+ elif args.subcommand == "report":
285
+ analyzer = Analyzer(
286
+ config,
287
+ server,
288
+ AnalyzerStateManager(config=config, server=server),
289
+ checkpoint_required=True,
290
+ )
291
+ analyzer.report(mode=args.mode)
292
+ finally:
293
+ if server is not None:
294
+ server.stop()
295
+
296
+
297
+ def _is_a_model_repository_required(args, config):
298
+ model_repository_required = (
299
+ args.subcommand == "profile"
300
+ and not config.model_repository
301
+ and not config.triton_launch_mode == "remote"
302
+ )
303
+ return model_repository_required
304
+
305
+
306
+ if __name__ == "__main__":
307
+ main()
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import logging
18
+ import sys
19
+
20
+ from model_analyzer.constants import LOGGER_NAME
21
+
22
+
23
+ class LogFormatter(logging.Formatter):
24
+ """Class to handle formatting of the logger outputs"""
25
+
26
+ def __init__(self):
27
+ logger = logging.getLogger(LOGGER_NAME)
28
+ self._log_level = logger.getEffectiveLevel()
29
+ super().__init__(datefmt="%H:%M:%S")
30
+
31
+ def format(self, record):
32
+ front = "%(asctime)s " if self._log_level is logging.DEBUG else ""
33
+ if record.levelno == logging.INFO:
34
+ self._style._fmt = f"{front}[Model Analyzer] %(message)s"
35
+ else:
36
+ self._style._fmt = f"{front}[Model Analyzer] %(levelname)s: %(message)s"
37
+ return super().format(record)
38
+
39
+
40
+ def setup_logging(quiet, verbose):
41
+ """
42
+ Setup logger format
43
+
44
+ Parameters
45
+ ----------
46
+ quiet : bool
47
+ If true, don't print anything other than errors
48
+ verbose : bool
49
+ If true and quiet is not true, print debug information
50
+ """
51
+
52
+ if quiet:
53
+ log_level = logging.ERROR
54
+ elif verbose:
55
+ log_level = logging.DEBUG
56
+ else:
57
+ log_level = logging.INFO
58
+
59
+ logger = logging.getLogger(LOGGER_NAME)
60
+ logger.setLevel(level=log_level)
61
+
62
+ handler = logging.StreamHandler(sys.stdout)
63
+ handler.setFormatter(LogFormatter())
64
+ logger.addHandler(handler)
65
+ logger.propagate = False
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+
18
+ class TritonModelAnalyzerException(Exception):
19
+ """
20
+ A custom exception specific to the Triton Model
21
+ Analyzer
22
+ """
23
+
24
+ pass