triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
import numba.cuda
|
|
20
|
+
|
|
21
|
+
import model_analyzer.monitor.dcgm.dcgm_agent as dcgm_agent
|
|
22
|
+
import model_analyzer.monitor.dcgm.dcgm_structs as structs
|
|
23
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
24
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
25
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class GPUDeviceFactory:
|
|
31
|
+
"""
|
|
32
|
+
Factory class for creating GPUDevices
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
self._devices = []
|
|
37
|
+
self._devices_by_bus_id = {}
|
|
38
|
+
self._devices_by_uuid = {}
|
|
39
|
+
self.init_all_devices()
|
|
40
|
+
|
|
41
|
+
def init_all_devices(self, dcgmPath=None):
|
|
42
|
+
"""
|
|
43
|
+
Create GPUDevice objects for all DCGM visible
|
|
44
|
+
devices.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
dcgmPath : str
|
|
49
|
+
Absolute path to dcgm shared library
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
if numba.cuda.is_available():
|
|
53
|
+
logger.info("Initializing GPUDevice handles")
|
|
54
|
+
structs._dcgmInit(dcgmPath)
|
|
55
|
+
dcgm_agent.dcgmInit()
|
|
56
|
+
|
|
57
|
+
# Start DCGM in the embedded mode to use the shared library
|
|
58
|
+
dcgm_handle = dcgm_agent.dcgmStartEmbedded(
|
|
59
|
+
structs.DCGM_OPERATION_MODE_MANUAL
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Create a GPU device for every supported DCGM device
|
|
63
|
+
dcgm_device_ids = dcgm_agent.dcgmGetAllSupportedDevices(dcgm_handle)
|
|
64
|
+
|
|
65
|
+
for device_id in dcgm_device_ids:
|
|
66
|
+
device_atrributes = dcgm_agent.dcgmGetDeviceAttributes(
|
|
67
|
+
dcgm_handle, device_id
|
|
68
|
+
).identifiers
|
|
69
|
+
pci_bus_id = device_atrributes.pciBusId
|
|
70
|
+
device_uuid = device_atrributes.uuid
|
|
71
|
+
device_name = device_atrributes.deviceName
|
|
72
|
+
|
|
73
|
+
gpu_device = GPUDevice(device_name, device_id, pci_bus_id, device_uuid)
|
|
74
|
+
|
|
75
|
+
self._devices.append(gpu_device)
|
|
76
|
+
self._devices_by_bus_id[pci_bus_id] = gpu_device
|
|
77
|
+
self._devices_by_uuid[device_uuid] = gpu_device
|
|
78
|
+
|
|
79
|
+
dcgm_agent.dcgmShutdown()
|
|
80
|
+
|
|
81
|
+
def get_device_by_bus_id(self, bus_id, dcgmPath=None):
|
|
82
|
+
"""
|
|
83
|
+
Get a GPU device by using its bus ID.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
bus_id : bytes
|
|
88
|
+
Bus id corresponding to the GPU. The bus id should be created by
|
|
89
|
+
converting the colon separated hex notation into a bytes type
|
|
90
|
+
using ascii encoding. The bus id before conversion to bytes
|
|
91
|
+
should look like "00:65:00".
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
Device
|
|
96
|
+
The device associated with this bus id.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
if bus_id in self._devices_by_bus_id:
|
|
100
|
+
return self._devices_by_bus_id[bus_id]
|
|
101
|
+
else:
|
|
102
|
+
raise TritonModelAnalyzerException(
|
|
103
|
+
f"GPU with {bus_id} bus id is either not supported by DCGM or not present."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def get_device_by_cuda_index(self, index):
|
|
107
|
+
"""
|
|
108
|
+
Get a GPU device using the CUDA index. This includes the index
|
|
109
|
+
provided by CUDA visible devices.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
index : int
|
|
114
|
+
index of the device in the list of visible CUDA devices.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
Device
|
|
119
|
+
The device associated with the index provided.
|
|
120
|
+
|
|
121
|
+
Raises
|
|
122
|
+
------
|
|
123
|
+
IndexError
|
|
124
|
+
If the index is out of bound.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
devices = numba.cuda.list_devices()
|
|
128
|
+
if index > len(devices) - 1:
|
|
129
|
+
raise IndexError
|
|
130
|
+
|
|
131
|
+
cuda_device = devices[index]
|
|
132
|
+
device_identity = cuda_device.get_device_identity()
|
|
133
|
+
pci_domain_id = device_identity["pci_domain_id"]
|
|
134
|
+
pci_device_id = device_identity["pci_device_id"]
|
|
135
|
+
pci_bus_id = device_identity["pci_bus_id"]
|
|
136
|
+
device_bus_id = f"{pci_domain_id:08X}:{pci_bus_id:02X}:{pci_device_id:02X}.0"
|
|
137
|
+
|
|
138
|
+
return self.get_device_by_bus_id(device_bus_id)
|
|
139
|
+
|
|
140
|
+
def get_device_by_uuid(self, uuid, dcgmPath=None):
|
|
141
|
+
"""
|
|
142
|
+
Get a GPU device using the GPU uuid.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
uuid : str
|
|
147
|
+
index of the device in the list of visible CUDA devices.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
Device
|
|
152
|
+
The device associated with the uuid.
|
|
153
|
+
|
|
154
|
+
Raises
|
|
155
|
+
------
|
|
156
|
+
TritonModelAnalyzerExcpetion
|
|
157
|
+
If the uuid does not exist this exception will be raised.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
if uuid in self._devices_by_uuid:
|
|
161
|
+
return self._devices_by_uuid[uuid]
|
|
162
|
+
else:
|
|
163
|
+
raise TritonModelAnalyzerException(f"GPU UUID {uuid} was not found.")
|
|
164
|
+
|
|
165
|
+
def verify_requested_gpus(self, requested_gpus):
|
|
166
|
+
"""
|
|
167
|
+
Creates a list of GPU UUIDs corresponding to the GPUs visible to
|
|
168
|
+
numba.cuda among the requested gpus
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
requested_gpus : list of str or list of ints
|
|
173
|
+
Can either be GPU UUIDs or GPU device ids
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
List of GPUDevices
|
|
178
|
+
list of GPUDevices corresponding to visible GPUs among requested
|
|
179
|
+
|
|
180
|
+
Raises
|
|
181
|
+
------
|
|
182
|
+
TritonModelAnalyzerException
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
cuda_visible_gpus = self.get_cuda_visible_gpus()
|
|
186
|
+
|
|
187
|
+
if len(requested_gpus) == 1:
|
|
188
|
+
if requested_gpus[0] == "all":
|
|
189
|
+
self._log_gpus_used(cuda_visible_gpus)
|
|
190
|
+
return cuda_visible_gpus
|
|
191
|
+
elif requested_gpus[0] == "[]":
|
|
192
|
+
logger.info("No GPUs requested")
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
# Check if each string in the list can be parsed as an int
|
|
197
|
+
requested_cuda_indices = list(map(int, requested_gpus))
|
|
198
|
+
requested_gpus = []
|
|
199
|
+
|
|
200
|
+
for idx in requested_cuda_indices:
|
|
201
|
+
try:
|
|
202
|
+
requested_gpus.append(self.get_device_by_cuda_index(idx))
|
|
203
|
+
except TritonModelAnalyzerException:
|
|
204
|
+
raise TritonModelAnalyzerException(
|
|
205
|
+
f"Requested GPU with device id : {idx}. This GPU is not supported by DCGM."
|
|
206
|
+
)
|
|
207
|
+
except ValueError:
|
|
208
|
+
# requested_gpus are assumed to be UUIDs
|
|
209
|
+
requested_gpus = [self.get_device_by_uuid(uuid) for uuid in requested_gpus]
|
|
210
|
+
|
|
211
|
+
# Return the intersection of CUDA visible UUIDs and requested/supported UUIDs.
|
|
212
|
+
available_gpus = list(set(cuda_visible_gpus) & set(requested_gpus))
|
|
213
|
+
self._log_gpus_used(available_gpus)
|
|
214
|
+
|
|
215
|
+
return available_gpus
|
|
216
|
+
|
|
217
|
+
def get_cuda_visible_gpus(self):
|
|
218
|
+
"""
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
list of GPUDevice
|
|
222
|
+
UUIDs of the DCGM supported devices visible to CUDA
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
cuda_visible_gpus = []
|
|
226
|
+
if numba.cuda.is_available():
|
|
227
|
+
for cuda_device in numba.cuda.list_devices():
|
|
228
|
+
try:
|
|
229
|
+
cuda_visible_gpus.append(
|
|
230
|
+
self.get_device_by_cuda_index(cuda_device.id)
|
|
231
|
+
)
|
|
232
|
+
except TritonModelAnalyzerException:
|
|
233
|
+
# Device not supported by DCGM, log warning
|
|
234
|
+
logger.warning(
|
|
235
|
+
f"Device '{str(cuda_device.name, encoding='ascii')}' with "
|
|
236
|
+
f"cuda device id {cuda_device.id} is not supported by DCGM."
|
|
237
|
+
)
|
|
238
|
+
return cuda_visible_gpus
|
|
239
|
+
|
|
240
|
+
def _log_gpus_used(self, gpus):
|
|
241
|
+
"""
|
|
242
|
+
Log the info for the GPUDevices in use
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
for gpu in gpus:
|
|
246
|
+
logger.info(
|
|
247
|
+
f"Using GPU {gpu.device_id()} {gpu.device_name()} with UUID {gpu.device_uuid()}"
|
|
248
|
+
)
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
import shutil
|
|
20
|
+
import sys
|
|
21
|
+
from pprint import pformat
|
|
22
|
+
|
|
23
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
24
|
+
from model_analyzer.device.gpu_device_factory import GPUDeviceFactory
|
|
25
|
+
|
|
26
|
+
from .analyzer import Analyzer
|
|
27
|
+
from .cli.cli import CLI
|
|
28
|
+
from .config.input.config_command_profile import ConfigCommandProfile
|
|
29
|
+
from .config.input.config_command_report import ConfigCommandReport
|
|
30
|
+
from .log_formatter import setup_logging
|
|
31
|
+
from .model_analyzer_exceptions import TritonModelAnalyzerException
|
|
32
|
+
from .state.analyzer_state_manager import AnalyzerStateManager
|
|
33
|
+
from .triton.client.client_factory import TritonClientFactory
|
|
34
|
+
from .triton.server.server_factory import TritonServerFactory
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_client_handle(config):
|
|
40
|
+
"""
|
|
41
|
+
Creates and returns a TritonClient
|
|
42
|
+
with specified arguments
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
config : namespace
|
|
47
|
+
Arguments parsed from the CLI
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
if config.client_protocol == "http":
|
|
51
|
+
http_ssl_options = get_http_ssl_options(config)
|
|
52
|
+
client = TritonClientFactory.create_http_client(
|
|
53
|
+
server_url=config.triton_http_endpoint, ssl_options=http_ssl_options
|
|
54
|
+
)
|
|
55
|
+
elif config.client_protocol == "grpc":
|
|
56
|
+
grpc_ssl_options = get_grpc_ssl_options(config)
|
|
57
|
+
client = TritonClientFactory.create_grpc_client(
|
|
58
|
+
server_url=config.triton_grpc_endpoint, ssl_options=grpc_ssl_options
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
raise TritonModelAnalyzerException(
|
|
62
|
+
f"Unrecognized client-protocol : {config.client_protocol}"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return client
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_http_ssl_options(config):
|
|
69
|
+
"""
|
|
70
|
+
Returns HTTP SSL options dictionary
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
config : namespace
|
|
75
|
+
Arguments parsed from the CLI
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
ssl_option_keys = [
|
|
79
|
+
"ssl-https-verify-peer",
|
|
80
|
+
"ssl-https-verify-host",
|
|
81
|
+
"ssl-https-ca-certificates-file",
|
|
82
|
+
"ssl-https-client-certificate-file",
|
|
83
|
+
"ssl-https-client-certificate-type",
|
|
84
|
+
"ssl-https-private-key-file",
|
|
85
|
+
"ssl-https-private-key-type",
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
key: config.perf_analyzer_flags[key]
|
|
90
|
+
for key in ssl_option_keys
|
|
91
|
+
if key in config.perf_analyzer_flags
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_grpc_ssl_options(config):
|
|
96
|
+
"""
|
|
97
|
+
Returns gRPC SSL options dictionary
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
config : namespace
|
|
102
|
+
Arguments parsed from the CLI
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
ssl_option_keys = [
|
|
106
|
+
"ssl-grpc-use-ssl",
|
|
107
|
+
"ssl-grpc-root-certifications-file",
|
|
108
|
+
"ssl-grpc-private-key-file",
|
|
109
|
+
"ssl-grpc-certificate-chain-file",
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
key: config.perf_analyzer_flags[key]
|
|
114
|
+
for key in ssl_option_keys
|
|
115
|
+
if key in config.perf_analyzer_flags
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_triton_handles(config, gpus):
|
|
120
|
+
"""
|
|
121
|
+
Creates a TritonServer and starts it. Creates a TritonClient
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
config : namespace
|
|
126
|
+
The arguments passed into the CLI
|
|
127
|
+
gpus : list of str
|
|
128
|
+
Available, supported, visible requested GPU UUIDs
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
TritonClient, TritonServer
|
|
133
|
+
Handles for triton client/server pair.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
client = get_client_handle(config)
|
|
137
|
+
fail_if_server_already_running(client, config)
|
|
138
|
+
server = TritonServerFactory.get_server_handle(config, gpus)
|
|
139
|
+
|
|
140
|
+
return client, server
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get_cli_and_config_options():
|
|
144
|
+
"""
|
|
145
|
+
Parses CLI/Yaml Config file options
|
|
146
|
+
into Namespace and Config objects for
|
|
147
|
+
the correct subcommand
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
args : Namespace
|
|
152
|
+
Object that contains the parse CLI commands
|
|
153
|
+
Used for the global options
|
|
154
|
+
config: CommandConfig
|
|
155
|
+
The config corresponding to the command being run,
|
|
156
|
+
already filled in with values from CLI or YAML.
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
# Parse CLI options
|
|
160
|
+
try:
|
|
161
|
+
config_profile = ConfigCommandProfile()
|
|
162
|
+
config_report = ConfigCommandReport()
|
|
163
|
+
|
|
164
|
+
cli = CLI()
|
|
165
|
+
cli.add_subcommand(
|
|
166
|
+
cmd="profile",
|
|
167
|
+
help="Run model inference profiling based on specified CLI or config options.",
|
|
168
|
+
config=config_profile,
|
|
169
|
+
)
|
|
170
|
+
cli.add_subcommand(
|
|
171
|
+
cmd="analyze",
|
|
172
|
+
help="DEPRECATED: Aliased to profile - please use profile subcommand.",
|
|
173
|
+
config=config_profile,
|
|
174
|
+
)
|
|
175
|
+
cli.add_subcommand(
|
|
176
|
+
cmd="report",
|
|
177
|
+
help="Generate detailed reports for a single config",
|
|
178
|
+
config=config_report,
|
|
179
|
+
)
|
|
180
|
+
return cli.parse()
|
|
181
|
+
|
|
182
|
+
except TritonModelAnalyzerException as e:
|
|
183
|
+
logger.error(f"Model Analyzer encountered an error: {e}")
|
|
184
|
+
sys.exit(1)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def create_output_model_repository(config):
|
|
188
|
+
"""
|
|
189
|
+
Creates output model repository
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
ConfigCommandProfile
|
|
194
|
+
The config containing the output_model_repository_path
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
os.mkdir(config.output_model_repository_path)
|
|
199
|
+
except FileExistsError:
|
|
200
|
+
if not config.override_output_model_repository:
|
|
201
|
+
raise TritonModelAnalyzerException(
|
|
202
|
+
f'Path "{config.output_model_repository_path}" already exists. '
|
|
203
|
+
'Please set or modify "--output-model-repository-path" flag or remove this directory.'
|
|
204
|
+
" You can also allow overriding of the output directory using"
|
|
205
|
+
' the "--override-output-model-repository" flag.'
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
shutil.rmtree(config.output_model_repository_path)
|
|
209
|
+
logger.warning(
|
|
210
|
+
"Overriding the output model repo path "
|
|
211
|
+
f'"{config.output_model_repository_path}"'
|
|
212
|
+
)
|
|
213
|
+
os.mkdir(config.output_model_repository_path)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def fail_if_server_already_running(client, config):
|
|
217
|
+
"""
|
|
218
|
+
Checks if there is already a Triton server running
|
|
219
|
+
If there is and the launch mode is not 'remote' or 'c_api', throw an exception
|
|
220
|
+
Else, nothing will happen
|
|
221
|
+
"""
|
|
222
|
+
if config.triton_launch_mode == "remote" or config.triton_launch_mode == "c_api":
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
is_server_running = True
|
|
226
|
+
try:
|
|
227
|
+
client.is_server_ready()
|
|
228
|
+
except Exception:
|
|
229
|
+
is_server_running = False
|
|
230
|
+
finally:
|
|
231
|
+
if is_server_running:
|
|
232
|
+
raise TritonModelAnalyzerException(
|
|
233
|
+
f"Another application (likely a Triton Server) is already using the desired port. In '{config.triton_launch_mode}' mode, Model Analyzer will launch a Triton Server and requires that the HTTP/GRPC port is not occupied by another application. Please kill the other application or specify a different port."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def main():
|
|
238
|
+
"""
|
|
239
|
+
Main entrypoint of model_analyzer
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
# Need to create a basic logging format for logs we print
|
|
243
|
+
# before we have enough information to configure the full logger
|
|
244
|
+
logging.basicConfig(format="[Model Analyzer] %(message)s")
|
|
245
|
+
|
|
246
|
+
args, config = get_cli_and_config_options()
|
|
247
|
+
|
|
248
|
+
setup_logging(quiet=args.quiet, verbose=args.verbose)
|
|
249
|
+
|
|
250
|
+
logger.debug("\n%s", pformat(config.get_all_config()))
|
|
251
|
+
|
|
252
|
+
# Launch subcommand handlers
|
|
253
|
+
server = None
|
|
254
|
+
try:
|
|
255
|
+
# Make calls to correct analyzer subcommand functions
|
|
256
|
+
if args.subcommand == "profile" or args.subcommand == "analyze":
|
|
257
|
+
if _is_a_model_repository_required(args, config):
|
|
258
|
+
raise TritonModelAnalyzerException(
|
|
259
|
+
"No model repository specified. Please specify it using the YAML config file or using the --model-repository flag in CLI."
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Set up devices
|
|
263
|
+
if config.dcgm_disable:
|
|
264
|
+
gpus = []
|
|
265
|
+
else:
|
|
266
|
+
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
|
|
267
|
+
|
|
268
|
+
# Check/create output model repository
|
|
269
|
+
create_output_model_repository(config)
|
|
270
|
+
|
|
271
|
+
client, server = get_triton_handles(config, gpus)
|
|
272
|
+
state_manager = AnalyzerStateManager(config=config, server=server)
|
|
273
|
+
|
|
274
|
+
# Only check for exit after the events that take a long time.
|
|
275
|
+
if state_manager.exiting():
|
|
276
|
+
return
|
|
277
|
+
|
|
278
|
+
analyzer = Analyzer(
|
|
279
|
+
config, server, state_manager, checkpoint_required=False
|
|
280
|
+
)
|
|
281
|
+
analyzer.profile(
|
|
282
|
+
client=client, gpus=gpus, mode=args.mode, verbose=args.verbose
|
|
283
|
+
)
|
|
284
|
+
elif args.subcommand == "report":
|
|
285
|
+
analyzer = Analyzer(
|
|
286
|
+
config,
|
|
287
|
+
server,
|
|
288
|
+
AnalyzerStateManager(config=config, server=server),
|
|
289
|
+
checkpoint_required=True,
|
|
290
|
+
)
|
|
291
|
+
analyzer.report(mode=args.mode)
|
|
292
|
+
finally:
|
|
293
|
+
if server is not None:
|
|
294
|
+
server.stop()
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _is_a_model_repository_required(args, config):
|
|
298
|
+
model_repository_required = (
|
|
299
|
+
args.subcommand == "profile"
|
|
300
|
+
and not config.model_repository
|
|
301
|
+
and not config.triton_launch_mode == "remote"
|
|
302
|
+
)
|
|
303
|
+
return model_repository_required
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
if __name__ == "__main__":
|
|
307
|
+
main()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LogFormatter(logging.Formatter):
|
|
24
|
+
"""Class to handle formatting of the logger outputs"""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
28
|
+
self._log_level = logger.getEffectiveLevel()
|
|
29
|
+
super().__init__(datefmt="%H:%M:%S")
|
|
30
|
+
|
|
31
|
+
def format(self, record):
|
|
32
|
+
front = "%(asctime)s " if self._log_level is logging.DEBUG else ""
|
|
33
|
+
if record.levelno == logging.INFO:
|
|
34
|
+
self._style._fmt = f"{front}[Model Analyzer] %(message)s"
|
|
35
|
+
else:
|
|
36
|
+
self._style._fmt = f"{front}[Model Analyzer] %(levelname)s: %(message)s"
|
|
37
|
+
return super().format(record)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def setup_logging(quiet, verbose):
|
|
41
|
+
"""
|
|
42
|
+
Setup logger format
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
quiet : bool
|
|
47
|
+
If true, don't print anything other than errors
|
|
48
|
+
verbose : bool
|
|
49
|
+
If true and quiet is not true, print debug information
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
if quiet:
|
|
53
|
+
log_level = logging.ERROR
|
|
54
|
+
elif verbose:
|
|
55
|
+
log_level = logging.DEBUG
|
|
56
|
+
else:
|
|
57
|
+
log_level = logging.INFO
|
|
58
|
+
|
|
59
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
60
|
+
logger.setLevel(level=log_level)
|
|
61
|
+
|
|
62
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
63
|
+
handler.setFormatter(LogFormatter())
|
|
64
|
+
logger.addHandler(handler)
|
|
65
|
+
logger.propagate = False
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TritonModelAnalyzerException(Exception):
|
|
19
|
+
"""
|
|
20
|
+
A custom exception specific to the Triton Model
|
|
21
|
+
Analyzer
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
pass
|