triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
from urllib.parse import urlparse
|
|
20
|
+
|
|
21
|
+
from model_analyzer.config.input.config_utils import binary_path_validator
|
|
22
|
+
from model_analyzer.constants import CONFIG_PARSER_FAILURE, LOGGER_NAME
|
|
23
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
24
|
+
|
|
25
|
+
from .server_config import TritonServerConfig
|
|
26
|
+
from .server_docker import TritonServerDocker
|
|
27
|
+
from .server_local import TritonServerLocal
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TritonServerFactory:
|
|
33
|
+
"""
|
|
34
|
+
A factory for creating TritonServer instances
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def create_server_docker(
|
|
39
|
+
image,
|
|
40
|
+
config,
|
|
41
|
+
gpus,
|
|
42
|
+
log_path=None,
|
|
43
|
+
mounts=None,
|
|
44
|
+
labels=None,
|
|
45
|
+
shm_size=None,
|
|
46
|
+
args=None,
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
image : str
|
|
52
|
+
The tritonserver docker image to pull and run
|
|
53
|
+
config : TritonServerConfig
|
|
54
|
+
the config object containing arguments for this server instance
|
|
55
|
+
gpus : list of str
|
|
56
|
+
List of GPU UUIDs to be mounted and used in the container
|
|
57
|
+
Use ["all"] to include all GPUs
|
|
58
|
+
log_path: str
|
|
59
|
+
Absolute path to the triton log file
|
|
60
|
+
mounts: list of str
|
|
61
|
+
The volumes to be mounted to the tritonserver container
|
|
62
|
+
labels: dict
|
|
63
|
+
name-value pairs for label to set metadata for triton docker
|
|
64
|
+
container. (Not the same as environment variables)
|
|
65
|
+
shm-size: str
|
|
66
|
+
The size of /dev/shm for the triton docker container.
|
|
67
|
+
args: dict
|
|
68
|
+
name-value pairs of triton docker args
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
TritonServerDocker
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
return TritonServerDocker(
|
|
75
|
+
image=image,
|
|
76
|
+
config=config,
|
|
77
|
+
gpus=gpus,
|
|
78
|
+
log_path=log_path,
|
|
79
|
+
mounts=mounts,
|
|
80
|
+
labels=labels,
|
|
81
|
+
shm_size=shm_size,
|
|
82
|
+
args=args,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def create_server_local(path, config, gpus, log_path=None):
|
|
87
|
+
"""
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
path : str
|
|
91
|
+
The absolute path to the tritonserver executable
|
|
92
|
+
config : TritonServerConfig
|
|
93
|
+
the config object containing arguments for this server instance
|
|
94
|
+
gpus: list of str
|
|
95
|
+
List of GPU UUIDs to be made visible to Triton
|
|
96
|
+
Use ["all"] to include all GPUs
|
|
97
|
+
log_path: str
|
|
98
|
+
Absolute path to the triton log file
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
TritonServerLocal
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
return TritonServerLocal(path=path, config=config, gpus=gpus, log_path=log_path)
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def get_server_handle(config, gpus, use_model_repository=False):
|
|
109
|
+
"""
|
|
110
|
+
Creates and returns a TritonServer
|
|
111
|
+
with specified arguments
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
config : namespace
|
|
116
|
+
Arguments parsed from the CLI
|
|
117
|
+
gpus : list of str
|
|
118
|
+
Available, supported, visible requested GPU UUIDs
|
|
119
|
+
use_model_repository : bool
|
|
120
|
+
Optional flag to use the model directory instead
|
|
121
|
+
of the output model directory
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
TritonServer
|
|
125
|
+
Handle to the Triton Server
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
if config.triton_launch_mode == "remote":
|
|
129
|
+
server = TritonServerFactory._get_remote_server_handle(
|
|
130
|
+
config, print_warning_message=use_model_repository
|
|
131
|
+
)
|
|
132
|
+
elif config.triton_launch_mode == "local":
|
|
133
|
+
server = TritonServerFactory._get_local_server_handle(
|
|
134
|
+
config, gpus, use_model_repository=True
|
|
135
|
+
)
|
|
136
|
+
elif config.triton_launch_mode == "docker":
|
|
137
|
+
server = TritonServerFactory._get_docker_server_handle(
|
|
138
|
+
config, gpus, use_model_repository=True
|
|
139
|
+
)
|
|
140
|
+
elif config.triton_launch_mode == "c_api":
|
|
141
|
+
server = TritonServerFactory._get_c_api_server_handle(
|
|
142
|
+
config, use_model_repository
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
raise TritonModelAnalyzerException(
|
|
146
|
+
f"Unrecognized triton-launch-mode : {config.triton_launch_mode}"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return server
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def _get_remote_server_handle(config, print_warning_message=True):
|
|
153
|
+
triton_config = TritonServerConfig()
|
|
154
|
+
triton_config.update_config(config.triton_server_flags)
|
|
155
|
+
triton_config["model-repository"] = "remote-model-repository"
|
|
156
|
+
logger.info("Using remote Triton Server")
|
|
157
|
+
server = TritonServerFactory.create_server_local(
|
|
158
|
+
path=None, config=triton_config, gpus=[], log_path=""
|
|
159
|
+
)
|
|
160
|
+
if print_warning_message:
|
|
161
|
+
logger.warning(
|
|
162
|
+
"GPU memory metrics reported in the remote mode are not"
|
|
163
|
+
" accurate. Model Analyzer uses Triton explicit model control to"
|
|
164
|
+
" load/unload models. Some frameworks do not release the GPU"
|
|
165
|
+
" memory even when the memory is not being used. Consider"
|
|
166
|
+
' using the "local" or "docker" mode if you want to accurately'
|
|
167
|
+
" monitor the GPU memory usage for different models."
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return server
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _get_local_server_handle(config, gpus, use_model_repository):
|
|
174
|
+
TritonServerFactory._validate_triton_server_path(config)
|
|
175
|
+
|
|
176
|
+
triton_config = TritonServerConfig()
|
|
177
|
+
triton_config.update_config(config.triton_server_flags)
|
|
178
|
+
|
|
179
|
+
triton_config["model-repository"] = (
|
|
180
|
+
config.model_repository
|
|
181
|
+
if use_model_repository and config.model_repository
|
|
182
|
+
else config.output_model_repository_path
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if use_model_repository and config.model_repository:
|
|
186
|
+
triton_config["strict-model-config"] = "false"
|
|
187
|
+
|
|
188
|
+
triton_config["http-port"] = config.triton_http_endpoint.split(":")[-1]
|
|
189
|
+
triton_config["grpc-port"] = config.triton_grpc_endpoint.split(":")[-1]
|
|
190
|
+
triton_config["metrics-port"] = urlparse(config.triton_metrics_url).port
|
|
191
|
+
triton_config["model-control-mode"] = "explicit"
|
|
192
|
+
triton_config["metrics-interval-ms"] = int(config.monitoring_interval * 1e3)
|
|
193
|
+
logger.info("Starting a local Triton Server")
|
|
194
|
+
server = TritonServerFactory.create_server_local(
|
|
195
|
+
path=config.triton_server_path,
|
|
196
|
+
config=triton_config,
|
|
197
|
+
gpus=gpus,
|
|
198
|
+
log_path=config.triton_output_path,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return server
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def _get_docker_server_handle(config, gpus, use_model_repository):
|
|
205
|
+
triton_config = TritonServerConfig()
|
|
206
|
+
triton_config.update_config(config.triton_server_flags)
|
|
207
|
+
|
|
208
|
+
if use_model_repository:
|
|
209
|
+
triton_config["model-repository"] = os.path.abspath(config.model_repository)
|
|
210
|
+
else:
|
|
211
|
+
triton_config["model-repository"] = os.path.abspath(
|
|
212
|
+
config.output_model_repository_path
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
if use_model_repository:
|
|
216
|
+
triton_config["strict-model-config"] = "false"
|
|
217
|
+
|
|
218
|
+
triton_config["http-port"] = config.triton_http_endpoint.split(":")[-1]
|
|
219
|
+
triton_config["grpc-port"] = config.triton_grpc_endpoint.split(":")[-1]
|
|
220
|
+
triton_config["metrics-port"] = urlparse(config.triton_metrics_url).port
|
|
221
|
+
triton_config["model-control-mode"] = "explicit"
|
|
222
|
+
triton_config["metrics-interval-ms"] = int(config.monitoring_interval * 1e3)
|
|
223
|
+
logger.info("Starting a Triton Server using docker")
|
|
224
|
+
server = TritonServerFactory.create_server_docker(
|
|
225
|
+
image=config.triton_docker_image,
|
|
226
|
+
config=triton_config,
|
|
227
|
+
gpus=gpus,
|
|
228
|
+
log_path=config.triton_output_path,
|
|
229
|
+
mounts=config.triton_docker_mounts,
|
|
230
|
+
labels=config.triton_docker_labels,
|
|
231
|
+
shm_size=config.triton_docker_shm_size,
|
|
232
|
+
args=config.triton_docker_args,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
return server
|
|
236
|
+
|
|
237
|
+
@staticmethod
|
|
238
|
+
def _get_c_api_server_handle(config, use_model_repository):
|
|
239
|
+
TritonServerFactory._validate_triton_install_path(config)
|
|
240
|
+
|
|
241
|
+
triton_config = TritonServerConfig()
|
|
242
|
+
|
|
243
|
+
if use_model_repository:
|
|
244
|
+
triton_config["model-repository"] = os.path.abspath(config.model_repository)
|
|
245
|
+
else:
|
|
246
|
+
triton_config["model-repository"] = os.path.abspath(
|
|
247
|
+
config.output_model_repository_path
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if use_model_repository:
|
|
251
|
+
triton_config["strict-model-config"] = "false"
|
|
252
|
+
|
|
253
|
+
logger.info("Starting a Triton Server using perf_analyzer's C_API")
|
|
254
|
+
server = TritonServerFactory.create_server_local(
|
|
255
|
+
path=None, config=triton_config, gpus=[], log_path=""
|
|
256
|
+
)
|
|
257
|
+
logger.warning(
|
|
258
|
+
"When profiling with perf_analyzer's C_API, some metrics may be "
|
|
259
|
+
"affected. Triton is not launched with explicit model control "
|
|
260
|
+
"mode, and as a result, loads all model config variants as they "
|
|
261
|
+
"are created in the output_model_repository."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return server
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
def _validate_triton_server_path(config):
|
|
268
|
+
"""
|
|
269
|
+
Raises an exception if 'triton_server_path' doesn't exist
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
config : namespace
|
|
274
|
+
Arguments parsed from the CLI
|
|
275
|
+
"""
|
|
276
|
+
path = config.get_config()["triton_server_path"].value()
|
|
277
|
+
config_status = binary_path_validator(path)
|
|
278
|
+
if config_status.status() == CONFIG_PARSER_FAILURE:
|
|
279
|
+
raise TritonModelAnalyzerException(config_status.message())
|
|
280
|
+
|
|
281
|
+
@staticmethod
|
|
282
|
+
def _validate_triton_install_path(config):
|
|
283
|
+
"""
|
|
284
|
+
Raises an exception in the following cases:
|
|
285
|
+
- 'triton_install_path' doesn't exist
|
|
286
|
+
- 'trtion_install_path' exists, but contains no files
|
|
287
|
+
|
|
288
|
+
Parameters
|
|
289
|
+
----------
|
|
290
|
+
config : namespace
|
|
291
|
+
Arguments parsed from the CLI
|
|
292
|
+
"""
|
|
293
|
+
path = config.get_config()["triton_install_path"].value()
|
|
294
|
+
|
|
295
|
+
# Check the file system
|
|
296
|
+
if not path or not os.path.exists(path) or not os.path.isdir(path):
|
|
297
|
+
raise TritonModelAnalyzerException(
|
|
298
|
+
f"triton_install_path {path} is not specified, does not exist, "
|
|
299
|
+
"or is not a directory."
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Make sure that files exist in the install directory
|
|
303
|
+
if len(os.listdir(path)) == 0:
|
|
304
|
+
raise TritonModelAnalyzerException(
|
|
305
|
+
f"triton_install_path {path} should not be empty."
|
|
306
|
+
)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
import tempfile
|
|
20
|
+
from io import TextIOWrapper
|
|
21
|
+
from subprocess import DEVNULL, STDOUT, Popen, TimeoutExpired
|
|
22
|
+
|
|
23
|
+
import psutil
|
|
24
|
+
|
|
25
|
+
from model_analyzer.constants import LOGGER_NAME, SERVER_OUTPUT_TIMEOUT_SECS
|
|
26
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
27
|
+
|
|
28
|
+
from .server import TritonServer
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TritonServerLocal(TritonServer):
|
|
34
|
+
"""
|
|
35
|
+
Concrete Implementation of TritonServer interface that runs
|
|
36
|
+
tritonserver locally as as subprocess.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, path, config, gpus, log_path):
|
|
40
|
+
"""
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
path : str
|
|
44
|
+
The absolute path to the tritonserver executable
|
|
45
|
+
config : TritonServerConfig
|
|
46
|
+
the config object containing arguments for this server instance
|
|
47
|
+
gpus: list of str
|
|
48
|
+
List of GPU UUIDs to be made visible to Triton
|
|
49
|
+
log_path: str
|
|
50
|
+
Absolute path to the triton log file
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
self._tritonserver_process = None
|
|
54
|
+
self._server_config = config
|
|
55
|
+
self._server_path = path
|
|
56
|
+
self._gpus = gpus
|
|
57
|
+
self._log_path = log_path
|
|
58
|
+
self._log_file = DEVNULL
|
|
59
|
+
self._is_first_time_starting_server = True
|
|
60
|
+
|
|
61
|
+
assert self._server_config[
|
|
62
|
+
"model-repository"
|
|
63
|
+
], "Triton Server requires --model-repository argument to be set."
|
|
64
|
+
|
|
65
|
+
def start(self, env=None):
|
|
66
|
+
"""
|
|
67
|
+
Starts the tritonserver container locally
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
if self._server_path:
|
|
71
|
+
# Create command list and run subprocess
|
|
72
|
+
cmd = [self._server_path]
|
|
73
|
+
cmd += self._server_config.to_args_list()
|
|
74
|
+
|
|
75
|
+
# Set environment, update with user config env
|
|
76
|
+
triton_env = os.environ.copy()
|
|
77
|
+
|
|
78
|
+
if env:
|
|
79
|
+
# Filter env variables that use env lookups
|
|
80
|
+
for variable, value in env.items():
|
|
81
|
+
if value.find("$") == -1:
|
|
82
|
+
triton_env[variable] = value
|
|
83
|
+
else:
|
|
84
|
+
# Collect the ones that need lookups to give to the shell
|
|
85
|
+
triton_env[variable] = os.path.expandvars(value)
|
|
86
|
+
|
|
87
|
+
# List GPUs to be used by tritonserver
|
|
88
|
+
triton_env["CUDA_VISIBLE_DEVICES"] = ",".join(
|
|
89
|
+
[gpu.device_uuid() for gpu in self._gpus]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if self._log_path:
|
|
93
|
+
try:
|
|
94
|
+
if self._is_first_time_starting_server:
|
|
95
|
+
if os.path.exists(self._log_path):
|
|
96
|
+
os.remove(self._log_path)
|
|
97
|
+
self._log_file = open(self._log_path, "a+")
|
|
98
|
+
except OSError as e:
|
|
99
|
+
raise TritonModelAnalyzerException(e)
|
|
100
|
+
else:
|
|
101
|
+
self._log_file = tempfile.NamedTemporaryFile()
|
|
102
|
+
|
|
103
|
+
self._is_first_time_starting_server = False
|
|
104
|
+
|
|
105
|
+
# Construct Popen command
|
|
106
|
+
try:
|
|
107
|
+
self._tritonserver_process = Popen(
|
|
108
|
+
cmd,
|
|
109
|
+
stdout=self._log_file,
|
|
110
|
+
stderr=STDOUT,
|
|
111
|
+
start_new_session=True,
|
|
112
|
+
universal_newlines=True,
|
|
113
|
+
env=triton_env,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
logger.debug("Triton Server started.")
|
|
117
|
+
except Exception as e:
|
|
118
|
+
raise TritonModelAnalyzerException(e)
|
|
119
|
+
|
|
120
|
+
def stop(self):
|
|
121
|
+
"""
|
|
122
|
+
Stops the running tritonserver
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
# Terminate process, capture output
|
|
126
|
+
if self._tritonserver_process is not None:
|
|
127
|
+
self._tritonserver_process.terminate()
|
|
128
|
+
try:
|
|
129
|
+
self._tritonserver_process.communicate(
|
|
130
|
+
timeout=SERVER_OUTPUT_TIMEOUT_SECS
|
|
131
|
+
)
|
|
132
|
+
except TimeoutExpired:
|
|
133
|
+
self._tritonserver_process.kill()
|
|
134
|
+
self._tritonserver_process.communicate()
|
|
135
|
+
self._tritonserver_process = None
|
|
136
|
+
if self._log_path:
|
|
137
|
+
self._log_file.close()
|
|
138
|
+
logger.debug("Stopped Triton Server.")
|
|
139
|
+
|
|
140
|
+
def cpu_stats(self):
|
|
141
|
+
"""
|
|
142
|
+
Returns the CPU memory usage and CPU available memory in MB
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
if self._tritonserver_process:
|
|
146
|
+
server_process = psutil.Process(self._tritonserver_process.pid)
|
|
147
|
+
process_memory_info = server_process.memory_full_info()
|
|
148
|
+
system_memory_info = psutil.virtual_memory()
|
|
149
|
+
|
|
150
|
+
# Divide by 1.0e6 to convert from bytes to MB
|
|
151
|
+
return (process_memory_info.uss // 1.0e6), (
|
|
152
|
+
system_memory_info.available // 1.0e6
|
|
153
|
+
)
|
|
154
|
+
else:
|
|
155
|
+
return 0.0, 0.0
|
|
156
|
+
|
|
157
|
+
def log_file(self) -> TextIOWrapper:
|
|
158
|
+
return self._log_file
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: triton-model-analyzer
|
|
3
|
+
Version: 1.48.0
|
|
4
|
+
Summary: Triton Model Analyzer is a tool to profile and analyze the runtime performance of one or more models on the Triton Inference Server
|
|
5
|
+
Author-email: "NVIDIA Inc." <sw-dl-triton@nvidia.com>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://developer.nvidia.com/nvidia-triton-inference-server
|
|
8
|
+
Project-URL: Repository, https://github.com/triton-inference-server/model_analyzer
|
|
9
|
+
Keywords: triton,tensorrt,inference,server,service,analyzer,nvidia
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Intended Audience :: Information Technology
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Utilities
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Environment :: Console
|
|
25
|
+
Classifier: Natural Language :: English
|
|
26
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
27
|
+
Requires-Python: >=3.8
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: cryptography>=3.3.2
|
|
31
|
+
Requires-Dist: distro>=1.5.0
|
|
32
|
+
Requires-Dist: docker>=4.3.1
|
|
33
|
+
Requires-Dist: gevent>=22.08.0
|
|
34
|
+
Requires-Dist: grpcio<1.68,>=1.63.0
|
|
35
|
+
Requires-Dist: httplib2>=0.19.0
|
|
36
|
+
Requires-Dist: importlib_metadata>=7.1.0
|
|
37
|
+
Requires-Dist: matplotlib>=3.3.4
|
|
38
|
+
Requires-Dist: numba>=0.51.2
|
|
39
|
+
Requires-Dist: optuna==3.6.1
|
|
40
|
+
Requires-Dist: pdfkit>=0.6.1
|
|
41
|
+
Requires-Dist: prometheus_client>=0.9.0
|
|
42
|
+
Requires-Dist: protobuf
|
|
43
|
+
Requires-Dist: psutil>=5.8.0
|
|
44
|
+
Requires-Dist: pyyaml>=5.3.1
|
|
45
|
+
Requires-Dist: requests>=2.24.0
|
|
46
|
+
Requires-Dist: tritonclient[all]>=2.4.0
|
|
47
|
+
Requires-Dist: urllib3>=2.0.7
|
|
48
|
+
Provides-Extra: perf-analyzer
|
|
49
|
+
Requires-Dist: perf-analyzer; extra == "perf-analyzer"
|
|
50
|
+
Dynamic: license-file
|
|
51
|
+
|
|
52
|
+
See the Model Analyzer's [installation documentation](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/install.md#using-pip3) for package details. The [quick start](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/quick_start.md) documentation describes how to get started with profiling and analysis using Triton Model Analyzer.
|