triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
from io import TextIOWrapper
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TritonServer(ABC):
|
|
22
|
+
"""
|
|
23
|
+
Defines the interface for the objects created by
|
|
24
|
+
TritonServerFactory
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def start(self, env=None):
|
|
29
|
+
"""
|
|
30
|
+
Starts the tritonserver
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
env: dict
|
|
35
|
+
The environment to set for this tritonserver launch
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def stop(self):
|
|
40
|
+
"""
|
|
41
|
+
Stops and cleans up after the server
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def log_file(self) -> TextIOWrapper:
|
|
46
|
+
"""
|
|
47
|
+
Returns the server's log file
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def cpu_stats(self):
|
|
52
|
+
"""
|
|
53
|
+
Returns the CPU memory usage and CPU available memory in MB
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def update_config(self, params):
|
|
57
|
+
"""
|
|
58
|
+
Update the server's arguments
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
params: dict
|
|
63
|
+
keys are argument names and values are their values.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
self._server_config.update_config(params)
|
|
67
|
+
|
|
68
|
+
def config(self):
|
|
69
|
+
"""
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
TritonServerConfig
|
|
73
|
+
This server's config
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
return self._server_config
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TritonServerConfig:
|
|
21
|
+
"""
|
|
22
|
+
A config class to set arguments to the Triton Inference
|
|
23
|
+
Server. An argument set to None will use the server default.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
server_arg_keys = [
|
|
27
|
+
# Server
|
|
28
|
+
"id",
|
|
29
|
+
"exit-timeout-secs",
|
|
30
|
+
# Logging
|
|
31
|
+
"log-verbose",
|
|
32
|
+
"log-info",
|
|
33
|
+
"log-warning",
|
|
34
|
+
"log-error",
|
|
35
|
+
"log-format",
|
|
36
|
+
"log-file",
|
|
37
|
+
# Model Repository
|
|
38
|
+
"model-store",
|
|
39
|
+
"model-repository",
|
|
40
|
+
"exit-on-error",
|
|
41
|
+
"disable-auto-complete-config",
|
|
42
|
+
"strict-readiness",
|
|
43
|
+
"model-control-mode",
|
|
44
|
+
"repository-poll-secs",
|
|
45
|
+
"load-model",
|
|
46
|
+
"model-load-thread-count",
|
|
47
|
+
"model-load-retry-count",
|
|
48
|
+
"model-namespacing",
|
|
49
|
+
# HTTP
|
|
50
|
+
"allow-http",
|
|
51
|
+
"http-address",
|
|
52
|
+
"http-port",
|
|
53
|
+
"reuse-http-port",
|
|
54
|
+
"http-header-forward-pattern",
|
|
55
|
+
"http-thread-count",
|
|
56
|
+
"http-restricted-api",
|
|
57
|
+
# GRPC
|
|
58
|
+
"allow-grpc",
|
|
59
|
+
"grpc-address",
|
|
60
|
+
"grpc-port",
|
|
61
|
+
"reuse-grpc-port",
|
|
62
|
+
"grpc-header-forward-pattern",
|
|
63
|
+
"grpc-infer-allocation-pool-size",
|
|
64
|
+
"grpc-use-ssl",
|
|
65
|
+
"grpc-use-ssl-mutual",
|
|
66
|
+
"grpc-server-cert",
|
|
67
|
+
"grpc-server-key",
|
|
68
|
+
"grpc-root-cert",
|
|
69
|
+
"grpc-infer-response-compression-level",
|
|
70
|
+
"grpc-keepalive-time",
|
|
71
|
+
"grpc-keepalive-timeout",
|
|
72
|
+
"grpc-keepalive-permit-without-calls",
|
|
73
|
+
"grpc-http2-max-pings-without-data",
|
|
74
|
+
"grpc-http2-min-recv-ping-interval-without-data",
|
|
75
|
+
"grpc-http2-max-ping-strikes",
|
|
76
|
+
"grpc-max-connection-age",
|
|
77
|
+
"grpc-max-connection-age-grace",
|
|
78
|
+
"grpc-restricted-protocol",
|
|
79
|
+
# Sagemaker
|
|
80
|
+
"allow-sagemaker",
|
|
81
|
+
"sagemaker-port",
|
|
82
|
+
"sagemaker-safe-port-range",
|
|
83
|
+
"sagemaker-thread-count",
|
|
84
|
+
# Vertex
|
|
85
|
+
"allow-vertex-ai",
|
|
86
|
+
"vertex-ai-port",
|
|
87
|
+
"vertex-ai-thread-count",
|
|
88
|
+
"vertex-ai-default-model",
|
|
89
|
+
# Metrics
|
|
90
|
+
"allow-metrics",
|
|
91
|
+
"allow-gpu-metrics",
|
|
92
|
+
"allow-cpu-metrics",
|
|
93
|
+
"metrics-address",
|
|
94
|
+
"metrics-port",
|
|
95
|
+
"metrics-interval-ms",
|
|
96
|
+
"metrics-config",
|
|
97
|
+
# Tracing
|
|
98
|
+
"trace-config",
|
|
99
|
+
# Backend
|
|
100
|
+
"backend-directory",
|
|
101
|
+
"backend-config",
|
|
102
|
+
# Repository Agent
|
|
103
|
+
"repoagent-directory",
|
|
104
|
+
# Response Cache
|
|
105
|
+
"cache-config",
|
|
106
|
+
"cache-directory",
|
|
107
|
+
# Rate Limiter
|
|
108
|
+
"rate-limit",
|
|
109
|
+
"rate-limit-resource",
|
|
110
|
+
# Memory/Device Management
|
|
111
|
+
"pinned-memory-pool-byte-size",
|
|
112
|
+
"cuda-memory-pool-byte-size",
|
|
113
|
+
"cuda-virtual-address-size",
|
|
114
|
+
"min-supported-compute-capability",
|
|
115
|
+
"buffer-management-thread-count",
|
|
116
|
+
"host-policy",
|
|
117
|
+
"model-load-gpu-limit",
|
|
118
|
+
# DEPRECATED
|
|
119
|
+
"strict-model-config",
|
|
120
|
+
"response-cache-byte-size",
|
|
121
|
+
"trace-file",
|
|
122
|
+
"trace-level",
|
|
123
|
+
"trace-rate",
|
|
124
|
+
"trace-count",
|
|
125
|
+
"trace-log-frequency",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
def __init__(self):
|
|
129
|
+
"""
|
|
130
|
+
Construct TritonServerConfig
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
self._server_args = {k: None for k in self.server_arg_keys}
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def allowed_keys(cls):
|
|
137
|
+
"""
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
list of str
|
|
141
|
+
The keys that can be used to configure tritonserver instance
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
snake_cased_keys = [key.replace("-", "_") for key in cls.server_arg_keys]
|
|
145
|
+
return cls.server_arg_keys + snake_cased_keys
|
|
146
|
+
|
|
147
|
+
def update_config(self, params=None):
|
|
148
|
+
"""
|
|
149
|
+
Allows setting values from a
|
|
150
|
+
params dict
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
params: dict
|
|
155
|
+
keys are allowed args to perf_analyzer
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
if params:
|
|
159
|
+
for key in params:
|
|
160
|
+
self[key.strip().replace("_", "-")] = params[key]
|
|
161
|
+
|
|
162
|
+
def to_cli_string(self):
|
|
163
|
+
"""
|
|
164
|
+
Utility function to convert a config into a
|
|
165
|
+
string of arguments to the server with CLI.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
str
|
|
170
|
+
the command consisting of all set arguments to
|
|
171
|
+
the tritonserver.
|
|
172
|
+
e.g. '--model-repository=/models --log-verbose=True'
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
return " ".join(
|
|
176
|
+
[f"--{key}={val}" for key, val in self._server_args.items() if val]
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def to_args_list(self):
|
|
180
|
+
"""
|
|
181
|
+
Utility function to convert a cli string into a list of arguments while
|
|
182
|
+
taking into account "smart" delimiters. Notice in the example below
|
|
183
|
+
that only the first equals sign is used as split delimiter.
|
|
184
|
+
|
|
185
|
+
Returns
|
|
186
|
+
-------
|
|
187
|
+
list
|
|
188
|
+
the list of arguments consisting of all set arguments to
|
|
189
|
+
the tritonserver.
|
|
190
|
+
|
|
191
|
+
Example:
|
|
192
|
+
input cli_string: "--model-control-mode=explicit
|
|
193
|
+
--backend-config=tensorflow,version=2"
|
|
194
|
+
|
|
195
|
+
output: ['--model-control-mode', 'explicit',
|
|
196
|
+
'--backend-config', 'tensorflow,version=2']
|
|
197
|
+
"""
|
|
198
|
+
args_list = []
|
|
199
|
+
args = self.to_cli_string().split()
|
|
200
|
+
for arg in args:
|
|
201
|
+
args_list += arg.split("=", 1)
|
|
202
|
+
return args_list
|
|
203
|
+
|
|
204
|
+
def copy(self):
|
|
205
|
+
"""
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
TritonServerConfig
|
|
209
|
+
object that has the same args as this one
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
config_copy = TritonServerConfig()
|
|
213
|
+
config_copy.update_config(params=self._server_args)
|
|
214
|
+
return config_copy
|
|
215
|
+
|
|
216
|
+
def server_args(self):
|
|
217
|
+
"""
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
dict
|
|
221
|
+
keys are server arguments
|
|
222
|
+
values are their values
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
return self._server_args
|
|
226
|
+
|
|
227
|
+
def __getitem__(self, key):
|
|
228
|
+
"""
|
|
229
|
+
Gets an arguments value in config
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
key : str
|
|
234
|
+
The name of the argument to the tritonserver
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
The value that the argument is set to in this config
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
return self._server_args[key.strip().replace("_", "-")]
|
|
242
|
+
|
|
243
|
+
def __setitem__(self, key, value):
|
|
244
|
+
"""
|
|
245
|
+
Sets an arguments value in config
|
|
246
|
+
after checking if defined/supported.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
key : str
|
|
251
|
+
The name of the argument to the tritonserver
|
|
252
|
+
value : (any)
|
|
253
|
+
The value to which the argument is being set
|
|
254
|
+
|
|
255
|
+
Raises
|
|
256
|
+
------
|
|
257
|
+
TritonModelAnalyzerException
|
|
258
|
+
If key is unsupported or undefined in the
|
|
259
|
+
config class
|
|
260
|
+
"""
|
|
261
|
+
|
|
262
|
+
kebab_cased_key = key.strip().replace("_", "-")
|
|
263
|
+
if kebab_cased_key in self._server_args:
|
|
264
|
+
self._server_args[kebab_cased_key] = value
|
|
265
|
+
else:
|
|
266
|
+
raise TritonModelAnalyzerException(
|
|
267
|
+
f"The argument '{key}' to the Triton Inference "
|
|
268
|
+
"Server is not supported by the model analyzer."
|
|
269
|
+
)
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import tempfile
|
|
19
|
+
from io import TextIOWrapper
|
|
20
|
+
from multiprocessing.pool import ThreadPool
|
|
21
|
+
from subprocess import DEVNULL
|
|
22
|
+
|
|
23
|
+
import docker
|
|
24
|
+
|
|
25
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
26
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
27
|
+
|
|
28
|
+
from .server import TritonServer
|
|
29
|
+
|
|
30
|
+
LOCAL_HTTP_PORT = 8000
|
|
31
|
+
LOCAL_GRPC_PORT = 8001
|
|
32
|
+
LOCAL_METRICS_PORT = 8002
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TritonServerDocker(TritonServer):
|
|
38
|
+
"""
|
|
39
|
+
Concrete Implementation of TritonServer interface that runs
|
|
40
|
+
triton in a docker container.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, image, config, gpus, log_path, mounts, labels, shm_size, args):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
image : str
|
|
48
|
+
The tritonserver docker image to pull and run
|
|
49
|
+
config : TritonServerConfig
|
|
50
|
+
the config object containing arguments for this server instance
|
|
51
|
+
gpus : list of str
|
|
52
|
+
List of GPU UUIDs to be mounted and used in the container
|
|
53
|
+
log_path: str
|
|
54
|
+
Absolute path to the triton log file
|
|
55
|
+
mounts: list of str
|
|
56
|
+
The volumes to be mounted to the tritonserver container
|
|
57
|
+
labels: dict
|
|
58
|
+
name-value pairs for label to set metadata for triton docker
|
|
59
|
+
container. (Not the same as environment variables)
|
|
60
|
+
shm-size: str
|
|
61
|
+
The size of /dev/shm for the triton docker container.
|
|
62
|
+
args: dict
|
|
63
|
+
name-values part for triton docker args
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
self._server_config = config
|
|
67
|
+
self._docker_client = docker.from_env()
|
|
68
|
+
self._tritonserver_image = image
|
|
69
|
+
self._tritonserver_container = None
|
|
70
|
+
self._log_path = log_path
|
|
71
|
+
self._log_file = DEVNULL
|
|
72
|
+
self._mounts = mounts
|
|
73
|
+
self._labels = labels if labels else {}
|
|
74
|
+
self._gpus = gpus
|
|
75
|
+
self._shm_size = shm_size
|
|
76
|
+
self._args = args if args else {}
|
|
77
|
+
|
|
78
|
+
assert self._server_config[
|
|
79
|
+
"model-repository"
|
|
80
|
+
], "Triton Server requires --model-repository argument to be set."
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
self._docker_client.images.get(self._tritonserver_image)
|
|
84
|
+
except Exception:
|
|
85
|
+
logger.info(f"Pulling docker image {self._tritonserver_image}")
|
|
86
|
+
self._docker_client.images.pull(self._tritonserver_image)
|
|
87
|
+
|
|
88
|
+
def start(self, env=None):
|
|
89
|
+
"""
|
|
90
|
+
Starts the tritonserver docker container using docker-py
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
# List GPUs to be mounted and used inside docker container
|
|
94
|
+
devices = []
|
|
95
|
+
if len(self._gpus):
|
|
96
|
+
devices = [
|
|
97
|
+
docker.types.DeviceRequest(
|
|
98
|
+
device_ids=[gpu.device_uuid() for gpu in self._gpus],
|
|
99
|
+
capabilities=[["gpu"]],
|
|
100
|
+
)
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
# Set environment inside container.
|
|
104
|
+
# Supports only strings, and value lookups/concats
|
|
105
|
+
env_cmds = [
|
|
106
|
+
f"CUDA_VISIBLE_DEVICES={','.join([gpu.device_uuid() for gpu in self._gpus])}"
|
|
107
|
+
]
|
|
108
|
+
if env:
|
|
109
|
+
# Set all environment variables inside the container
|
|
110
|
+
for env_variable in list(env):
|
|
111
|
+
env_cmds.append(f"{env_variable}={env[env_variable]}")
|
|
112
|
+
|
|
113
|
+
# Mount required directories
|
|
114
|
+
volumes = {}
|
|
115
|
+
if self._mounts:
|
|
116
|
+
for volume_str in self._mounts:
|
|
117
|
+
host_path, dest, mode = volume_str.split(":")
|
|
118
|
+
volumes[host_path] = {"bind": dest, "mode": mode}
|
|
119
|
+
|
|
120
|
+
volumes[self._server_config["model-repository"]] = {
|
|
121
|
+
"bind": self._server_config["model-repository"],
|
|
122
|
+
"mode": "ro",
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Map ports, use config values but set to server defaults if not
|
|
126
|
+
# specified
|
|
127
|
+
server_http_port = self._server_config["http-port"] or 8000
|
|
128
|
+
server_grpc_port = self._server_config["grpc-port"] or 8001
|
|
129
|
+
server_metrics_port = self._server_config["metrics-port"] or 8002
|
|
130
|
+
|
|
131
|
+
ports = {
|
|
132
|
+
server_http_port: server_http_port,
|
|
133
|
+
server_grpc_port: server_grpc_port,
|
|
134
|
+
server_metrics_port: server_metrics_port,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# Construct run command
|
|
138
|
+
command = " ".join(
|
|
139
|
+
env_cmds + ["tritonserver", self._server_config.to_cli_string()]
|
|
140
|
+
)
|
|
141
|
+
try:
|
|
142
|
+
# Run the docker container and run the command in the container
|
|
143
|
+
self._tritonserver_container = self._docker_client.containers.run(
|
|
144
|
+
command=f'bash -c "{command}"',
|
|
145
|
+
init=True,
|
|
146
|
+
image=self._tritonserver_image,
|
|
147
|
+
device_requests=devices,
|
|
148
|
+
volumes=volumes,
|
|
149
|
+
labels=self._labels,
|
|
150
|
+
ports=ports,
|
|
151
|
+
publish_all_ports=True,
|
|
152
|
+
tty=False,
|
|
153
|
+
stdin_open=False,
|
|
154
|
+
detach=True,
|
|
155
|
+
shm_size=self._shm_size,
|
|
156
|
+
**self._args,
|
|
157
|
+
)
|
|
158
|
+
logger.debug("Triton Server started.")
|
|
159
|
+
except docker.errors.APIError as e:
|
|
160
|
+
if e.explanation.find("port is already allocated") != -1:
|
|
161
|
+
raise TritonModelAnalyzerException(
|
|
162
|
+
"One of the following port(s) are already allocated: "
|
|
163
|
+
f"{server_http_port}, {server_grpc_port}, "
|
|
164
|
+
f"{server_metrics_port}.\n"
|
|
165
|
+
"Change the Triton server ports using"
|
|
166
|
+
" --triton-http-endpoint, --triton-grpc-endpoint,"
|
|
167
|
+
" and --triton-metrics-endpoint flags."
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
raise TritonModelAnalyzerException(e)
|
|
171
|
+
|
|
172
|
+
if self._log_path:
|
|
173
|
+
try:
|
|
174
|
+
self._log_file = open(self._log_path, "a+")
|
|
175
|
+
self._log_pool = ThreadPool(processes=1)
|
|
176
|
+
self._log_pool.apply_async(self._logging_worker)
|
|
177
|
+
except OSError as e:
|
|
178
|
+
raise TritonModelAnalyzerException(e)
|
|
179
|
+
else:
|
|
180
|
+
self._log_file = tempfile.NamedTemporaryFile()
|
|
181
|
+
|
|
182
|
+
def _logging_worker(self):
|
|
183
|
+
"""
|
|
184
|
+
streams logs to
|
|
185
|
+
log file
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
for chunk in self._tritonserver_container.logs(stream=True):
|
|
189
|
+
self._log_file.write(chunk.decode("utf-8"))
|
|
190
|
+
|
|
191
|
+
def stop(self):
|
|
192
|
+
"""
|
|
193
|
+
Stops the tritonserver docker container
|
|
194
|
+
and cleans up docker client
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
if self._tritonserver_container is not None:
|
|
198
|
+
if self._log_path:
|
|
199
|
+
if self._log_pool:
|
|
200
|
+
self._log_pool.terminate()
|
|
201
|
+
self._log_pool.close()
|
|
202
|
+
if self._log_file:
|
|
203
|
+
self._log_file.close()
|
|
204
|
+
self._tritonserver_container.stop()
|
|
205
|
+
self._tritonserver_container.remove(force=True)
|
|
206
|
+
self._tritonserver_container = None
|
|
207
|
+
logger.debug("Stopped Triton Server.")
|
|
208
|
+
self._docker_client.close()
|
|
209
|
+
|
|
210
|
+
def cpu_stats(self):
|
|
211
|
+
"""
|
|
212
|
+
Returns the CPU memory usage and CPU available memory in MB
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
cmd = "bash -c \"pmap -x $(pgrep tritonserver) | tail -n1 | awk '{print $4}'\""
|
|
216
|
+
_, used_mem_bytes = self._tritonserver_container.exec_run(cmd=cmd, stream=False)
|
|
217
|
+
cmd = "bash -c \"free | awk '{if(NR==2)print $7}'\""
|
|
218
|
+
_, available_mem_bytes = self._tritonserver_container.exec_run(
|
|
219
|
+
cmd=cmd, stream=False
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Divide by 1.0e6 to convert from kilobytes to MB
|
|
223
|
+
return (
|
|
224
|
+
float(used_mem_bytes.decode("utf-8")) // 1.0e3,
|
|
225
|
+
float(available_mem_bytes.decode("utf-8")) // 1.0e3,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def log_file(self) -> TextIOWrapper:
|
|
229
|
+
return self._log_file
|