triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Dict, List, Optional
|
|
19
|
+
|
|
20
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
21
|
+
ModelVariantNameManager,
|
|
22
|
+
)
|
|
23
|
+
from model_analyzer.config.generate.run_config_generator_factory import (
|
|
24
|
+
RunConfigGeneratorFactory,
|
|
25
|
+
)
|
|
26
|
+
from model_analyzer.config.generate.search_parameters import SearchParameters
|
|
27
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
28
|
+
from model_analyzer.config.input.objects.config_model_profile_spec import (
|
|
29
|
+
ConfigModelProfileSpec,
|
|
30
|
+
)
|
|
31
|
+
from model_analyzer.constants import INVALID_MEASUREMENT_THRESHOLD, LOGGER_NAME
|
|
32
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
33
|
+
from model_analyzer.record.metrics_manager import MetricsManager
|
|
34
|
+
from model_analyzer.result.constraint_manager import ConstraintManager
|
|
35
|
+
from model_analyzer.result.result_manager import ResultManager
|
|
36
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
37
|
+
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
|
|
38
|
+
from model_analyzer.triton.client.client import TritonClient
|
|
39
|
+
from model_analyzer.triton.model.model_config import ModelConfig
|
|
40
|
+
from model_analyzer.triton.server.server import TritonServer
|
|
41
|
+
|
|
42
|
+
from .model_analyzer_exceptions import TritonModelAnalyzerException
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ModelManager:
|
|
48
|
+
"""
|
|
49
|
+
This class handles the search for, creation of, and execution of run configs.
|
|
50
|
+
It also records the best results for each model.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
config: ConfigCommandProfile,
|
|
56
|
+
gpus: List[GPUDevice],
|
|
57
|
+
client: TritonClient,
|
|
58
|
+
server: TritonServer,
|
|
59
|
+
metrics_manager: MetricsManager,
|
|
60
|
+
result_manager: ResultManager,
|
|
61
|
+
state_manager: AnalyzerStateManager,
|
|
62
|
+
constraint_manager: ConstraintManager,
|
|
63
|
+
search_parameters: Dict[str, SearchParameters],
|
|
64
|
+
composing_search_parameters: Dict[str, SearchParameters],
|
|
65
|
+
):
|
|
66
|
+
"""
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
config:ConfigCommandProfile
|
|
70
|
+
The config for the model analyzer
|
|
71
|
+
gpus: List of GPUDevice
|
|
72
|
+
client: TritonClient
|
|
73
|
+
The client handle used to send requests to Triton
|
|
74
|
+
server: TritonServer
|
|
75
|
+
The server handle used to start and stop Triton instances
|
|
76
|
+
metrics_manager: MetricsManager
|
|
77
|
+
The object that handles launching perf analyzer instances and profiling.
|
|
78
|
+
result_manager: ResultManager
|
|
79
|
+
The object that handles storing and sorting the results from the perf analyzer
|
|
80
|
+
state_manager: AnalyzerStateManager
|
|
81
|
+
The object that handles serializing the state of the analyzer and saving.
|
|
82
|
+
constraint_manager: ConstraintManager
|
|
83
|
+
The object that handles processing and applying
|
|
84
|
+
constraints on a given measurements
|
|
85
|
+
search_parameters: SearchParameters
|
|
86
|
+
The object that handles the users configuration search parameters
|
|
87
|
+
composing_search_parameters: SearchParameters
|
|
88
|
+
The object that handles the users configuration search parameters for composing models
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
self._config = config
|
|
92
|
+
self._gpus = gpus
|
|
93
|
+
self._client = client
|
|
94
|
+
self._server = server
|
|
95
|
+
self._metrics_manager = metrics_manager
|
|
96
|
+
self._result_manager = result_manager
|
|
97
|
+
self._state_manager = state_manager
|
|
98
|
+
self._constraint_manager = constraint_manager
|
|
99
|
+
self._search_parameters = search_parameters
|
|
100
|
+
self._composing_search_parameters = composing_search_parameters
|
|
101
|
+
|
|
102
|
+
if state_manager.starting_fresh_run():
|
|
103
|
+
self._init_state()
|
|
104
|
+
|
|
105
|
+
self._failed_measurement_attempts = 0
|
|
106
|
+
self._received_measurement_values_from_pa = False
|
|
107
|
+
|
|
108
|
+
self._model_variant_name_manager = ModelVariantNameManager.from_dict(
|
|
109
|
+
self._state_manager.get_state_variable(
|
|
110
|
+
"ModelManager.model_variant_name_manager"
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def run_models(self, models: List[ConfigModelProfileSpec]) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Generates configs, runs inferences, gets
|
|
117
|
+
measurements for a list of models
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
models : List of ConfigModelProfileSpec
|
|
122
|
+
The models to run
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
# Note: this is not done in config_command, because there isn't a ModelConfig yet,
|
|
126
|
+
# so we cannot determine if the model is an ensemble
|
|
127
|
+
self._check_for_ensemble_model_incompatibility(models)
|
|
128
|
+
|
|
129
|
+
self._metrics_manager.start_new_model()
|
|
130
|
+
|
|
131
|
+
# Save the global server config and update the server's config for this model run
|
|
132
|
+
server_config_copy = self._server.config().copy()
|
|
133
|
+
|
|
134
|
+
triton_server_flags = self._get_triton_server_flags(models)
|
|
135
|
+
self._server.update_config(params=triton_server_flags)
|
|
136
|
+
|
|
137
|
+
rcg = RunConfigGeneratorFactory.create_run_config_generator(
|
|
138
|
+
command_config=self._config,
|
|
139
|
+
state_manager=self._state_manager,
|
|
140
|
+
gpus=self._gpus,
|
|
141
|
+
models=models,
|
|
142
|
+
client=self._client,
|
|
143
|
+
result_manager=self._result_manager,
|
|
144
|
+
search_parameters=self._search_parameters,
|
|
145
|
+
composing_search_parameters=self._composing_search_parameters,
|
|
146
|
+
model_variant_name_manager=self._model_variant_name_manager,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
for run_config in rcg.get_configs():
|
|
150
|
+
if self._state_manager.exiting():
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
if run_config.is_legal_combination():
|
|
154
|
+
measurement = self._metrics_manager.execute_run_config(run_config)
|
|
155
|
+
|
|
156
|
+
self._check_for_valid_measurement(measurement)
|
|
157
|
+
self._stop_ma_if_no_valid_measurement_threshold_reached()
|
|
158
|
+
else:
|
|
159
|
+
logger.info("Skipping illegal run configuration")
|
|
160
|
+
measurement = None
|
|
161
|
+
|
|
162
|
+
if measurement:
|
|
163
|
+
objectives = [model.objectives() for model in models]
|
|
164
|
+
weightings = [model.weighting() for model in models]
|
|
165
|
+
|
|
166
|
+
measurement.set_metric_weightings(metric_objectives=objectives)
|
|
167
|
+
measurement.set_constraint_manager(
|
|
168
|
+
constraint_manager=self._constraint_manager
|
|
169
|
+
)
|
|
170
|
+
measurement.set_model_config_weighting(model_config_weights=weightings)
|
|
171
|
+
|
|
172
|
+
rcg.set_last_results([measurement])
|
|
173
|
+
self._state_manager.save_checkpoint()
|
|
174
|
+
|
|
175
|
+
self._metrics_manager.finalize()
|
|
176
|
+
|
|
177
|
+
# Reset the server args to global config
|
|
178
|
+
self._server.update_config(params=server_config_copy.server_args())
|
|
179
|
+
|
|
180
|
+
model_variant_name_manager_dict = self._state_manager.default_encode(
|
|
181
|
+
self._model_variant_name_manager
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
self._state_manager.set_state_variable(
|
|
185
|
+
"ModelManager.model_variant_name_manager", model_variant_name_manager_dict
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _get_triton_server_flags(self, models):
|
|
189
|
+
triton_server_flags = models[0].triton_server_flags()
|
|
190
|
+
|
|
191
|
+
for model in models:
|
|
192
|
+
if model.triton_server_flags() != triton_server_flags:
|
|
193
|
+
raise TritonModelAnalyzerException(
|
|
194
|
+
f"Triton server flags must be the same for all models to run concurrently"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _check_for_ensemble_model_incompatibility(
|
|
198
|
+
self, models: List[ConfigModelProfileSpec]
|
|
199
|
+
) -> None:
|
|
200
|
+
for model in models:
|
|
201
|
+
model_config = ModelConfig.create_from_profile_spec(
|
|
202
|
+
model, self._config, self._client, self._gpus
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if model_config.is_ensemble():
|
|
206
|
+
if len(models) > 1:
|
|
207
|
+
raise TritonModelAnalyzerException(
|
|
208
|
+
f"\nProfiling of multiple models is not supported for ensemble models"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if self._config.run_config_search_mode == "brute":
|
|
212
|
+
if self._config.get_config()[
|
|
213
|
+
"run_config_search_mode"
|
|
214
|
+
].is_set_by_user():
|
|
215
|
+
raise TritonModelAnalyzerException(
|
|
216
|
+
f"\nBrute search mode is not supported for ensemble models"
|
|
217
|
+
"\nPlease use quick search mode (--run-config-search-mode quick)"
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
self._config.run_config_search_mode = "quick"
|
|
221
|
+
elif not self._config.bls_composing_models:
|
|
222
|
+
if len(self._config.cpu_only_composing_models) > 0:
|
|
223
|
+
raise TritonModelAnalyzerException(
|
|
224
|
+
f"\nCan only specify --cpu-only-composing-models for ensemble or BLS models."
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def _init_state(self):
|
|
228
|
+
"""
|
|
229
|
+
Sets ModelManager object managed
|
|
230
|
+
state variables in AnalyzerState
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
self._state_manager.set_state_variable(
|
|
234
|
+
"ModelManager.model_variant_name_manager",
|
|
235
|
+
self._state_manager.default_encode(ModelVariantNameManager()),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def _check_for_valid_measurement(
|
|
239
|
+
self, measurement: Optional[RunConfigMeasurement]
|
|
240
|
+
) -> None:
|
|
241
|
+
if measurement:
|
|
242
|
+
self._received_measurement_values_from_pa = True
|
|
243
|
+
else:
|
|
244
|
+
self._failed_measurement_attempts += 1
|
|
245
|
+
|
|
246
|
+
def _stop_ma_if_no_valid_measurement_threshold_reached(self) -> None:
|
|
247
|
+
if self._received_measurement_values_from_pa:
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
if self._failed_measurement_attempts >= INVALID_MEASUREMENT_THRESHOLD:
|
|
251
|
+
raise TritonModelAnalyzerException(
|
|
252
|
+
f"The first {INVALID_MEASUREMENT_THRESHOLD} attempts to acquire measurements "
|
|
253
|
+
"have failed. Please examine the Tritonserver/PA error logs "
|
|
254
|
+
"to determine what has gone wrong."
|
|
255
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.record.types.cpu_available_ram import CPUAvailableRAM
|
|
18
|
+
from model_analyzer.record.types.cpu_used_ram import CPUUsedRAM
|
|
19
|
+
|
|
20
|
+
from .monitor import Monitor
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CPUMonitor(Monitor):
|
|
24
|
+
"""
|
|
25
|
+
A monitor for measuring the CPU usage of tritonserver during inference
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
cpu_metrics = {CPUAvailableRAM, CPUUsedRAM}
|
|
29
|
+
|
|
30
|
+
def __init__(self, server, frequency, metrics):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
server : TritonServer
|
|
35
|
+
A handle to the TritonServer
|
|
36
|
+
frequency : float
|
|
37
|
+
How often the metrics should be monitored.
|
|
38
|
+
metrics : list
|
|
39
|
+
A list of Record objects that will be monitored.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
super().__init__(frequency, metrics)
|
|
43
|
+
self._cpu_memory_records = []
|
|
44
|
+
self._server = server
|
|
45
|
+
|
|
46
|
+
def is_monitoring_connected(self) -> bool:
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
def _monitoring_iteration(self):
|
|
50
|
+
"""
|
|
51
|
+
Get memory info of process and
|
|
52
|
+
append
|
|
53
|
+
"""
|
|
54
|
+
if (CPUUsedRAM in self._metrics) or (CPUAvailableRAM in self._metrics):
|
|
55
|
+
used_mem, free_mem = self._server.cpu_stats()
|
|
56
|
+
if CPUUsedRAM in self._metrics:
|
|
57
|
+
self._cpu_memory_records.append(CPUUsedRAM(value=used_mem))
|
|
58
|
+
if CPUAvailableRAM in self._metrics:
|
|
59
|
+
self._cpu_memory_records.append(CPUAvailableRAM(value=free_mem))
|
|
60
|
+
|
|
61
|
+
def _collect_records(self):
|
|
62
|
+
"""
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
List of Records
|
|
66
|
+
the records metrics
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
return self._cpu_memory_records
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import model_analyzer.monitor.dcgm.dcgm_structs as dcgm_structs
|
|
16
|
+
import model_analyzer.monitor.dcgm.dcgm_agent as dcgm_agent
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DcgmDiag:
|
|
20
|
+
|
|
21
|
+
# Maps version codes to simple version values for range comparisons
|
|
22
|
+
_versionMap = {dcgm_structs.dcgmRunDiag_version: 5}
|
|
23
|
+
|
|
24
|
+
def __init__(self,
|
|
25
|
+
gpuIds=None,
|
|
26
|
+
testNamesStr='',
|
|
27
|
+
paramsStr='',
|
|
28
|
+
verbose=True,
|
|
29
|
+
version=dcgm_structs.dcgmRunDiag_version):
|
|
30
|
+
# Make sure version is valid
|
|
31
|
+
if version not in DcgmDiag._versionMap:
|
|
32
|
+
raise ValueError("'%s' is not a valid version for dcgmRunDiag." %
|
|
33
|
+
version)
|
|
34
|
+
self.version = version
|
|
35
|
+
|
|
36
|
+
if self.version == dcgm_structs.dcgmRunDiag_version7:
|
|
37
|
+
self.runDiagInfo = dcgm_structs.c_dcgmRunDiag_v7()
|
|
38
|
+
else:
|
|
39
|
+
self.runDiagInfo = dcgm_structs.c_dcgmRunDiag_t()
|
|
40
|
+
|
|
41
|
+
self.numTests = 0
|
|
42
|
+
self.numParams = 0
|
|
43
|
+
self.SetVerbose(verbose)
|
|
44
|
+
if testNamesStr == '':
|
|
45
|
+
# default to a level 1 test
|
|
46
|
+
self.runDiagInfo.validate = 1
|
|
47
|
+
elif testNamesStr == '1':
|
|
48
|
+
self.runDiagInfo.validate = 1
|
|
49
|
+
elif testNamesStr == '2':
|
|
50
|
+
self.runDiagInfo.validate = 2
|
|
51
|
+
elif testNamesStr == '3':
|
|
52
|
+
self.runDiagInfo.validate = 3
|
|
53
|
+
elif testNamesStr == '4':
|
|
54
|
+
self.runDiagInfo.validate = 4
|
|
55
|
+
else:
|
|
56
|
+
# Make sure no number other that 1-4 were submitted
|
|
57
|
+
if testNamesStr.isdigit():
|
|
58
|
+
raise ValueError("'%s' is not a valid test name." %
|
|
59
|
+
testNamesStr)
|
|
60
|
+
|
|
61
|
+
# Copy to the testNames portion of the object
|
|
62
|
+
names = testNamesStr.split(',')
|
|
63
|
+
if len(names) > dcgm_structs.DCGM_MAX_TEST_NAMES:
|
|
64
|
+
err = 'DcgmDiag cannot initialize: %d test names were specified exceeding the limit of %d.' %\
|
|
65
|
+
(len(names), dcgm_structs.DCGM_MAX_TEST_NAMES)
|
|
66
|
+
raise ValueError(err)
|
|
67
|
+
|
|
68
|
+
for testName in names:
|
|
69
|
+
self.AddTest(testName)
|
|
70
|
+
|
|
71
|
+
if paramsStr != '':
|
|
72
|
+
params = paramsStr.split(';')
|
|
73
|
+
if len(params) >= dcgm_structs.DCGM_MAX_TEST_PARMS:
|
|
74
|
+
err = 'DcgmDiag cannot initialize: %d parameters were specified, exceeding the limit of %d.' %\
|
|
75
|
+
(len(params), dcgm_structs.DCGM_MAX_TEST_PARMS)
|
|
76
|
+
raise ValueError(err)
|
|
77
|
+
|
|
78
|
+
for param in params:
|
|
79
|
+
self.AddParameter(param)
|
|
80
|
+
|
|
81
|
+
if gpuIds:
|
|
82
|
+
first = True
|
|
83
|
+
for gpu in gpuIds:
|
|
84
|
+
if first:
|
|
85
|
+
self.runDiagInfo.gpuList = str(gpu)
|
|
86
|
+
first = False
|
|
87
|
+
else:
|
|
88
|
+
self.runDiagInfo.gpuList = "%s,%s" % (
|
|
89
|
+
self.runDiagInfo.gpuList, str(gpu))
|
|
90
|
+
|
|
91
|
+
def SetVerbose(self, val):
|
|
92
|
+
if val == True:
|
|
93
|
+
self.runDiagInfo.flags |= dcgm_structs.DCGM_RUN_FLAGS_VERBOSE
|
|
94
|
+
else:
|
|
95
|
+
self.runDiagInfo.flags &= ~dcgm_structs.DCGM_RUN_FLAGS_VERBOSE
|
|
96
|
+
|
|
97
|
+
def UseFakeGpus(self):
|
|
98
|
+
self.runDiagInfo.fakeGpuList = self.runDiagInfo.gpuList
|
|
99
|
+
|
|
100
|
+
def GetStruct(self):
|
|
101
|
+
return self.runDiagInfo
|
|
102
|
+
|
|
103
|
+
def AddParameter(self, parameterStr):
|
|
104
|
+
if len(parameterStr) >= dcgm_structs.DCGM_MAX_TEST_PARMS_LEN:
|
|
105
|
+
err = 'DcgmDiag cannot add parameter \'%s\' because it exceeds max length %d.' % \
|
|
106
|
+
(parameterStr, dcgm_structs.DCGM_MAX_TEST_PARMS_LEN)
|
|
107
|
+
raise ValueError(err)
|
|
108
|
+
|
|
109
|
+
index = 0
|
|
110
|
+
for c in parameterStr:
|
|
111
|
+
self.runDiagInfo.testParms[self.numParams][index] = ord(c)
|
|
112
|
+
index += 1
|
|
113
|
+
|
|
114
|
+
self.numParams += 1
|
|
115
|
+
|
|
116
|
+
def AddTest(self, testNameStr):
|
|
117
|
+
if len(testNameStr) >= dcgm_structs.DCGM_MAX_TEST_NAMES_LEN:
|
|
118
|
+
err = 'DcgmDiag cannot add test name \'%s\' because it exceeds max length %d.' % \
|
|
119
|
+
(testNameStr, dcgm_structs.DCGM_MAX_TEST_NAMES_LEN)
|
|
120
|
+
raise ValueError(err)
|
|
121
|
+
|
|
122
|
+
index = 0
|
|
123
|
+
for c in testNameStr:
|
|
124
|
+
self.runDiagInfo.testNames[self.numTests][index] = ord(c)
|
|
125
|
+
index += 1
|
|
126
|
+
|
|
127
|
+
self.numTests += 1
|
|
128
|
+
|
|
129
|
+
def SetStatsOnFail(self, val):
|
|
130
|
+
if val == True:
|
|
131
|
+
self.runDiagInfo.flags |= dcgm_structs.DCGM_RUN_FLAGS_STATSONFAIL
|
|
132
|
+
|
|
133
|
+
def SetThrottleMask(self, value):
|
|
134
|
+
if DcgmDiag._versionMap[self.version] < 3:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
"Throttle mask requires minimum version 3 for dcgmRunDiag.")
|
|
137
|
+
if isinstance(
|
|
138
|
+
value,
|
|
139
|
+
str) and len(value) >= dcgm_structs.DCGM_THROTTLE_MASK_LEN:
|
|
140
|
+
raise ValueError("Throttle mask value '%s' exceeds max length %d." %
|
|
141
|
+
(value, dcgm_structs.DCGM_THROTTLE_MASK_LEN - 1))
|
|
142
|
+
|
|
143
|
+
self.runDiagInfo.throttleMask = str(value)
|
|
144
|
+
|
|
145
|
+
def SetFailEarly(self, enable=True, checkInterval=5):
|
|
146
|
+
if DcgmDiag._versionMap[self.version] < 5:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"Fail early requires minimum version 5 for dcgmRunDiag.")
|
|
149
|
+
if not isinstance(checkInterval, int):
|
|
150
|
+
raise ValueError("Invalid checkInterval value: %s" % checkInterval)
|
|
151
|
+
|
|
152
|
+
if enable:
|
|
153
|
+
self.runDiagInfo.flags |= dcgm_structs.DCGM_RUN_FLAGS_FAIL_EARLY
|
|
154
|
+
self.runDiagInfo.failCheckInterval = checkInterval
|
|
155
|
+
else:
|
|
156
|
+
self.runDiagInfo.flags &= ~dcgm_structs.DCGM_RUN_FLAGS_FAIL_EARLY
|
|
157
|
+
|
|
158
|
+
def Execute(self, handle):
|
|
159
|
+
return dcgm_agent.dcgmActionValidate_v2(handle, self.runDiagInfo,
|
|
160
|
+
self.version)
|
|
161
|
+
|
|
162
|
+
def SetStatsPath(self, statsPath):
|
|
163
|
+
if len(statsPath) >= dcgm_structs.DCGM_PATH_LEN:
|
|
164
|
+
err = "DcgmDiag cannot set statsPath '%s' because it exceeds max length %d." % \
|
|
165
|
+
(statsPath, dcgm_structs.DCGM_PATH_LEN)
|
|
166
|
+
raise ValueError(err)
|
|
167
|
+
|
|
168
|
+
self.runDiagInfo.statsPath = statsPath
|
|
169
|
+
|
|
170
|
+
def SetConfigFileContents(self, configFileContents):
|
|
171
|
+
if len(configFileContents) >= dcgm_structs.DCGM_MAX_CONFIG_FILE_LEN:
|
|
172
|
+
err = "Dcgm Diag cannot set config file contents to '%s' because it exceeds max length %d." \
|
|
173
|
+
% (configFileContents, dcgm_structs.DCGM_MAX_CONFIG_FILE_LEN)
|
|
174
|
+
raise ValueError(err)
|
|
175
|
+
|
|
176
|
+
self.runDiagInfo.configFileContents = configFileContents
|
|
177
|
+
|
|
178
|
+
def SetDebugLogFile(self, logFileName):
|
|
179
|
+
if len(logFileName) >= dcgm_structs.DCGM_FILE_LEN:
|
|
180
|
+
raise ValueError("Cannot set debug file to '%s' because it exceeds max length %d."\
|
|
181
|
+
% (logFileName, dcgm_structs.DCGM_FILE_LEN))
|
|
182
|
+
|
|
183
|
+
self.runDiagInfo.debugLogFile = logFileName
|
|
184
|
+
|
|
185
|
+
def SetDebugLevel(self, debugLevel):
|
|
186
|
+
if debugLevel < 0 or debugLevel > 5:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
"Cannot set debug level to %d. Debug Level must be a value from 0-5 inclusive."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
self.runDiagInfo.debugLevel = debugLevel
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import model_analyzer.monitor.dcgm.dcgm_agent as dcgm_agent
|
|
16
|
+
import model_analyzer.monitor.dcgm.dcgm_structs as dcgm_structs
|
|
17
|
+
'''
|
|
18
|
+
Class for managing a group of field IDs in the host engine.
|
|
19
|
+
'''
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DcgmFieldGroup:
|
|
23
|
+
'''
|
|
24
|
+
Constructor
|
|
25
|
+
|
|
26
|
+
dcgmHandle - DcgmHandle() instance to use for communicating with the host engine
|
|
27
|
+
name - Name of the field group to use within DCGM. This must be unique
|
|
28
|
+
fieldIds - Fields that are part of this group
|
|
29
|
+
fieldGroupId - If provided, this is used to initialize the object from an existing field group ID
|
|
30
|
+
'''
|
|
31
|
+
|
|
32
|
+
def __init__(self, dcgmHandle, name="", fieldIds=None, fieldGroupId=None):
|
|
33
|
+
fieldIds = fieldIds or []
|
|
34
|
+
self.name = name
|
|
35
|
+
self.fieldIds = fieldIds
|
|
36
|
+
self._dcgmHandle = dcgmHandle
|
|
37
|
+
self.wasCreated = False
|
|
38
|
+
|
|
39
|
+
#If the user passed in an ID, the field group already exists. Fetch live info
|
|
40
|
+
if fieldGroupId is not None:
|
|
41
|
+
self.fieldGroupId = fieldGroupId
|
|
42
|
+
fieldGroupInfo = dcgm_agent.dcgmFieldGroupGetInfo(
|
|
43
|
+
self._dcgmHandle.handle, self.fieldGroupId)
|
|
44
|
+
self.name = fieldGroupInfo.fieldGroupName
|
|
45
|
+
self.fieldIds = fieldGroupInfo.fieldIds
|
|
46
|
+
else:
|
|
47
|
+
self.fieldGroupId = None #Assign here so the destructor doesn't fail if the call below fails
|
|
48
|
+
self.fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(
|
|
49
|
+
self._dcgmHandle.handle, fieldIds, name)
|
|
50
|
+
self.wasCreated = True
|
|
51
|
+
|
|
52
|
+
'''
|
|
53
|
+
Remove this field group from DCGM. This object can no longer be passed to other APIs after this call.
|
|
54
|
+
'''
|
|
55
|
+
|
|
56
|
+
def Delete(self):
|
|
57
|
+
if self.wasCreated and self.fieldGroupId is not None:
|
|
58
|
+
try:
|
|
59
|
+
try:
|
|
60
|
+
dcgm_agent.dcgmFieldGroupDestroy(self._dcgmHandle.handle,
|
|
61
|
+
self.fieldGroupId)
|
|
62
|
+
except dcgm_structs.dcgmExceptionClass(
|
|
63
|
+
dcgm_structs.DCGM_ST_NO_DATA):
|
|
64
|
+
# someone may have deleted the group under us. That's ok.
|
|
65
|
+
pass
|
|
66
|
+
except dcgm_structs.dcgmExceptionClass(
|
|
67
|
+
dcgm_structs.DCGM_ST_CONNECTION_NOT_VALID):
|
|
68
|
+
# We lost our connection, but we're destructing this object anyway.
|
|
69
|
+
pass
|
|
70
|
+
except AttributeError as ae:
|
|
71
|
+
# When we're cleaning up at the end, dcgm_agent and dcgm_structs have been unloaded and we'll
|
|
72
|
+
# get an AttributeError: "'NoneType' object has no 'dcgmExceptionClass'" Ignore this
|
|
73
|
+
pass
|
|
74
|
+
except TypeError as te:
|
|
75
|
+
# When we're cleaning up at the end, dcgm_agent and dcgm_structs have been unloaded and we might
|
|
76
|
+
# get a TypeError: "'NoneType' object is not callable'" Ignore this
|
|
77
|
+
pass
|
|
78
|
+
self.fieldGroupId = None
|
|
79
|
+
self._dcgmHandle = None
|
|
80
|
+
|
|
81
|
+
#Destructor
|
|
82
|
+
def __del__(self):
|
|
83
|
+
self.Delete()
|