triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Dict, List, Optional
|
|
19
|
+
|
|
20
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
21
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
22
|
+
from model_analyzer.triton.model.model_config import ModelConfig
|
|
23
|
+
from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ModelRunConfig:
|
|
29
|
+
"""
|
|
30
|
+
Encapsulates all the information (ModelConfigVariant + PerfConfig) needed to run
|
|
31
|
+
a model in Perf Analyzer
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
DEFAULT_MAX_BATCH_SIZE = 1
|
|
35
|
+
DEFAULT_PERF_BATCH_SIZE = 1
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
model_name: str,
|
|
40
|
+
model_config_variant: ModelConfigVariant,
|
|
41
|
+
perf_config: PerfAnalyzerConfig,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
model_name: str
|
|
47
|
+
The name of the model
|
|
48
|
+
model_config_variant : ModelConfigVariant
|
|
49
|
+
model config variant corresponding to this run
|
|
50
|
+
perf_config : PerfAnalyzerConfig
|
|
51
|
+
List of possible run parameters to pass
|
|
52
|
+
to Perf Analyzer
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
self._model_name = model_name
|
|
56
|
+
self._model_config_variant = model_config_variant
|
|
57
|
+
self._perf_config = perf_config
|
|
58
|
+
self._composing_config_variants: List[ModelConfigVariant] = []
|
|
59
|
+
|
|
60
|
+
def model_name(self) -> str:
|
|
61
|
+
"""
|
|
62
|
+
Get the original model name for this run config.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
str
|
|
67
|
+
Original model name
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
return self._model_name
|
|
71
|
+
|
|
72
|
+
def model_variant_name(self) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Get the model config variant name for this config.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
str
|
|
79
|
+
Model variant name
|
|
80
|
+
"""
|
|
81
|
+
return (
|
|
82
|
+
self._model_config_variant.variant_name
|
|
83
|
+
if self._model_config_variant
|
|
84
|
+
else ""
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def model_config_variant(self) -> ModelConfigVariant:
|
|
88
|
+
"""
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
ModelConfigVariant
|
|
92
|
+
The ModelConfigVariant corresponding to this run
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
return self._model_config_variant
|
|
96
|
+
|
|
97
|
+
def model_config(self) -> Optional[ModelConfig]:
|
|
98
|
+
"""
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
ModelConfig
|
|
102
|
+
The ModelConfig corresponding to this run
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
return (
|
|
106
|
+
self._model_config_variant.model_config
|
|
107
|
+
if self._model_config_variant
|
|
108
|
+
else None
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def perf_config(self) -> PerfAnalyzerConfig:
|
|
112
|
+
"""
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
PerfAnalyzerConfig
|
|
116
|
+
run parameters corresponding to this run of
|
|
117
|
+
the perf analyzer
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
return self._perf_config
|
|
121
|
+
|
|
122
|
+
def composing_config_variants(self) -> List[ModelConfigVariant]:
|
|
123
|
+
"""
|
|
124
|
+
Returns the list of composing model config variants
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
return self._composing_config_variants
|
|
128
|
+
|
|
129
|
+
def composing_configs(self) -> List[ModelConfig]:
|
|
130
|
+
"""
|
|
131
|
+
Returns the list of composing model configs
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
if self._composing_config_variants:
|
|
135
|
+
composing_configs = [
|
|
136
|
+
composing_config_variant.model_config
|
|
137
|
+
for composing_config_variant in self._composing_config_variants
|
|
138
|
+
]
|
|
139
|
+
return composing_configs
|
|
140
|
+
else:
|
|
141
|
+
return []
|
|
142
|
+
|
|
143
|
+
def representation(self) -> str:
|
|
144
|
+
"""
|
|
145
|
+
Returns a representation string for the ModelRunConfig that can be used
|
|
146
|
+
as a key to uniquely identify it
|
|
147
|
+
"""
|
|
148
|
+
repr = self.model_variant_name()
|
|
149
|
+
repr += " " + self.perf_config().representation()
|
|
150
|
+
|
|
151
|
+
if self._composing_config_variants:
|
|
152
|
+
repr += " " + (",").join(self.get_composing_config_names()) # type: ignore
|
|
153
|
+
|
|
154
|
+
return repr
|
|
155
|
+
|
|
156
|
+
def _check_for_client_vs_model_batch_size(self) -> bool:
|
|
157
|
+
"""
|
|
158
|
+
Returns false if client batch size is greater than model batch size. Else true
|
|
159
|
+
"""
|
|
160
|
+
model_config = self._model_config_variant.model_config.get_config()
|
|
161
|
+
|
|
162
|
+
max_batch_size = (
|
|
163
|
+
model_config["max_batch_size"]
|
|
164
|
+
if "max_batch_size" in model_config
|
|
165
|
+
else self.DEFAULT_MAX_BATCH_SIZE
|
|
166
|
+
)
|
|
167
|
+
perf_batch_size = (
|
|
168
|
+
self._perf_config["batch-size"]
|
|
169
|
+
if "batch-size" in self._perf_config
|
|
170
|
+
else self.DEFAULT_PERF_BATCH_SIZE
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
legal = max_batch_size >= perf_batch_size
|
|
174
|
+
if not legal:
|
|
175
|
+
logger.debug(
|
|
176
|
+
f"Illegal model run config because client batch size {perf_batch_size} is greater than model max batch size {max_batch_size}"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return legal
|
|
180
|
+
|
|
181
|
+
def _check_for_preferred_vs_model_batch_size(self) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Returns false if maximum of preferred batch size is greater than model batch size. Else true
|
|
184
|
+
"""
|
|
185
|
+
legal = True
|
|
186
|
+
|
|
187
|
+
model_configs = self._create_model_config_dicts()
|
|
188
|
+
|
|
189
|
+
for model_config in model_configs:
|
|
190
|
+
max_batch_size = (
|
|
191
|
+
model_config["max_batch_size"]
|
|
192
|
+
if "max_batch_size" in model_config
|
|
193
|
+
else self.DEFAULT_MAX_BATCH_SIZE
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if (
|
|
197
|
+
"dynamic_batching" in model_config
|
|
198
|
+
and "preferred_batch_size" in model_config["dynamic_batching"]
|
|
199
|
+
):
|
|
200
|
+
max_preferred_batch_size = max(
|
|
201
|
+
model_config["dynamic_batching"]["preferred_batch_size"]
|
|
202
|
+
)
|
|
203
|
+
legal = max_batch_size >= max_preferred_batch_size
|
|
204
|
+
|
|
205
|
+
if not legal:
|
|
206
|
+
logger.debug(
|
|
207
|
+
f"Illegal model run config because maximum of {model_config['name']}'s preferred batch size {max_preferred_batch_size} is greater than model max batch size {max_batch_size}"
|
|
208
|
+
)
|
|
209
|
+
return legal
|
|
210
|
+
|
|
211
|
+
return legal
|
|
212
|
+
|
|
213
|
+
def _create_model_config_dicts(self) -> List[Dict]:
|
|
214
|
+
"""
|
|
215
|
+
Create a list of model config dictionaries for
|
|
216
|
+
the given model + composing models
|
|
217
|
+
"""
|
|
218
|
+
model_configs = (
|
|
219
|
+
[]
|
|
220
|
+
if self.is_ensemble_model()
|
|
221
|
+
else [self._model_config_variant.model_config.get_config()]
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
model_configs.extend(
|
|
225
|
+
[
|
|
226
|
+
composing_config_variant.model_config.get_config()
|
|
227
|
+
for composing_config_variant in self._composing_config_variants
|
|
228
|
+
]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return model_configs
|
|
232
|
+
|
|
233
|
+
def is_legal_combination(self):
|
|
234
|
+
"""
|
|
235
|
+
Returns true if the run_config is valid and should be run. Else false
|
|
236
|
+
"""
|
|
237
|
+
legal = (
|
|
238
|
+
self._check_for_client_vs_model_batch_size()
|
|
239
|
+
and self._check_for_preferred_vs_model_batch_size()
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return legal
|
|
243
|
+
|
|
244
|
+
def is_ensemble_model(self) -> bool:
|
|
245
|
+
"""
|
|
246
|
+
Returns true if the model config is an ensemble model
|
|
247
|
+
"""
|
|
248
|
+
return self._model_config_variant.model_config.is_ensemble()
|
|
249
|
+
|
|
250
|
+
def is_bls_model(self) -> bool:
|
|
251
|
+
"""
|
|
252
|
+
Returns true if the model config is a BLS model
|
|
253
|
+
"""
|
|
254
|
+
# If composing configs are present and it's not an ensemble it must be a BLS
|
|
255
|
+
# Note: this will need to change if we allow ensembles to contain BLS models
|
|
256
|
+
return (
|
|
257
|
+
not self._model_config_variant.model_config.is_ensemble()
|
|
258
|
+
and len(self._composing_config_variants) > 0
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def get_composing_config_names(self) -> Optional[List[str]]:
|
|
262
|
+
"""
|
|
263
|
+
Returns list of composing config names
|
|
264
|
+
"""
|
|
265
|
+
return [
|
|
266
|
+
composing_config_variant.variant_name
|
|
267
|
+
for composing_config_variant in self._composing_config_variants
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
def add_composing_model_config_variants(
|
|
271
|
+
self, composing_model_config_variants: List[ModelConfigVariant]
|
|
272
|
+
) -> None:
|
|
273
|
+
"""
|
|
274
|
+
Adds a list of composing model config variants
|
|
275
|
+
"""
|
|
276
|
+
for composing_model_config_variant in composing_model_config_variants:
|
|
277
|
+
self._composing_config_variants.append(composing_model_config_variant)
|
|
278
|
+
|
|
279
|
+
@classmethod
|
|
280
|
+
def from_dict(cls, model_run_config_dict):
|
|
281
|
+
model_run_config = ModelRunConfig(None, None, None)
|
|
282
|
+
model_run_config._model_name = model_run_config_dict["_model_name"]
|
|
283
|
+
|
|
284
|
+
if "_model_config_variant" in model_run_config_dict:
|
|
285
|
+
model_config = ModelConfig.from_dict(
|
|
286
|
+
model_run_config_dict["_model_config_variant"]["model_config"]
|
|
287
|
+
)
|
|
288
|
+
variant_name = model_run_config_dict["_model_config_variant"][
|
|
289
|
+
"variant_name"
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
model_run_config._model_config_variant = ModelConfigVariant(
|
|
293
|
+
model_config, variant_name
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
model_run_config._perf_config = PerfAnalyzerConfig.from_dict(
|
|
297
|
+
model_run_config_dict["_perf_config"]
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
if "_composing_config_variants" in model_run_config_dict:
|
|
301
|
+
model_run_config._composing_config_variants = [
|
|
302
|
+
ModelConfigVariant(
|
|
303
|
+
ModelConfig.from_dict(
|
|
304
|
+
composing_config_variant_dict["model_config"]
|
|
305
|
+
),
|
|
306
|
+
composing_config_variant_dict["variant_name"],
|
|
307
|
+
)
|
|
308
|
+
for composing_config_variant_dict in model_run_config_dict[
|
|
309
|
+
"_composing_config_variants"
|
|
310
|
+
]
|
|
311
|
+
]
|
|
312
|
+
|
|
313
|
+
return model_run_config
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
from model_analyzer.config.run.model_run_config import ModelRunConfig
|
|
20
|
+
from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RunConfig:
|
|
24
|
+
"""
|
|
25
|
+
Encapsulates all the information needed to run one or more models
|
|
26
|
+
at the same time in Perf Analyzer
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, triton_env, genai_perf_flags=None):
|
|
30
|
+
"""
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
triton_env : dict
|
|
34
|
+
A dictionary of environment variables to set
|
|
35
|
+
when launching tritonserver
|
|
36
|
+
|
|
37
|
+
genai_perf_flags: dict
|
|
38
|
+
The set of flags used when calling genai_perf for LLM models
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
self._triton_env = triton_env
|
|
42
|
+
self._genai_perf_config = GenaiPerfConfig()
|
|
43
|
+
self._genai_perf_config.update_config(genai_perf_flags)
|
|
44
|
+
self._model_run_configs: List[ModelRunConfig] = []
|
|
45
|
+
|
|
46
|
+
def add_model_run_config(self, model_run_config):
|
|
47
|
+
"""
|
|
48
|
+
Add a ModelRunConfig to this RunConfig
|
|
49
|
+
"""
|
|
50
|
+
self._model_run_configs.append(model_run_config)
|
|
51
|
+
|
|
52
|
+
def model_run_configs(self) -> List[ModelRunConfig]:
|
|
53
|
+
"""
|
|
54
|
+
Returns the list of ModelRunConfigs to run concurrently
|
|
55
|
+
"""
|
|
56
|
+
return self._model_run_configs
|
|
57
|
+
|
|
58
|
+
def representation(self):
|
|
59
|
+
"""
|
|
60
|
+
Returns a representation string for the RunConfig that can be used
|
|
61
|
+
as a key to uniquely identify it
|
|
62
|
+
"""
|
|
63
|
+
return "".join([mrc.representation() for mrc in self.model_run_configs()])
|
|
64
|
+
|
|
65
|
+
def is_legal_combination(self):
|
|
66
|
+
"""
|
|
67
|
+
Returns true if all model_run_configs are valid
|
|
68
|
+
"""
|
|
69
|
+
return all(
|
|
70
|
+
[
|
|
71
|
+
model_run_config.is_legal_combination()
|
|
72
|
+
for model_run_config in self._model_run_configs
|
|
73
|
+
]
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def is_ensemble_model(self) -> bool:
|
|
77
|
+
"""
|
|
78
|
+
Returns true if the first model config is an ensemble
|
|
79
|
+
(an ensemble cannot be part of a multi-model)
|
|
80
|
+
"""
|
|
81
|
+
return self._model_run_configs[0].is_ensemble_model()
|
|
82
|
+
|
|
83
|
+
def is_bls_model(self) -> bool:
|
|
84
|
+
"""
|
|
85
|
+
Returns true if the first model config is a BLS model
|
|
86
|
+
(a BLS cannot be part of a multi-model)
|
|
87
|
+
"""
|
|
88
|
+
return self._model_run_configs[0].is_bls_model()
|
|
89
|
+
|
|
90
|
+
def cpu_only(self):
|
|
91
|
+
"""
|
|
92
|
+
Returns true if all model_run_configs only operate on the CPU
|
|
93
|
+
"""
|
|
94
|
+
return all(
|
|
95
|
+
[
|
|
96
|
+
model_run_config.model_config_variant().cpu_only
|
|
97
|
+
for model_run_config in self._model_run_configs
|
|
98
|
+
]
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def triton_environment(self):
|
|
102
|
+
"""
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
dict
|
|
106
|
+
The environment that tritonserver
|
|
107
|
+
was run with for this RunConfig
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
return self._triton_env
|
|
111
|
+
|
|
112
|
+
def genai_perf_config(self):
|
|
113
|
+
return self._genai_perf_config
|
|
114
|
+
|
|
115
|
+
def models_name(self):
|
|
116
|
+
"""Returns a single comma-joined name of the original model names"""
|
|
117
|
+
return ",".join([mrc.model_name() for mrc in self.model_run_configs()])
|
|
118
|
+
|
|
119
|
+
def model_variants_name(self):
|
|
120
|
+
"""Returns a single comma-joined name of the model variant names"""
|
|
121
|
+
return ",".join([mrc.model_variant_name() for mrc in self.model_run_configs()])
|
|
122
|
+
|
|
123
|
+
def composing_config_variants(self):
|
|
124
|
+
"""
|
|
125
|
+
Returns a list of composing model config variants from the first model run config
|
|
126
|
+
(an ensemble/BLS cannot be part of a multi-model profile)
|
|
127
|
+
"""
|
|
128
|
+
return self._model_run_configs[0].composing_config_variants()
|
|
129
|
+
|
|
130
|
+
def composing_model_variants_name(self):
|
|
131
|
+
"""
|
|
132
|
+
Returns a single comma-joined name of the composing model variant names
|
|
133
|
+
(an ensemble/BLS cannot be part of a multi-model profile)
|
|
134
|
+
"""
|
|
135
|
+
return ",".join(
|
|
136
|
+
[
|
|
137
|
+
cvc.variant_name
|
|
138
|
+
for cvc in self.model_run_configs()[0].composing_config_variants()
|
|
139
|
+
]
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def composing_configs(self):
|
|
143
|
+
"""
|
|
144
|
+
Returns a list of composing model configs from the first model run config
|
|
145
|
+
(an ensemble/BLS cannot be part of a multi-model profile)
|
|
146
|
+
"""
|
|
147
|
+
return self._model_run_configs[0].composing_configs()
|
|
148
|
+
|
|
149
|
+
def combined_model_variants_name(self):
|
|
150
|
+
"""
|
|
151
|
+
Combines the model + composing model's variant names (joined with a '::')
|
|
152
|
+
"""
|
|
153
|
+
if self.composing_model_variants_name():
|
|
154
|
+
return (
|
|
155
|
+
f"{self.model_variants_name()}::{self.composing_model_variants_name()}"
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
return self.model_variants_name()
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_dict(cls, run_config_dict):
|
|
162
|
+
run_config = RunConfig({})
|
|
163
|
+
|
|
164
|
+
run_config._triton_env = run_config_dict["_triton_env"]
|
|
165
|
+
for mrc_dict in run_config_dict["_model_run_configs"]:
|
|
166
|
+
run_config._model_run_configs.append(ModelRunConfig.from_dict(mrc_dict))
|
|
167
|
+
|
|
168
|
+
return run_config
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import Any, Dict
|
|
18
|
+
|
|
19
|
+
# Config constants
|
|
20
|
+
CONFIG_PARSER_SUCCESS = 1
|
|
21
|
+
CONFIG_PARSER_FAILURE = 0
|
|
22
|
+
|
|
23
|
+
# Result Table constants
|
|
24
|
+
RESULT_TABLE_COLUMN_PADDING = 2
|
|
25
|
+
|
|
26
|
+
# Result Comparator Constants
|
|
27
|
+
COMPARISON_SCORE_THRESHOLD = 0
|
|
28
|
+
|
|
29
|
+
# Dict of parameters to apply on top of the default
|
|
30
|
+
# config to result in the default config (empty dict)
|
|
31
|
+
DEFAULT_CONFIG_PARAMS: Dict[str, Any] = {}
|
|
32
|
+
|
|
33
|
+
# Run Search
|
|
34
|
+
THROUGHPUT_MINIMUM_GAIN = 0.05
|
|
35
|
+
THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES = 4
|
|
36
|
+
THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES = 4
|
|
37
|
+
|
|
38
|
+
# Quick search algorithm constants
|
|
39
|
+
RADIUS = 3
|
|
40
|
+
MIN_INITIALIZED = 3
|
|
41
|
+
|
|
42
|
+
# Reports
|
|
43
|
+
TOP_MODELS_REPORT_KEY = "Best Configs Across All Models"
|
|
44
|
+
|
|
45
|
+
# State Management
|
|
46
|
+
MAX_NUMBER_OF_INTERRUPTS = 3
|
|
47
|
+
|
|
48
|
+
# Perf Analyzer
|
|
49
|
+
MEASUREMENT_WINDOW_STEP = 1000
|
|
50
|
+
MEASUREMENT_REQUEST_COUNT_STEP = 50
|
|
51
|
+
INTERVAL_SLEEP_TIME = 1
|
|
52
|
+
PERF_ANALYZER_MEASUREMENT_WINDOW = 5000
|
|
53
|
+
PERF_ANALYZER_MINIMUM_REQUEST_COUNT = 50
|
|
54
|
+
SECONDS_TO_MILLISECONDS_MULTIPLIER = 1000
|
|
55
|
+
|
|
56
|
+
# Triton Server
|
|
57
|
+
SERVER_OUTPUT_TIMEOUT_SECS = 5
|
|
58
|
+
|
|
59
|
+
# Logging
|
|
60
|
+
LOGGER_NAME = "model_analyzer_logger"
|
|
61
|
+
|
|
62
|
+
# PA Error Log Filename
|
|
63
|
+
PA_ERROR_LOG_FILENAME = "perf_analyzer_error.log"
|
|
64
|
+
|
|
65
|
+
# Constraints
|
|
66
|
+
GLOBAL_CONSTRAINTS_KEY = "__default__"
|
|
67
|
+
|
|
68
|
+
# Measurement constants
|
|
69
|
+
INVALID_MEASUREMENT_THRESHOLD = 2
|
|
70
|
+
|
|
71
|
+
# Model analyzer package name
|
|
72
|
+
PACKAGE_NAME = "triton-model-analyzer"
|
|
73
|
+
|
|
74
|
+
# GENAI-PERF
|
|
75
|
+
GENAI_PERF_CSV = "profile_export_genai_perf.csv"
|
|
76
|
+
GENAI_PERF_COLLATERAL = ["llm_inputs.json", "profile_export.json"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Device:
|
|
19
|
+
"""
|
|
20
|
+
Generic device class representing devices being monitored
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
pass
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.device.device import Device
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GPUDevice(Device):
|
|
21
|
+
"""
|
|
22
|
+
Representing a GPU device
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, device_name, device_id, pci_bus_id, device_uuid):
|
|
26
|
+
"""
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
device_name: str
|
|
30
|
+
Human readable name of the device
|
|
31
|
+
device_id : int
|
|
32
|
+
Device id according to the `nvidia-smi` output
|
|
33
|
+
pci_bus_id : str
|
|
34
|
+
PCI bus id
|
|
35
|
+
device_uuid : str
|
|
36
|
+
Device UUID
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
assert type(device_name) is str
|
|
40
|
+
assert type(device_id) is int
|
|
41
|
+
assert type(pci_bus_id) is str
|
|
42
|
+
assert type(device_uuid) is str
|
|
43
|
+
|
|
44
|
+
self._device_name = device_name
|
|
45
|
+
self._device_id = device_id
|
|
46
|
+
self._pci_bus_id = pci_bus_id
|
|
47
|
+
self._device_uuid = device_uuid
|
|
48
|
+
|
|
49
|
+
def device_name(self):
|
|
50
|
+
"""
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
str
|
|
54
|
+
device name
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
return self._device_name
|
|
58
|
+
|
|
59
|
+
def device_id(self):
|
|
60
|
+
"""
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
int
|
|
64
|
+
device id of this GPU
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
return self._device_id
|
|
68
|
+
|
|
69
|
+
def pci_bus_id(self):
|
|
70
|
+
"""
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
bytes
|
|
74
|
+
PCI bus id of this GPU
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
return self._pci_bus_id
|
|
78
|
+
|
|
79
|
+
def device_uuid(self):
|
|
80
|
+
"""
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
str
|
|
84
|
+
UUID of this GPU
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
return self._device_uuid
|