triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Dict, List
|
|
19
|
+
|
|
20
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
21
|
+
ModelVariantNameManager,
|
|
22
|
+
)
|
|
23
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
24
|
+
from model_analyzer.constants import DEFAULT_CONFIG_PARAMS, LOGGER_NAME
|
|
25
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
26
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
27
|
+
from model_analyzer.triton.client.client import TritonClient
|
|
28
|
+
from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
|
|
29
|
+
|
|
30
|
+
from .base_model_config_generator import BaseModelConfigGenerator
|
|
31
|
+
from .generator_utils import GeneratorUtils
|
|
32
|
+
from .model_profile_spec import ModelProfileSpec
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
35
|
+
from copy import deepcopy
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ManualModelConfigGenerator(BaseModelConfigGenerator):
|
|
39
|
+
"""Given a model, generates model configs in manual search mode"""
|
|
40
|
+
|
|
41
|
+
_log_first_run = False
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
config: ConfigCommandProfile,
|
|
46
|
+
gpus: List[GPUDevice],
|
|
47
|
+
model: ModelProfileSpec,
|
|
48
|
+
client: TritonClient,
|
|
49
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
50
|
+
default_only: bool,
|
|
51
|
+
early_exit_enable: bool,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
config: ModelAnalyzerConfig
|
|
57
|
+
gpus: List of GPUDevices
|
|
58
|
+
model: The model to generate ModelConfigs for
|
|
59
|
+
client: TritonClient
|
|
60
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
61
|
+
default_only: Bool
|
|
62
|
+
If true, only the default config will be generated
|
|
63
|
+
If false, the default config will NOT be generated
|
|
64
|
+
early_exit_enable: Bool
|
|
65
|
+
If true, the generator can early exit if throughput plateaus
|
|
66
|
+
"""
|
|
67
|
+
super().__init__(
|
|
68
|
+
config,
|
|
69
|
+
gpus,
|
|
70
|
+
model,
|
|
71
|
+
client,
|
|
72
|
+
model_variant_name_manager,
|
|
73
|
+
default_only,
|
|
74
|
+
early_exit_enable,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if not ManualModelConfigGenerator._log_first_run:
|
|
78
|
+
logger.info("")
|
|
79
|
+
logger.info("Starting manual brute search")
|
|
80
|
+
logger.info("")
|
|
81
|
+
ManualModelConfigGenerator._log_first_run = True
|
|
82
|
+
|
|
83
|
+
self._search_disabled = config.run_config_search_disable
|
|
84
|
+
self._curr_config_index = 0
|
|
85
|
+
self._curr_max_batch_size_index = 0
|
|
86
|
+
|
|
87
|
+
self._max_batch_sizes = None
|
|
88
|
+
self._non_max_batch_size_param_combos: List[Dict] = []
|
|
89
|
+
self._determine_max_batch_sizes_and_param_combos()
|
|
90
|
+
|
|
91
|
+
# All configs are pregenerated in _configs[][]
|
|
92
|
+
# Indexed as follows:
|
|
93
|
+
# _configs[_curr_config_index][_curr_max_batch_size_index]
|
|
94
|
+
#
|
|
95
|
+
self._configs = self._generate_model_config_variants()
|
|
96
|
+
|
|
97
|
+
def _done_walking(self) -> bool:
|
|
98
|
+
return len(self._configs) == self._curr_config_index
|
|
99
|
+
|
|
100
|
+
def _done_walking_max_batch_size(self) -> bool:
|
|
101
|
+
if (
|
|
102
|
+
self._max_batch_sizes is None
|
|
103
|
+
or len(self._max_batch_sizes) == self._curr_max_batch_size_index
|
|
104
|
+
):
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
if self._early_exit_enable and self._last_results_erroneous():
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
if self._early_exit_enable and not self._last_results_increased_throughput():
|
|
111
|
+
self._print_max_batch_size_plateau_warning()
|
|
112
|
+
return True
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
def _step(self) -> None:
|
|
116
|
+
self._step_max_batch_size()
|
|
117
|
+
|
|
118
|
+
if self._done_walking_max_batch_size():
|
|
119
|
+
self._reset_max_batch_size()
|
|
120
|
+
self._step_config()
|
|
121
|
+
|
|
122
|
+
def _reset_max_batch_size(self) -> None:
|
|
123
|
+
super()._reset_max_batch_size()
|
|
124
|
+
self._curr_max_batch_size_index = 0
|
|
125
|
+
|
|
126
|
+
def _step_config(self) -> None:
|
|
127
|
+
self._curr_config_index += 1
|
|
128
|
+
|
|
129
|
+
def _step_max_batch_size(self) -> None:
|
|
130
|
+
self._curr_max_batch_size_index += 1
|
|
131
|
+
|
|
132
|
+
last_max_throughput = self._get_last_results_max_throughput()
|
|
133
|
+
if last_max_throughput:
|
|
134
|
+
self._curr_max_batch_size_throughputs.append(last_max_throughput)
|
|
135
|
+
|
|
136
|
+
def _get_next_model_config_variant(self) -> ModelConfigVariant:
|
|
137
|
+
return self._configs[self._curr_config_index][self._curr_max_batch_size_index]
|
|
138
|
+
|
|
139
|
+
def _generate_model_config_variants(self) -> List[List[ModelConfigVariant]]:
|
|
140
|
+
"""Generate all model config combinations"""
|
|
141
|
+
|
|
142
|
+
model_config_variants = []
|
|
143
|
+
for param_combo in self._non_max_batch_size_param_combos:
|
|
144
|
+
configs_with_max_batch_size = []
|
|
145
|
+
if self._max_batch_sizes:
|
|
146
|
+
for mbs in self._max_batch_sizes:
|
|
147
|
+
param_combo["max_batch_size"] = mbs
|
|
148
|
+
model_config_variant = self._make_direct_mode_model_config_variant(
|
|
149
|
+
param_combo
|
|
150
|
+
)
|
|
151
|
+
configs_with_max_batch_size.append(model_config_variant)
|
|
152
|
+
else:
|
|
153
|
+
model_config_variant = self._make_direct_mode_model_config_variant(
|
|
154
|
+
param_combo
|
|
155
|
+
)
|
|
156
|
+
configs_with_max_batch_size.append(model_config_variant)
|
|
157
|
+
|
|
158
|
+
model_config_variants.append(configs_with_max_batch_size)
|
|
159
|
+
|
|
160
|
+
return model_config_variants
|
|
161
|
+
|
|
162
|
+
def _determine_max_batch_sizes_and_param_combos(self) -> None:
|
|
163
|
+
"""
|
|
164
|
+
Determine self._max_batch_sizes and self._non_max_batch_size_param_combos
|
|
165
|
+
"""
|
|
166
|
+
if self._default_only:
|
|
167
|
+
self._non_max_batch_size_param_combos = [DEFAULT_CONFIG_PARAMS]
|
|
168
|
+
else:
|
|
169
|
+
model_config_params = deepcopy(self._base_model.model_config_parameters())
|
|
170
|
+
if model_config_params:
|
|
171
|
+
self._max_batch_sizes = model_config_params.pop("max_batch_size", None)
|
|
172
|
+
self._non_max_batch_size_param_combos = (
|
|
173
|
+
GeneratorUtils.generate_combinations(model_config_params)
|
|
174
|
+
)
|
|
175
|
+
else:
|
|
176
|
+
if self._search_disabled:
|
|
177
|
+
self._non_max_batch_size_param_combos = (
|
|
178
|
+
self._generate_search_disabled_param_combos()
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
raise TritonModelAnalyzerException(
|
|
182
|
+
f"Automatic search not supported in ManualModelConfigGenerator"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _generate_search_disabled_param_combos(self) -> List[Dict]:
|
|
186
|
+
"""Return the configs when we want to search but searching is disabled"""
|
|
187
|
+
return [DEFAULT_CONFIG_PARAMS]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
20
|
+
ModelVariantNameManager,
|
|
21
|
+
)
|
|
22
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
23
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
24
|
+
from model_analyzer.triton.client.client import TritonClient
|
|
25
|
+
|
|
26
|
+
from .automatic_model_config_generator import AutomaticModelConfigGenerator
|
|
27
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
28
|
+
from .manual_model_config_generator import ManualModelConfigGenerator
|
|
29
|
+
from .model_profile_spec import ModelProfileSpec
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ModelConfigGeneratorFactory:
|
|
33
|
+
"""
|
|
34
|
+
Factory that creates the correct Config Generators
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def create_model_config_generator(
|
|
39
|
+
config: ConfigCommandProfile,
|
|
40
|
+
gpus: List[GPUDevice],
|
|
41
|
+
model: ModelProfileSpec,
|
|
42
|
+
client: TritonClient,
|
|
43
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
44
|
+
default_only: bool,
|
|
45
|
+
early_exit_enable: bool,
|
|
46
|
+
) -> ConfigGeneratorInterface:
|
|
47
|
+
"""
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
config: ConfigCommandProfile
|
|
51
|
+
The Model Analyzer config file for the profile step
|
|
52
|
+
gpus: List of GPUDevices
|
|
53
|
+
model: ConfigModelProfileSpec
|
|
54
|
+
The model to generate ModelRunConfigs for
|
|
55
|
+
client: TritonClient
|
|
56
|
+
The client handle used to send requests to Triton
|
|
57
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
58
|
+
Used to manage the model variant names
|
|
59
|
+
default_only: Bool
|
|
60
|
+
If true, only the default config will be generated by the created generator
|
|
61
|
+
If false, the default config will NOT be generated by the created generator
|
|
62
|
+
early_exit_enable: Bool
|
|
63
|
+
If true, the created generator can early exit if throughput plateaus
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
A generator that implements ConfigGeneratorInterface and creates ModelConfigs
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
search_disabled = config.run_config_search_disable
|
|
71
|
+
model_config_params = model.model_config_parameters()
|
|
72
|
+
|
|
73
|
+
if search_disabled or model_config_params:
|
|
74
|
+
return ManualModelConfigGenerator(
|
|
75
|
+
config,
|
|
76
|
+
gpus,
|
|
77
|
+
model,
|
|
78
|
+
client,
|
|
79
|
+
model_variant_name_manager,
|
|
80
|
+
default_only,
|
|
81
|
+
early_exit_enable,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
return AutomaticModelConfigGenerator(
|
|
85
|
+
config,
|
|
86
|
+
gpus,
|
|
87
|
+
model,
|
|
88
|
+
client,
|
|
89
|
+
model_variant_name_manager,
|
|
90
|
+
default_only,
|
|
91
|
+
early_exit_enable,
|
|
92
|
+
)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from copy import deepcopy
|
|
18
|
+
from typing import List
|
|
19
|
+
|
|
20
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
21
|
+
from model_analyzer.config.input.objects.config_model_profile_spec import (
|
|
22
|
+
ConfigModelProfileSpec,
|
|
23
|
+
)
|
|
24
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
25
|
+
from model_analyzer.triton.client.client import TritonClient
|
|
26
|
+
from model_analyzer.triton.model.model_config import ModelConfig
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ModelProfileSpec(ConfigModelProfileSpec):
|
|
30
|
+
"""
|
|
31
|
+
The profile configuration and default model config for a single model to be profiled
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
spec: ConfigModelProfileSpec,
|
|
37
|
+
config: ConfigCommandProfile,
|
|
38
|
+
client: TritonClient,
|
|
39
|
+
gpus: List[GPUDevice],
|
|
40
|
+
):
|
|
41
|
+
super().__init__(spec.model_name())
|
|
42
|
+
self.__dict__ = deepcopy(spec.__dict__)
|
|
43
|
+
|
|
44
|
+
self._default_model_config = ModelConfig.create_model_config_dict(
|
|
45
|
+
config, client, gpus, config.model_repository, spec.model_name()
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if spec.model_name() in config.cpu_only_composing_models:
|
|
49
|
+
self._cpu_only = True
|
|
50
|
+
|
|
51
|
+
def get_default_config(self) -> dict:
|
|
52
|
+
"""Returns the default configuration for this model"""
|
|
53
|
+
return deepcopy(self._default_model_config)
|
|
54
|
+
|
|
55
|
+
def supports_batching(self) -> bool:
|
|
56
|
+
"""Returns True if this model supports batching. Else False"""
|
|
57
|
+
if (
|
|
58
|
+
"max_batch_size" not in self._default_model_config
|
|
59
|
+
or self._default_model_config["max_batch_size"] == 0
|
|
60
|
+
):
|
|
61
|
+
return False
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
def supports_dynamic_batching(self) -> bool:
|
|
65
|
+
"""Returns True if this model supports dynamic batching. Else False"""
|
|
66
|
+
supports_dynamic_batching = self.supports_batching()
|
|
67
|
+
|
|
68
|
+
if "sequence_batching" in self._default_model_config:
|
|
69
|
+
supports_dynamic_batching = False
|
|
70
|
+
return supports_dynamic_batching
|
|
71
|
+
|
|
72
|
+
def is_ensemble(self) -> bool:
|
|
73
|
+
"""Returns true if the model is an ensemble"""
|
|
74
|
+
return "ensemble_scheduling" in self._default_model_config
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import Generator, List, Optional
|
|
18
|
+
|
|
19
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
20
|
+
ModelVariantNameManager,
|
|
21
|
+
)
|
|
22
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
23
|
+
from model_analyzer.config.run.model_run_config import ModelRunConfig
|
|
24
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
25
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
26
|
+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
27
|
+
from model_analyzer.triton.client.client import TritonClient
|
|
28
|
+
from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
|
|
29
|
+
|
|
30
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
31
|
+
from .model_config_generator_factory import ModelConfigGeneratorFactory
|
|
32
|
+
from .model_profile_spec import ModelProfileSpec
|
|
33
|
+
from .perf_analyzer_config_generator import PerfAnalyzerConfigGenerator
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ModelRunConfigGenerator(ConfigGeneratorInterface):
|
|
37
|
+
"""
|
|
38
|
+
Given a model, generates all ModelRunConfigs (combination of
|
|
39
|
+
ModelConfig and PerfConfig)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
config: ConfigCommandProfile,
|
|
45
|
+
gpus: List[GPUDevice],
|
|
46
|
+
model: ModelProfileSpec,
|
|
47
|
+
client: TritonClient,
|
|
48
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
49
|
+
default_only: bool,
|
|
50
|
+
) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
config: ModelAnalyzerConfig
|
|
55
|
+
|
|
56
|
+
gpus: List of GPUDevices
|
|
57
|
+
|
|
58
|
+
model: ConfigModelProfileSpec
|
|
59
|
+
The model to generate ModelRunConfigs for
|
|
60
|
+
|
|
61
|
+
client: TritonClient
|
|
62
|
+
|
|
63
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
64
|
+
|
|
65
|
+
default_only: Bool
|
|
66
|
+
"""
|
|
67
|
+
self._config = config
|
|
68
|
+
self._gpus = gpus
|
|
69
|
+
self._model = model
|
|
70
|
+
self._client = client
|
|
71
|
+
self._model_variant_name_manager = model_variant_name_manager
|
|
72
|
+
|
|
73
|
+
self._model_name = model.model_name()
|
|
74
|
+
|
|
75
|
+
self._model_pa_flags = model.perf_analyzer_flags()
|
|
76
|
+
self._model_parameters = model.parameters()
|
|
77
|
+
self._triton_server_env = model.triton_server_environment()
|
|
78
|
+
|
|
79
|
+
self._determine_early_exit_enables(config, model)
|
|
80
|
+
|
|
81
|
+
self._mcg = ModelConfigGeneratorFactory.create_model_config_generator(
|
|
82
|
+
self._config,
|
|
83
|
+
self._gpus,
|
|
84
|
+
model,
|
|
85
|
+
self._client,
|
|
86
|
+
self._model_variant_name_manager,
|
|
87
|
+
default_only,
|
|
88
|
+
self._mcg_early_exit_enable,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
self._curr_mc_measurements: List[Optional[RunConfigMeasurement]] = []
|
|
92
|
+
|
|
93
|
+
def get_configs(self) -> Generator[ModelRunConfig, None, None]:
|
|
94
|
+
"""
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
ModelRunConfig
|
|
98
|
+
The next ModelRunConfig generated by this class
|
|
99
|
+
"""
|
|
100
|
+
for model_config_variant in self._mcg.get_configs():
|
|
101
|
+
self._pacg = PerfAnalyzerConfigGenerator(
|
|
102
|
+
self._config,
|
|
103
|
+
model_config_variant.model_config.get_field("name"),
|
|
104
|
+
self._model_pa_flags,
|
|
105
|
+
self._model_parameters,
|
|
106
|
+
self._pacg_early_exit_enable,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
for perf_analyzer_config in self._pacg.get_configs():
|
|
110
|
+
run_config = self._generate_model_run_config(
|
|
111
|
+
model_config_variant, perf_analyzer_config
|
|
112
|
+
)
|
|
113
|
+
yield run_config
|
|
114
|
+
|
|
115
|
+
self._set_last_results_model_config_generator()
|
|
116
|
+
|
|
117
|
+
def set_last_results(
|
|
118
|
+
self, measurements: List[Optional[RunConfigMeasurement]]
|
|
119
|
+
) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Given the results from the last ModelRunConfig, make decisions
|
|
122
|
+
about future configurations to generate
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
measurements: List of Measurements from the last run(s)
|
|
127
|
+
"""
|
|
128
|
+
self._pacg.set_last_results(measurements)
|
|
129
|
+
self._curr_mc_measurements.extend(measurements)
|
|
130
|
+
|
|
131
|
+
def _set_last_results_model_config_generator(self) -> None:
|
|
132
|
+
self._mcg.set_last_results(self._curr_mc_measurements)
|
|
133
|
+
self._curr_mc_measurements = []
|
|
134
|
+
|
|
135
|
+
def _generate_model_run_config(
|
|
136
|
+
self,
|
|
137
|
+
model_config_variant: ModelConfigVariant,
|
|
138
|
+
perf_analyzer_config: PerfAnalyzerConfig,
|
|
139
|
+
) -> ModelRunConfig:
|
|
140
|
+
run_config = ModelRunConfig(
|
|
141
|
+
self._model_name, model_config_variant, perf_analyzer_config
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return run_config
|
|
145
|
+
|
|
146
|
+
def _determine_early_exit_enables(
|
|
147
|
+
self, config: ConfigCommandProfile, model: ModelProfileSpec
|
|
148
|
+
) -> None:
|
|
149
|
+
early_exit_enable = config.early_exit_enable
|
|
150
|
+
concurrency_specified = model.parameters()["concurrency"]
|
|
151
|
+
config_parameters_exist = model.model_config_parameters()
|
|
152
|
+
|
|
153
|
+
self._pacg_early_exit_enable = early_exit_enable or not concurrency_specified
|
|
154
|
+
self._mcg_early_exit_enable = early_exit_enable or not config_parameters_exist
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from copy import deepcopy
|
|
18
|
+
from typing import Dict, List, Tuple
|
|
19
|
+
|
|
20
|
+
from model_analyzer.constants import DEFAULT_CONFIG_PARAMS
|
|
21
|
+
from model_analyzer.triton.model.model_config_variant import ModelConfigVariant
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ModelVariantNameManager:
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
# Dict of {model_config_name: model_config_dict}
|
|
27
|
+
self._model_config_dicts: Dict[str, Dict] = {}
|
|
28
|
+
|
|
29
|
+
# Dict of {base_model_name: current_count_integer}
|
|
30
|
+
self._model_name_index: Dict[str, int] = {}
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def from_dict(
|
|
34
|
+
cls, model_variant_name_manager_dict: Dict
|
|
35
|
+
) -> "ModelVariantNameManager":
|
|
36
|
+
model_variant_name_manager = ModelVariantNameManager()
|
|
37
|
+
|
|
38
|
+
model_variant_name_manager._model_config_dicts = (
|
|
39
|
+
model_variant_name_manager_dict["_model_config_dicts"]
|
|
40
|
+
)
|
|
41
|
+
model_variant_name_manager._model_name_index = model_variant_name_manager_dict[
|
|
42
|
+
"_model_name_index"
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
return model_variant_name_manager
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def make_ensemble_composing_model_key(
|
|
49
|
+
ensemble_model_config_variants: List[ModelConfigVariant],
|
|
50
|
+
) -> Dict[str, str]:
|
|
51
|
+
ensemble_names = [emcv.variant_name for emcv in ensemble_model_config_variants]
|
|
52
|
+
ensemble_key = ",".join(ensemble_names)
|
|
53
|
+
|
|
54
|
+
return {"key": ensemble_key}
|
|
55
|
+
|
|
56
|
+
def get_model_variant_name(
|
|
57
|
+
self, model_name: str, model_config_dict: Dict, param_combo: Dict
|
|
58
|
+
) -> Tuple[bool, str]:
|
|
59
|
+
"""
|
|
60
|
+
Given a base model name and a dict of parameters to be applied
|
|
61
|
+
to the base model config, return if the variant already existed
|
|
62
|
+
and the name of the model variant
|
|
63
|
+
|
|
64
|
+
If the same input values are provided to this function multiple times,
|
|
65
|
+
the same value will be returned
|
|
66
|
+
"""
|
|
67
|
+
return self._get_variant_name(
|
|
68
|
+
model_name, model_config_dict, is_ensemble=False, param_combo=param_combo
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def get_ensemble_model_variant_name(
|
|
72
|
+
self, model_name: str, ensemble_dict: Dict
|
|
73
|
+
) -> Tuple[bool, str]:
|
|
74
|
+
"""
|
|
75
|
+
Given a base ensemble model name and a dict of ensemble composing configs,
|
|
76
|
+
return if the variant already existed and the name of the model variant
|
|
77
|
+
|
|
78
|
+
If the same input values are provided to this function multiple times,
|
|
79
|
+
the same value will be returned
|
|
80
|
+
"""
|
|
81
|
+
return self._get_variant_name(model_name, ensemble_dict, is_ensemble=True)
|
|
82
|
+
|
|
83
|
+
def _get_variant_name(
|
|
84
|
+
self,
|
|
85
|
+
model_name: str,
|
|
86
|
+
config_dict: Dict,
|
|
87
|
+
is_ensemble: bool,
|
|
88
|
+
param_combo: Dict = {},
|
|
89
|
+
) -> Tuple[bool, str]:
|
|
90
|
+
model_config_dict = self._copy_and_restore_model_config_dict_name(
|
|
91
|
+
model_name, config_dict
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
variant_found, model_variant_name = self._find_existing_variant(
|
|
95
|
+
model_config_dict
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if is_ensemble:
|
|
99
|
+
if self._is_ensemble_default_config(config_dict):
|
|
100
|
+
return (False, model_name + "_config_default")
|
|
101
|
+
else:
|
|
102
|
+
if self._is_default_config(param_combo):
|
|
103
|
+
return (False, model_name + "_config_default")
|
|
104
|
+
|
|
105
|
+
if variant_found:
|
|
106
|
+
return (True, model_variant_name)
|
|
107
|
+
|
|
108
|
+
model_variant_name = self._create_new_model_variant(
|
|
109
|
+
model_name, model_config_dict
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return (False, model_variant_name)
|
|
113
|
+
|
|
114
|
+
def _copy_and_restore_model_config_dict_name(
|
|
115
|
+
self, model_name: str, model_config_dict: Dict
|
|
116
|
+
) -> Dict:
|
|
117
|
+
model_config_dict_copy = deepcopy(model_config_dict)
|
|
118
|
+
model_config_dict_copy["name"] = model_name
|
|
119
|
+
|
|
120
|
+
return model_config_dict_copy
|
|
121
|
+
|
|
122
|
+
def _find_existing_variant(self, model_config_dict: Dict) -> Tuple[bool, str]:
|
|
123
|
+
for (
|
|
124
|
+
model_config_name,
|
|
125
|
+
model_config_variant_dict,
|
|
126
|
+
) in self._model_config_dicts.items():
|
|
127
|
+
if model_config_dict == model_config_variant_dict:
|
|
128
|
+
return (True, model_config_name)
|
|
129
|
+
|
|
130
|
+
return (False, "")
|
|
131
|
+
|
|
132
|
+
def _is_default_config(self, param_combo: Dict) -> bool:
|
|
133
|
+
return param_combo == DEFAULT_CONFIG_PARAMS
|
|
134
|
+
|
|
135
|
+
def _is_ensemble_default_config(self, ensemble_dict: Dict) -> bool:
|
|
136
|
+
return "_config_default" in ensemble_dict["key"]
|
|
137
|
+
|
|
138
|
+
def _create_new_model_variant(
|
|
139
|
+
self, model_name: str, model_config_dict: Dict
|
|
140
|
+
) -> str:
|
|
141
|
+
if model_name not in self._model_name_index:
|
|
142
|
+
new_index = 0
|
|
143
|
+
else:
|
|
144
|
+
new_index = self._model_name_index[model_name] + 1
|
|
145
|
+
|
|
146
|
+
self._model_name_index[model_name] = new_index
|
|
147
|
+
model_config_name = model_name + "_config_" + str(new_index)
|
|
148
|
+
self._model_config_dicts[model_config_name] = model_config_dict
|
|
149
|
+
|
|
150
|
+
return model_config_name
|