triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from math import log2
|
|
18
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
19
|
+
|
|
20
|
+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
|
|
21
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
22
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
23
|
+
|
|
24
|
+
from .search_parameter import ParameterCategory, ParameterUsage, SearchParameter
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SearchParameters:
|
|
28
|
+
"""
|
|
29
|
+
Contains information about all configuration parameters the user wants to search
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
# These map to the run-config-search fields
|
|
33
|
+
# See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md
|
|
34
|
+
exponential_rcs_parameters = [
|
|
35
|
+
"max_batch_size",
|
|
36
|
+
"batch_sizes",
|
|
37
|
+
"concurrency",
|
|
38
|
+
"request_rate",
|
|
39
|
+
]
|
|
40
|
+
linear_rcs_parameters = ["instance_group"]
|
|
41
|
+
|
|
42
|
+
model_parameters = [
|
|
43
|
+
"max_batch_size",
|
|
44
|
+
"instance_group",
|
|
45
|
+
"max_queue_delay_microseconds",
|
|
46
|
+
]
|
|
47
|
+
runtime_parameters = ["batch_sizes", "concurrency", "request_rate"]
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
model: ModelProfileSpec,
|
|
52
|
+
config: ConfigCommandProfile = ConfigCommandProfile(),
|
|
53
|
+
is_bls_model: bool = False,
|
|
54
|
+
is_ensemble_model: bool = False,
|
|
55
|
+
is_composing_model: bool = False,
|
|
56
|
+
):
|
|
57
|
+
self._config = config
|
|
58
|
+
self._parameters = model.parameters()
|
|
59
|
+
self._model_config_parameters = model.model_config_parameters()
|
|
60
|
+
self._supports_max_batch_size = model.supports_batching()
|
|
61
|
+
self._search_parameters: Dict[str, SearchParameter] = {}
|
|
62
|
+
self._is_ensemble_model = is_ensemble_model
|
|
63
|
+
self._is_bls_model = is_bls_model
|
|
64
|
+
self._is_composing_model = is_composing_model
|
|
65
|
+
|
|
66
|
+
self._populate_search_parameters()
|
|
67
|
+
|
|
68
|
+
def get_search_parameters(self) -> Dict[str, SearchParameter]:
|
|
69
|
+
return self._search_parameters
|
|
70
|
+
|
|
71
|
+
def get_parameter(self, name: str) -> Optional[SearchParameter]:
|
|
72
|
+
return self._search_parameters.get(name)
|
|
73
|
+
|
|
74
|
+
def get_type(self, name: str) -> ParameterUsage:
|
|
75
|
+
return self._search_parameters[name].usage
|
|
76
|
+
|
|
77
|
+
def get_category(self, name: str) -> ParameterCategory:
|
|
78
|
+
return self._search_parameters[name].category
|
|
79
|
+
|
|
80
|
+
def get_range(self, name: str) -> Tuple[Optional[int], Optional[int]]:
|
|
81
|
+
return (
|
|
82
|
+
self._search_parameters[name].min_range,
|
|
83
|
+
self._search_parameters[name].max_range,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def get_list(self, name: str) -> Optional[List[Any]]:
|
|
87
|
+
return self._search_parameters[name].enumerated_list
|
|
88
|
+
|
|
89
|
+
def number_of_total_possible_configurations(self) -> int:
|
|
90
|
+
total_number_of_configs = 1
|
|
91
|
+
for parameter in self._search_parameters.values():
|
|
92
|
+
total_number_of_configs *= self._number_of_configurations_for_parameter(
|
|
93
|
+
parameter
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return total_number_of_configs
|
|
97
|
+
|
|
98
|
+
def print_info(self, name: str) -> str:
|
|
99
|
+
info_string = f" {name}: "
|
|
100
|
+
|
|
101
|
+
parameter = self._search_parameters[name]
|
|
102
|
+
if parameter.category is ParameterCategory.INTEGER:
|
|
103
|
+
info_string += f"{parameter.min_range} to {parameter.max_range}"
|
|
104
|
+
elif parameter.category is ParameterCategory.EXPONENTIAL:
|
|
105
|
+
info_string += f"{2**parameter.min_range} to {2**parameter.max_range}" # type: ignore
|
|
106
|
+
elif (
|
|
107
|
+
parameter.category is ParameterCategory.INT_LIST
|
|
108
|
+
or parameter.category is ParameterCategory.STR_LIST
|
|
109
|
+
):
|
|
110
|
+
info_string += f"{parameter.enumerated_list}"
|
|
111
|
+
|
|
112
|
+
info_string += f" ({self._number_of_configurations_for_parameter(parameter)})"
|
|
113
|
+
|
|
114
|
+
return info_string
|
|
115
|
+
|
|
116
|
+
def _number_of_configurations_for_parameter(
|
|
117
|
+
self, parameter: SearchParameter
|
|
118
|
+
) -> int:
|
|
119
|
+
if (
|
|
120
|
+
parameter.category is ParameterCategory.INTEGER
|
|
121
|
+
or parameter.category is ParameterCategory.EXPONENTIAL
|
|
122
|
+
):
|
|
123
|
+
number_of_parameter_configs = parameter.max_range - parameter.min_range + 1 # type: ignore
|
|
124
|
+
else:
|
|
125
|
+
number_of_parameter_configs = len(parameter.enumerated_list) # type: ignore
|
|
126
|
+
|
|
127
|
+
return number_of_parameter_configs
|
|
128
|
+
|
|
129
|
+
def _populate_search_parameters(self) -> None:
|
|
130
|
+
self._populate_parameters()
|
|
131
|
+
self._populate_model_config_parameters()
|
|
132
|
+
|
|
133
|
+
def _populate_parameters(self) -> None:
|
|
134
|
+
self._populate_batch_sizes()
|
|
135
|
+
|
|
136
|
+
if not self._is_composing_model:
|
|
137
|
+
if self._config.is_request_rate_specified(self._parameters):
|
|
138
|
+
self._populate_request_rate()
|
|
139
|
+
else:
|
|
140
|
+
self._populate_concurrency()
|
|
141
|
+
|
|
142
|
+
def _populate_model_config_parameters(self) -> None:
|
|
143
|
+
self._populate_max_batch_size()
|
|
144
|
+
self._populate_instance_group()
|
|
145
|
+
self._populate_max_queue_delay_microseconds()
|
|
146
|
+
|
|
147
|
+
def _populate_batch_sizes(self) -> None:
|
|
148
|
+
if self._parameters and self._parameters["batch_sizes"]:
|
|
149
|
+
self._populate_list_parameter(
|
|
150
|
+
parameter_name="batch_sizes",
|
|
151
|
+
parameter_list=self._parameters["batch_sizes"],
|
|
152
|
+
parameter_category=ParameterCategory.INT_LIST,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _populate_concurrency(self) -> None:
|
|
156
|
+
if self._parameters and self._parameters["concurrency"]:
|
|
157
|
+
self._populate_list_parameter(
|
|
158
|
+
parameter_name="concurrency",
|
|
159
|
+
parameter_list=self._parameters["concurrency"],
|
|
160
|
+
parameter_category=ParameterCategory.INT_LIST,
|
|
161
|
+
)
|
|
162
|
+
elif self._config.use_concurrency_formula:
|
|
163
|
+
return
|
|
164
|
+
else:
|
|
165
|
+
self._populate_rcs_parameter(
|
|
166
|
+
parameter_name="concurrency",
|
|
167
|
+
rcs_parameter_min_value=self._config.run_config_search_min_concurrency,
|
|
168
|
+
rcs_parameter_max_value=self._config.run_config_search_max_concurrency,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def _populate_request_rate(self) -> None:
|
|
172
|
+
if self._parameters and self._parameters["request_rate"]:
|
|
173
|
+
self._populate_list_parameter(
|
|
174
|
+
parameter_name="request_rate",
|
|
175
|
+
parameter_list=self._parameters["request_rate"],
|
|
176
|
+
parameter_category=ParameterCategory.INT_LIST,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
self._populate_rcs_parameter(
|
|
180
|
+
parameter_name="request_rate",
|
|
181
|
+
rcs_parameter_min_value=self._config.run_config_search_min_request_rate,
|
|
182
|
+
rcs_parameter_max_value=self._config.run_config_search_max_request_rate,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _populate_max_batch_size(self) -> None:
|
|
186
|
+
# Example config format:
|
|
187
|
+
# model_config_parameters:
|
|
188
|
+
# max_batch_size: [1, 4, 16]
|
|
189
|
+
if self._is_key_in_model_config_parameters("max_batch_size"):
|
|
190
|
+
parameter_list = self._model_config_parameters["max_batch_size"]
|
|
191
|
+
self._populate_list_parameter(
|
|
192
|
+
parameter_name="max_batch_size",
|
|
193
|
+
parameter_list=parameter_list,
|
|
194
|
+
parameter_category=ParameterCategory.INT_LIST,
|
|
195
|
+
)
|
|
196
|
+
elif self._supports_max_batch_size and not self._is_bls_model:
|
|
197
|
+
# Need to populate max_batch_size based on RCS min/max values
|
|
198
|
+
# when no model config parameters are present
|
|
199
|
+
self._populate_rcs_parameter(
|
|
200
|
+
parameter_name="max_batch_size",
|
|
201
|
+
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
|
|
202
|
+
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
def _populate_instance_group(self) -> None:
|
|
206
|
+
# Example config format:
|
|
207
|
+
#
|
|
208
|
+
# model_config_parameters:
|
|
209
|
+
# instance_group:
|
|
210
|
+
# - kind: KIND_GPU
|
|
211
|
+
# count: [1, 2, 3, 4]
|
|
212
|
+
if self._is_key_in_model_config_parameters("instance_group"):
|
|
213
|
+
parameter_list = self._model_config_parameters["instance_group"][0][0][
|
|
214
|
+
"count"
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
self._populate_list_parameter(
|
|
218
|
+
parameter_name="instance_group",
|
|
219
|
+
parameter_list=parameter_list,
|
|
220
|
+
parameter_category=ParameterCategory.INT_LIST,
|
|
221
|
+
)
|
|
222
|
+
elif not self._is_ensemble_model:
|
|
223
|
+
# Need to populate instance_group based on RCS min/max values
|
|
224
|
+
# when no model config parameters are present
|
|
225
|
+
self._populate_rcs_parameter(
|
|
226
|
+
parameter_name="instance_group",
|
|
227
|
+
rcs_parameter_min_value=self._config.run_config_search_min_instance_count,
|
|
228
|
+
rcs_parameter_max_value=self._config.run_config_search_max_instance_count,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def _is_key_in_model_config_parameters(self, key: str) -> bool:
|
|
232
|
+
key_found = bool(
|
|
233
|
+
self._model_config_parameters and key in self._model_config_parameters
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return key_found
|
|
237
|
+
|
|
238
|
+
def _populate_max_queue_delay_microseconds(self) -> None:
|
|
239
|
+
# Example format
|
|
240
|
+
#
|
|
241
|
+
# model_config_parameters:
|
|
242
|
+
# dynamic_batching:
|
|
243
|
+
# max_queue_delay_microseconds: [100, 200, 300]
|
|
244
|
+
|
|
245
|
+
# There is no RCS field for max_queue_delay_microseconds
|
|
246
|
+
if self._is_max_queue_delay_in_model_config_parameters():
|
|
247
|
+
self._populate_list_parameter(
|
|
248
|
+
parameter_name="max_queue_delay_microseconds",
|
|
249
|
+
parameter_list=self._model_config_parameters["dynamic_batching"][0][
|
|
250
|
+
"max_queue_delay_microseconds"
|
|
251
|
+
],
|
|
252
|
+
parameter_category=ParameterCategory.INT_LIST,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def _is_max_queue_delay_in_model_config_parameters(self) -> bool:
|
|
256
|
+
if self._model_config_parameters:
|
|
257
|
+
max_queue_delay_present = (
|
|
258
|
+
"dynamic_batching" in self._model_config_parameters.keys()
|
|
259
|
+
and (
|
|
260
|
+
"max_queue_delay_microseconds"
|
|
261
|
+
in self._model_config_parameters["dynamic_batching"][0]
|
|
262
|
+
)
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
max_queue_delay_present = False
|
|
266
|
+
|
|
267
|
+
return max_queue_delay_present
|
|
268
|
+
|
|
269
|
+
def _populate_list_parameter(
|
|
270
|
+
self,
|
|
271
|
+
parameter_name: str,
|
|
272
|
+
parameter_list: List[Union[int, str]],
|
|
273
|
+
parameter_category: ParameterCategory,
|
|
274
|
+
) -> None:
|
|
275
|
+
usage = self._determine_parameter_usage(parameter_name)
|
|
276
|
+
|
|
277
|
+
self._add_search_parameter(
|
|
278
|
+
name=parameter_name,
|
|
279
|
+
usage=usage,
|
|
280
|
+
category=parameter_category,
|
|
281
|
+
enumerated_list=parameter_list,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def _populate_rcs_parameter(
|
|
285
|
+
self,
|
|
286
|
+
parameter_name: str,
|
|
287
|
+
rcs_parameter_min_value: int,
|
|
288
|
+
rcs_parameter_max_value: int,
|
|
289
|
+
) -> None:
|
|
290
|
+
usage = self._determine_parameter_usage(parameter_name)
|
|
291
|
+
category = self._determine_parameter_category(parameter_name)
|
|
292
|
+
|
|
293
|
+
if category == ParameterCategory.EXPONENTIAL:
|
|
294
|
+
min_range = int(log2(rcs_parameter_min_value)) # type: ignore
|
|
295
|
+
max_range = int(log2(rcs_parameter_max_value)) # type: ignore
|
|
296
|
+
else:
|
|
297
|
+
min_range = rcs_parameter_min_value # type: ignore
|
|
298
|
+
max_range = rcs_parameter_max_value # type: ignore
|
|
299
|
+
|
|
300
|
+
self._add_search_parameter(
|
|
301
|
+
name=parameter_name,
|
|
302
|
+
usage=usage,
|
|
303
|
+
category=category,
|
|
304
|
+
min_range=min_range,
|
|
305
|
+
max_range=max_range,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
def _determine_parameter_category(self, name: str) -> ParameterCategory:
|
|
309
|
+
if name in SearchParameters.exponential_rcs_parameters:
|
|
310
|
+
category = ParameterCategory.EXPONENTIAL
|
|
311
|
+
elif name in SearchParameters.linear_rcs_parameters:
|
|
312
|
+
category = ParameterCategory.INTEGER
|
|
313
|
+
else:
|
|
314
|
+
TritonModelAnalyzerException(f"ParameterCategory not found for {name}")
|
|
315
|
+
|
|
316
|
+
return category
|
|
317
|
+
|
|
318
|
+
def _determine_parameter_usage(self, name: str) -> ParameterUsage:
|
|
319
|
+
if name in SearchParameters.model_parameters:
|
|
320
|
+
usage = ParameterUsage.MODEL
|
|
321
|
+
elif name in SearchParameters.runtime_parameters:
|
|
322
|
+
usage = ParameterUsage.RUNTIME
|
|
323
|
+
else:
|
|
324
|
+
TritonModelAnalyzerException(f"ParameterUsage not found for {name}")
|
|
325
|
+
|
|
326
|
+
return usage
|
|
327
|
+
|
|
328
|
+
def _add_search_parameter(
|
|
329
|
+
self,
|
|
330
|
+
name: str,
|
|
331
|
+
usage: ParameterUsage,
|
|
332
|
+
category: ParameterCategory,
|
|
333
|
+
min_range: Optional[int] = None,
|
|
334
|
+
max_range: Optional[int] = None,
|
|
335
|
+
enumerated_list: List[Any] = [],
|
|
336
|
+
) -> None:
|
|
337
|
+
self._check_for_illegal_input(category, min_range, max_range, enumerated_list)
|
|
338
|
+
|
|
339
|
+
self._search_parameters[name] = SearchParameter(
|
|
340
|
+
usage=usage,
|
|
341
|
+
category=category,
|
|
342
|
+
enumerated_list=enumerated_list,
|
|
343
|
+
min_range=min_range,
|
|
344
|
+
max_range=max_range,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
def _check_for_illegal_input(
|
|
348
|
+
self,
|
|
349
|
+
category: ParameterCategory,
|
|
350
|
+
min_range: Optional[int],
|
|
351
|
+
max_range: Optional[int],
|
|
352
|
+
enumerated_list: List[Any],
|
|
353
|
+
) -> None:
|
|
354
|
+
if (
|
|
355
|
+
category is ParameterCategory.INT_LIST
|
|
356
|
+
or category is ParameterCategory.STR_LIST
|
|
357
|
+
):
|
|
358
|
+
self._check_for_illegal_list_input(min_range, max_range, enumerated_list)
|
|
359
|
+
else:
|
|
360
|
+
if min_range is None or max_range is None:
|
|
361
|
+
raise TritonModelAnalyzerException(
|
|
362
|
+
f"Both min_range and max_range must be specified"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if min_range and max_range:
|
|
366
|
+
if min_range > max_range:
|
|
367
|
+
raise TritonModelAnalyzerException(
|
|
368
|
+
f"min_range cannot be larger than max_range"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def _check_for_illegal_list_input(
|
|
372
|
+
self,
|
|
373
|
+
min_range: Optional[int],
|
|
374
|
+
max_range: Optional[int],
|
|
375
|
+
enumerated_list: List[Any],
|
|
376
|
+
) -> None:
|
|
377
|
+
if not enumerated_list:
|
|
378
|
+
raise TritonModelAnalyzerException(
|
|
379
|
+
f"enumerated_list must be specified for a ParameterCategory.LIST"
|
|
380
|
+
)
|
|
381
|
+
elif min_range is not None:
|
|
382
|
+
raise TritonModelAnalyzerException(
|
|
383
|
+
f"min_range cannot be specified for a ParameterCategory.LIST"
|
|
384
|
+
)
|
|
385
|
+
elif max_range is not None:
|
|
386
|
+
raise TritonModelAnalyzerException(
|
|
387
|
+
f"max_range cannot be specified for a ParameterCategory.LIST"
|
|
388
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|