triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import Dict, List
|
|
18
|
+
|
|
19
|
+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
|
|
20
|
+
from model_analyzer.config.generate.model_variant_name_manager import (
|
|
21
|
+
ModelVariantNameManager,
|
|
22
|
+
)
|
|
23
|
+
from model_analyzer.config.generate.search_parameters import SearchParameters
|
|
24
|
+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
25
|
+
from model_analyzer.config.input.objects.config_model_profile_spec import (
|
|
26
|
+
ConfigModelProfileSpec,
|
|
27
|
+
)
|
|
28
|
+
from model_analyzer.constants import MIN_INITIALIZED, RADIUS
|
|
29
|
+
from model_analyzer.device.gpu_device import GPUDevice
|
|
30
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
31
|
+
from model_analyzer.result.result_manager import ResultManager
|
|
32
|
+
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
|
|
33
|
+
from model_analyzer.triton.client.client import TritonClient
|
|
34
|
+
from model_analyzer.triton.model.model_config import ModelConfig
|
|
35
|
+
|
|
36
|
+
from .brute_plus_binary_parameter_search_run_config_generator import (
|
|
37
|
+
BrutePlusBinaryParameterSearchRunConfigGenerator,
|
|
38
|
+
)
|
|
39
|
+
from .config_generator_interface import ConfigGeneratorInterface
|
|
40
|
+
from .optuna_plus_concurrency_sweep_run_config_generator import (
|
|
41
|
+
OptunaPlusConcurrencySweepRunConfigGenerator,
|
|
42
|
+
)
|
|
43
|
+
from .quick_plus_concurrency_sweep_run_config_generator import (
|
|
44
|
+
QuickPlusConcurrencySweepRunConfigGenerator,
|
|
45
|
+
)
|
|
46
|
+
from .search_config import SearchConfig
|
|
47
|
+
from .search_dimension import SearchDimension
|
|
48
|
+
from .search_dimensions import SearchDimensions
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class RunConfigGeneratorFactory:
|
|
52
|
+
"""
|
|
53
|
+
Factory that creates the correct RunConfig Generators
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def create_run_config_generator(
|
|
58
|
+
command_config: ConfigCommandProfile,
|
|
59
|
+
state_manager: AnalyzerStateManager,
|
|
60
|
+
gpus: List[GPUDevice],
|
|
61
|
+
models: List[ConfigModelProfileSpec],
|
|
62
|
+
client: TritonClient,
|
|
63
|
+
result_manager: ResultManager,
|
|
64
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
65
|
+
search_parameters: Dict[str, SearchParameters],
|
|
66
|
+
composing_search_parameters: Dict[str, SearchParameters],
|
|
67
|
+
) -> ConfigGeneratorInterface:
|
|
68
|
+
"""
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
command_config: ConfigCommandProfile
|
|
72
|
+
The Model Analyzer config file for the profile step
|
|
73
|
+
state_manager: AnalyzerStateManager
|
|
74
|
+
The object that allows control and update of checkpoint state
|
|
75
|
+
gpus: List of GPUDevices
|
|
76
|
+
models: list of ConfigModelProfileSpec
|
|
77
|
+
The models to generate RunConfigs for
|
|
78
|
+
client: TritonClient
|
|
79
|
+
The client handle used to send requests to Triton
|
|
80
|
+
result_manager: ResultManager
|
|
81
|
+
The object that handles storing and sorting the results from the perf analyzer
|
|
82
|
+
model_variant_name_manager: ModelVariantNameManager
|
|
83
|
+
Maps model variants to config names
|
|
84
|
+
search_parameters: SearchParameters
|
|
85
|
+
The object that handles the users configuration search parameters
|
|
86
|
+
composing_search_parameters: SearchParameters
|
|
87
|
+
The object that handles the users configuration search parameters for composing models
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
A generator that implements ConfigGeneratorInterface and creates RunConfigs
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
new_models = []
|
|
95
|
+
for model in models:
|
|
96
|
+
new_models.append(ModelProfileSpec(model, command_config, client, gpus))
|
|
97
|
+
|
|
98
|
+
composing_models = RunConfigGeneratorFactory._create_composing_models(
|
|
99
|
+
new_models, command_config, client, gpus
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
for composing_model in composing_models:
|
|
103
|
+
composing_search_parameters[
|
|
104
|
+
composing_model.model_name()
|
|
105
|
+
] = SearchParameters(
|
|
106
|
+
config=command_config,
|
|
107
|
+
model=composing_model,
|
|
108
|
+
is_composing_model=True,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if command_config.run_config_search_mode == "optuna":
|
|
112
|
+
return RunConfigGeneratorFactory._create_optuna_plus_concurrency_sweep_run_config_generator(
|
|
113
|
+
command_config=command_config,
|
|
114
|
+
state_manager=state_manager,
|
|
115
|
+
gpu_count=len(gpus),
|
|
116
|
+
models=new_models,
|
|
117
|
+
composing_models=composing_models,
|
|
118
|
+
result_manager=result_manager,
|
|
119
|
+
search_parameters=search_parameters,
|
|
120
|
+
composing_search_parameters=composing_search_parameters,
|
|
121
|
+
model_variant_name_manager=model_variant_name_manager,
|
|
122
|
+
)
|
|
123
|
+
elif command_config.run_config_search_mode == "quick" or composing_models:
|
|
124
|
+
return RunConfigGeneratorFactory._create_quick_plus_concurrency_sweep_run_config_generator(
|
|
125
|
+
command_config=command_config,
|
|
126
|
+
gpu_count=len(gpus),
|
|
127
|
+
models=new_models,
|
|
128
|
+
composing_models=composing_models,
|
|
129
|
+
result_manager=result_manager,
|
|
130
|
+
model_variant_name_manager=model_variant_name_manager,
|
|
131
|
+
)
|
|
132
|
+
elif command_config.run_config_search_mode == "brute":
|
|
133
|
+
return RunConfigGeneratorFactory._create_brute_plus_binary_parameter_search_run_config_generator(
|
|
134
|
+
command_config=command_config,
|
|
135
|
+
gpus=gpus,
|
|
136
|
+
models=new_models,
|
|
137
|
+
client=client,
|
|
138
|
+
result_manager=result_manager,
|
|
139
|
+
model_variant_name_manager=model_variant_name_manager,
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
raise TritonModelAnalyzerException(
|
|
143
|
+
f"Unexpected search mode {command_config.run_config_search_mode}"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def _create_brute_plus_binary_parameter_search_run_config_generator(
|
|
148
|
+
command_config: ConfigCommandProfile,
|
|
149
|
+
gpus: List[GPUDevice],
|
|
150
|
+
models: List[ModelProfileSpec],
|
|
151
|
+
client: TritonClient,
|
|
152
|
+
result_manager: ResultManager,
|
|
153
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
154
|
+
) -> ConfigGeneratorInterface:
|
|
155
|
+
return BrutePlusBinaryParameterSearchRunConfigGenerator(
|
|
156
|
+
config=command_config,
|
|
157
|
+
gpus=gpus,
|
|
158
|
+
models=models,
|
|
159
|
+
client=client,
|
|
160
|
+
result_manager=result_manager,
|
|
161
|
+
model_variant_name_manager=model_variant_name_manager,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
@staticmethod
|
|
165
|
+
def _create_optuna_plus_concurrency_sweep_run_config_generator(
|
|
166
|
+
command_config: ConfigCommandProfile,
|
|
167
|
+
state_manager: AnalyzerStateManager,
|
|
168
|
+
gpu_count: int,
|
|
169
|
+
models: List[ModelProfileSpec],
|
|
170
|
+
composing_models: List[ModelProfileSpec],
|
|
171
|
+
result_manager: ResultManager,
|
|
172
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
173
|
+
search_parameters: Dict[str, SearchParameters],
|
|
174
|
+
composing_search_parameters: Dict[str, SearchParameters],
|
|
175
|
+
) -> ConfigGeneratorInterface:
|
|
176
|
+
return OptunaPlusConcurrencySweepRunConfigGenerator(
|
|
177
|
+
config=command_config,
|
|
178
|
+
state_manager=state_manager,
|
|
179
|
+
gpu_count=gpu_count,
|
|
180
|
+
composing_models=composing_models,
|
|
181
|
+
models=models,
|
|
182
|
+
result_manager=result_manager,
|
|
183
|
+
model_variant_name_manager=model_variant_name_manager,
|
|
184
|
+
search_parameters=search_parameters,
|
|
185
|
+
composing_search_parameters=composing_search_parameters,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def _create_quick_plus_concurrency_sweep_run_config_generator(
|
|
190
|
+
command_config: ConfigCommandProfile,
|
|
191
|
+
gpu_count: int,
|
|
192
|
+
models: List[ModelProfileSpec],
|
|
193
|
+
composing_models: List[ModelProfileSpec],
|
|
194
|
+
result_manager: ResultManager,
|
|
195
|
+
model_variant_name_manager: ModelVariantNameManager,
|
|
196
|
+
) -> ConfigGeneratorInterface:
|
|
197
|
+
search_config = RunConfigGeneratorFactory._create_search_config(
|
|
198
|
+
models, composing_models
|
|
199
|
+
)
|
|
200
|
+
return QuickPlusConcurrencySweepRunConfigGenerator(
|
|
201
|
+
search_config=search_config,
|
|
202
|
+
config=command_config,
|
|
203
|
+
gpu_count=gpu_count,
|
|
204
|
+
models=models,
|
|
205
|
+
composing_models=composing_models,
|
|
206
|
+
result_manager=result_manager,
|
|
207
|
+
model_variant_name_manager=model_variant_name_manager,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def _create_search_config(
|
|
212
|
+
models: List[ModelProfileSpec], composing_models: List[ModelProfileSpec]
|
|
213
|
+
) -> SearchConfig:
|
|
214
|
+
dimensions = SearchDimensions()
|
|
215
|
+
|
|
216
|
+
index = 0
|
|
217
|
+
all_models = models + composing_models
|
|
218
|
+
|
|
219
|
+
for model in all_models:
|
|
220
|
+
# Top level ensemble models don't have any dimensions
|
|
221
|
+
if model.is_ensemble():
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
dims = RunConfigGeneratorFactory._get_dimensions_for_model(
|
|
225
|
+
model.supports_batching()
|
|
226
|
+
)
|
|
227
|
+
dimensions.add_dimensions(index, dims)
|
|
228
|
+
index += 1
|
|
229
|
+
|
|
230
|
+
search_config = SearchConfig(
|
|
231
|
+
dimensions=dimensions, radius=RADIUS, min_initialized=MIN_INITIALIZED
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
return search_config
|
|
235
|
+
|
|
236
|
+
@staticmethod
|
|
237
|
+
def _get_dimensions_for_model(is_batching_supported: bool) -> List[SearchDimension]:
|
|
238
|
+
if is_batching_supported:
|
|
239
|
+
return RunConfigGeneratorFactory._get_batching_supported_dimensions()
|
|
240
|
+
else:
|
|
241
|
+
return RunConfigGeneratorFactory._get_batching_not_supported_dimensions()
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def _get_batching_supported_dimensions() -> List[SearchDimension]:
|
|
245
|
+
return [
|
|
246
|
+
SearchDimension(
|
|
247
|
+
f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL
|
|
248
|
+
),
|
|
249
|
+
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR),
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _get_batching_not_supported_dimensions() -> List[SearchDimension]:
|
|
254
|
+
return [
|
|
255
|
+
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
@staticmethod
|
|
259
|
+
def _create_composing_models(
|
|
260
|
+
models: List[ModelProfileSpec],
|
|
261
|
+
config: ConfigCommandProfile,
|
|
262
|
+
client: TritonClient,
|
|
263
|
+
gpus: List[GPUDevice],
|
|
264
|
+
) -> List[ModelProfileSpec]:
|
|
265
|
+
"""
|
|
266
|
+
Given a list of models create a list of all the composing models (BLS + Ensemble)
|
|
267
|
+
"""
|
|
268
|
+
composing_models = RunConfigGeneratorFactory._create_bls_composing_models(
|
|
269
|
+
config, client, gpus
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
for model in models:
|
|
273
|
+
composing_models.extend(
|
|
274
|
+
RunConfigGeneratorFactory._create_ensemble_composing_models(
|
|
275
|
+
model, config, client, gpus
|
|
276
|
+
)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
for composing_model in composing_models:
|
|
280
|
+
if composing_model.is_ensemble():
|
|
281
|
+
raise TritonModelAnalyzerException(
|
|
282
|
+
f"Model Analyzer does not support ensembles as a composing model type: {composing_model.model_name()}"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
return composing_models
|
|
286
|
+
|
|
287
|
+
@staticmethod
|
|
288
|
+
def _create_bls_composing_models(
|
|
289
|
+
config: ConfigCommandProfile, client: TritonClient, gpus: List[GPUDevice]
|
|
290
|
+
) -> List[ModelProfileSpec]:
|
|
291
|
+
"""
|
|
292
|
+
Creates a list of BLS composing model configs based on the profile command config
|
|
293
|
+
"""
|
|
294
|
+
bls_composing_model_configs = [
|
|
295
|
+
ModelProfileSpec(bls_composing_model_spec, config, client, gpus)
|
|
296
|
+
for bls_composing_model_spec in config.bls_composing_models
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
return bls_composing_model_configs
|
|
300
|
+
|
|
301
|
+
@staticmethod
|
|
302
|
+
def _create_ensemble_composing_models(
|
|
303
|
+
model: ModelProfileSpec,
|
|
304
|
+
config: ConfigCommandProfile,
|
|
305
|
+
client: TritonClient,
|
|
306
|
+
gpus: List[GPUDevice],
|
|
307
|
+
) -> List[ModelProfileSpec]:
|
|
308
|
+
"""
|
|
309
|
+
Creates a list of Ensemble composing model configs based on the model
|
|
310
|
+
"""
|
|
311
|
+
model_config = ModelConfig.create_from_profile_spec(model, config, client, gpus)
|
|
312
|
+
|
|
313
|
+
if not model_config.is_ensemble():
|
|
314
|
+
return []
|
|
315
|
+
|
|
316
|
+
ensemble_composing_model_names = model_config.get_ensemble_composing_models()
|
|
317
|
+
|
|
318
|
+
ensemble_composing_model_specs = (
|
|
319
|
+
ConfigModelProfileSpec.model_list_to_config_model_profile_spec(
|
|
320
|
+
ensemble_composing_model_names
|
|
321
|
+
)
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
ensemble_composing_model_configs = [
|
|
325
|
+
ModelProfileSpec(ensemble_composing_model_spec, config, client, gpus)
|
|
326
|
+
for ensemble_composing_model_spec in ensemble_composing_model_specs
|
|
327
|
+
]
|
|
328
|
+
|
|
329
|
+
return ensemble_composing_model_configs
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import List, Optional
|
|
18
|
+
|
|
19
|
+
from model_analyzer.config.generate.search_dimension import SearchDimension
|
|
20
|
+
|
|
21
|
+
from .search_dimensions import SearchDimensions
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NeighborhoodConfig:
|
|
25
|
+
"""
|
|
26
|
+
Defines the configuration for a Neighborhood object
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, dimensions: SearchDimensions, radius: int, min_initialized: int):
|
|
30
|
+
"""
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
dimensions: SearchDimensions
|
|
34
|
+
radius: int
|
|
35
|
+
All points within distance=radius from a location will be in
|
|
36
|
+
its neighborhood
|
|
37
|
+
min_initialized: int
|
|
38
|
+
Minimum number of initialized values in a neighborhood
|
|
39
|
+
before a step can be taken
|
|
40
|
+
"""
|
|
41
|
+
self._dimensions = dimensions
|
|
42
|
+
self._radius = radius
|
|
43
|
+
self._min_initialized = min_initialized
|
|
44
|
+
|
|
45
|
+
def get_num_dimensions(self) -> int:
|
|
46
|
+
"""Returns the number of dimensions in this search"""
|
|
47
|
+
return len(self._dimensions)
|
|
48
|
+
|
|
49
|
+
def get_dimensions(self) -> SearchDimensions:
|
|
50
|
+
"""Returns the SearchDimensions"""
|
|
51
|
+
return self._dimensions
|
|
52
|
+
|
|
53
|
+
def get_dimension(self, idx: int) -> SearchDimension:
|
|
54
|
+
"""Returns the SearchDimension at the given index"""
|
|
55
|
+
return self._dimensions[idx]
|
|
56
|
+
|
|
57
|
+
def get_min_indexes(self) -> List[int]:
|
|
58
|
+
"""
|
|
59
|
+
Returns a list corresponding to the minimum index of all SearchDimensions
|
|
60
|
+
"""
|
|
61
|
+
min_indexes = []
|
|
62
|
+
for dimension in self._dimensions:
|
|
63
|
+
min_indexes.append(dimension.get_min_idx())
|
|
64
|
+
return min_indexes
|
|
65
|
+
|
|
66
|
+
def get_min_initialized(self) -> int:
|
|
67
|
+
"""
|
|
68
|
+
Returns the minimum number of initialized coordinates needed
|
|
69
|
+
in a neighborhood before a step can be taken
|
|
70
|
+
"""
|
|
71
|
+
return self._min_initialized
|
|
72
|
+
|
|
73
|
+
def get_radius(self) -> int:
|
|
74
|
+
"""Returns the base radius of a neighborhood"""
|
|
75
|
+
return self._radius
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class SearchConfig(NeighborhoodConfig):
|
|
79
|
+
"""
|
|
80
|
+
Defines all dimensions to search
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(self, dimensions: SearchDimensions, radius: int, min_initialized: int):
|
|
84
|
+
"""
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
dimensions: SearchDimensions
|
|
88
|
+
radius: int
|
|
89
|
+
All points within distance=radius from a location will be in
|
|
90
|
+
each neighborhood
|
|
91
|
+
min_initialized: int
|
|
92
|
+
Minimum number of initialized values in a neighborhood
|
|
93
|
+
before a step can be taken
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
super().__init__(
|
|
97
|
+
dimensions=dimensions, radius=radius, min_initialized=min_initialized
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def get_neighborhood_config(
|
|
101
|
+
self, radius: Optional[int] = None
|
|
102
|
+
) -> NeighborhoodConfig:
|
|
103
|
+
"""
|
|
104
|
+
Return a NeighborhoodConfig with an optional override to the radius
|
|
105
|
+
"""
|
|
106
|
+
radius_to_use = radius if radius is not None else self.get_radius()
|
|
107
|
+
|
|
108
|
+
return NeighborhoodConfig(
|
|
109
|
+
dimensions=self._dimensions,
|
|
110
|
+
radius=radius_to_use,
|
|
111
|
+
min_initialized=self._min_initialized,
|
|
112
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import math
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SearchDimension:
|
|
22
|
+
"""
|
|
23
|
+
Defines a single dimension to search, and how the values
|
|
24
|
+
of that dimension grow
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
DIMENSION_TYPE_LINEAR = 0
|
|
28
|
+
DIMENSION_TYPE_EXPONENTIAL = 1
|
|
29
|
+
DIMENSION_NO_MAX = sys.maxsize
|
|
30
|
+
|
|
31
|
+
def __init__(self, name: str, type: int, min: int = 0, max: int = DIMENSION_NO_MAX):
|
|
32
|
+
"""
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
name: str
|
|
36
|
+
type: enum
|
|
37
|
+
Enum indicating how the values of this dimension grow
|
|
38
|
+
min: int
|
|
39
|
+
The minimum index for this search dimension. If unspecified, min is 0
|
|
40
|
+
max: int
|
|
41
|
+
The maximum index for this search dimension. If unspecified, then there is no max
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
self._name = name
|
|
45
|
+
self._type = type
|
|
46
|
+
self._min = min
|
|
47
|
+
self._max = max
|
|
48
|
+
|
|
49
|
+
def get_min_idx(self) -> int:
|
|
50
|
+
"""Return the minimum index for this dimension"""
|
|
51
|
+
return self._min
|
|
52
|
+
|
|
53
|
+
def get_max_idx(self) -> int:
|
|
54
|
+
"""Return the maximum index for this dimension"""
|
|
55
|
+
return self._max
|
|
56
|
+
|
|
57
|
+
def get_name(self) -> str:
|
|
58
|
+
"""Return the name for this dimension"""
|
|
59
|
+
return self._name
|
|
60
|
+
|
|
61
|
+
def get_value_at_idx(self, idx: int) -> int:
|
|
62
|
+
"""Return the value of the dimension at the given index"""
|
|
63
|
+
if idx < self._min or idx > self._max:
|
|
64
|
+
raise IndexError(
|
|
65
|
+
f"Index {idx} is out of range for search dimension {self._name}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if self._type == SearchDimension.DIMENSION_TYPE_LINEAR:
|
|
69
|
+
return idx + 1
|
|
70
|
+
elif self._type == SearchDimension.DIMENSION_TYPE_EXPONENTIAL:
|
|
71
|
+
return int(math.pow(2, idx))
|
|
72
|
+
else:
|
|
73
|
+
raise Exception(f"Unknown type {self._type}")
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from typing import Any, Dict, Iterator, List
|
|
18
|
+
|
|
19
|
+
from .coordinate import Coordinate
|
|
20
|
+
from .search_dimension import SearchDimension
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SearchDimensions:
|
|
24
|
+
"""
|
|
25
|
+
Data class that holds one or more dimensions and associates each one to a key
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
self._dimensions: List[SearchDimension] = []
|
|
30
|
+
self._dimension_keys: List[Any] = []
|
|
31
|
+
|
|
32
|
+
def add_dimensions(self, key: Any, dimensions: List[SearchDimension]) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Add dimensions and associate them all with the given key
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
Key: int
|
|
39
|
+
The key to associate the dimensions with
|
|
40
|
+
|
|
41
|
+
Dimensions: List of SearchDimension
|
|
42
|
+
Dimensions to add and associate with the key
|
|
43
|
+
"""
|
|
44
|
+
for dim in dimensions:
|
|
45
|
+
self._dimensions.append(dim)
|
|
46
|
+
self._dimension_keys.append(key)
|
|
47
|
+
|
|
48
|
+
def get_values_for_coordinate(
|
|
49
|
+
self, coordinate: Coordinate
|
|
50
|
+
) -> Dict[Any, Dict[str, Any]]:
|
|
51
|
+
"""
|
|
52
|
+
Given a Coordinate, return all dimension_name:dimension_value pairs associated with
|
|
53
|
+
that coordinate, organized by the dimension's key
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
|
|
58
|
+
coordinate: Coordinate
|
|
59
|
+
The coordinate to get values for
|
|
60
|
+
|
|
61
|
+
Returns: Dict of Dicts
|
|
62
|
+
ret[key][SearchDimension name] = value
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
vals: Dict[Any, Dict[str, Any]] = {}
|
|
66
|
+
for i, v in enumerate(coordinate):
|
|
67
|
+
key = self._dimension_keys[i]
|
|
68
|
+
if key not in vals:
|
|
69
|
+
vals[key] = {}
|
|
70
|
+
|
|
71
|
+
dim = self._dimensions[i]
|
|
72
|
+
name = dim.get_name()
|
|
73
|
+
val = dim.get_value_at_idx(v)
|
|
74
|
+
vals[key][name] = val
|
|
75
|
+
|
|
76
|
+
return vals
|
|
77
|
+
|
|
78
|
+
def __iter__(self) -> Iterator:
|
|
79
|
+
return iter(self._dimensions)
|
|
80
|
+
|
|
81
|
+
def __len__(self) -> int:
|
|
82
|
+
return len(self._dimensions)
|
|
83
|
+
|
|
84
|
+
def __getitem__(self, index: int) -> SearchDimension:
|
|
85
|
+
return self._dimensions[index]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from enum import Enum, auto
|
|
19
|
+
from typing import Any, List, Optional
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ParameterUsage(Enum):
|
|
23
|
+
MODEL = auto()
|
|
24
|
+
RUNTIME = auto()
|
|
25
|
+
BUILD = auto()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ParameterCategory(Enum):
|
|
29
|
+
INTEGER = auto()
|
|
30
|
+
EXPONENTIAL = auto()
|
|
31
|
+
STR_LIST = auto()
|
|
32
|
+
INT_LIST = auto()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class SearchParameter:
|
|
37
|
+
"""
|
|
38
|
+
A dataclass that holds information about a configuration's search parameter
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
usage: ParameterUsage
|
|
42
|
+
category: ParameterCategory
|
|
43
|
+
|
|
44
|
+
# This is only applicable to the LIST categories
|
|
45
|
+
enumerated_list: Optional[List[Any]] = None
|
|
46
|
+
|
|
47
|
+
# These are only applicable to INTEGER and EXPONENTIAL categories
|
|
48
|
+
min_range: Optional[int] = None
|
|
49
|
+
max_range: Optional[int] = None
|