triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
from model_analyzer.config.input.config_utils import (
|
|
21
|
+
file_path_validator,
|
|
22
|
+
parent_path_validator,
|
|
23
|
+
)
|
|
24
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
25
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
26
|
+
|
|
27
|
+
from .config_command import ConfigCommand
|
|
28
|
+
from .config_defaults import (
|
|
29
|
+
DEFAULT_ALWAYS_REPORT_GPU_METRICS,
|
|
30
|
+
DEFAULT_CHECKPOINT_DIRECTORY,
|
|
31
|
+
DEFAULT_EXPORT_PATH,
|
|
32
|
+
DEFAULT_OFFLINE_REPORT_PLOTS,
|
|
33
|
+
DEFAULT_ONLINE_REPORT_PLOTS,
|
|
34
|
+
DEFAULT_REPORT_FORMAT,
|
|
35
|
+
)
|
|
36
|
+
from .config_enum import ConfigEnum
|
|
37
|
+
from .config_field import ConfigField
|
|
38
|
+
from .config_list_generic import ConfigListGeneric
|
|
39
|
+
from .config_list_string import ConfigListString
|
|
40
|
+
from .config_object import ConfigObject
|
|
41
|
+
from .config_primitive import ConfigPrimitive
|
|
42
|
+
from .config_union import ConfigUnion
|
|
43
|
+
from .objects.config_model_report_spec import ConfigModelReportSpec
|
|
44
|
+
from .objects.config_plot import ConfigPlot
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ConfigCommandReport(ConfigCommand):
|
|
50
|
+
"""
|
|
51
|
+
Model Analyzer config object.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self):
|
|
55
|
+
super().__init__()
|
|
56
|
+
self._fill_config()
|
|
57
|
+
|
|
58
|
+
def _fill_config(self):
|
|
59
|
+
"""
|
|
60
|
+
Builder function makes calls to add config to
|
|
61
|
+
fill the config with options
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
self._add_config(
|
|
65
|
+
ConfigField(
|
|
66
|
+
"config_file",
|
|
67
|
+
flags=["-f", "--config-file"],
|
|
68
|
+
field_type=ConfigPrimitive(str),
|
|
69
|
+
description="Path to Config File for subcommand 'report'.",
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
self._add_config(
|
|
73
|
+
ConfigField(
|
|
74
|
+
"checkpoint_directory",
|
|
75
|
+
flags=["-s", "--checkpoint-directory"],
|
|
76
|
+
default_value=DEFAULT_CHECKPOINT_DIRECTORY,
|
|
77
|
+
field_type=ConfigPrimitive(str, validator=parent_path_validator),
|
|
78
|
+
description="Full path to directory to which to read and write checkpoints and profile data.",
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
self._add_config(
|
|
82
|
+
ConfigField(
|
|
83
|
+
"export_path",
|
|
84
|
+
flags=["-e", "--export-path"],
|
|
85
|
+
default_value=DEFAULT_EXPORT_PATH,
|
|
86
|
+
field_type=ConfigPrimitive(str, validator=file_path_validator),
|
|
87
|
+
description="Full path to directory in which to store the results",
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
plots_scheme = ConfigObject(
|
|
91
|
+
schema={
|
|
92
|
+
"*": ConfigObject(
|
|
93
|
+
schema={
|
|
94
|
+
"title": ConfigPrimitive(type_=str),
|
|
95
|
+
"x_axis": ConfigPrimitive(type_=str),
|
|
96
|
+
"y_axis": ConfigPrimitive(type_=str),
|
|
97
|
+
"monotonic": ConfigPrimitive(type_=bool),
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
},
|
|
101
|
+
output_mapper=ConfigPlot.from_object,
|
|
102
|
+
)
|
|
103
|
+
self._add_config(
|
|
104
|
+
ConfigField(
|
|
105
|
+
"plots",
|
|
106
|
+
field_type=ConfigUnion(
|
|
107
|
+
[
|
|
108
|
+
plots_scheme,
|
|
109
|
+
ConfigListGeneric(
|
|
110
|
+
type_=plots_scheme, output_mapper=ConfigPlot.from_list
|
|
111
|
+
),
|
|
112
|
+
]
|
|
113
|
+
),
|
|
114
|
+
default_value=DEFAULT_ONLINE_REPORT_PLOTS,
|
|
115
|
+
description="Model analyzer uses the information in this section to construct plots of the results.",
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
report_model_scheme = ConfigObject(
|
|
120
|
+
required=True,
|
|
121
|
+
schema={
|
|
122
|
+
# Any key is allowed, but the keys must follow the pattern
|
|
123
|
+
# below
|
|
124
|
+
"*": ConfigObject(schema={"plots": plots_scheme})
|
|
125
|
+
},
|
|
126
|
+
output_mapper=ConfigModelReportSpec.model_object_to_config_model_report_spec,
|
|
127
|
+
)
|
|
128
|
+
self._add_config(
|
|
129
|
+
ConfigField(
|
|
130
|
+
"report_model_configs",
|
|
131
|
+
flags=["-n", "--report-model-configs"],
|
|
132
|
+
field_type=ConfigUnion(
|
|
133
|
+
[
|
|
134
|
+
report_model_scheme,
|
|
135
|
+
ConfigListGeneric(
|
|
136
|
+
ConfigUnion(
|
|
137
|
+
[
|
|
138
|
+
report_model_scheme,
|
|
139
|
+
ConfigPrimitive(
|
|
140
|
+
str,
|
|
141
|
+
output_mapper=ConfigModelReportSpec.model_str_to_config_model_report_spec,
|
|
142
|
+
),
|
|
143
|
+
]
|
|
144
|
+
),
|
|
145
|
+
required=True,
|
|
146
|
+
output_mapper=ConfigModelReportSpec.model_mixed_to_config_model_report_spec,
|
|
147
|
+
),
|
|
148
|
+
ConfigListString(
|
|
149
|
+
output_mapper=ConfigModelReportSpec.model_list_to_config_model_report_spec
|
|
150
|
+
),
|
|
151
|
+
],
|
|
152
|
+
required=True,
|
|
153
|
+
),
|
|
154
|
+
description=(
|
|
155
|
+
"Comma delimited list of the names of model configs"
|
|
156
|
+
" for which to generate detailed reports."
|
|
157
|
+
),
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
self._add_config(
|
|
161
|
+
ConfigField(
|
|
162
|
+
"output_formats",
|
|
163
|
+
flags=["-o", "--output-formats"],
|
|
164
|
+
default_value=DEFAULT_REPORT_FORMAT,
|
|
165
|
+
field_type=ConfigUnion(
|
|
166
|
+
[
|
|
167
|
+
ConfigListGeneric(
|
|
168
|
+
type_=ConfigEnum(choices=["pdf", "csv", "png"])
|
|
169
|
+
),
|
|
170
|
+
ConfigListString(),
|
|
171
|
+
]
|
|
172
|
+
),
|
|
173
|
+
description="Output file format for detailed report.",
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
self._add_config(
|
|
177
|
+
ConfigField(
|
|
178
|
+
"always_report_gpu_metrics",
|
|
179
|
+
flags=["--always_report-gpu-metrics"],
|
|
180
|
+
field_type=ConfigPrimitive(bool),
|
|
181
|
+
parser_args={"action": "store_true"},
|
|
182
|
+
default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
|
|
183
|
+
description="Report GPU metrics, even when the model is `cpu_only`.",
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def set_config_values(self, args):
|
|
188
|
+
"""
|
|
189
|
+
Set the config values. This function sets all the values for the
|
|
190
|
+
config. CLI arguments have the highest priority, then YAML config
|
|
191
|
+
values and then default values.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
args : argparse.Namespace
|
|
196
|
+
Parsed arguments from the CLI
|
|
197
|
+
|
|
198
|
+
Raises
|
|
199
|
+
------
|
|
200
|
+
TritonModelAnalyzerException
|
|
201
|
+
If the required fields are not specified, it will raise
|
|
202
|
+
this exception
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
if args.mode == "online":
|
|
206
|
+
self._fields["plots"].set_default_value(DEFAULT_ONLINE_REPORT_PLOTS)
|
|
207
|
+
elif args.mode == "offline":
|
|
208
|
+
self._fields["plots"].set_default_value(DEFAULT_OFFLINE_REPORT_PLOTS)
|
|
209
|
+
|
|
210
|
+
super().set_config_values(args)
|
|
211
|
+
|
|
212
|
+
def _preprocess_and_verify_arguments(self):
|
|
213
|
+
"""
|
|
214
|
+
Enforces some rules on the config.
|
|
215
|
+
|
|
216
|
+
Raises
|
|
217
|
+
------
|
|
218
|
+
TritonModelAnalyzerException
|
|
219
|
+
If there is a problem with arguments or config.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
if not self.export_path:
|
|
223
|
+
logger.warning(
|
|
224
|
+
f"--export-path not specified. Using {self._fields['export_path'].default_value()}"
|
|
225
|
+
)
|
|
226
|
+
elif self.export_path and not os.path.isdir(self.export_path):
|
|
227
|
+
raise TritonModelAnalyzerException(
|
|
228
|
+
f"Export path {self.export_path} is not a directory."
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def _autofill_values(self):
|
|
232
|
+
"""
|
|
233
|
+
Fill in the implied or default
|
|
234
|
+
config values.
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
new_report_model_configs = {}
|
|
238
|
+
for model in self.report_model_configs:
|
|
239
|
+
new_report_model_config = {}
|
|
240
|
+
|
|
241
|
+
# Plots
|
|
242
|
+
if not model.plots():
|
|
243
|
+
new_report_model_config["plots"] = {
|
|
244
|
+
plot.name(): {
|
|
245
|
+
"title": plot.title(),
|
|
246
|
+
"x_axis": plot.x_axis(),
|
|
247
|
+
"y_axis": plot.y_axis(),
|
|
248
|
+
"monotonic": plot.monotonic(),
|
|
249
|
+
}
|
|
250
|
+
for plot in self.plots
|
|
251
|
+
}
|
|
252
|
+
else:
|
|
253
|
+
new_report_model_config["plots"] = {
|
|
254
|
+
plot.name(): {
|
|
255
|
+
"title": plot.title(),
|
|
256
|
+
"x_axis": plot.x_axis(),
|
|
257
|
+
"y_axis": plot.y_axis(),
|
|
258
|
+
"monotonic": plot.monotonic(),
|
|
259
|
+
}
|
|
260
|
+
for plot in model.plots()
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
new_report_model_configs[
|
|
264
|
+
model.model_config_name()
|
|
265
|
+
] = new_report_model_config
|
|
266
|
+
|
|
267
|
+
self._fields["report_model_configs"].set_value(new_report_model_configs)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
#
|
|
20
|
+
# Common defaults
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
DEFAULT_CHECKPOINT_DIRECTORY = os.path.join(os.getcwd(), "checkpoints")
|
|
24
|
+
DEFAULT_ONLINE_OBJECTIVES = {"perf_throughput": 10}
|
|
25
|
+
DEFAULT_OFFLINE_OBJECTIVES = {"perf_throughput": 10}
|
|
26
|
+
DEFAULT_MODEL_WEIGHTING = 1
|
|
27
|
+
|
|
28
|
+
#
|
|
29
|
+
# Profile Config defaults
|
|
30
|
+
#
|
|
31
|
+
|
|
32
|
+
DEFAULT_MONITORING_INTERVAL = 1.0
|
|
33
|
+
DEFAULT_DURATION_SECONDS = 3
|
|
34
|
+
DEFAULT_COLLECT_CPU_METRICS = False
|
|
35
|
+
DEFAULT_LOG_LEVEL = "INFO"
|
|
36
|
+
DEFAULT_GPUS = "all"
|
|
37
|
+
DEFAULT_ALWAYS_REPORT_GPU_METRICS = False
|
|
38
|
+
DEFAULT_SKIP_SUMMARY_REPORTS = False
|
|
39
|
+
DEFAULT_SKIP_DETAILED_REPORTS = False
|
|
40
|
+
DEFAULT_OUTPUT_MODEL_REPOSITORY = os.path.join(os.getcwd(), "output_model_repository")
|
|
41
|
+
DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG = False
|
|
42
|
+
DEFAULT_BATCH_SIZES = 1
|
|
43
|
+
DEFAULT_MAX_RETRIES = 50
|
|
44
|
+
DEFAULT_CLIENT_PROTOCOL = "grpc"
|
|
45
|
+
DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024
|
|
46
|
+
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1
|
|
47
|
+
DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE = 8192
|
|
48
|
+
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE = 16
|
|
49
|
+
DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT = 5
|
|
50
|
+
DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT = 1
|
|
51
|
+
DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE = 1
|
|
52
|
+
DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE = 128
|
|
53
|
+
DEFAULT_RUN_CONFIG_MAX_BINARY_SEARCH_STEPS = 5
|
|
54
|
+
DEFAULT_RUN_CONFIG_SEARCH_DISABLE = False
|
|
55
|
+
DEFAULT_RUN_CONFIG_SEARCH_MODE = "brute"
|
|
56
|
+
DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE = False
|
|
57
|
+
DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE = 5
|
|
58
|
+
DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE = 10
|
|
59
|
+
DEFAULT_OPTUNA_MIN_TRIALS = 20
|
|
60
|
+
DEFAULT_OPTUNA_MAX_TRIALS = 200
|
|
61
|
+
DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD = 10
|
|
62
|
+
DEFAULT_USE_CONCURRENCY_FORMULA = False
|
|
63
|
+
DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
|
|
64
|
+
DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
|
|
65
|
+
DEFAULT_DCGM_DISABLE = False
|
|
66
|
+
DEFAULT_TRITON_LAUNCH_MODE = "local"
|
|
67
|
+
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.11-py3"
|
|
68
|
+
DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
|
|
69
|
+
DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
|
|
70
|
+
DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"
|
|
71
|
+
DEFAULT_TRITON_SERVER_PATH = "tritonserver"
|
|
72
|
+
DEFAULT_TRITON_INSTALL_PATH = "/opt/tritonserver"
|
|
73
|
+
DEFAULT_PERF_ANALYZER_TIMEOUT = 600
|
|
74
|
+
DEFAULT_PERF_ANALYZER_CPU_UTIL = 80.0
|
|
75
|
+
DEFAULT_PERF_ANALYZER_PATH = "perf_analyzer"
|
|
76
|
+
DEFAULT_PERF_OUTPUT_FLAG = False
|
|
77
|
+
DEFAULT_PERF_MAX_AUTO_ADJUSTS = 10
|
|
78
|
+
DEFAULT_MEASUREMENT_MODE = "count_windows"
|
|
79
|
+
DEFAULT_MODEL_TYPE = "generic"
|
|
80
|
+
|
|
81
|
+
DEFAULT_ONLINE_PLOTS = {
|
|
82
|
+
"throughput_v_latency": {
|
|
83
|
+
"title": "Throughput vs. Latency",
|
|
84
|
+
"x_axis": "perf_latency_p99",
|
|
85
|
+
"y_axis": "perf_throughput",
|
|
86
|
+
"monotonic": True,
|
|
87
|
+
},
|
|
88
|
+
"gpu_mem_v_latency": {
|
|
89
|
+
"title": "GPU Memory vs. Latency",
|
|
90
|
+
"x_axis": "perf_latency_p99",
|
|
91
|
+
"y_axis": "gpu_used_memory",
|
|
92
|
+
"monotonic": False,
|
|
93
|
+
},
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
DEFAULT_OFFLINE_PLOTS = {
|
|
97
|
+
"through_v_batch_size": {
|
|
98
|
+
"title": "Throughput vs. Batch Size",
|
|
99
|
+
"x_axis": "batch_size",
|
|
100
|
+
"y_axis": "perf_throughput",
|
|
101
|
+
"monotonic": False,
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
DEFAULT_CPU_MEM_PLOT = {
|
|
106
|
+
"cpu_mem_v_latency": {
|
|
107
|
+
"title": "CPU Memory vs. Latency",
|
|
108
|
+
"x_axis": "perf_latency_p99",
|
|
109
|
+
"y_axis": "cpu_used_ram",
|
|
110
|
+
"monotonic": False,
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
DEFAULT_EXPORT_PATH = os.getcwd()
|
|
115
|
+
DEFAULT_FILENAME_MODEL_INFERENCE = "metrics-model-inference.csv"
|
|
116
|
+
DEFAULT_FILENAME_MODEL_GPU = "metrics-model-gpu.csv"
|
|
117
|
+
DEFAULT_FILENAME_SERVER_ONLY = "metrics-server-only.csv"
|
|
118
|
+
|
|
119
|
+
DEFAULT_INFERENCE_OUTPUT_FIELDS = [
|
|
120
|
+
"model_name",
|
|
121
|
+
"batch_size",
|
|
122
|
+
"concurrency",
|
|
123
|
+
"model_config_path",
|
|
124
|
+
"instance_group",
|
|
125
|
+
"max_batch_size",
|
|
126
|
+
"satisfies_constraints",
|
|
127
|
+
"perf_throughput",
|
|
128
|
+
"perf_latency_p99",
|
|
129
|
+
]
|
|
130
|
+
DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS = [
|
|
131
|
+
"model_name",
|
|
132
|
+
"batch_size",
|
|
133
|
+
"concurrency",
|
|
134
|
+
"model_config_path",
|
|
135
|
+
"instance_group",
|
|
136
|
+
"max_batch_size",
|
|
137
|
+
"satisfies_constraints",
|
|
138
|
+
"perf_throughput",
|
|
139
|
+
"perf_latency_p99",
|
|
140
|
+
"inter_token_latency_p99",
|
|
141
|
+
"time_to_first_token_p99",
|
|
142
|
+
"output_token_throughput",
|
|
143
|
+
]
|
|
144
|
+
DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS = [
|
|
145
|
+
"model_name",
|
|
146
|
+
"batch_size",
|
|
147
|
+
"request_rate",
|
|
148
|
+
"model_config_path",
|
|
149
|
+
"instance_group",
|
|
150
|
+
"max_batch_size",
|
|
151
|
+
"satisfies_constraints",
|
|
152
|
+
"perf_throughput",
|
|
153
|
+
"perf_latency_p99",
|
|
154
|
+
]
|
|
155
|
+
DEFAULT_GPU_OUTPUT_FIELDS = [
|
|
156
|
+
"model_name",
|
|
157
|
+
"gpu_uuid",
|
|
158
|
+
"batch_size",
|
|
159
|
+
"concurrency",
|
|
160
|
+
"model_config_path",
|
|
161
|
+
"instance_group",
|
|
162
|
+
"satisfies_constraints",
|
|
163
|
+
"gpu_used_memory",
|
|
164
|
+
"gpu_utilization",
|
|
165
|
+
"gpu_power_usage",
|
|
166
|
+
]
|
|
167
|
+
DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS = [
|
|
168
|
+
"model_name",
|
|
169
|
+
"gpu_uuid",
|
|
170
|
+
"batch_size",
|
|
171
|
+
"request_rate",
|
|
172
|
+
"model_config_path",
|
|
173
|
+
"instance_group",
|
|
174
|
+
"satisfies_constraints",
|
|
175
|
+
"gpu_used_memory",
|
|
176
|
+
"gpu_utilization",
|
|
177
|
+
"gpu_power_usage",
|
|
178
|
+
]
|
|
179
|
+
DEFAULT_SERVER_OUTPUT_FIELDS = [
|
|
180
|
+
"model_name",
|
|
181
|
+
"gpu_uuid",
|
|
182
|
+
"gpu_used_memory",
|
|
183
|
+
"gpu_utilization",
|
|
184
|
+
"gpu_power_usage",
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
DEFAULT_NUM_CONFIGS_PER_MODEL = 3
|
|
188
|
+
DEFAULT_NUM_TOP_MODEL_CONFIGS = 0
|
|
189
|
+
|
|
190
|
+
#
|
|
191
|
+
# Report Config defaults
|
|
192
|
+
#
|
|
193
|
+
|
|
194
|
+
DEFAULT_REPORT_FORMAT = "pdf"
|
|
195
|
+
|
|
196
|
+
DEFAULT_ONLINE_REPORT_PLOTS = {
|
|
197
|
+
"gpu_mem_v_latency": {
|
|
198
|
+
"title": "GPU Memory vs. Latency",
|
|
199
|
+
"x_axis": "perf_latency_p99",
|
|
200
|
+
"y_axis": "gpu_used_memory",
|
|
201
|
+
"monotonic": False,
|
|
202
|
+
},
|
|
203
|
+
"gpu_util_v_latency": {
|
|
204
|
+
"title": "GPU Utilization vs. Latency",
|
|
205
|
+
"x_axis": "perf_latency_p99",
|
|
206
|
+
"y_axis": "gpu_utilization",
|
|
207
|
+
"monotonic": False,
|
|
208
|
+
},
|
|
209
|
+
"cpu_mem_v_latency": {
|
|
210
|
+
"title": "RAM Usage vs. Latency",
|
|
211
|
+
"x_axis": "perf_latency_p99",
|
|
212
|
+
"y_axis": "cpu_used_ram",
|
|
213
|
+
"monotonic": False,
|
|
214
|
+
},
|
|
215
|
+
"gpu_power_v_latency": {
|
|
216
|
+
"title": "GPU Power vs. Latency",
|
|
217
|
+
"x_axis": "perf_latency_p99",
|
|
218
|
+
"y_axis": "gpu_power_usage",
|
|
219
|
+
"monotonic": False,
|
|
220
|
+
},
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
DEFAULT_OFFLINE_REPORT_PLOTS = {
|
|
224
|
+
"throughput_v_batch_size": {
|
|
225
|
+
"title": "Throughput vs. Batch Size",
|
|
226
|
+
"x_axis": "batch_size",
|
|
227
|
+
"y_axis": "perf_throughput",
|
|
228
|
+
"monotonic": False,
|
|
229
|
+
},
|
|
230
|
+
"latency_v_batch_size": {
|
|
231
|
+
"title": "p99 Latency vs. Batch Size",
|
|
232
|
+
"x_axis": "batch_size",
|
|
233
|
+
"y_axis": "perf_latency_p99",
|
|
234
|
+
"monotonic": False,
|
|
235
|
+
},
|
|
236
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.constants import CONFIG_PARSER_FAILURE
|
|
18
|
+
|
|
19
|
+
from .config_status import ConfigStatus
|
|
20
|
+
from .config_value import ConfigValue
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ConfigEnum(ConfigValue):
|
|
24
|
+
"""
|
|
25
|
+
Enum type support for config.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
choices,
|
|
31
|
+
preprocess=None,
|
|
32
|
+
required=False,
|
|
33
|
+
validator=None,
|
|
34
|
+
output_mapper=None,
|
|
35
|
+
name=None,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Create a new enum config field.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
choices : A list of allowed choices
|
|
43
|
+
The type of elements in the list
|
|
44
|
+
preprocess : callable
|
|
45
|
+
Function be called before setting new values.
|
|
46
|
+
required : bool
|
|
47
|
+
Whether a given config is required or not.
|
|
48
|
+
validator : callable or None
|
|
49
|
+
A validator for the final value of the field.
|
|
50
|
+
output_mapper: callable
|
|
51
|
+
This callable unifies the output value of this field.
|
|
52
|
+
name : str
|
|
53
|
+
Fully qualified name for this field.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
self._choices = choices
|
|
57
|
+
self._type = self
|
|
58
|
+
super().__init__(preprocess, required, validator, output_mapper, name)
|
|
59
|
+
|
|
60
|
+
def set_value(self, value):
|
|
61
|
+
choices = self._choices
|
|
62
|
+
|
|
63
|
+
if value not in choices:
|
|
64
|
+
return ConfigStatus(
|
|
65
|
+
CONFIG_PARSER_FAILURE,
|
|
66
|
+
f'Value "{value}" for field "{self.name()}" is not acceptable.'
|
|
67
|
+
f' Value should be one of the following values: "{choices}".',
|
|
68
|
+
self,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return super().set_value(value)
|
|
72
|
+
|
|
73
|
+
def cli_type(self):
|
|
74
|
+
"""
|
|
75
|
+
Get the type of this field for CLI.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
type
|
|
80
|
+
str
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
return str
|