triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1747 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
from copy import deepcopy
|
|
21
|
+
|
|
22
|
+
import numba.cuda
|
|
23
|
+
import psutil
|
|
24
|
+
from google.protobuf.descriptor import FieldDescriptor
|
|
25
|
+
from tritonclient.grpc.model_config_pb2 import ModelConfig
|
|
26
|
+
|
|
27
|
+
from model_analyzer.config.input.config_utils import (
|
|
28
|
+
binary_path_validator,
|
|
29
|
+
file_path_validator,
|
|
30
|
+
objective_list_output_mapper,
|
|
31
|
+
parent_path_validator,
|
|
32
|
+
)
|
|
33
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
34
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
35
|
+
from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig
|
|
36
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
37
|
+
from model_analyzer.record.record import RecordType
|
|
38
|
+
from model_analyzer.triton.server.server_config import TritonServerConfig
|
|
39
|
+
|
|
40
|
+
from .config_command import ConfigCommand
|
|
41
|
+
from .config_defaults import (
|
|
42
|
+
DEFAULT_ALWAYS_REPORT_GPU_METRICS,
|
|
43
|
+
DEFAULT_BATCH_SIZES,
|
|
44
|
+
DEFAULT_CHECKPOINT_DIRECTORY,
|
|
45
|
+
DEFAULT_CLIENT_PROTOCOL,
|
|
46
|
+
DEFAULT_COLLECT_CPU_METRICS,
|
|
47
|
+
DEFAULT_CONCURRENCY_SWEEP_DISABLE,
|
|
48
|
+
DEFAULT_DCGM_DISABLE,
|
|
49
|
+
DEFAULT_DURATION_SECONDS,
|
|
50
|
+
DEFAULT_EXPORT_PATH,
|
|
51
|
+
DEFAULT_FILENAME_MODEL_GPU,
|
|
52
|
+
DEFAULT_FILENAME_MODEL_INFERENCE,
|
|
53
|
+
DEFAULT_FILENAME_SERVER_ONLY,
|
|
54
|
+
DEFAULT_GPU_OUTPUT_FIELDS,
|
|
55
|
+
DEFAULT_GPUS,
|
|
56
|
+
DEFAULT_INFERENCE_OUTPUT_FIELDS,
|
|
57
|
+
DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS,
|
|
58
|
+
DEFAULT_MAX_RETRIES,
|
|
59
|
+
DEFAULT_MODEL_TYPE,
|
|
60
|
+
DEFAULT_MODEL_WEIGHTING,
|
|
61
|
+
DEFAULT_MONITORING_INTERVAL,
|
|
62
|
+
DEFAULT_NUM_CONFIGS_PER_MODEL,
|
|
63
|
+
DEFAULT_NUM_TOP_MODEL_CONFIGS,
|
|
64
|
+
DEFAULT_OFFLINE_OBJECTIVES,
|
|
65
|
+
DEFAULT_OFFLINE_PLOTS,
|
|
66
|
+
DEFAULT_ONLINE_OBJECTIVES,
|
|
67
|
+
DEFAULT_ONLINE_PLOTS,
|
|
68
|
+
DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
|
|
69
|
+
DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
|
|
70
|
+
DEFAULT_OPTUNA_MAX_TRIALS,
|
|
71
|
+
DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
|
|
72
|
+
DEFAULT_OPTUNA_MIN_TRIALS,
|
|
73
|
+
DEFAULT_OUTPUT_MODEL_REPOSITORY,
|
|
74
|
+
DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG,
|
|
75
|
+
DEFAULT_PERF_ANALYZER_CPU_UTIL,
|
|
76
|
+
DEFAULT_PERF_ANALYZER_PATH,
|
|
77
|
+
DEFAULT_PERF_ANALYZER_TIMEOUT,
|
|
78
|
+
DEFAULT_PERF_MAX_AUTO_ADJUSTS,
|
|
79
|
+
DEFAULT_PERF_OUTPUT_FLAG,
|
|
80
|
+
DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS,
|
|
81
|
+
DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS,
|
|
82
|
+
DEFAULT_REQUEST_RATE_SEARCH_ENABLE,
|
|
83
|
+
DEFAULT_RUN_CONFIG_MAX_BINARY_SEARCH_STEPS,
|
|
84
|
+
DEFAULT_RUN_CONFIG_MAX_CONCURRENCY,
|
|
85
|
+
DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT,
|
|
86
|
+
DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE,
|
|
87
|
+
DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE,
|
|
88
|
+
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
|
|
89
|
+
DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT,
|
|
90
|
+
DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE,
|
|
91
|
+
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
|
|
92
|
+
DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE,
|
|
93
|
+
DEFAULT_RUN_CONFIG_SEARCH_DISABLE,
|
|
94
|
+
DEFAULT_RUN_CONFIG_SEARCH_MODE,
|
|
95
|
+
DEFAULT_SERVER_OUTPUT_FIELDS,
|
|
96
|
+
DEFAULT_SKIP_DETAILED_REPORTS,
|
|
97
|
+
DEFAULT_SKIP_SUMMARY_REPORTS,
|
|
98
|
+
DEFAULT_TRITON_DOCKER_IMAGE,
|
|
99
|
+
DEFAULT_TRITON_GRPC_ENDPOINT,
|
|
100
|
+
DEFAULT_TRITON_HTTP_ENDPOINT,
|
|
101
|
+
DEFAULT_TRITON_INSTALL_PATH,
|
|
102
|
+
DEFAULT_TRITON_LAUNCH_MODE,
|
|
103
|
+
DEFAULT_TRITON_METRICS_URL,
|
|
104
|
+
DEFAULT_TRITON_SERVER_PATH,
|
|
105
|
+
DEFAULT_USE_CONCURRENCY_FORMULA,
|
|
106
|
+
)
|
|
107
|
+
from .config_enum import ConfigEnum
|
|
108
|
+
from .config_field import ConfigField
|
|
109
|
+
from .config_list_generic import ConfigListGeneric
|
|
110
|
+
from .config_list_numeric import ConfigListNumeric
|
|
111
|
+
from .config_list_string import ConfigListString
|
|
112
|
+
from .config_none import ConfigNone
|
|
113
|
+
from .config_object import ConfigObject
|
|
114
|
+
from .config_primitive import ConfigPrimitive
|
|
115
|
+
from .config_sweep import ConfigSweep
|
|
116
|
+
from .config_union import ConfigUnion
|
|
117
|
+
from .objects.config_model_profile_spec import ConfigModelProfileSpec
|
|
118
|
+
from .objects.config_plot import ConfigPlot
|
|
119
|
+
from .objects.config_protobuf_utils import (
|
|
120
|
+
is_protobuf_type_primitive,
|
|
121
|
+
protobuf_to_config_type,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class ConfigCommandProfile(ConfigCommand):
|
|
128
|
+
"""
|
|
129
|
+
Model Analyzer config object.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def __init__(self):
|
|
133
|
+
super().__init__()
|
|
134
|
+
self._fill_config()
|
|
135
|
+
|
|
136
|
+
def _resolve_protobuf_field(self, field: FieldDescriptor) -> ConfigSweep:
|
|
137
|
+
"""
|
|
138
|
+
Recursively resolve protobuf fields.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
field : google.protobuf.pyext._message.FieldDescriptor
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
ConfigValue
|
|
147
|
+
A config type equivalent to the protobuf type.
|
|
148
|
+
|
|
149
|
+
Raises
|
|
150
|
+
------
|
|
151
|
+
TritonModelAnalyzerException
|
|
152
|
+
If the protobuf config field cannot be resolved, this exception
|
|
153
|
+
will be raised.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
if is_protobuf_type_primitive(field.type):
|
|
157
|
+
config_type = protobuf_to_config_type(field.type)
|
|
158
|
+
|
|
159
|
+
# If it is a repeated field, we should use ConfigListGeneric
|
|
160
|
+
if field.label == FieldDescriptor.LABEL_REPEATED:
|
|
161
|
+
config_type = ConfigListGeneric(ConfigPrimitive(config_type))
|
|
162
|
+
else:
|
|
163
|
+
config_type = ConfigPrimitive(config_type)
|
|
164
|
+
|
|
165
|
+
elif field.type == FieldDescriptor.TYPE_MESSAGE:
|
|
166
|
+
# If the field type is TYPE_MESSAGE, we need to create a new
|
|
167
|
+
# message of type ConfigObject
|
|
168
|
+
sub_field_schema = {}
|
|
169
|
+
|
|
170
|
+
# Custom handling for map field
|
|
171
|
+
# TODO: Add support for types in the keys
|
|
172
|
+
if (
|
|
173
|
+
field.message_type.has_options
|
|
174
|
+
and field.message_type.GetOptions().map_entry
|
|
175
|
+
):
|
|
176
|
+
value_field_type = self._resolve_protobuf_field(
|
|
177
|
+
field.message_type.fields_by_name["value"]
|
|
178
|
+
)
|
|
179
|
+
sub_field_schema["*"] = value_field_type
|
|
180
|
+
config_type = ConfigObject(schema=sub_field_schema)
|
|
181
|
+
|
|
182
|
+
else:
|
|
183
|
+
fields = field.message_type.fields
|
|
184
|
+
for sub_field in fields:
|
|
185
|
+
sub_field_schema[sub_field.name] = self._resolve_protobuf_field(
|
|
186
|
+
sub_field
|
|
187
|
+
)
|
|
188
|
+
if field.label == FieldDescriptor.LABEL_REPEATED:
|
|
189
|
+
config_type = ConfigListGeneric(
|
|
190
|
+
ConfigObject(schema=sub_field_schema)
|
|
191
|
+
)
|
|
192
|
+
else:
|
|
193
|
+
config_type = ConfigObject(schema=sub_field_schema)
|
|
194
|
+
elif field.type == FieldDescriptor.TYPE_ENUM:
|
|
195
|
+
choices = []
|
|
196
|
+
enum_values = field.enum_type.values
|
|
197
|
+
for enum_value in enum_values:
|
|
198
|
+
choices.append(enum_value.name)
|
|
199
|
+
config_type = ConfigEnum(choices)
|
|
200
|
+
else:
|
|
201
|
+
raise TritonModelAnalyzerException(
|
|
202
|
+
"The current version of Model Config is not supported by Model Analyzer."
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return ConfigSweep(ConfigUnion([config_type, ConfigNone()]))
|
|
206
|
+
|
|
207
|
+
def _get_model_config_fields(self):
|
|
208
|
+
"""
|
|
209
|
+
Constructs a ConfigObject from the ModelConfig protobuf.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
model_config_prototype = ModelConfig()
|
|
213
|
+
fields = model_config_prototype.DESCRIPTOR.fields
|
|
214
|
+
|
|
215
|
+
schema = {}
|
|
216
|
+
for field in fields:
|
|
217
|
+
schema[field.name] = self._resolve_protobuf_field(field)
|
|
218
|
+
|
|
219
|
+
return ConfigObject(schema)
|
|
220
|
+
|
|
221
|
+
def _fill_config(self):
|
|
222
|
+
"""
|
|
223
|
+
Builder function makes calls to add config to
|
|
224
|
+
fill the config with options
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
self._add_config(
|
|
228
|
+
ConfigField(
|
|
229
|
+
"config_file",
|
|
230
|
+
field_type=ConfigPrimitive(str),
|
|
231
|
+
flags=["-f", "--config-file"],
|
|
232
|
+
description="Path to Config File for subcommand 'profile'.",
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
self._add_config(
|
|
236
|
+
ConfigField(
|
|
237
|
+
"checkpoint_directory",
|
|
238
|
+
flags=["-s", "--checkpoint-directory"],
|
|
239
|
+
default_value=DEFAULT_CHECKPOINT_DIRECTORY,
|
|
240
|
+
field_type=ConfigPrimitive(str, validator=parent_path_validator),
|
|
241
|
+
description="Full path to directory to which to read and write checkpoints and profile data.",
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
self._add_config(
|
|
245
|
+
ConfigField(
|
|
246
|
+
"monitoring_interval",
|
|
247
|
+
flags=["-i", "--monitoring-interval"],
|
|
248
|
+
field_type=ConfigPrimitive(float),
|
|
249
|
+
default_value=DEFAULT_MONITORING_INTERVAL,
|
|
250
|
+
description="Interval of time between metrics measurements in seconds",
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
self._add_config(
|
|
254
|
+
ConfigField(
|
|
255
|
+
"duration_seconds",
|
|
256
|
+
field_type=ConfigPrimitive(int),
|
|
257
|
+
flags=["-d", "--duration-seconds"],
|
|
258
|
+
default_value=DEFAULT_DURATION_SECONDS,
|
|
259
|
+
description="Specifies how long (seconds) to gather server-only metrics",
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
self._add_config(
|
|
263
|
+
ConfigField(
|
|
264
|
+
"collect_cpu_metrics",
|
|
265
|
+
field_type=ConfigPrimitive(bool),
|
|
266
|
+
flags=["--collect-cpu-metrics"],
|
|
267
|
+
parser_args={"action": "store_true"},
|
|
268
|
+
default_value=DEFAULT_COLLECT_CPU_METRICS,
|
|
269
|
+
description="Specify whether CPU metrics are collected or not",
|
|
270
|
+
)
|
|
271
|
+
)
|
|
272
|
+
self._add_config(
|
|
273
|
+
ConfigField(
|
|
274
|
+
"gpus",
|
|
275
|
+
flags=["--gpus"],
|
|
276
|
+
field_type=ConfigListString(),
|
|
277
|
+
default_value=DEFAULT_GPUS,
|
|
278
|
+
description="List of GPU UUIDs to be used for the profiling. "
|
|
279
|
+
"Use 'all' to profile all the GPUs visible by CUDA.",
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
self._add_config(
|
|
283
|
+
ConfigField(
|
|
284
|
+
"always_report_gpu_metrics",
|
|
285
|
+
flags=["--always-report-gpu-metrics"],
|
|
286
|
+
field_type=ConfigPrimitive(bool),
|
|
287
|
+
parser_args={"action": "store_true"},
|
|
288
|
+
default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
|
|
289
|
+
description="Report GPU metrics, even when the model is `cpu_only`.",
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
self._add_config(
|
|
293
|
+
ConfigField(
|
|
294
|
+
"dcgm_disable",
|
|
295
|
+
field_type=ConfigPrimitive(bool),
|
|
296
|
+
flags=["--dcgm-disable"],
|
|
297
|
+
parser_args={"action": "store_true"},
|
|
298
|
+
default_value=DEFAULT_DCGM_DISABLE,
|
|
299
|
+
description="Disables DCGM, which prevents obtaining information about GPUs",
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
self._add_config(
|
|
303
|
+
ConfigField(
|
|
304
|
+
"skip_summary_reports",
|
|
305
|
+
flags=["--skip-summary-reports"],
|
|
306
|
+
field_type=ConfigPrimitive(bool),
|
|
307
|
+
parser_args={"action": "store_true"},
|
|
308
|
+
default_value=DEFAULT_SKIP_SUMMARY_REPORTS,
|
|
309
|
+
description="Skips the generation of analysis summary reports and tables.",
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
self._add_config(
|
|
313
|
+
ConfigField(
|
|
314
|
+
"skip_detailed_reports",
|
|
315
|
+
flags=["--skip-detailed-reports"],
|
|
316
|
+
field_type=ConfigPrimitive(bool),
|
|
317
|
+
parser_args={"action": "store_true"},
|
|
318
|
+
default_value=DEFAULT_SKIP_DETAILED_REPORTS,
|
|
319
|
+
description="Skips the generation of detailed summary reports and tables.",
|
|
320
|
+
)
|
|
321
|
+
)
|
|
322
|
+
self._add_config(
|
|
323
|
+
ConfigField(
|
|
324
|
+
"model_type",
|
|
325
|
+
flags=["--model-type"],
|
|
326
|
+
field_type=ConfigPrimitive(str),
|
|
327
|
+
default_value=DEFAULT_MODEL_TYPE,
|
|
328
|
+
description="Type of model being profiled: generic or LLM",
|
|
329
|
+
)
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
self._add_repository_configs()
|
|
333
|
+
self._add_client_configs()
|
|
334
|
+
self._add_profile_models_configs()
|
|
335
|
+
self._add_perf_analyzer_configs()
|
|
336
|
+
self._add_triton_configs()
|
|
337
|
+
self._add_run_search_configs()
|
|
338
|
+
self._add_export_configs()
|
|
339
|
+
self._add_report_configs()
|
|
340
|
+
self._add_table_configs()
|
|
341
|
+
self._add_shorthand_configs()
|
|
342
|
+
|
|
343
|
+
def _add_repository_configs(self):
|
|
344
|
+
"""
|
|
345
|
+
Adds configs specific to model repository
|
|
346
|
+
"""
|
|
347
|
+
self._add_config(
|
|
348
|
+
ConfigField(
|
|
349
|
+
"model_repository",
|
|
350
|
+
flags=["-m", "--model-repository"],
|
|
351
|
+
field_type=ConfigPrimitive(str, validator=file_path_validator),
|
|
352
|
+
description="Triton Model repository location",
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
self._add_config(
|
|
356
|
+
ConfigField(
|
|
357
|
+
"output_model_repository_path",
|
|
358
|
+
field_type=ConfigPrimitive(str),
|
|
359
|
+
default_value=DEFAULT_OUTPUT_MODEL_REPOSITORY,
|
|
360
|
+
flags=["--output-model-repository-path"],
|
|
361
|
+
description="Output model repository path used by Model Analyzer."
|
|
362
|
+
" This is the directory that will contain all the generated model configurations",
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
self._add_config(
|
|
366
|
+
ConfigField(
|
|
367
|
+
"override_output_model_repository",
|
|
368
|
+
field_type=ConfigPrimitive(bool),
|
|
369
|
+
parser_args={"action": "store_true"},
|
|
370
|
+
default_value=DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG,
|
|
371
|
+
flags=["--override-output-model-repository"],
|
|
372
|
+
description="Will override the contents of the output model repository"
|
|
373
|
+
" and replace it with the new results.",
|
|
374
|
+
)
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
def _add_profile_models_configs(self):
|
|
378
|
+
"""
|
|
379
|
+
Adds configs specific to model specifications
|
|
380
|
+
"""
|
|
381
|
+
triton_server_flags_scheme = ConfigObject(
|
|
382
|
+
schema={k: ConfigPrimitive(str) for k in TritonServerConfig.allowed_keys()}
|
|
383
|
+
)
|
|
384
|
+
perf_analyzer_additive_keys = {
|
|
385
|
+
k: None for k in PerfAnalyzerConfig.additive_keys()
|
|
386
|
+
}
|
|
387
|
+
perf_analyzer_flags_scheme = ConfigObject(
|
|
388
|
+
schema={
|
|
389
|
+
k: (
|
|
390
|
+
(ConfigUnion([ConfigPrimitive(type_=str), ConfigListString()]))
|
|
391
|
+
if (k in perf_analyzer_additive_keys)
|
|
392
|
+
else ConfigPrimitive(type_=str)
|
|
393
|
+
)
|
|
394
|
+
for k in PerfAnalyzerConfig.allowed_keys()
|
|
395
|
+
}
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
genai_perf_flags_scheme = ConfigObject(
|
|
399
|
+
schema={k: ConfigPrimitive(str) for k in GenaiPerfConfig.allowed_keys()}
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
triton_server_environment_scheme = ConfigObject(
|
|
403
|
+
schema={"*": ConfigPrimitive(str)}
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# This comes from the installed python package:
|
|
407
|
+
# <install_path>/lib/python3.8/dist-packages/docker/models/containers.py
|
|
408
|
+
# Only supporting values that are bool, int, string, or lists of strings
|
|
409
|
+
triton_docker_args_scheme = ConfigObject(
|
|
410
|
+
schema={
|
|
411
|
+
"image": ConfigPrimitive(str),
|
|
412
|
+
"command": ConfigPrimitive(str),
|
|
413
|
+
"auto_remove": ConfigPrimitive(bool),
|
|
414
|
+
"blkio_weight_device": ConfigListString(),
|
|
415
|
+
"blkio_weight": ConfigPrimitive(int),
|
|
416
|
+
"cap_add": ConfigListString(),
|
|
417
|
+
"cap_drop": ConfigListString(),
|
|
418
|
+
"cgroup_parent": ConfigPrimitive(str),
|
|
419
|
+
"cgroupns": ConfigPrimitive(str),
|
|
420
|
+
"cpu_count": ConfigPrimitive(int),
|
|
421
|
+
"cpu_percent": ConfigPrimitive(int),
|
|
422
|
+
"cpu_period": ConfigPrimitive(int),
|
|
423
|
+
"cpu_quota": ConfigPrimitive(int),
|
|
424
|
+
"cpu_rt_period": ConfigPrimitive(int),
|
|
425
|
+
"cpu_shares": ConfigPrimitive(int),
|
|
426
|
+
"cpuset_cpus": ConfigPrimitive(str),
|
|
427
|
+
"cpuset_mems": ConfigPrimitive(str),
|
|
428
|
+
"detach": ConfigPrimitive(bool),
|
|
429
|
+
"domainname": ConfigPrimitive(str),
|
|
430
|
+
"entrypoint": ConfigPrimitive(str),
|
|
431
|
+
"environment": ConfigListString(),
|
|
432
|
+
"hostname": ConfigPrimitive(str),
|
|
433
|
+
"init": ConfigPrimitive(bool),
|
|
434
|
+
"init_path": ConfigPrimitive(str),
|
|
435
|
+
"ipc_mode": ConfigPrimitive(str),
|
|
436
|
+
"isolation": ConfigPrimitive(str),
|
|
437
|
+
"kernel_memory": ConfigPrimitive(str),
|
|
438
|
+
"labels": ConfigListString(),
|
|
439
|
+
"mac_address": ConfigPrimitive(str),
|
|
440
|
+
"mem_limit": ConfigPrimitive(str),
|
|
441
|
+
"mem_reservation": ConfigPrimitive(str),
|
|
442
|
+
"memswap_limit": ConfigPrimitive(str),
|
|
443
|
+
"name": ConfigPrimitive(str),
|
|
444
|
+
"nano_cpus": ConfigPrimitive(int),
|
|
445
|
+
"network": ConfigPrimitive(str),
|
|
446
|
+
"network_disabled": ConfigPrimitive(bool),
|
|
447
|
+
"network_mode": ConfigPrimitive(str),
|
|
448
|
+
"oom_kill_disable": ConfigPrimitive(bool),
|
|
449
|
+
"oom_score_adj": ConfigPrimitive(int),
|
|
450
|
+
"pid_mode": ConfigPrimitive(str),
|
|
451
|
+
"pids_limit": ConfigPrimitive(int),
|
|
452
|
+
"platform": ConfigPrimitive(str),
|
|
453
|
+
"privileged": ConfigPrimitive(bool),
|
|
454
|
+
"publish_all_ports": ConfigPrimitive(bool),
|
|
455
|
+
"remove": ConfigPrimitive(bool),
|
|
456
|
+
"runtime": ConfigPrimitive(str),
|
|
457
|
+
"shm_size": ConfigPrimitive(str),
|
|
458
|
+
"stdin_open": ConfigPrimitive(bool),
|
|
459
|
+
"stdout": ConfigPrimitive(bool),
|
|
460
|
+
"stderr": ConfigPrimitive(bool),
|
|
461
|
+
"stop_signal": ConfigPrimitive(str),
|
|
462
|
+
"stream": ConfigPrimitive(bool),
|
|
463
|
+
"tty": ConfigPrimitive(bool),
|
|
464
|
+
"use_config_proxy": ConfigPrimitive(bool),
|
|
465
|
+
"user": ConfigPrimitive(str),
|
|
466
|
+
"userns_mode": ConfigPrimitive(str),
|
|
467
|
+
"uts_mode": ConfigPrimitive(str),
|
|
468
|
+
"version": ConfigPrimitive(str),
|
|
469
|
+
"volume_driver": ConfigPrimitive(str),
|
|
470
|
+
"volumes": ConfigListString(),
|
|
471
|
+
"working_dir": ConfigPrimitive(str),
|
|
472
|
+
}
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
self._add_config(
|
|
476
|
+
ConfigField(
|
|
477
|
+
"perf_analyzer_flags",
|
|
478
|
+
field_type=perf_analyzer_flags_scheme,
|
|
479
|
+
description="Allows custom configuration of the perf analyzer instances used by model analyzer.",
|
|
480
|
+
)
|
|
481
|
+
)
|
|
482
|
+
self._add_config(
|
|
483
|
+
ConfigField(
|
|
484
|
+
"genai_perf_flags",
|
|
485
|
+
field_type=genai_perf_flags_scheme,
|
|
486
|
+
description="Allows custom configuration of the GenAI Perf instances used by model analyzer.",
|
|
487
|
+
)
|
|
488
|
+
)
|
|
489
|
+
self._add_config(
|
|
490
|
+
ConfigField(
|
|
491
|
+
"triton_server_flags",
|
|
492
|
+
field_type=triton_server_flags_scheme,
|
|
493
|
+
description="Allows custom configuration of the triton instances used by model analyzer.",
|
|
494
|
+
)
|
|
495
|
+
)
|
|
496
|
+
self._add_config(
|
|
497
|
+
ConfigField(
|
|
498
|
+
"triton_server_environment",
|
|
499
|
+
field_type=triton_server_environment_scheme,
|
|
500
|
+
description="Allows setting environment variables for tritonserver server instances launched by Model Analyzer",
|
|
501
|
+
)
|
|
502
|
+
)
|
|
503
|
+
self._add_config(
|
|
504
|
+
ConfigField(
|
|
505
|
+
"triton_docker_args",
|
|
506
|
+
field_type=triton_docker_args_scheme,
|
|
507
|
+
description="Allows setting docker variables for tritonserver server instances launched by Model Analyzer",
|
|
508
|
+
)
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
objectives_scheme = ConfigUnion(
|
|
512
|
+
[
|
|
513
|
+
ConfigObject(
|
|
514
|
+
schema={
|
|
515
|
+
tag: ConfigPrimitive(type_=int)
|
|
516
|
+
for tag in RecordType.get_all_record_types().keys()
|
|
517
|
+
}
|
|
518
|
+
),
|
|
519
|
+
ConfigListString(output_mapper=objective_list_output_mapper),
|
|
520
|
+
]
|
|
521
|
+
)
|
|
522
|
+
constraints_scheme = ConfigObject(
|
|
523
|
+
schema={
|
|
524
|
+
"perf_throughput": ConfigObject(
|
|
525
|
+
schema={
|
|
526
|
+
"min": ConfigPrimitive(int),
|
|
527
|
+
}
|
|
528
|
+
),
|
|
529
|
+
"output_token_throughput": ConfigObject(
|
|
530
|
+
schema={
|
|
531
|
+
"min": ConfigPrimitive(int),
|
|
532
|
+
}
|
|
533
|
+
),
|
|
534
|
+
"perf_latency_avg": ConfigObject(
|
|
535
|
+
schema={
|
|
536
|
+
"max": ConfigPrimitive(int),
|
|
537
|
+
}
|
|
538
|
+
),
|
|
539
|
+
"perf_latency_p90": ConfigObject(
|
|
540
|
+
schema={
|
|
541
|
+
"max": ConfigPrimitive(int),
|
|
542
|
+
}
|
|
543
|
+
),
|
|
544
|
+
"perf_latency_p95": ConfigObject(
|
|
545
|
+
schema={
|
|
546
|
+
"max": ConfigPrimitive(int),
|
|
547
|
+
}
|
|
548
|
+
),
|
|
549
|
+
"perf_latency_p99": ConfigObject(
|
|
550
|
+
schema={
|
|
551
|
+
"max": ConfigPrimitive(int),
|
|
552
|
+
}
|
|
553
|
+
),
|
|
554
|
+
"perf_latency": ConfigObject(
|
|
555
|
+
schema={
|
|
556
|
+
"max": ConfigPrimitive(int),
|
|
557
|
+
}
|
|
558
|
+
),
|
|
559
|
+
"gpu_used_memory": ConfigObject(
|
|
560
|
+
schema={
|
|
561
|
+
"max": ConfigPrimitive(int),
|
|
562
|
+
}
|
|
563
|
+
),
|
|
564
|
+
"inter_token_latency_p99": ConfigObject(
|
|
565
|
+
schema={
|
|
566
|
+
"max": ConfigPrimitive(int),
|
|
567
|
+
}
|
|
568
|
+
),
|
|
569
|
+
"inter_token_latency_p95": ConfigObject(
|
|
570
|
+
schema={
|
|
571
|
+
"max": ConfigPrimitive(int),
|
|
572
|
+
}
|
|
573
|
+
),
|
|
574
|
+
"inter_token_latency_p90": ConfigObject(
|
|
575
|
+
schema={
|
|
576
|
+
"max": ConfigPrimitive(int),
|
|
577
|
+
}
|
|
578
|
+
),
|
|
579
|
+
"inter_token_latency_p75": ConfigObject(
|
|
580
|
+
schema={
|
|
581
|
+
"max": ConfigPrimitive(int),
|
|
582
|
+
}
|
|
583
|
+
),
|
|
584
|
+
"inter_token_latency_p50": ConfigObject(
|
|
585
|
+
schema={
|
|
586
|
+
"max": ConfigPrimitive(int),
|
|
587
|
+
}
|
|
588
|
+
),
|
|
589
|
+
"inter_token_latency_p25": ConfigObject(
|
|
590
|
+
schema={
|
|
591
|
+
"max": ConfigPrimitive(int),
|
|
592
|
+
}
|
|
593
|
+
),
|
|
594
|
+
"inter_token_latency_min": ConfigObject(
|
|
595
|
+
schema={
|
|
596
|
+
"max": ConfigPrimitive(int),
|
|
597
|
+
}
|
|
598
|
+
),
|
|
599
|
+
"inter_token_latency_max": ConfigObject(
|
|
600
|
+
schema={
|
|
601
|
+
"max": ConfigPrimitive(int),
|
|
602
|
+
}
|
|
603
|
+
),
|
|
604
|
+
"inter_token_latency_avg": ConfigObject(
|
|
605
|
+
schema={
|
|
606
|
+
"max": ConfigPrimitive(int),
|
|
607
|
+
}
|
|
608
|
+
),
|
|
609
|
+
"time_to_first_token_p99": ConfigObject(
|
|
610
|
+
schema={
|
|
611
|
+
"max": ConfigPrimitive(int),
|
|
612
|
+
}
|
|
613
|
+
),
|
|
614
|
+
"time_to_first_token_p95": ConfigObject(
|
|
615
|
+
schema={
|
|
616
|
+
"max": ConfigPrimitive(int),
|
|
617
|
+
}
|
|
618
|
+
),
|
|
619
|
+
"time_to_first_token_p90": ConfigObject(
|
|
620
|
+
schema={
|
|
621
|
+
"max": ConfigPrimitive(int),
|
|
622
|
+
}
|
|
623
|
+
),
|
|
624
|
+
"time_to_first_token_p75": ConfigObject(
|
|
625
|
+
schema={
|
|
626
|
+
"max": ConfigPrimitive(int),
|
|
627
|
+
}
|
|
628
|
+
),
|
|
629
|
+
"time_to_first_token_p50": ConfigObject(
|
|
630
|
+
schema={
|
|
631
|
+
"max": ConfigPrimitive(int),
|
|
632
|
+
}
|
|
633
|
+
),
|
|
634
|
+
"time_to_first_token_p25": ConfigObject(
|
|
635
|
+
schema={
|
|
636
|
+
"max": ConfigPrimitive(int),
|
|
637
|
+
}
|
|
638
|
+
),
|
|
639
|
+
"time_to_first_token_min": ConfigObject(
|
|
640
|
+
schema={
|
|
641
|
+
"max": ConfigPrimitive(int),
|
|
642
|
+
}
|
|
643
|
+
),
|
|
644
|
+
"time_to_first_token_max": ConfigObject(
|
|
645
|
+
schema={
|
|
646
|
+
"max": ConfigPrimitive(int),
|
|
647
|
+
}
|
|
648
|
+
),
|
|
649
|
+
"time_to_first_token_avg": ConfigObject(
|
|
650
|
+
schema={
|
|
651
|
+
"max": ConfigPrimitive(int),
|
|
652
|
+
}
|
|
653
|
+
),
|
|
654
|
+
}
|
|
655
|
+
)
|
|
656
|
+
self._add_config(
|
|
657
|
+
ConfigField(
|
|
658
|
+
"objectives",
|
|
659
|
+
field_type=objectives_scheme,
|
|
660
|
+
default_value=DEFAULT_OFFLINE_OBJECTIVES,
|
|
661
|
+
description="Model Analyzer uses the objectives described here to find the best configuration for each model.",
|
|
662
|
+
)
|
|
663
|
+
)
|
|
664
|
+
self._add_config(
|
|
665
|
+
ConfigField(
|
|
666
|
+
"constraints",
|
|
667
|
+
field_type=constraints_scheme,
|
|
668
|
+
description='Constraints on the objectives specified in the "objectives" field of the config.',
|
|
669
|
+
)
|
|
670
|
+
)
|
|
671
|
+
self._add_config(
|
|
672
|
+
ConfigField(
|
|
673
|
+
"weighting",
|
|
674
|
+
field_type=ConfigPrimitive(int),
|
|
675
|
+
description="A weighting used to bias the model when determining the best configuration",
|
|
676
|
+
)
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
model_config_fields = self._get_model_config_fields()
|
|
680
|
+
profile_model_scheme = ConfigObject(
|
|
681
|
+
required=True,
|
|
682
|
+
schema={
|
|
683
|
+
# Any key is allowed, but the keys must follow the pattern
|
|
684
|
+
# below
|
|
685
|
+
"*": ConfigObject(
|
|
686
|
+
schema={
|
|
687
|
+
"cpu_only": ConfigPrimitive(bool),
|
|
688
|
+
"parameters": ConfigObject(
|
|
689
|
+
schema={
|
|
690
|
+
"batch_sizes": ConfigListNumeric(type_=int),
|
|
691
|
+
"concurrency": ConfigListNumeric(type_=int),
|
|
692
|
+
"request_rate": ConfigListNumeric(type_=int),
|
|
693
|
+
}
|
|
694
|
+
),
|
|
695
|
+
"objectives": objectives_scheme,
|
|
696
|
+
"constraints": constraints_scheme,
|
|
697
|
+
"weighting": ConfigPrimitive(type_=int),
|
|
698
|
+
"model_config_parameters": model_config_fields,
|
|
699
|
+
"perf_analyzer_flags": perf_analyzer_flags_scheme,
|
|
700
|
+
"genai_perf_flags": genai_perf_flags_scheme,
|
|
701
|
+
"triton_server_flags": triton_server_flags_scheme,
|
|
702
|
+
"triton_server_environment": triton_server_environment_scheme,
|
|
703
|
+
"triton_docker_args": triton_docker_args_scheme,
|
|
704
|
+
}
|
|
705
|
+
)
|
|
706
|
+
},
|
|
707
|
+
output_mapper=ConfigModelProfileSpec.model_object_to_config_model_profile_spec,
|
|
708
|
+
)
|
|
709
|
+
self._add_config(
|
|
710
|
+
ConfigField(
|
|
711
|
+
"profile_models",
|
|
712
|
+
flags=["--profile-models"],
|
|
713
|
+
field_type=ConfigUnion(
|
|
714
|
+
[
|
|
715
|
+
profile_model_scheme,
|
|
716
|
+
ConfigListGeneric(
|
|
717
|
+
ConfigUnion(
|
|
718
|
+
[
|
|
719
|
+
profile_model_scheme,
|
|
720
|
+
ConfigPrimitive(
|
|
721
|
+
str,
|
|
722
|
+
output_mapper=ConfigModelProfileSpec.model_str_to_config_model_profile_spec,
|
|
723
|
+
),
|
|
724
|
+
]
|
|
725
|
+
),
|
|
726
|
+
required=True,
|
|
727
|
+
output_mapper=ConfigModelProfileSpec.model_mixed_to_config_model_profile_spec,
|
|
728
|
+
),
|
|
729
|
+
ConfigListString(
|
|
730
|
+
output_mapper=ConfigModelProfileSpec.model_list_to_config_model_profile_spec
|
|
731
|
+
),
|
|
732
|
+
],
|
|
733
|
+
required=True,
|
|
734
|
+
),
|
|
735
|
+
description="List of the models to be profiled",
|
|
736
|
+
)
|
|
737
|
+
)
|
|
738
|
+
self._add_config(
|
|
739
|
+
ConfigField(
|
|
740
|
+
"batch_sizes",
|
|
741
|
+
flags=["-b", "--batch-sizes"],
|
|
742
|
+
field_type=ConfigListNumeric(int),
|
|
743
|
+
default_value=DEFAULT_BATCH_SIZES,
|
|
744
|
+
description="Comma-delimited list of batch sizes to use for the profiling",
|
|
745
|
+
)
|
|
746
|
+
)
|
|
747
|
+
self._add_config(
|
|
748
|
+
ConfigField(
|
|
749
|
+
"concurrency",
|
|
750
|
+
flags=["-c", "--concurrency"],
|
|
751
|
+
field_type=ConfigListNumeric(int),
|
|
752
|
+
description="Comma-delimited list of concurrency values or ranges <start:end:step>"
|
|
753
|
+
" to be used during profiling",
|
|
754
|
+
)
|
|
755
|
+
)
|
|
756
|
+
self._add_config(
|
|
757
|
+
ConfigField(
|
|
758
|
+
"request_rate",
|
|
759
|
+
flags=["--request-rate"],
|
|
760
|
+
field_type=ConfigListNumeric(int),
|
|
761
|
+
description="Comma-delimited list of request rate values or ranges <start:end:step>"
|
|
762
|
+
" to be used during profiling",
|
|
763
|
+
)
|
|
764
|
+
)
|
|
765
|
+
self._add_config(
|
|
766
|
+
ConfigField(
|
|
767
|
+
"reload_model_disable",
|
|
768
|
+
field_type=ConfigPrimitive(bool),
|
|
769
|
+
parser_args={"action": "store_true"},
|
|
770
|
+
default_value=False,
|
|
771
|
+
flags=["--reload-model-disable"],
|
|
772
|
+
description="Flag to indicate whether or not to disable model "
|
|
773
|
+
"loading and unloading in remote mode.",
|
|
774
|
+
)
|
|
775
|
+
)
|
|
776
|
+
self._add_config(
|
|
777
|
+
ConfigField(
|
|
778
|
+
"bls_composing_models",
|
|
779
|
+
flags=["--bls-composing-models"],
|
|
780
|
+
field_type=ConfigUnion(
|
|
781
|
+
[
|
|
782
|
+
profile_model_scheme,
|
|
783
|
+
ConfigListGeneric(
|
|
784
|
+
ConfigUnion(
|
|
785
|
+
[
|
|
786
|
+
profile_model_scheme,
|
|
787
|
+
ConfigPrimitive(
|
|
788
|
+
str,
|
|
789
|
+
output_mapper=ConfigModelProfileSpec.model_str_to_config_model_profile_spec,
|
|
790
|
+
),
|
|
791
|
+
]
|
|
792
|
+
),
|
|
793
|
+
required=True,
|
|
794
|
+
output_mapper=ConfigModelProfileSpec.model_mixed_to_config_model_profile_spec,
|
|
795
|
+
),
|
|
796
|
+
ConfigListString(
|
|
797
|
+
output_mapper=ConfigModelProfileSpec.model_list_to_config_model_profile_spec
|
|
798
|
+
),
|
|
799
|
+
],
|
|
800
|
+
required=True,
|
|
801
|
+
),
|
|
802
|
+
default_value=[],
|
|
803
|
+
description="List of the models to be profiled",
|
|
804
|
+
)
|
|
805
|
+
)
|
|
806
|
+
self._add_config(
|
|
807
|
+
ConfigField(
|
|
808
|
+
"cpu_only_composing_models",
|
|
809
|
+
field_type=ConfigListString(),
|
|
810
|
+
flags=["--cpu-only-composing-models"],
|
|
811
|
+
description=(
|
|
812
|
+
"A list of strings representing composing models that should be profiled using CPU instances only. "
|
|
813
|
+
),
|
|
814
|
+
)
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
def _add_client_configs(self):
|
|
818
|
+
"""
|
|
819
|
+
Adds configs specific to tritonclient
|
|
820
|
+
"""
|
|
821
|
+
self._add_config(
|
|
822
|
+
ConfigField(
|
|
823
|
+
"client_max_retries",
|
|
824
|
+
flags=["-r", "--client-max-retries"],
|
|
825
|
+
field_type=ConfigPrimitive(int),
|
|
826
|
+
default_value=DEFAULT_MAX_RETRIES,
|
|
827
|
+
description="Specifies the max number of retries for any requests to Triton server.",
|
|
828
|
+
)
|
|
829
|
+
)
|
|
830
|
+
self._add_config(
|
|
831
|
+
ConfigField(
|
|
832
|
+
"client_protocol",
|
|
833
|
+
flags=["--client-protocol"],
|
|
834
|
+
choices=["http", "grpc"],
|
|
835
|
+
field_type=ConfigPrimitive(str),
|
|
836
|
+
default_value=DEFAULT_CLIENT_PROTOCOL,
|
|
837
|
+
description="The protocol used to communicate with the Triton Inference Server",
|
|
838
|
+
)
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
def _add_run_search_configs(self):
|
|
842
|
+
"""
|
|
843
|
+
Add the config options related
|
|
844
|
+
to the run search
|
|
845
|
+
"""
|
|
846
|
+
|
|
847
|
+
self._add_config(
|
|
848
|
+
ConfigField(
|
|
849
|
+
"early_exit_enable",
|
|
850
|
+
field_type=ConfigPrimitive(bool),
|
|
851
|
+
parser_args={"action": "store_true"},
|
|
852
|
+
default_value=False,
|
|
853
|
+
flags=["--early-exit-enable"],
|
|
854
|
+
description="Flag to indicate if Model Analyzer can skip some configurations when manually searching concurrency/request rate, or max_batch_size",
|
|
855
|
+
)
|
|
856
|
+
)
|
|
857
|
+
self._add_config(
|
|
858
|
+
ConfigField(
|
|
859
|
+
"run_config_search_max_concurrency",
|
|
860
|
+
flags=["--run-config-search-max-concurrency"],
|
|
861
|
+
field_type=ConfigPrimitive(int),
|
|
862
|
+
default_value=DEFAULT_RUN_CONFIG_MAX_CONCURRENCY,
|
|
863
|
+
description="Max concurrency value that run config search should not go beyond that.",
|
|
864
|
+
)
|
|
865
|
+
)
|
|
866
|
+
self._add_config(
|
|
867
|
+
ConfigField(
|
|
868
|
+
"run_config_search_min_concurrency",
|
|
869
|
+
flags=["--run-config-search-min-concurrency"],
|
|
870
|
+
field_type=ConfigPrimitive(int),
|
|
871
|
+
default_value=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
|
|
872
|
+
description="Min concurrency value that run config search should start with.",
|
|
873
|
+
)
|
|
874
|
+
)
|
|
875
|
+
self._add_config(
|
|
876
|
+
ConfigField(
|
|
877
|
+
"run_config_search_max_request_rate",
|
|
878
|
+
flags=["--run-config-search-max-request-rate"],
|
|
879
|
+
field_type=ConfigPrimitive(int),
|
|
880
|
+
default_value=DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE,
|
|
881
|
+
description="Max request rate value that run config search should not go beyond that.",
|
|
882
|
+
)
|
|
883
|
+
)
|
|
884
|
+
self._add_config(
|
|
885
|
+
ConfigField(
|
|
886
|
+
"run_config_search_min_request_rate",
|
|
887
|
+
flags=["--run-config-search-min-request-rate"],
|
|
888
|
+
field_type=ConfigPrimitive(int),
|
|
889
|
+
default_value=DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
|
|
890
|
+
description="Min request rate value that run config search should start with.",
|
|
891
|
+
)
|
|
892
|
+
)
|
|
893
|
+
self._add_config(
|
|
894
|
+
ConfigField(
|
|
895
|
+
"run_config_search_max_instance_count",
|
|
896
|
+
flags=["--run-config-search-max-instance-count"],
|
|
897
|
+
field_type=ConfigPrimitive(int),
|
|
898
|
+
default_value=DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT,
|
|
899
|
+
description="Max instance count value that run config search should not go beyond that.",
|
|
900
|
+
)
|
|
901
|
+
)
|
|
902
|
+
self._add_config(
|
|
903
|
+
ConfigField(
|
|
904
|
+
"run_config_search_min_instance_count",
|
|
905
|
+
flags=["--run-config-search-min-instance-count"],
|
|
906
|
+
field_type=ConfigPrimitive(int),
|
|
907
|
+
default_value=DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT,
|
|
908
|
+
description="Min instance count value that run config search should start with.",
|
|
909
|
+
)
|
|
910
|
+
)
|
|
911
|
+
self._add_config(
|
|
912
|
+
ConfigField(
|
|
913
|
+
"run_config_search_max_model_batch_size",
|
|
914
|
+
flags=["--run-config-search-max-model-batch-size"],
|
|
915
|
+
field_type=ConfigPrimitive(int),
|
|
916
|
+
default_value=DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE,
|
|
917
|
+
description="Value for the model's max_batch_size that run config search will not go beyond.",
|
|
918
|
+
)
|
|
919
|
+
)
|
|
920
|
+
self._add_config(
|
|
921
|
+
ConfigField(
|
|
922
|
+
"run_config_search_min_model_batch_size",
|
|
923
|
+
flags=["--run-config-search-min-model-batch-size"],
|
|
924
|
+
field_type=ConfigPrimitive(int),
|
|
925
|
+
default_value=DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE,
|
|
926
|
+
description="Value for the model's max_batch_size that run config search will start from.",
|
|
927
|
+
)
|
|
928
|
+
)
|
|
929
|
+
self._add_config(
|
|
930
|
+
ConfigField(
|
|
931
|
+
"run_config_search_max_binary_search_steps",
|
|
932
|
+
flags=["--run-config-search-max-binary-search-steps"],
|
|
933
|
+
field_type=ConfigPrimitive(int),
|
|
934
|
+
default_value=DEFAULT_RUN_CONFIG_MAX_BINARY_SEARCH_STEPS,
|
|
935
|
+
description="Maximum number of steps take during the binary concurrency search.",
|
|
936
|
+
)
|
|
937
|
+
)
|
|
938
|
+
self._add_config(
|
|
939
|
+
ConfigField(
|
|
940
|
+
"min_percentage_of_search_space",
|
|
941
|
+
flags=["--min-percentage-of-search-space"],
|
|
942
|
+
field_type=ConfigPrimitive(int),
|
|
943
|
+
default_value=DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
|
|
944
|
+
description="Minimum percentage of the search space to profile when using Optuna",
|
|
945
|
+
)
|
|
946
|
+
)
|
|
947
|
+
self._add_config(
|
|
948
|
+
ConfigField(
|
|
949
|
+
"max_percentage_of_search_space",
|
|
950
|
+
flags=["--max-percentage-of-search-space"],
|
|
951
|
+
field_type=ConfigPrimitive(int),
|
|
952
|
+
default_value=DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
|
|
953
|
+
description="Maximum percentage of the search space to profile when using Optuna",
|
|
954
|
+
)
|
|
955
|
+
)
|
|
956
|
+
self._add_config(
|
|
957
|
+
ConfigField(
|
|
958
|
+
"optuna_min_trials",
|
|
959
|
+
flags=["--optuna-min-trials"],
|
|
960
|
+
field_type=ConfigPrimitive(int),
|
|
961
|
+
default_value=DEFAULT_OPTUNA_MIN_TRIALS,
|
|
962
|
+
description="Minimum number of trials to profile when using Optuna",
|
|
963
|
+
)
|
|
964
|
+
)
|
|
965
|
+
self._add_config(
|
|
966
|
+
ConfigField(
|
|
967
|
+
"optuna_max_trials",
|
|
968
|
+
flags=["--optuna-max-trials"],
|
|
969
|
+
field_type=ConfigPrimitive(int),
|
|
970
|
+
default_value=DEFAULT_OPTUNA_MAX_TRIALS,
|
|
971
|
+
description="Maximum number of trials to profile when using Optuna",
|
|
972
|
+
)
|
|
973
|
+
)
|
|
974
|
+
self._add_config(
|
|
975
|
+
ConfigField(
|
|
976
|
+
"optuna_early_exit_threshold",
|
|
977
|
+
flags=["--optuna-early-exit-threshold"],
|
|
978
|
+
field_type=ConfigPrimitive(int),
|
|
979
|
+
default_value=DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
|
|
980
|
+
description="Number of trials without improvement before triggering early exit when using Optuna",
|
|
981
|
+
)
|
|
982
|
+
)
|
|
983
|
+
self._add_config(
|
|
984
|
+
ConfigField(
|
|
985
|
+
"use_concurrency_formula",
|
|
986
|
+
flags=["--use-concurrency-formula"],
|
|
987
|
+
field_type=ConfigPrimitive(bool),
|
|
988
|
+
parser_args={"action": "store_true"},
|
|
989
|
+
default_value=DEFAULT_USE_CONCURRENCY_FORMULA,
|
|
990
|
+
description="Use the concurrency formula instead of searching the concurrency space in Optuna search mode",
|
|
991
|
+
)
|
|
992
|
+
)
|
|
993
|
+
self._add_config(
|
|
994
|
+
ConfigField(
|
|
995
|
+
"run_config_search_mode",
|
|
996
|
+
flags=["--run-config-search-mode"],
|
|
997
|
+
choices=["brute", "quick", "optuna"],
|
|
998
|
+
field_type=ConfigPrimitive(str),
|
|
999
|
+
default_value=DEFAULT_RUN_CONFIG_SEARCH_MODE,
|
|
1000
|
+
description="The search mode for Model Analyzer to find and evaluate"
|
|
1001
|
+
" model configurations. 'brute' will brute force all combinations of"
|
|
1002
|
+
" configuration options. 'quick' will attempt to find a near-optimal"
|
|
1003
|
+
" configuration as fast as possible, but isn't guaranteed to find the"
|
|
1004
|
+
" best. 'optuna' is a more generalized search algorithm allowing "
|
|
1005
|
+
" the user to quickly search over any set of parameters.",
|
|
1006
|
+
)
|
|
1007
|
+
)
|
|
1008
|
+
self._add_config(
|
|
1009
|
+
ConfigField(
|
|
1010
|
+
"run_config_search_disable",
|
|
1011
|
+
flags=["--run-config-search-disable"],
|
|
1012
|
+
field_type=ConfigPrimitive(bool),
|
|
1013
|
+
parser_args={"action": "store_true"},
|
|
1014
|
+
default_value=DEFAULT_RUN_CONFIG_SEARCH_DISABLE,
|
|
1015
|
+
description="Disable run config search.",
|
|
1016
|
+
)
|
|
1017
|
+
)
|
|
1018
|
+
self._add_config(
|
|
1019
|
+
ConfigField(
|
|
1020
|
+
"run_config_profile_models_concurrently_enable",
|
|
1021
|
+
flags=["--run-config-profile-models-concurrently-enable"],
|
|
1022
|
+
field_type=ConfigPrimitive(bool),
|
|
1023
|
+
parser_args={"action": "store_true"},
|
|
1024
|
+
default_value=DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE,
|
|
1025
|
+
description="Enable the profiling of all supplied models concurrently.",
|
|
1026
|
+
)
|
|
1027
|
+
)
|
|
1028
|
+
self._add_config(
|
|
1029
|
+
ConfigField(
|
|
1030
|
+
"request_rate_search_enable",
|
|
1031
|
+
flags=["--request-rate-search-enable"],
|
|
1032
|
+
field_type=ConfigPrimitive(bool),
|
|
1033
|
+
parser_args={"action": "store_true"},
|
|
1034
|
+
default_value=DEFAULT_REQUEST_RATE_SEARCH_ENABLE,
|
|
1035
|
+
description="Enables the searching of request rate (instead of concurrency).",
|
|
1036
|
+
)
|
|
1037
|
+
)
|
|
1038
|
+
self._add_config(
|
|
1039
|
+
ConfigField(
|
|
1040
|
+
"concurrency_sweep_disable",
|
|
1041
|
+
flags=["--concurrency-sweep-disable"],
|
|
1042
|
+
field_type=ConfigPrimitive(bool),
|
|
1043
|
+
parser_args={"action": "store_true"},
|
|
1044
|
+
default_value=DEFAULT_CONCURRENCY_SWEEP_DISABLE,
|
|
1045
|
+
description="Disables the sweeping of concurrencies for the top-N models after quick/optuna search completion.",
|
|
1046
|
+
)
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
def _add_triton_configs(self):
|
|
1050
|
+
"""
|
|
1051
|
+
Adds the triton related flags
|
|
1052
|
+
and config options
|
|
1053
|
+
"""
|
|
1054
|
+
|
|
1055
|
+
self._add_config(
|
|
1056
|
+
ConfigField(
|
|
1057
|
+
"triton_launch_mode",
|
|
1058
|
+
field_type=ConfigPrimitive(str),
|
|
1059
|
+
flags=["--triton-launch-mode"],
|
|
1060
|
+
default_value=DEFAULT_TRITON_LAUNCH_MODE,
|
|
1061
|
+
choices=["local", "docker", "remote", "c_api"],
|
|
1062
|
+
description="The method by which to launch Triton Server. "
|
|
1063
|
+
"'local' assumes tritonserver binary is available locally. "
|
|
1064
|
+
"'docker' pulls and launches a triton docker container with "
|
|
1065
|
+
"the specified version. 'remote' connects to a running "
|
|
1066
|
+
"server using given http, grpc and metrics endpoints. "
|
|
1067
|
+
"'c_api' allows direct benchmarking of Triton locally"
|
|
1068
|
+
"without the use of endpoints.",
|
|
1069
|
+
)
|
|
1070
|
+
)
|
|
1071
|
+
self._add_config(
|
|
1072
|
+
ConfigField(
|
|
1073
|
+
"triton_docker_image",
|
|
1074
|
+
flags=["--triton-docker-image"],
|
|
1075
|
+
field_type=ConfigPrimitive(str),
|
|
1076
|
+
default_value=DEFAULT_TRITON_DOCKER_IMAGE,
|
|
1077
|
+
description="Triton Server Docker image tag",
|
|
1078
|
+
)
|
|
1079
|
+
)
|
|
1080
|
+
self._add_config(
|
|
1081
|
+
ConfigField(
|
|
1082
|
+
"triton_http_endpoint",
|
|
1083
|
+
flags=["--triton-http-endpoint"],
|
|
1084
|
+
field_type=ConfigPrimitive(str),
|
|
1085
|
+
default_value=DEFAULT_TRITON_HTTP_ENDPOINT,
|
|
1086
|
+
description="Triton Server HTTP endpoint url used by Model Analyzer client.",
|
|
1087
|
+
)
|
|
1088
|
+
)
|
|
1089
|
+
self._add_config(
|
|
1090
|
+
ConfigField(
|
|
1091
|
+
"triton_grpc_endpoint",
|
|
1092
|
+
flags=["--triton-grpc-endpoint"],
|
|
1093
|
+
field_type=ConfigPrimitive(str),
|
|
1094
|
+
default_value=DEFAULT_TRITON_GRPC_ENDPOINT,
|
|
1095
|
+
description="Triton Server HTTP endpoint url used by Model Analyzer client.",
|
|
1096
|
+
)
|
|
1097
|
+
)
|
|
1098
|
+
self._add_config(
|
|
1099
|
+
ConfigField(
|
|
1100
|
+
"triton_metrics_url",
|
|
1101
|
+
field_type=ConfigPrimitive(str),
|
|
1102
|
+
flags=["--triton-metrics-url"],
|
|
1103
|
+
default_value=DEFAULT_TRITON_METRICS_URL,
|
|
1104
|
+
description="Triton Server Metrics endpoint url. ",
|
|
1105
|
+
)
|
|
1106
|
+
)
|
|
1107
|
+
self._add_config(
|
|
1108
|
+
ConfigField(
|
|
1109
|
+
"triton_server_path",
|
|
1110
|
+
field_type=ConfigPrimitive(str),
|
|
1111
|
+
flags=["--triton-server-path"],
|
|
1112
|
+
default_value=DEFAULT_TRITON_SERVER_PATH,
|
|
1113
|
+
description="The full path to the tritonserver binary executable",
|
|
1114
|
+
)
|
|
1115
|
+
)
|
|
1116
|
+
self._add_config(
|
|
1117
|
+
ConfigField(
|
|
1118
|
+
"triton_output_path",
|
|
1119
|
+
field_type=ConfigPrimitive(str),
|
|
1120
|
+
flags=["--triton-output-path"],
|
|
1121
|
+
description=(
|
|
1122
|
+
"The full path to the file to which Triton server instance will "
|
|
1123
|
+
"append their log output. If not specified, they are not written."
|
|
1124
|
+
),
|
|
1125
|
+
)
|
|
1126
|
+
)
|
|
1127
|
+
self._add_config(
|
|
1128
|
+
ConfigField(
|
|
1129
|
+
"triton_docker_mounts",
|
|
1130
|
+
field_type=ConfigListString(),
|
|
1131
|
+
flags=["--triton-docker-mounts"],
|
|
1132
|
+
description=(
|
|
1133
|
+
"A list of strings representing volumes to be mounted. "
|
|
1134
|
+
"The strings should have the format '<host path>:<container path>:<access mode>'."
|
|
1135
|
+
),
|
|
1136
|
+
)
|
|
1137
|
+
)
|
|
1138
|
+
self._add_config(
|
|
1139
|
+
ConfigField(
|
|
1140
|
+
"triton_docker_labels",
|
|
1141
|
+
field_type=ConfigObject(schema={"*": ConfigPrimitive(str)}),
|
|
1142
|
+
description="A dictionary of name-value labels to set metadata for the Triton "
|
|
1143
|
+
"server docker container in docker launch mode",
|
|
1144
|
+
)
|
|
1145
|
+
)
|
|
1146
|
+
self._add_config(
|
|
1147
|
+
ConfigField(
|
|
1148
|
+
"triton_docker_shm_size",
|
|
1149
|
+
field_type=ConfigPrimitive(str),
|
|
1150
|
+
flags=["--triton-docker-shm-size"],
|
|
1151
|
+
description=(
|
|
1152
|
+
"The size of the /dev/shm for the triton docker container"
|
|
1153
|
+
),
|
|
1154
|
+
)
|
|
1155
|
+
)
|
|
1156
|
+
self._add_config(
|
|
1157
|
+
ConfigField(
|
|
1158
|
+
"triton_install_path",
|
|
1159
|
+
field_type=ConfigPrimitive(str),
|
|
1160
|
+
default_value=DEFAULT_TRITON_INSTALL_PATH,
|
|
1161
|
+
flags=["--triton-install-path"],
|
|
1162
|
+
description=(
|
|
1163
|
+
"Path to Triton install directory i.e. the parent directory of 'lib/libtritonserver.so'."
|
|
1164
|
+
"Required only when using triton_launch_mode=c_api."
|
|
1165
|
+
),
|
|
1166
|
+
)
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
def _add_perf_analyzer_configs(self):
|
|
1170
|
+
"""
|
|
1171
|
+
Add the perf_analyzer related config
|
|
1172
|
+
options
|
|
1173
|
+
"""
|
|
1174
|
+
|
|
1175
|
+
self._add_config(
|
|
1176
|
+
ConfigField(
|
|
1177
|
+
"perf_analyzer_timeout",
|
|
1178
|
+
flags=["--perf-analyzer-timeout"],
|
|
1179
|
+
field_type=ConfigPrimitive(int),
|
|
1180
|
+
default_value=DEFAULT_PERF_ANALYZER_TIMEOUT,
|
|
1181
|
+
description="Perf analyzer timeout value in seconds.",
|
|
1182
|
+
)
|
|
1183
|
+
)
|
|
1184
|
+
self._add_config(
|
|
1185
|
+
ConfigField(
|
|
1186
|
+
"perf_analyzer_cpu_util",
|
|
1187
|
+
flags=["--perf-analyzer-cpu-util"],
|
|
1188
|
+
field_type=ConfigPrimitive(float),
|
|
1189
|
+
default_value=psutil.cpu_count() * DEFAULT_PERF_ANALYZER_CPU_UTIL,
|
|
1190
|
+
description="Maximum CPU utilization value allowed for the perf_analyzer.",
|
|
1191
|
+
)
|
|
1192
|
+
)
|
|
1193
|
+
self._add_config(
|
|
1194
|
+
ConfigField(
|
|
1195
|
+
"perf_analyzer_path",
|
|
1196
|
+
flags=["--perf-analyzer-path"],
|
|
1197
|
+
field_type=ConfigPrimitive(str, validator=binary_path_validator),
|
|
1198
|
+
default_value=DEFAULT_PERF_ANALYZER_PATH,
|
|
1199
|
+
description="The full path to the perf_analyzer binary executable",
|
|
1200
|
+
)
|
|
1201
|
+
)
|
|
1202
|
+
self._add_config(
|
|
1203
|
+
ConfigField(
|
|
1204
|
+
"perf_output",
|
|
1205
|
+
flags=["--perf-output"],
|
|
1206
|
+
parser_args={"action": "store_true"},
|
|
1207
|
+
field_type=ConfigPrimitive(bool),
|
|
1208
|
+
default_value=DEFAULT_PERF_OUTPUT_FLAG,
|
|
1209
|
+
description="Enables the output from the perf_analyzer to a file specified by"
|
|
1210
|
+
" perf_output_path. If perf_output_path is None, output will be"
|
|
1211
|
+
" written to stdout.",
|
|
1212
|
+
)
|
|
1213
|
+
)
|
|
1214
|
+
self._add_config(
|
|
1215
|
+
ConfigField(
|
|
1216
|
+
"perf_output_path",
|
|
1217
|
+
flags=["--perf-output-path"],
|
|
1218
|
+
field_type=ConfigPrimitive(str),
|
|
1219
|
+
description="Path to the file to which write perf_analyzer output, if enabled.",
|
|
1220
|
+
)
|
|
1221
|
+
)
|
|
1222
|
+
self._add_config(
|
|
1223
|
+
ConfigField(
|
|
1224
|
+
"perf_analyzer_max_auto_adjusts",
|
|
1225
|
+
flags=["--perf-analyzer-max-auto-adjusts"],
|
|
1226
|
+
field_type=ConfigPrimitive(int),
|
|
1227
|
+
default_value=DEFAULT_PERF_MAX_AUTO_ADJUSTS,
|
|
1228
|
+
description="Maximum number of times perf_analyzer is "
|
|
1229
|
+
"launched with auto adjusted parameters in an attempt to profile a model. ",
|
|
1230
|
+
)
|
|
1231
|
+
)
|
|
1232
|
+
|
|
1233
|
+
def _add_export_configs(self):
|
|
1234
|
+
"""
|
|
1235
|
+
Add configs related to exporting data
|
|
1236
|
+
"""
|
|
1237
|
+
self._add_config(
|
|
1238
|
+
ConfigField(
|
|
1239
|
+
"export_path",
|
|
1240
|
+
flags=["-e", "--export-path"],
|
|
1241
|
+
default_value=DEFAULT_EXPORT_PATH,
|
|
1242
|
+
field_type=ConfigPrimitive(str, validator=parent_path_validator),
|
|
1243
|
+
description="Full path to directory in which to store the results",
|
|
1244
|
+
)
|
|
1245
|
+
)
|
|
1246
|
+
self._add_config(
|
|
1247
|
+
ConfigField(
|
|
1248
|
+
"filename_model_inference",
|
|
1249
|
+
flags=["--filename-model-inference"],
|
|
1250
|
+
default_value=DEFAULT_FILENAME_MODEL_INFERENCE,
|
|
1251
|
+
field_type=ConfigPrimitive(str),
|
|
1252
|
+
description="Specifies filename for storing model inference metrics",
|
|
1253
|
+
)
|
|
1254
|
+
)
|
|
1255
|
+
self._add_config(
|
|
1256
|
+
ConfigField(
|
|
1257
|
+
"filename_model_gpu",
|
|
1258
|
+
flags=["--filename-model-gpu"],
|
|
1259
|
+
field_type=ConfigPrimitive(str),
|
|
1260
|
+
default_value=DEFAULT_FILENAME_MODEL_GPU,
|
|
1261
|
+
description="Specifies filename for storing model GPU metrics",
|
|
1262
|
+
)
|
|
1263
|
+
)
|
|
1264
|
+
self._add_config(
|
|
1265
|
+
ConfigField(
|
|
1266
|
+
"filename_server_only",
|
|
1267
|
+
flags=["--filename-server-only"],
|
|
1268
|
+
field_type=ConfigPrimitive(str),
|
|
1269
|
+
default_value=DEFAULT_FILENAME_SERVER_ONLY,
|
|
1270
|
+
description="Specifies filename for server-only metrics",
|
|
1271
|
+
)
|
|
1272
|
+
)
|
|
1273
|
+
|
|
1274
|
+
def _add_report_configs(self):
|
|
1275
|
+
"""
|
|
1276
|
+
Adds report related configs
|
|
1277
|
+
"""
|
|
1278
|
+
self._add_config(
|
|
1279
|
+
ConfigField(
|
|
1280
|
+
"num_configs_per_model",
|
|
1281
|
+
flags=["--num-configs-per-model"],
|
|
1282
|
+
field_type=ConfigPrimitive(int),
|
|
1283
|
+
default_value=DEFAULT_NUM_CONFIGS_PER_MODEL,
|
|
1284
|
+
description="The number of configurations to plot per model in the summary.",
|
|
1285
|
+
)
|
|
1286
|
+
)
|
|
1287
|
+
self._add_config(
|
|
1288
|
+
ConfigField(
|
|
1289
|
+
"num_top_model_configs",
|
|
1290
|
+
flags=["--num-top-model-configs"],
|
|
1291
|
+
field_type=ConfigPrimitive(int),
|
|
1292
|
+
default_value=DEFAULT_NUM_TOP_MODEL_CONFIGS,
|
|
1293
|
+
description="Model Analyzer will compare this many of the top models configs across all models.",
|
|
1294
|
+
)
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
def _add_table_configs(self):
|
|
1298
|
+
"""
|
|
1299
|
+
Adds result table related
|
|
1300
|
+
configs
|
|
1301
|
+
"""
|
|
1302
|
+
self._add_config(
|
|
1303
|
+
ConfigField(
|
|
1304
|
+
"inference_output_fields",
|
|
1305
|
+
flags=["--inference-output-fields"],
|
|
1306
|
+
field_type=ConfigListString(),
|
|
1307
|
+
default_value=DEFAULT_INFERENCE_OUTPUT_FIELDS,
|
|
1308
|
+
description="Specifies column keys for model inference metrics table",
|
|
1309
|
+
)
|
|
1310
|
+
)
|
|
1311
|
+
self._add_config(
|
|
1312
|
+
ConfigField(
|
|
1313
|
+
"gpu_output_fields",
|
|
1314
|
+
flags=["--gpu-output-fields"],
|
|
1315
|
+
field_type=ConfigListString(),
|
|
1316
|
+
default_value=DEFAULT_GPU_OUTPUT_FIELDS,
|
|
1317
|
+
description="Specifies column keys for model gpu metrics table",
|
|
1318
|
+
)
|
|
1319
|
+
)
|
|
1320
|
+
self._add_config(
|
|
1321
|
+
ConfigField(
|
|
1322
|
+
"server_output_fields",
|
|
1323
|
+
flags=["--server-output-fields"],
|
|
1324
|
+
field_type=ConfigListString(),
|
|
1325
|
+
default_value=DEFAULT_SERVER_OUTPUT_FIELDS,
|
|
1326
|
+
description="Specifies column keys for server-only metrics table",
|
|
1327
|
+
)
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
def _add_shorthand_configs(self):
|
|
1331
|
+
"""
|
|
1332
|
+
Adds configs for various shorthands
|
|
1333
|
+
"""
|
|
1334
|
+
self._add_config(
|
|
1335
|
+
ConfigField(
|
|
1336
|
+
"latency_budget",
|
|
1337
|
+
flags=["--latency-budget"],
|
|
1338
|
+
field_type=ConfigPrimitive(int),
|
|
1339
|
+
description="Shorthand flag for specifying a maximum latency in ms.",
|
|
1340
|
+
)
|
|
1341
|
+
)
|
|
1342
|
+
|
|
1343
|
+
self._add_config(
|
|
1344
|
+
ConfigField(
|
|
1345
|
+
"min_throughput",
|
|
1346
|
+
flags=["--min-throughput"],
|
|
1347
|
+
field_type=ConfigPrimitive(int),
|
|
1348
|
+
description="Shorthand flag for specifying a minimum throughput.",
|
|
1349
|
+
)
|
|
1350
|
+
)
|
|
1351
|
+
|
|
1352
|
+
def set_config_values(self, args: argparse.Namespace) -> None:
|
|
1353
|
+
"""
|
|
1354
|
+
Set the config values. This function sets all the values for the
|
|
1355
|
+
config. CLI arguments have the highest priority, then YAML config
|
|
1356
|
+
values and then default values.
|
|
1357
|
+
|
|
1358
|
+
Parameters
|
|
1359
|
+
----------
|
|
1360
|
+
args : argparse.Namespace
|
|
1361
|
+
Parsed arguments from the CLI
|
|
1362
|
+
|
|
1363
|
+
Raises
|
|
1364
|
+
------
|
|
1365
|
+
TritonModelAnalyzerException
|
|
1366
|
+
If the required fields are not specified, it will raise
|
|
1367
|
+
this exception
|
|
1368
|
+
"""
|
|
1369
|
+
if args.mode == "online" and "latency_budget" not in args:
|
|
1370
|
+
self._fields["objectives"].set_default_value(DEFAULT_ONLINE_OBJECTIVES)
|
|
1371
|
+
|
|
1372
|
+
super().set_config_values(args)
|
|
1373
|
+
|
|
1374
|
+
# Add plot configs and after config parse. Users should not be
|
|
1375
|
+
# able to edit these plots.
|
|
1376
|
+
self._add_plot_configs()
|
|
1377
|
+
if args.mode == "online":
|
|
1378
|
+
self._fields["plots"].set_value(DEFAULT_ONLINE_PLOTS)
|
|
1379
|
+
elif args.mode == "offline":
|
|
1380
|
+
self._fields["plots"].set_value(DEFAULT_OFFLINE_PLOTS)
|
|
1381
|
+
|
|
1382
|
+
def _add_plot_configs(self):
|
|
1383
|
+
"""
|
|
1384
|
+
Add plots to the config
|
|
1385
|
+
"""
|
|
1386
|
+
plots_scheme = ConfigObject(
|
|
1387
|
+
schema={
|
|
1388
|
+
"*": ConfigObject(
|
|
1389
|
+
schema={
|
|
1390
|
+
"title": ConfigPrimitive(type_=str),
|
|
1391
|
+
"x_axis": ConfigPrimitive(type_=str),
|
|
1392
|
+
"y_axis": ConfigPrimitive(type_=str),
|
|
1393
|
+
"monotonic": ConfigPrimitive(type_=bool),
|
|
1394
|
+
}
|
|
1395
|
+
)
|
|
1396
|
+
},
|
|
1397
|
+
output_mapper=ConfigPlot.from_object,
|
|
1398
|
+
)
|
|
1399
|
+
self._add_config(
|
|
1400
|
+
ConfigField(
|
|
1401
|
+
"plots",
|
|
1402
|
+
field_type=ConfigUnion(
|
|
1403
|
+
[
|
|
1404
|
+
plots_scheme,
|
|
1405
|
+
ConfigListGeneric(
|
|
1406
|
+
type_=plots_scheme, output_mapper=ConfigPlot.from_list
|
|
1407
|
+
),
|
|
1408
|
+
]
|
|
1409
|
+
),
|
|
1410
|
+
description="Model analyzer uses the information in this section to construct plots of the results.",
|
|
1411
|
+
)
|
|
1412
|
+
)
|
|
1413
|
+
|
|
1414
|
+
def _preprocess_and_verify_arguments(self):
|
|
1415
|
+
"""
|
|
1416
|
+
Enforces some rules on the config.
|
|
1417
|
+
|
|
1418
|
+
Raises
|
|
1419
|
+
------
|
|
1420
|
+
TritonModelAnalyzerException
|
|
1421
|
+
If there is a problem with arguments or config.
|
|
1422
|
+
"""
|
|
1423
|
+
|
|
1424
|
+
if self.triton_launch_mode == "remote":
|
|
1425
|
+
if self.client_protocol == "http" and not self.triton_http_endpoint:
|
|
1426
|
+
raise TritonModelAnalyzerException(
|
|
1427
|
+
"client-protocol is 'http'. Must specify triton-http-endpoint "
|
|
1428
|
+
"if connecting to already running server or change protocol using "
|
|
1429
|
+
"--client-protocol."
|
|
1430
|
+
)
|
|
1431
|
+
if self.client_protocol == "grpc" and not self.triton_grpc_endpoint:
|
|
1432
|
+
raise TritonModelAnalyzerException(
|
|
1433
|
+
"client-protocol is 'grpc'. Must specify triton-grpc-endpoint "
|
|
1434
|
+
"if connecting to already running server or change protocol using "
|
|
1435
|
+
"--client-protocol."
|
|
1436
|
+
)
|
|
1437
|
+
elif self.triton_docker_mounts or self.triton_docker_labels:
|
|
1438
|
+
if self.triton_launch_mode == "docker":
|
|
1439
|
+
# Verify format
|
|
1440
|
+
if self.triton_docker_mounts:
|
|
1441
|
+
for volume_str in self.triton_docker_mounts:
|
|
1442
|
+
if volume_str.count(":") != 2:
|
|
1443
|
+
raise TritonModelAnalyzerException(
|
|
1444
|
+
"triton_docker_mounts needs to be a list of strings. Each string "
|
|
1445
|
+
" should be of the format <host path>:<container dest>:<access mode>"
|
|
1446
|
+
)
|
|
1447
|
+
else:
|
|
1448
|
+
logger.warning(
|
|
1449
|
+
f"Triton launch mode is set to {self.triton_launch_mode}. "
|
|
1450
|
+
"Ignoring triton_docker_mounts and triton_docker_labels."
|
|
1451
|
+
)
|
|
1452
|
+
|
|
1453
|
+
if self.triton_launch_mode == "docker":
|
|
1454
|
+
if not self.triton_docker_image or self.triton_docker_image.isspace():
|
|
1455
|
+
raise TritonModelAnalyzerException(
|
|
1456
|
+
"triton_docker_image provided but is empty."
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
if self.triton_launch_mode == "c_api":
|
|
1460
|
+
if self.triton_server_flags:
|
|
1461
|
+
logger.warning(
|
|
1462
|
+
"Triton launch mode is set to C_API. Model Analyzer cannot set "
|
|
1463
|
+
"triton_server_flags."
|
|
1464
|
+
)
|
|
1465
|
+
if self.triton_output_path:
|
|
1466
|
+
logger.warning(
|
|
1467
|
+
"Triton launch mode is set to C_API, triton logs are not supported. "
|
|
1468
|
+
"Triton server error output can be obtained by setting perf_output_path."
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
if self.triton_launch_mode != "docker":
|
|
1472
|
+
if self.triton_docker_args:
|
|
1473
|
+
logger.warning(
|
|
1474
|
+
"Triton launch mode is not set to docker. Model Analyzer cannot set "
|
|
1475
|
+
"triton_docker_args."
|
|
1476
|
+
)
|
|
1477
|
+
# If run config search is disabled and no concurrency or request rate is provided,
|
|
1478
|
+
# set the default value.
|
|
1479
|
+
if self.run_config_search_disable:
|
|
1480
|
+
if len(self.concurrency) == 0 and len(self.request_rate) == 0:
|
|
1481
|
+
self.concurrency = [1]
|
|
1482
|
+
|
|
1483
|
+
if not self.export_path:
|
|
1484
|
+
logger.warning(
|
|
1485
|
+
f"--export-path not specified. Using {self._fields['export_path'].default_value()}"
|
|
1486
|
+
)
|
|
1487
|
+
elif os.path.exists(self.export_path) and not os.path.isdir(self.export_path):
|
|
1488
|
+
raise TritonModelAnalyzerException(
|
|
1489
|
+
f"Export path {self.export_path} is not a directory."
|
|
1490
|
+
)
|
|
1491
|
+
elif not os.path.exists(self.export_path):
|
|
1492
|
+
os.makedirs(self.export_path)
|
|
1493
|
+
|
|
1494
|
+
if self.num_top_model_configs > 0 and not self.constraints:
|
|
1495
|
+
raise TritonModelAnalyzerException(
|
|
1496
|
+
"If setting num_top_model_configs > 0, comparison across models is requested. "
|
|
1497
|
+
"This requires that global constraints be specified in the config to be used as default."
|
|
1498
|
+
)
|
|
1499
|
+
|
|
1500
|
+
def _autofill_values(self):
|
|
1501
|
+
"""
|
|
1502
|
+
Fill in the implied or default
|
|
1503
|
+
config values.
|
|
1504
|
+
"""
|
|
1505
|
+
cpu_only = False
|
|
1506
|
+
if self.triton_launch_mode != "remote" and (
|
|
1507
|
+
len(self.gpus) == 0 or not numba.cuda.is_available()
|
|
1508
|
+
):
|
|
1509
|
+
cpu_only = True
|
|
1510
|
+
|
|
1511
|
+
# Set global constraints if latency budget is specified
|
|
1512
|
+
if self.latency_budget:
|
|
1513
|
+
if self.constraints:
|
|
1514
|
+
constraints = self.constraints
|
|
1515
|
+
constraints["perf_latency_p99"] = {"max": self.latency_budget}
|
|
1516
|
+
if "perf_latency" in constraints:
|
|
1517
|
+
# In case a tighter perf_latency is provided
|
|
1518
|
+
constraints["perf_latency"] = constraints["perf_latency_p99"]
|
|
1519
|
+
self._fields["constraints"].set_value(constraints)
|
|
1520
|
+
else:
|
|
1521
|
+
self._fields["constraints"].set_value(
|
|
1522
|
+
{"perf_latency_p99": {"max": self.latency_budget}}
|
|
1523
|
+
)
|
|
1524
|
+
|
|
1525
|
+
# Set global constraints if minimum throughput is specified
|
|
1526
|
+
if self.min_throughput:
|
|
1527
|
+
if self.constraints:
|
|
1528
|
+
constraints = self.constraints
|
|
1529
|
+
constraints["perf_throughput"] = {"min": self.min_throughput}
|
|
1530
|
+
self._fields["constraints"].set_value(constraints)
|
|
1531
|
+
else:
|
|
1532
|
+
self._fields["constraints"].set_value(
|
|
1533
|
+
{"perf_throughput": {"min": self.min_throughput}}
|
|
1534
|
+
)
|
|
1535
|
+
|
|
1536
|
+
# Switch default output fields if request rate is being used
|
|
1537
|
+
# and the user didn't specify a custom output field
|
|
1538
|
+
if self._using_request_rate():
|
|
1539
|
+
if not self._fields["inference_output_fields"].is_set_by_user():
|
|
1540
|
+
self.inference_output_fields = (
|
|
1541
|
+
DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS
|
|
1542
|
+
)
|
|
1543
|
+
|
|
1544
|
+
if not self._fields["gpu_output_fields"].is_set_by_user():
|
|
1545
|
+
self.gpu_output_fields = DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS
|
|
1546
|
+
|
|
1547
|
+
# Switch default output fields if user specifies model type of LLM
|
|
1548
|
+
# and the user didn't specify a custom output field
|
|
1549
|
+
if self.model_type == "LLM":
|
|
1550
|
+
if not self._fields["inference_output_fields"].is_set_by_user():
|
|
1551
|
+
self.inference_output_fields = DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS
|
|
1552
|
+
|
|
1553
|
+
new_profile_models = {}
|
|
1554
|
+
for i, model in enumerate(self.profile_models):
|
|
1555
|
+
new_model = {"cpu_only": (model.cpu_only() or cpu_only)}
|
|
1556
|
+
|
|
1557
|
+
# Objectives
|
|
1558
|
+
if not model.objectives():
|
|
1559
|
+
new_model["objectives"] = self.objectives
|
|
1560
|
+
else:
|
|
1561
|
+
new_model["objectives"] = model.objectives()
|
|
1562
|
+
|
|
1563
|
+
# Constraints
|
|
1564
|
+
if not model.constraints():
|
|
1565
|
+
if (
|
|
1566
|
+
"constraints" in self._fields
|
|
1567
|
+
and self._fields["constraints"].value()
|
|
1568
|
+
):
|
|
1569
|
+
new_model["constraints"] = self.constraints
|
|
1570
|
+
else:
|
|
1571
|
+
new_model["constraints"] = model.constraints().to_dict()
|
|
1572
|
+
|
|
1573
|
+
# Weighting
|
|
1574
|
+
if not model.weighting():
|
|
1575
|
+
if "weighting" in self._fields and self.weighting:
|
|
1576
|
+
raise TritonModelAnalyzerException(
|
|
1577
|
+
"Weighting can not be specified as a global parameter. Please make this a model parameter."
|
|
1578
|
+
)
|
|
1579
|
+
else:
|
|
1580
|
+
new_model["weighting"] = DEFAULT_MODEL_WEIGHTING
|
|
1581
|
+
else:
|
|
1582
|
+
new_model["weighting"] = model.weighting()
|
|
1583
|
+
|
|
1584
|
+
# Shorthands
|
|
1585
|
+
if self.latency_budget:
|
|
1586
|
+
if "constraints" in new_model:
|
|
1587
|
+
new_model["constraints"]["perf_latency_p99"] = {
|
|
1588
|
+
"max": self.latency_budget
|
|
1589
|
+
}
|
|
1590
|
+
if "perf_latency" in new_model["constraints"]:
|
|
1591
|
+
# In case a tighter perf_latency is provided
|
|
1592
|
+
new_model["constraints"]["perf_latency"] = new_model[
|
|
1593
|
+
"constraints"
|
|
1594
|
+
]["perf_latency_p99"]
|
|
1595
|
+
else:
|
|
1596
|
+
new_model["constraints"] = {
|
|
1597
|
+
"perf_latency_p99": {"max": self.latency_budget}
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
if self.min_throughput:
|
|
1601
|
+
if "constraints" in new_model:
|
|
1602
|
+
new_model["constraints"]["perf_throughput"] = {
|
|
1603
|
+
"min": self.min_throughput
|
|
1604
|
+
}
|
|
1605
|
+
else:
|
|
1606
|
+
new_model["constraints"] = {
|
|
1607
|
+
"perf_throughput": {"min": self.min_throughput}
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
# Run parameters
|
|
1611
|
+
if not model.parameters():
|
|
1612
|
+
if self.run_config_search_mode != "optuna":
|
|
1613
|
+
new_model["parameters"] = {
|
|
1614
|
+
"batch_sizes": self.batch_sizes,
|
|
1615
|
+
"concurrency": self.concurrency,
|
|
1616
|
+
"request_rate": self.request_rate,
|
|
1617
|
+
}
|
|
1618
|
+
else:
|
|
1619
|
+
if self._fields["batch_sizes"].is_set_by_user():
|
|
1620
|
+
new_model["parameters"] = {"batch_sizes": self.batch_sizes}
|
|
1621
|
+
else:
|
|
1622
|
+
new_model["parameters"] = {"batch_sizes": []}
|
|
1623
|
+
|
|
1624
|
+
new_model["parameters"]["concurrency"] = self.concurrency
|
|
1625
|
+
new_model["parameters"]["request_rate"] = self.request_rate
|
|
1626
|
+
|
|
1627
|
+
else:
|
|
1628
|
+
new_model["parameters"] = {}
|
|
1629
|
+
if "batch_sizes" in model.parameters():
|
|
1630
|
+
new_model["parameters"].update(
|
|
1631
|
+
{"batch_sizes": model.parameters()["batch_sizes"]}
|
|
1632
|
+
)
|
|
1633
|
+
else:
|
|
1634
|
+
if self.run_config_search_mode != "optuna":
|
|
1635
|
+
new_model["parameters"].update(
|
|
1636
|
+
{"batch_sizes": self.batch_sizes}
|
|
1637
|
+
)
|
|
1638
|
+
else:
|
|
1639
|
+
new_model["parameters"].update({"batch_sizes": []})
|
|
1640
|
+
|
|
1641
|
+
if "concurrency" in model.parameters():
|
|
1642
|
+
new_model["parameters"].update(
|
|
1643
|
+
{"concurrency": model.parameters()["concurrency"]}
|
|
1644
|
+
)
|
|
1645
|
+
elif not "request_rate" in model.parameters():
|
|
1646
|
+
new_model["parameters"].update({"concurrency": self.concurrency})
|
|
1647
|
+
else:
|
|
1648
|
+
new_model["parameters"].update({"concurrency": []})
|
|
1649
|
+
|
|
1650
|
+
if "request_rate" in model.parameters():
|
|
1651
|
+
new_model["parameters"].update(
|
|
1652
|
+
{"request_rate": model.parameters()["request_rate"]}
|
|
1653
|
+
)
|
|
1654
|
+
else:
|
|
1655
|
+
new_model["parameters"].update({"request_rate": self.request_rate})
|
|
1656
|
+
|
|
1657
|
+
if (
|
|
1658
|
+
new_model["parameters"]["request_rate"]
|
|
1659
|
+
and new_model["parameters"]["concurrency"]
|
|
1660
|
+
):
|
|
1661
|
+
raise TritonModelAnalyzerException(
|
|
1662
|
+
"Cannot specify both concurrency and request rate as model parameters."
|
|
1663
|
+
)
|
|
1664
|
+
|
|
1665
|
+
# Perf analyzer flags
|
|
1666
|
+
if not model.perf_analyzer_flags():
|
|
1667
|
+
new_model["perf_analyzer_flags"] = self.perf_analyzer_flags
|
|
1668
|
+
else:
|
|
1669
|
+
new_model["perf_analyzer_flags"] = model.perf_analyzer_flags()
|
|
1670
|
+
|
|
1671
|
+
# GenAI Perf flags
|
|
1672
|
+
if not model.genai_perf_flags():
|
|
1673
|
+
new_model["genai_perf_flags"] = self.genai_perf_flags
|
|
1674
|
+
else:
|
|
1675
|
+
new_model["genai_perf_flags"] = model.genai_perf_flags()
|
|
1676
|
+
|
|
1677
|
+
# triton server flags
|
|
1678
|
+
if not model.triton_server_flags():
|
|
1679
|
+
new_model["triton_server_flags"] = self.triton_server_flags
|
|
1680
|
+
else:
|
|
1681
|
+
new_model["triton_server_flags"] = model.triton_server_flags()
|
|
1682
|
+
|
|
1683
|
+
# triton server env
|
|
1684
|
+
if not model.triton_server_environment():
|
|
1685
|
+
new_model["triton_server_environment"] = self.triton_server_environment
|
|
1686
|
+
else:
|
|
1687
|
+
new_model[
|
|
1688
|
+
"triton_server_environment"
|
|
1689
|
+
] = model.triton_server_environment()
|
|
1690
|
+
|
|
1691
|
+
# triton docker args
|
|
1692
|
+
if not model.triton_docker_args():
|
|
1693
|
+
new_model["triton_docker_args"] = self.triton_docker_args
|
|
1694
|
+
else:
|
|
1695
|
+
new_model["triton_docker_args"] = model.triton_docker_args()
|
|
1696
|
+
|
|
1697
|
+
# Transfer model config parameters directly
|
|
1698
|
+
if model.model_config_parameters():
|
|
1699
|
+
new_model["model_config_parameters"] = model.model_config_parameters()
|
|
1700
|
+
|
|
1701
|
+
new_profile_models[model.model_name()] = new_model
|
|
1702
|
+
|
|
1703
|
+
# deepcopy is necessary, else it gets overwritten when updating profile_models
|
|
1704
|
+
self._fields["bls_composing_models"] = deepcopy(
|
|
1705
|
+
self._fields["bls_composing_models"]
|
|
1706
|
+
)
|
|
1707
|
+
self._fields["profile_models"].set_value(new_profile_models)
|
|
1708
|
+
|
|
1709
|
+
def _using_request_rate(self) -> bool:
|
|
1710
|
+
if self.request_rate or self.request_rate_search_enable:
|
|
1711
|
+
return True
|
|
1712
|
+
elif (
|
|
1713
|
+
self._fields["run_config_search_max_request_rate"].is_set_by_user()
|
|
1714
|
+
or self._fields["run_config_search_min_request_rate"].is_set_by_user()
|
|
1715
|
+
):
|
|
1716
|
+
return True
|
|
1717
|
+
else:
|
|
1718
|
+
return self._are_models_using_request_rate()
|
|
1719
|
+
|
|
1720
|
+
def _are_models_using_request_rate(self) -> bool:
|
|
1721
|
+
model_using_request_rate = False
|
|
1722
|
+
model_using_concurrency = False
|
|
1723
|
+
for i, model in enumerate(self.profile_models):
|
|
1724
|
+
if model.parameters() and "request_rate" in model.parameters():
|
|
1725
|
+
model_using_request_rate = True
|
|
1726
|
+
else:
|
|
1727
|
+
model_using_concurrency = True
|
|
1728
|
+
|
|
1729
|
+
if model_using_request_rate and model_using_concurrency:
|
|
1730
|
+
raise TritonModelAnalyzerException(
|
|
1731
|
+
"Parameters in all profiled models must use request-rate-range. "
|
|
1732
|
+
"Model Analyzer does not support mixing concurrency-range and request-rate-range."
|
|
1733
|
+
)
|
|
1734
|
+
else:
|
|
1735
|
+
return model_using_request_rate
|
|
1736
|
+
|
|
1737
|
+
def is_request_rate_specified(self, model_parameters: dict) -> bool:
|
|
1738
|
+
"""
|
|
1739
|
+
Returns true if either the model or the config specified request rate
|
|
1740
|
+
"""
|
|
1741
|
+
return (
|
|
1742
|
+
"request_rate" in model_parameters
|
|
1743
|
+
and model_parameters["request_rate"]
|
|
1744
|
+
or self.request_rate_search_enable
|
|
1745
|
+
or self.get_config()["run_config_search_min_request_rate"].is_set_by_user()
|
|
1746
|
+
or self.get_config()["run_config_search_max_request_rate"].is_set_by_user()
|
|
1747
|
+
)
|