triton-model-analyzer 1.48.0__py3-none-any.whl → 1.49.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +10 -14
- model_analyzer/config/generate/model_profile_spec.py +14 -14
- model_analyzer/config/generate/perf_analyzer_config_generator.py +14 -22
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +2 -16
- model_analyzer/config/generate/quick_run_config_generator.py +16 -28
- model_analyzer/config/input/config_defaults.py +3 -15
- model_analyzer/perf_analyzer/perf_analyzer.py +4 -16
- model_analyzer/perf_analyzer/perf_config.py +23 -15
- model_analyzer/plots/detailed_plot.py +41 -54
- model_analyzer/record/metrics_manager.py +7 -15
- model_analyzer/record/types/gpu_free_memory.py +13 -14
- model_analyzer/record/types/gpu_total_memory.py +13 -14
- model_analyzer/record/types/gpu_used_memory.py +13 -14
- model_analyzer/result/parameter_search.py +10 -17
- model_analyzer/result/run_config_measurement.py +29 -24
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/METADATA +1 -1
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/RECORD +21 -21
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/WHEEL +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/entry_points.txt +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/licenses/LICENSE +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from copy import deepcopy
|
|
@@ -132,9 +120,11 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
|
|
|
132
120
|
for result in top_results:
|
|
133
121
|
run_config = deepcopy(result.run_config())
|
|
134
122
|
model_parameters = self._get_model_parameters(model_name)
|
|
123
|
+
perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
|
|
135
124
|
parameter_search = ParameterSearch(
|
|
136
125
|
config=self._config,
|
|
137
126
|
model_parameters=model_parameters,
|
|
127
|
+
perf_analyzer_flags=perf_analyzer_flags,
|
|
138
128
|
skip_parameter_sweep=True,
|
|
139
129
|
)
|
|
140
130
|
for parameter in parameter_search.search_parameters():
|
|
@@ -151,6 +141,12 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
|
|
|
151
141
|
|
|
152
142
|
return {}
|
|
153
143
|
|
|
144
|
+
def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
|
|
145
|
+
for model in self._models:
|
|
146
|
+
if model_name == model.model_name():
|
|
147
|
+
return model.perf_analyzer_flags()
|
|
148
|
+
return {}
|
|
149
|
+
|
|
154
150
|
def _set_parameter(
|
|
155
151
|
self, run_config: RunConfig, model_parameters: Dict, parameter: int
|
|
156
152
|
) -> RunConfig:
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from copy import deepcopy
|
|
18
6
|
from typing import List
|
|
@@ -22,6 +10,7 @@ from model_analyzer.config.input.objects.config_model_profile_spec import (
|
|
|
22
10
|
ConfigModelProfileSpec,
|
|
23
11
|
)
|
|
24
12
|
from model_analyzer.device.gpu_device import GPUDevice
|
|
13
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
25
14
|
from model_analyzer.triton.client.client import TritonClient
|
|
26
15
|
from model_analyzer.triton.model.model_config import ModelConfig
|
|
27
16
|
|
|
@@ -72,3 +61,14 @@ class ModelProfileSpec(ConfigModelProfileSpec):
|
|
|
72
61
|
def is_ensemble(self) -> bool:
|
|
73
62
|
"""Returns true if the model is an ensemble"""
|
|
74
63
|
return "ensemble_scheduling" in self._default_model_config
|
|
64
|
+
|
|
65
|
+
def is_load_specified(self) -> bool:
|
|
66
|
+
"""
|
|
67
|
+
Returns true if the model's PA config has specified any of the
|
|
68
|
+
inference load args (such as concurrency). Else returns false
|
|
69
|
+
"""
|
|
70
|
+
load_args = PerfAnalyzerConfig.get_inference_load_args()
|
|
71
|
+
pa_flags = self.perf_analyzer_flags()
|
|
72
|
+
if pa_flags is None:
|
|
73
|
+
return False
|
|
74
|
+
return any(e in pa_flags for e in load_args)
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from typing import Generator, List, Optional
|
|
@@ -169,10 +157,12 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
169
157
|
self._parameter_results.extend(measurement)
|
|
170
158
|
|
|
171
159
|
def _create_parameter_list(self) -> List[int]:
|
|
172
|
-
#
|
|
173
|
-
#
|
|
174
|
-
#
|
|
175
|
-
if self.
|
|
160
|
+
# Determines the inference load (concurrency or request-rate or request-intervals)
|
|
161
|
+
# and creates the list of values to use. If nothing is specified by the user, then
|
|
162
|
+
# concurrency will be used.
|
|
163
|
+
if "request-intervals" in self._perf_analyzer_flags:
|
|
164
|
+
return [self._perf_analyzer_flags["request-intervals"]]
|
|
165
|
+
elif self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
176
166
|
return self._create_request_rate_list()
|
|
177
167
|
else:
|
|
178
168
|
return self._create_concurrency_list()
|
|
@@ -207,7 +197,7 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
207
197
|
for params in utils.generate_parameter_combinations(
|
|
208
198
|
perf_config_non_parameter_values
|
|
209
199
|
):
|
|
210
|
-
|
|
200
|
+
configs_with_inference_load = []
|
|
211
201
|
for parameter in self._parameters:
|
|
212
202
|
new_perf_config = PerfAnalyzerConfig()
|
|
213
203
|
|
|
@@ -217,7 +207,9 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
217
207
|
|
|
218
208
|
new_perf_config.update_config(params)
|
|
219
209
|
|
|
220
|
-
if self.
|
|
210
|
+
if "request-intervals" in self._perf_analyzer_flags:
|
|
211
|
+
pass
|
|
212
|
+
elif self._cli_config.is_request_rate_specified(self._model_parameters):
|
|
221
213
|
new_perf_config.update_config({"request-rate-range": parameter})
|
|
222
214
|
else:
|
|
223
215
|
new_perf_config.update_config({"concurrency-range": parameter})
|
|
@@ -225,8 +217,8 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
|
|
|
225
217
|
# User provided flags can override the search parameters
|
|
226
218
|
new_perf_config.update_config(self._perf_analyzer_flags)
|
|
227
219
|
|
|
228
|
-
|
|
229
|
-
self._configs.append(
|
|
220
|
+
configs_with_inference_load.append(new_perf_config)
|
|
221
|
+
self._configs.append(configs_with_inference_load)
|
|
230
222
|
|
|
231
223
|
def _create_non_parameter_perf_config_values(self) -> dict:
|
|
232
224
|
perf_config_values = {
|
|
@@ -1,21 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
|
-
from copy import deepcopy
|
|
19
6
|
from typing import Generator, List, Optional
|
|
20
7
|
|
|
21
8
|
from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
|
|
@@ -30,7 +17,6 @@ from model_analyzer.config.generate.search_config import SearchConfig
|
|
|
30
17
|
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
|
|
31
18
|
from model_analyzer.config.run.run_config import RunConfig
|
|
32
19
|
from model_analyzer.constants import LOGGER_NAME
|
|
33
|
-
from model_analyzer.result.parameter_search import ParameterSearch
|
|
34
20
|
from model_analyzer.result.result_manager import ResultManager
|
|
35
21
|
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
|
|
36
22
|
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from sys import maxsize
|
|
@@ -507,13 +495,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
|
507
495
|
|
|
508
496
|
perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
|
|
509
497
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
498
|
+
if not model.is_load_specified():
|
|
499
|
+
concurrency = self._calculate_concurrency(dimension_values)
|
|
500
|
+
perf_config_params = {
|
|
501
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
502
|
+
"concurrency-range": concurrency,
|
|
503
|
+
}
|
|
504
|
+
perf_analyzer_config.update_config(perf_config_params)
|
|
517
505
|
|
|
518
506
|
perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
519
507
|
return perf_analyzer_config
|
|
@@ -703,13 +691,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
|
|
|
703
691
|
model_config.get_field("name"), self._config
|
|
704
692
|
)
|
|
705
693
|
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
694
|
+
if not model.is_load_specified():
|
|
695
|
+
default_concurrency = self._calculate_default_concurrency(model_config)
|
|
696
|
+
perf_config_params = {
|
|
697
|
+
"batch-size": DEFAULT_BATCH_SIZES,
|
|
698
|
+
"concurrency-range": default_concurrency,
|
|
699
|
+
}
|
|
700
|
+
default_perf_analyzer_config.update_config(perf_config_params)
|
|
713
701
|
|
|
714
702
|
default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
|
|
715
703
|
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import os
|
|
18
6
|
|
|
@@ -64,7 +52,7 @@ DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
|
|
|
64
52
|
DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
|
|
65
53
|
DEFAULT_DCGM_DISABLE = False
|
|
66
54
|
DEFAULT_TRITON_LAUNCH_MODE = "local"
|
|
67
|
-
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.
|
|
55
|
+
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.12-py3"
|
|
68
56
|
DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
|
|
69
57
|
DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
|
|
70
58
|
DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import csv
|
|
18
6
|
import glob
|
|
@@ -432,8 +420,8 @@ class PerfAnalyzer:
|
|
|
432
420
|
return cmd
|
|
433
421
|
|
|
434
422
|
def _get_single_model_cmd(self, index):
|
|
435
|
-
if self._model_type == "
|
|
436
|
-
cmd = ["genai-perf", "-m", self._config.models_name()]
|
|
423
|
+
if self._model_type.lower() == "llm":
|
|
424
|
+
cmd = ["genai-perf", "profile", "-m", self._config.models_name()]
|
|
437
425
|
cmd += self._get_genai_perf_cli_command(index).replace("=", " ").split()
|
|
438
426
|
cmd += ["--"]
|
|
439
427
|
cmd += (
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from typing import List
|
|
18
6
|
|
|
@@ -98,6 +86,13 @@ class PerfAnalyzerConfig:
|
|
|
98
86
|
"collect-metrics",
|
|
99
87
|
]
|
|
100
88
|
|
|
89
|
+
# Only one of these args can be sent to PA, as each one controls the inference load in a different way
|
|
90
|
+
inference_load_args = [
|
|
91
|
+
"concurrency-range",
|
|
92
|
+
"request-rate-range",
|
|
93
|
+
"request-intervals",
|
|
94
|
+
]
|
|
95
|
+
|
|
101
96
|
def __init__(self):
|
|
102
97
|
"""
|
|
103
98
|
Construct a PerfAnalyzerConfig
|
|
@@ -108,7 +103,9 @@ class PerfAnalyzerConfig:
|
|
|
108
103
|
self._options = {
|
|
109
104
|
"-m": None,
|
|
110
105
|
"-x": None,
|
|
111
|
-
|
|
106
|
+
# Default to batch size of 1. This would be handled by PA if unspecified,
|
|
107
|
+
# but we want to be explicit so we can properly print/track values
|
|
108
|
+
"-b": 1,
|
|
112
109
|
"-u": None,
|
|
113
110
|
"-i": None,
|
|
114
111
|
"-f": None,
|
|
@@ -160,6 +157,16 @@ class PerfAnalyzerConfig:
|
|
|
160
157
|
|
|
161
158
|
return cls.additive_args[:]
|
|
162
159
|
|
|
160
|
+
@classmethod
|
|
161
|
+
def get_inference_load_args(cls):
|
|
162
|
+
"""
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
list of str
|
|
166
|
+
The Perf Analyzer args that control the inference load
|
|
167
|
+
"""
|
|
168
|
+
return cls.inference_load_args
|
|
169
|
+
|
|
163
170
|
def update_config(self, params=None):
|
|
164
171
|
"""
|
|
165
172
|
Allows setting values from a params dict
|
|
@@ -275,6 +282,7 @@ class PerfAnalyzerConfig:
|
|
|
275
282
|
"batch-size": self._options["-b"],
|
|
276
283
|
"concurrency-range": self._args["concurrency-range"],
|
|
277
284
|
"request-rate-range": self._args["request-rate-range"],
|
|
285
|
+
"request-intervals": self._args["request-intervals"],
|
|
278
286
|
}
|
|
279
287
|
|
|
280
288
|
@classmethod
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
import os
|
|
@@ -22,6 +10,7 @@ import matplotlib.pyplot as plt
|
|
|
22
10
|
from matplotlib import patches as mpatches
|
|
23
11
|
|
|
24
12
|
from model_analyzer.constants import LOGGER_NAME
|
|
13
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
25
14
|
from model_analyzer.record.metrics_manager import MetricsManager
|
|
26
15
|
|
|
27
16
|
logging.getLogger("matplotlib").setLevel(logging.ERROR)
|
|
@@ -89,7 +78,6 @@ class DetailedPlot:
|
|
|
89
78
|
self._fig.set_figheight(8)
|
|
90
79
|
self._fig.set_figwidth(12)
|
|
91
80
|
|
|
92
|
-
self._ax_latency.set_xlabel("Concurrent Client Requests")
|
|
93
81
|
self._ax_latency.set_ylabel(latency_axis_label)
|
|
94
82
|
self._ax_throughput.set_ylabel(throughput_axis_label)
|
|
95
83
|
|
|
@@ -120,29 +108,15 @@ class DetailedPlot:
|
|
|
120
108
|
"""
|
|
121
109
|
|
|
122
110
|
# TODO-TMA-568: This needs to be updated because there will be multiple model configs
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
]
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
if (
|
|
136
|
-
"request-rate-range" in run_config_measurement.model_specific_pa_params()[0]
|
|
137
|
-
and run_config_measurement.model_specific_pa_params()[0][
|
|
138
|
-
"request-rate-range"
|
|
139
|
-
]
|
|
140
|
-
):
|
|
141
|
-
self._data["request_rate"].append(
|
|
142
|
-
run_config_measurement.model_specific_pa_params()[0][
|
|
143
|
-
"request-rate-range"
|
|
144
|
-
]
|
|
145
|
-
)
|
|
111
|
+
for load_arg in PerfAnalyzerConfig.get_inference_load_args():
|
|
112
|
+
if (
|
|
113
|
+
load_arg in run_config_measurement.model_specific_pa_params()[0]
|
|
114
|
+
and run_config_measurement.model_specific_pa_params()[0][load_arg]
|
|
115
|
+
):
|
|
116
|
+
data_key = self._get_data_key_from_load_arg(load_arg)
|
|
117
|
+
self._data[data_key].append(
|
|
118
|
+
run_config_measurement.model_specific_pa_params()[0][load_arg]
|
|
119
|
+
)
|
|
146
120
|
|
|
147
121
|
self._data["perf_throughput"].append(
|
|
148
122
|
run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput")
|
|
@@ -164,25 +138,28 @@ class DetailedPlot:
|
|
|
164
138
|
on this plot's Axes object
|
|
165
139
|
"""
|
|
166
140
|
|
|
167
|
-
#
|
|
168
|
-
if "
|
|
141
|
+
# Update the x-axis plot title
|
|
142
|
+
if "request_intervals" in self._data and self._data["request_intervals"][0]:
|
|
143
|
+
self._ax_latency.set_xlabel("Request Intervals File")
|
|
144
|
+
sort_indices_key = "request_intervals"
|
|
145
|
+
elif "request_rate" in self._data and self._data["request_rate"][0]:
|
|
169
146
|
self._ax_latency.set_xlabel("Client Request Rate")
|
|
170
|
-
|
|
171
|
-
# Sort the data by request rate or concurrency
|
|
172
|
-
if "request_rate" in self._data and self._data["request_rate"][0]:
|
|
173
|
-
sort_indices = list(
|
|
174
|
-
zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1]))
|
|
175
|
-
)[0]
|
|
147
|
+
sort_indices_key = "request_rate"
|
|
176
148
|
else:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
149
|
+
self._ax_latency.set_xlabel("Concurrent Client Requests")
|
|
150
|
+
sort_indices_key = "concurrency"
|
|
151
|
+
|
|
152
|
+
sort_indices = list(
|
|
153
|
+
zip(*sorted(enumerate(self._data[sort_indices_key]), key=lambda x: x[1]))
|
|
154
|
+
)[0]
|
|
180
155
|
|
|
181
156
|
sorted_data = {
|
|
182
157
|
key: [data_list[i] for i in sort_indices]
|
|
183
158
|
for key, data_list in self._data.items()
|
|
184
159
|
}
|
|
185
160
|
|
|
161
|
+
sorted_data["indices"] = list(map(str, sorted_data[sort_indices_key]))
|
|
162
|
+
|
|
186
163
|
# Plot latency breakdown bars
|
|
187
164
|
labels = dict(
|
|
188
165
|
zip(
|
|
@@ -197,11 +174,6 @@ class DetailedPlot:
|
|
|
197
174
|
)
|
|
198
175
|
bottoms = None
|
|
199
176
|
|
|
200
|
-
if "request_rate" in self._data:
|
|
201
|
-
sorted_data["indices"] = list(map(str, sorted_data["request_rate"]))
|
|
202
|
-
else:
|
|
203
|
-
sorted_data["indices"] = list(map(str, sorted_data["concurrency"]))
|
|
204
|
-
|
|
205
177
|
# Plot latency breakdown with concurrency casted as string to make uniform x
|
|
206
178
|
for metric, label in labels.items():
|
|
207
179
|
self._ax_latency.bar(
|
|
@@ -264,3 +236,18 @@ class DetailedPlot:
|
|
|
264
236
|
"""
|
|
265
237
|
|
|
266
238
|
self._fig.savefig(os.path.join(filepath, self._name))
|
|
239
|
+
|
|
240
|
+
def _get_data_key_from_load_arg(self, load_arg):
|
|
241
|
+
"""
|
|
242
|
+
Gets the key into _data corresponding with the input load arg
|
|
243
|
+
|
|
244
|
+
For example, the load arg "request-rate-range" has the key "request_rate"
|
|
245
|
+
"""
|
|
246
|
+
# Check if '-range' exists at the end of the input string and remove it
|
|
247
|
+
if load_arg.endswith("-range"):
|
|
248
|
+
load_arg = load_arg[:-6]
|
|
249
|
+
|
|
250
|
+
# Replace any '-' with '_' in the remaining string
|
|
251
|
+
data_key = load_arg.replace("-", "_")
|
|
252
|
+
|
|
253
|
+
return data_key
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
import os
|
|
@@ -782,7 +770,11 @@ class MetricsManager:
|
|
|
782
770
|
def _print_run_config_info(self, run_config):
|
|
783
771
|
for model_run_config in run_config.model_run_configs():
|
|
784
772
|
perf_config = model_run_config.perf_config()
|
|
785
|
-
if perf_config["request-
|
|
773
|
+
if perf_config["request-intervals"]:
|
|
774
|
+
logger.info(
|
|
775
|
+
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}"
|
|
776
|
+
)
|
|
777
|
+
elif perf_config["request-rate-range"]:
|
|
786
778
|
if perf_config["batch-size"] != 1:
|
|
787
779
|
logger.info(
|
|
788
780
|
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from functools import total_ordering
|
|
18
6
|
|
|
@@ -27,6 +15,17 @@ class GPUFreeMemory(IncreasingGPURecord):
|
|
|
27
15
|
|
|
28
16
|
tag = "gpu_free_memory"
|
|
29
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def value_function():
|
|
20
|
+
"""
|
|
21
|
+
Returns the total value from a list
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Total value of the list
|
|
26
|
+
"""
|
|
27
|
+
return sum
|
|
28
|
+
|
|
30
29
|
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
30
|
"""
|
|
32
31
|
Parameters
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from functools import total_ordering
|
|
18
6
|
|
|
@@ -27,6 +15,17 @@ class GPUTotalMemory(IncreasingGPURecord):
|
|
|
27
15
|
|
|
28
16
|
tag = "gpu_total_memory"
|
|
29
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def value_function():
|
|
20
|
+
"""
|
|
21
|
+
Returns the total value from a list
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Total value of the list
|
|
26
|
+
"""
|
|
27
|
+
return sum
|
|
28
|
+
|
|
30
29
|
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
30
|
"""
|
|
32
31
|
Parameters
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from functools import total_ordering
|
|
18
6
|
|
|
@@ -27,6 +15,17 @@ class GPUUsedMemory(DecreasingGPURecord):
|
|
|
27
15
|
|
|
28
16
|
tag = "gpu_used_memory"
|
|
29
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def value_function():
|
|
20
|
+
"""
|
|
21
|
+
Returns the total value from a list
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Total value of the list
|
|
26
|
+
"""
|
|
27
|
+
return sum
|
|
28
|
+
|
|
30
29
|
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
30
|
"""
|
|
32
31
|
Parameters
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from math import log2
|
|
@@ -37,6 +25,8 @@ class ParameterSearch:
|
|
|
37
25
|
- Will sweep from by powers of two from min to max parameter
|
|
38
26
|
- If the user specifies a constraint, the algorithm will perform a binary search
|
|
39
27
|
around the boundary if the constraint is violated
|
|
28
|
+
- Will not sweep at all if custom stimulus is provided by the user (via the
|
|
29
|
+
"request-intervals" perf analyzer flag)
|
|
40
30
|
|
|
41
31
|
Invariant: It is necessary for the user to add new measurements as they are taken
|
|
42
32
|
"""
|
|
@@ -45,6 +35,7 @@ class ParameterSearch:
|
|
|
45
35
|
self,
|
|
46
36
|
config: ConfigCommandProfile,
|
|
47
37
|
model_parameters: dict = {},
|
|
38
|
+
perf_analyzer_flags: dict = {},
|
|
48
39
|
skip_parameter_sweep: bool = False,
|
|
49
40
|
) -> None:
|
|
50
41
|
"""
|
|
@@ -59,6 +50,7 @@ class ParameterSearch:
|
|
|
59
50
|
self._parameter_is_request_rate = config.is_request_rate_specified(
|
|
60
51
|
model_parameters
|
|
61
52
|
)
|
|
53
|
+
self._inference_load_is_custom = "request-intervals" in perf_analyzer_flags
|
|
62
54
|
|
|
63
55
|
if self._parameter_is_request_rate:
|
|
64
56
|
self._min_parameter_index = int(
|
|
@@ -98,10 +90,11 @@ class ParameterSearch:
|
|
|
98
90
|
a binary parameter search around the point where the constraint
|
|
99
91
|
violated
|
|
100
92
|
"""
|
|
101
|
-
|
|
93
|
+
if not self._inference_load_is_custom:
|
|
94
|
+
yield from self._perform_parameter_sweep()
|
|
102
95
|
|
|
103
|
-
|
|
104
|
-
|
|
96
|
+
if self._was_constraint_violated():
|
|
97
|
+
yield from self._perform_binary_parameter_search()
|
|
105
98
|
|
|
106
99
|
def _perform_parameter_sweep(self) -> Generator[int, None, None]:
|
|
107
100
|
for parameter in (
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from copy import deepcopy
|
|
@@ -48,6 +36,9 @@ class RunConfigMeasurement:
|
|
|
48
36
|
self._model_variants_name = model_variants_name
|
|
49
37
|
|
|
50
38
|
self._gpu_data = gpu_data
|
|
39
|
+
# Note: "_avg_gpu_data" is a historical name. This actually contains
|
|
40
|
+
# aggregated GPU metrics: memory metrics are SUMMED, while utilization
|
|
41
|
+
# and power metrics are AVERAGED across GPUs.
|
|
51
42
|
self._avg_gpu_data = self._average_list(list(self._gpu_data.values()))
|
|
52
43
|
self._avg_gpu_data_from_tag = self._get_avg_gpu_data_from_tag()
|
|
53
44
|
|
|
@@ -213,7 +204,9 @@ class RunConfigMeasurement:
|
|
|
213
204
|
|
|
214
205
|
def get_gpu_metric(self, tag: str) -> Optional[Record]:
|
|
215
206
|
"""
|
|
216
|
-
Returns the
|
|
207
|
+
Returns the aggregated Record associated with this GPU metric
|
|
208
|
+
across all GPUs. GPU memory metrics are summed; other metrics
|
|
209
|
+
like utilization are averaged.
|
|
217
210
|
|
|
218
211
|
Parameters
|
|
219
212
|
----------
|
|
@@ -224,7 +217,7 @@ class RunConfigMeasurement:
|
|
|
224
217
|
Returns
|
|
225
218
|
-------
|
|
226
219
|
Record:
|
|
227
|
-
of
|
|
220
|
+
of aggregated GPU metric Records corresponding to this tag,
|
|
228
221
|
or None if tag not found
|
|
229
222
|
"""
|
|
230
223
|
if tag in self._avg_gpu_data_from_tag:
|
|
@@ -320,8 +313,10 @@ class RunConfigMeasurement:
|
|
|
320
313
|
Returns
|
|
321
314
|
-------
|
|
322
315
|
float :
|
|
323
|
-
|
|
316
|
+
Aggregated value of the GPU metric Records across all GPUs
|
|
324
317
|
corresponding to the tag, default_value if tag not found.
|
|
318
|
+
GPU memory metrics are summed; other metrics like utilization
|
|
319
|
+
are averaged.
|
|
325
320
|
"""
|
|
326
321
|
metric = self.get_gpu_metric(tag)
|
|
327
322
|
if metric is None:
|
|
@@ -615,7 +610,9 @@ class RunConfigMeasurement:
|
|
|
615
610
|
|
|
616
611
|
def _average_list(self, row_list):
|
|
617
612
|
"""
|
|
618
|
-
|
|
613
|
+
Aggregate a 2d list of GPU records across GPUs.
|
|
614
|
+
Uses each record type's value_function() to determine
|
|
615
|
+
whether to sum or average the metric.
|
|
619
616
|
"""
|
|
620
617
|
|
|
621
618
|
if not row_list:
|
|
@@ -623,13 +620,21 @@ class RunConfigMeasurement:
|
|
|
623
620
|
else:
|
|
624
621
|
N = len(row_list)
|
|
625
622
|
d = len(row_list[0])
|
|
626
|
-
|
|
623
|
+
agg = [0 for _ in range(d)]
|
|
627
624
|
for i in range(d):
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
)
|
|
632
|
-
|
|
625
|
+
# Sum the records across all GPUs
|
|
626
|
+
summed_record = sum(
|
|
627
|
+
[row_list[j][i] for j in range(1, N)], start=row_list[0][i]
|
|
628
|
+
)
|
|
629
|
+
# Get the aggregation function for this record type
|
|
630
|
+
value_func = row_list[0][i].value_function()
|
|
631
|
+
# If the value function is sum, use the sum directly
|
|
632
|
+
# If the value function is mean/average, divide by N
|
|
633
|
+
if value_func == sum:
|
|
634
|
+
agg[i] = summed_record
|
|
635
|
+
else:
|
|
636
|
+
agg[i] = summed_record / N
|
|
637
|
+
return agg
|
|
633
638
|
|
|
634
639
|
def _deserialize_gpu_data(
|
|
635
640
|
self, serialized_gpu_data: Dict
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: triton-model-analyzer
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.49.0
|
|
4
4
|
Summary: Triton Model Analyzer is a tool to profile and analyze the runtime performance of one or more models on the Triton Inference Server
|
|
5
5
|
Author-email: "NVIDIA Inc." <sw-dl-triton@nvidia.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -11,7 +11,7 @@ model_analyzer/config/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3T
|
|
|
11
11
|
model_analyzer/config/generate/__init__.py,sha256=4XgtPRt-6E1qrP6lxcqPIaEQb3V0yYu6EMtnVwAKdIY,647
|
|
12
12
|
model_analyzer/config/generate/automatic_model_config_generator.py,sha256=EhpeYZlkAoSiKq1_QunXUBzbB7jPXSU2kyi3UpnzuOs,5797
|
|
13
13
|
model_analyzer/config/generate/base_model_config_generator.py,sha256=sT8wghsLRgVT7-AWMTkrvdX-CeahFYdXzGEW0QgnQaI,12817
|
|
14
|
-
model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py,sha256=
|
|
14
|
+
model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py,sha256=fQ4NGO-1nUpX1wHbw6DPqnlXPGNCBLE_MrfWwYMpInk,6175
|
|
15
15
|
model_analyzer/config/generate/brute_run_config_generator.py,sha256=bKYV3jFNpeE4cCIvBG8a3VYItjPzA0Lu_hyFukaQsUw,5484
|
|
16
16
|
model_analyzer/config/generate/concurrency_sweeper.py,sha256=_VRCRw6iNgF3fJBdP46mywsy9Jz0QTBNSXyIHM4X0gk,2907
|
|
17
17
|
model_analyzer/config/generate/config_generator_interface.py,sha256=4r2u5t7yrAqEeDTsfiPWEgqU0pesG3N5DChfweFPLm8,1681
|
|
@@ -20,15 +20,15 @@ model_analyzer/config/generate/coordinate_data.py,sha256=deqoRIvO0aN5D5sNDZcjmT0
|
|
|
20
20
|
model_analyzer/config/generate/generator_utils.py,sha256=5hj7qGzVs-oHk6UK_ggBzKJiTDVdX6MSLrXyEfU-kFE,4157
|
|
21
21
|
model_analyzer/config/generate/manual_model_config_generator.py,sha256=_Y78ORijg5E6c2a2zESwJzK55S8fbhoy2rqFTQq-seA,7088
|
|
22
22
|
model_analyzer/config/generate/model_config_generator_factory.py,sha256=5BjSJxRmrEVy3LyjIPnMT-QCz8GABa62oY1VvJfmhbE,3334
|
|
23
|
-
model_analyzer/config/generate/model_profile_spec.py,sha256=
|
|
23
|
+
model_analyzer/config/generate/model_profile_spec.py,sha256=fOCTNSK_GOOQ1FaRuCo_rDeVEf8H_67hUVERFlRndtM,2739
|
|
24
24
|
model_analyzer/config/generate/model_run_config_generator.py,sha256=Ip1djtZ3wlwG_Zs3YivIlJxmwS1TsNa_7aMCU9bCQP0,5484
|
|
25
25
|
model_analyzer/config/generate/model_variant_name_manager.py,sha256=qPAcvDQhkbArkW-A1A1mdRyqALUgfLlyV3dhNV6jKgs,5362
|
|
26
26
|
model_analyzer/config/generate/neighborhood.py,sha256=s9g9igetHvF6Xm9FhuzjbhPBVWO65SngABrX3RGOUBQ,19481
|
|
27
27
|
model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py,sha256=iYCzT2gH-DH5mNbRMGM_UNyPNfRkObvjcrvy0TSKPD8,5861
|
|
28
28
|
model_analyzer/config/generate/optuna_run_config_generator.py,sha256=Z09p5tZP8wl0hp9NpHXpGovv1dZZkZzH11FnuXe8HhU,33099
|
|
29
|
-
model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=
|
|
30
|
-
model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=
|
|
31
|
-
model_analyzer/config/generate/quick_run_config_generator.py,sha256=
|
|
29
|
+
model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=cCLQTV5V65QelGFMP3B4JaYvejNZj4tHJUQSZ3nTZSY,11112
|
|
30
|
+
model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=hAL6YD6hqLDL7ttOgJuDIUvEe6BpHNlYv82cQdHUo5o,4516
|
|
31
|
+
model_analyzer/config/generate/quick_run_config_generator.py,sha256=ccs8W7Bh20pmxel5iQF92pF7pirpaBQeveQNhUwWPq0,27719
|
|
32
32
|
model_analyzer/config/generate/run_config_generator_factory.py,sha256=KPjZGz0R-upqf7pwrtWrVRroNC-aDPeGZd63XR92zDU,13008
|
|
33
33
|
model_analyzer/config/generate/search_config.py,sha256=mvQaoQ9wIeNEHWJtIwXh0JujPQZqXLLXmWpI7uBHDGA,3655
|
|
34
34
|
model_analyzer/config/generate/search_dimension.py,sha256=iUoh4IhhMy9zhMhaY-q9DoBEku4--4o4j8RYQfXUVVk,2375
|
|
@@ -39,7 +39,7 @@ model_analyzer/config/input/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt
|
|
|
39
39
|
model_analyzer/config/input/config_command.py,sha256=63Y6aJrGBA-tR5eljlZSNwgFbHc4hZYd8mEOpmdrJxc,18776
|
|
40
40
|
model_analyzer/config/input/config_command_profile.py,sha256=P5V0y_J3p6BCDCu_QBsl5AXaz_Y73y7CpiessWbuCy4,70509
|
|
41
41
|
model_analyzer/config/input/config_command_report.py,sha256=Igiid6XK5OB23jvXZKPI4Pg5ZsSDkXgHU-QAGOo6_-E,9535
|
|
42
|
-
model_analyzer/config/input/config_defaults.py,sha256=
|
|
42
|
+
model_analyzer/config/input/config_defaults.py,sha256=DYmjHUPQxqhlhRmE2VtzLBHhdwhk-zXkMrlTJuYQGTg,6202
|
|
43
43
|
model_analyzer/config/input/config_enum.py,sha256=lu9cUz_MY52EcLEXxcLYWqB5If6FhsM1K8w1srzhDt4,2386
|
|
44
44
|
model_analyzer/config/input/config_field.py,sha256=XlqlouNuPENEYCBLYtHxsfMACrFlcYZGkbh6yE1psNo,5156
|
|
45
45
|
model_analyzer/config/input/config_list_generic.py,sha256=-4vhDxIEMPOwYcUIkMIFzxJ61mOmX6GEgGCpTfbCFsc,3355
|
|
@@ -103,24 +103,24 @@ model_analyzer/output/file_writer.py,sha256=NbAbjLFBcHZMYOJF-3Af9dqGVjS8WRGsQDid
|
|
|
103
103
|
model_analyzer/output/output_writer.py,sha256=JZ-e0uzrpNybKaAup0hLmvIuGn9v1Z5OsB8yMHX074A,1200
|
|
104
104
|
model_analyzer/perf_analyzer/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
|
|
105
105
|
model_analyzer/perf_analyzer/genai_perf_config.py,sha256=9g4081ttiAqV1H3vft3Brqx3nArslyAFspJODboj72g,5673
|
|
106
|
-
model_analyzer/perf_analyzer/perf_analyzer.py,sha256=
|
|
107
|
-
model_analyzer/perf_analyzer/perf_config.py,sha256=
|
|
106
|
+
model_analyzer/perf_analyzer/perf_analyzer.py,sha256=wcE6CIL_OKHFnJkipXb_mP8koBLXHOZb51OT4kR-JMQ,31067
|
|
107
|
+
model_analyzer/perf_analyzer/perf_config.py,sha256=nQHxZaIGp--JoCeTp2c5spA_o5o6H4dAgnLyrmvlGvM,14718
|
|
108
108
|
model_analyzer/plots/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3TD9rI,647
|
|
109
|
-
model_analyzer/plots/detailed_plot.py,sha256=
|
|
109
|
+
model_analyzer/plots/detailed_plot.py,sha256=nRxPM3MmsUsvmKpzJYST991pcmYCEXQWj2EhA11gTXg,8164
|
|
110
110
|
model_analyzer/plots/plot_manager.py,sha256=CFDUSF8Xm-1Uesg8dQSC5z8JSXa40mKtDNN2kVKP0ek,8962
|
|
111
111
|
model_analyzer/plots/simple_plot.py,sha256=udtWBXy_Y2UPWFEP4xhb-FWfjCjB4OWJwWN5lyJALhc,7163
|
|
112
112
|
model_analyzer/record/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
|
|
113
113
|
model_analyzer/record/gpu_record.py,sha256=7X9g6Y1efqiA9a9i80iIxLLWnk6121pYtk36WD8d62c,1872
|
|
114
|
-
model_analyzer/record/metrics_manager.py,sha256=
|
|
114
|
+
model_analyzer/record/metrics_manager.py,sha256=D3gBuL73maQbWNq_gNMUVXisfZnwVwhiMHFgJ2ZB_3c,32173
|
|
115
115
|
model_analyzer/record/record.py,sha256=asjFF8E3t6abXYd9RZCOG-w1AVVZRb5u9PHgxvMqgGA,7284
|
|
116
116
|
model_analyzer/record/record_aggregator.py,sha256=YFA1Av6m3oB_0kTGgvtUZEt4TZz8vL85uqnDCVRdvJI,8051
|
|
117
117
|
model_analyzer/record/types/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
|
|
118
118
|
model_analyzer/record/types/cpu_available_ram.py,sha256=DHhg_pePj8A0gSCPLIAHJnxZpdwoaWcaRMiBHyC5nTU,2431
|
|
119
119
|
model_analyzer/record/types/cpu_used_ram.py,sha256=h26jXoyGBHEeiE2Mvmwo6hpVaGBBYvZNQ8Fo0EMIBL4,2412
|
|
120
|
-
model_analyzer/record/types/gpu_free_memory.py,sha256=
|
|
120
|
+
model_analyzer/record/types/gpu_free_memory.py,sha256=_pksqjMi3guIMBIGVEpBamY86nT1V8o2phvEfvhsHBM,2341
|
|
121
121
|
model_analyzer/record/types/gpu_power_usage.py,sha256=eIlggXrIJ3E9qveprOiVNrMsITom4WiWKsLaDJMD8Lg,2903
|
|
122
|
-
model_analyzer/record/types/gpu_total_memory.py,sha256=
|
|
123
|
-
model_analyzer/record/types/gpu_used_memory.py,sha256=
|
|
122
|
+
model_analyzer/record/types/gpu_total_memory.py,sha256=QI_MFIP2G38mmpsml6Qmyz3bvG5tgKCpa8RziWIfM4k,2346
|
|
123
|
+
model_analyzer/record/types/gpu_used_memory.py,sha256=3tdjaSffCVLcfG7kLiNs3muwm-mBZld-n2VEKwJ450w,2337
|
|
124
124
|
model_analyzer/record/types/gpu_utilization.py,sha256=h9wqr1NtBDpYzHKTmZyXUhRriEz1_DAJRfYITmSQdsE,2880
|
|
125
125
|
model_analyzer/record/types/inter_token_latency_avg.py,sha256=i7jqWwdBcRyMblzdv-s8d1ET2-BdQq39WPt5WFaO7nE,1744
|
|
126
126
|
model_analyzer/record/types/inter_token_latency_base.py,sha256=fUf-0aT_TbbIH7iKbQDEu2sAser6G24--MAn8Vac4bA,2062
|
|
@@ -167,14 +167,14 @@ model_analyzer/result/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3T
|
|
|
167
167
|
model_analyzer/result/constraint_manager.py,sha256=nfHsBbX-66QN19uMH8ZBn68_mAaL0M30HMIG9N8KcBg,5005
|
|
168
168
|
model_analyzer/result/model_config_measurement.py,sha256=qwsPF-ea17Gl5EyFu9OWNWJrvl5hWzeqAkQo5bPgwfE,10657
|
|
169
169
|
model_analyzer/result/model_constraints.py,sha256=HXtbGK8mswXpCwWwrAantXb4sxhGo1bCWurwf9vW9KA,2687
|
|
170
|
-
model_analyzer/result/parameter_search.py,sha256=
|
|
170
|
+
model_analyzer/result/parameter_search.py,sha256=kw-kkxV7FlHKYThcdZoybaIxH-dILP5lAmdNc1OW_RI,8901
|
|
171
171
|
model_analyzer/result/result_manager.py,sha256=e_PWQV_33IM4SiLI7BGcmVtEMAe6IE_dZWbaf-DkXBo,15477
|
|
172
172
|
model_analyzer/result/result_statistics.py,sha256=wvqs64W8rZftN59rcQiH7Bmd0dfSAD7a9BGK_K1LYTI,4343
|
|
173
173
|
model_analyzer/result/result_table.py,sha256=NYwiPtkD_uSBn61omvkMvuLi-FBdMq0wHDlgcz9Gk7w,5843
|
|
174
174
|
model_analyzer/result/result_table_manager.py,sha256=UKAMSbS8o3CtM_4uSNaDNWxX6ZlGRqohgVcrZeihtgY,23643
|
|
175
175
|
model_analyzer/result/result_utils.py,sha256=EJXMo7csgqwonROb8_-hFeysnu-JDfj2Gf8naWHsw2s,1379
|
|
176
176
|
model_analyzer/result/results.py,sha256=YZlz5oJPPYNM4Nub4r_UNKMphKDT44_30iVjp5S4lds,8675
|
|
177
|
-
model_analyzer/result/run_config_measurement.py,sha256=
|
|
177
|
+
model_analyzer/result/run_config_measurement.py,sha256=CKlz8l1dvcFaThZpeKrEr9eLw3drlT3gVBSrsFZf6ZU,21577
|
|
178
178
|
model_analyzer/result/run_config_result.py,sha256=cs2fE-ISeInBErfJYG_L_3BuJjXNPDzIASYmwgU1JYE,6422
|
|
179
179
|
model_analyzer/result/run_config_result_comparator.py,sha256=xLRYEdF2TLRSbCl0Qj3OTfNiYa5RPtOkv_BtQibfwfc,3759
|
|
180
180
|
model_analyzer/result/sorted_results.py,sha256=ScWaT44X4-kplFv4lo9AClXEpA22DtwA6t8GPS2BTiw,4694
|
|
@@ -196,9 +196,9 @@ model_analyzer/triton/server/server_config.py,sha256=3pNQAnUmSXVAO7pKLNWak7AFGm1
|
|
|
196
196
|
model_analyzer/triton/server/server_docker.py,sha256=38e_tMniv2PBiEIh2KMxgqsMZlttL0yCrpD0lKd0r5g,8248
|
|
197
197
|
model_analyzer/triton/server/server_factory.py,sha256=WMZfXFQfO9aeFxCWewQhRk9ygMcne0ershGZ75XQ5IE,11152
|
|
198
198
|
model_analyzer/triton/server/server_local.py,sha256=i5t5MaDwfvWqBA3zG15GPkGiUm6I4Bb4i0d-wBP0WNs,5406
|
|
199
|
-
triton_model_analyzer-1.
|
|
200
|
-
triton_model_analyzer-1.
|
|
201
|
-
triton_model_analyzer-1.
|
|
202
|
-
triton_model_analyzer-1.
|
|
203
|
-
triton_model_analyzer-1.
|
|
204
|
-
triton_model_analyzer-1.
|
|
199
|
+
triton_model_analyzer-1.49.0.dist-info/licenses/LICENSE,sha256=vs3gOjyLmy49WMe2XKf_2eGOmBYjlsw3QFKYPB-qpHw,9144
|
|
200
|
+
triton_model_analyzer-1.49.0.dist-info/METADATA,sha256=ebuhiEKjcPklcz1N1BXDHRZRM2B_a1OyAHCGBeirh_I,2512
|
|
201
|
+
triton_model_analyzer-1.49.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
202
|
+
triton_model_analyzer-1.49.0.dist-info/entry_points.txt,sha256=k6Q0D76sL3qUNxe80hUThftycJAT7Sj5dBk8IjHXJoI,66
|
|
203
|
+
triton_model_analyzer-1.49.0.dist-info/top_level.txt,sha256=mTebknhUDSE7cCVyUtpzNVQ-tEUi8zZKFvp15xpIP2c,15
|
|
204
|
+
triton_model_analyzer-1.49.0.dist-info/RECORD,,
|
|
File without changes
|
{triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|