triton-model-analyzer 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +10 -14
- model_analyzer/config/generate/model_profile_spec.py +76 -19
- model_analyzer/config/generate/perf_analyzer_config_generator.py +14 -22
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +2 -16
- model_analyzer/config/generate/quick_run_config_generator.py +86 -35
- model_analyzer/config/generate/run_config_generator_factory.py +195 -31
- model_analyzer/config/input/config_command.py +81 -20
- model_analyzer/config/input/config_command_profile.py +41 -14
- model_analyzer/config/input/config_defaults.py +3 -15
- model_analyzer/perf_analyzer/perf_analyzer.py +4 -16
- model_analyzer/perf_analyzer/perf_config.py +23 -15
- model_analyzer/plots/detailed_plot.py +41 -54
- model_analyzer/record/metrics_manager.py +7 -15
- model_analyzer/record/types/gpu_free_memory.py +13 -14
- model_analyzer/record/types/gpu_total_memory.py +13 -14
- model_analyzer/record/types/gpu_used_memory.py +13 -14
- model_analyzer/result/parameter_search.py +10 -17
- model_analyzer/result/run_config_measurement.py +29 -24
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/METADATA +1 -1
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/RECORD +24 -24
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/WHEEL +1 -1
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/entry_points.txt +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/licenses/LICENSE +0 -0
- {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
import os
|
|
@@ -22,6 +10,7 @@ import matplotlib.pyplot as plt
|
|
|
22
10
|
from matplotlib import patches as mpatches
|
|
23
11
|
|
|
24
12
|
from model_analyzer.constants import LOGGER_NAME
|
|
13
|
+
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
|
|
25
14
|
from model_analyzer.record.metrics_manager import MetricsManager
|
|
26
15
|
|
|
27
16
|
logging.getLogger("matplotlib").setLevel(logging.ERROR)
|
|
@@ -89,7 +78,6 @@ class DetailedPlot:
|
|
|
89
78
|
self._fig.set_figheight(8)
|
|
90
79
|
self._fig.set_figwidth(12)
|
|
91
80
|
|
|
92
|
-
self._ax_latency.set_xlabel("Concurrent Client Requests")
|
|
93
81
|
self._ax_latency.set_ylabel(latency_axis_label)
|
|
94
82
|
self._ax_throughput.set_ylabel(throughput_axis_label)
|
|
95
83
|
|
|
@@ -120,29 +108,15 @@ class DetailedPlot:
|
|
|
120
108
|
"""
|
|
121
109
|
|
|
122
110
|
# TODO-TMA-568: This needs to be updated because there will be multiple model configs
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
]
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
if (
|
|
136
|
-
"request-rate-range" in run_config_measurement.model_specific_pa_params()[0]
|
|
137
|
-
and run_config_measurement.model_specific_pa_params()[0][
|
|
138
|
-
"request-rate-range"
|
|
139
|
-
]
|
|
140
|
-
):
|
|
141
|
-
self._data["request_rate"].append(
|
|
142
|
-
run_config_measurement.model_specific_pa_params()[0][
|
|
143
|
-
"request-rate-range"
|
|
144
|
-
]
|
|
145
|
-
)
|
|
111
|
+
for load_arg in PerfAnalyzerConfig.get_inference_load_args():
|
|
112
|
+
if (
|
|
113
|
+
load_arg in run_config_measurement.model_specific_pa_params()[0]
|
|
114
|
+
and run_config_measurement.model_specific_pa_params()[0][load_arg]
|
|
115
|
+
):
|
|
116
|
+
data_key = self._get_data_key_from_load_arg(load_arg)
|
|
117
|
+
self._data[data_key].append(
|
|
118
|
+
run_config_measurement.model_specific_pa_params()[0][load_arg]
|
|
119
|
+
)
|
|
146
120
|
|
|
147
121
|
self._data["perf_throughput"].append(
|
|
148
122
|
run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput")
|
|
@@ -164,25 +138,28 @@ class DetailedPlot:
|
|
|
164
138
|
on this plot's Axes object
|
|
165
139
|
"""
|
|
166
140
|
|
|
167
|
-
#
|
|
168
|
-
if "
|
|
141
|
+
# Update the x-axis plot title
|
|
142
|
+
if "request_intervals" in self._data and self._data["request_intervals"][0]:
|
|
143
|
+
self._ax_latency.set_xlabel("Request Intervals File")
|
|
144
|
+
sort_indices_key = "request_intervals"
|
|
145
|
+
elif "request_rate" in self._data and self._data["request_rate"][0]:
|
|
169
146
|
self._ax_latency.set_xlabel("Client Request Rate")
|
|
170
|
-
|
|
171
|
-
# Sort the data by request rate or concurrency
|
|
172
|
-
if "request_rate" in self._data and self._data["request_rate"][0]:
|
|
173
|
-
sort_indices = list(
|
|
174
|
-
zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1]))
|
|
175
|
-
)[0]
|
|
147
|
+
sort_indices_key = "request_rate"
|
|
176
148
|
else:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
149
|
+
self._ax_latency.set_xlabel("Concurrent Client Requests")
|
|
150
|
+
sort_indices_key = "concurrency"
|
|
151
|
+
|
|
152
|
+
sort_indices = list(
|
|
153
|
+
zip(*sorted(enumerate(self._data[sort_indices_key]), key=lambda x: x[1]))
|
|
154
|
+
)[0]
|
|
180
155
|
|
|
181
156
|
sorted_data = {
|
|
182
157
|
key: [data_list[i] for i in sort_indices]
|
|
183
158
|
for key, data_list in self._data.items()
|
|
184
159
|
}
|
|
185
160
|
|
|
161
|
+
sorted_data["indices"] = list(map(str, sorted_data[sort_indices_key]))
|
|
162
|
+
|
|
186
163
|
# Plot latency breakdown bars
|
|
187
164
|
labels = dict(
|
|
188
165
|
zip(
|
|
@@ -197,11 +174,6 @@ class DetailedPlot:
|
|
|
197
174
|
)
|
|
198
175
|
bottoms = None
|
|
199
176
|
|
|
200
|
-
if "request_rate" in self._data:
|
|
201
|
-
sorted_data["indices"] = list(map(str, sorted_data["request_rate"]))
|
|
202
|
-
else:
|
|
203
|
-
sorted_data["indices"] = list(map(str, sorted_data["concurrency"]))
|
|
204
|
-
|
|
205
177
|
# Plot latency breakdown with concurrency casted as string to make uniform x
|
|
206
178
|
for metric, label in labels.items():
|
|
207
179
|
self._ax_latency.bar(
|
|
@@ -264,3 +236,18 @@ class DetailedPlot:
|
|
|
264
236
|
"""
|
|
265
237
|
|
|
266
238
|
self._fig.savefig(os.path.join(filepath, self._name))
|
|
239
|
+
|
|
240
|
+
def _get_data_key_from_load_arg(self, load_arg):
|
|
241
|
+
"""
|
|
242
|
+
Gets the key into _data corresponding with the input load arg
|
|
243
|
+
|
|
244
|
+
For example, the load arg "request-rate-range" has the key "request_rate"
|
|
245
|
+
"""
|
|
246
|
+
# Check if '-range' exists at the end of the input string and remove it
|
|
247
|
+
if load_arg.endswith("-range"):
|
|
248
|
+
load_arg = load_arg[:-6]
|
|
249
|
+
|
|
250
|
+
# Replace any '-' with '_' in the remaining string
|
|
251
|
+
data_key = load_arg.replace("-", "_")
|
|
252
|
+
|
|
253
|
+
return data_key
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
import os
|
|
@@ -782,7 +770,11 @@ class MetricsManager:
|
|
|
782
770
|
def _print_run_config_info(self, run_config):
|
|
783
771
|
for model_run_config in run_config.model_run_configs():
|
|
784
772
|
perf_config = model_run_config.perf_config()
|
|
785
|
-
if perf_config["request-
|
|
773
|
+
if perf_config["request-intervals"]:
|
|
774
|
+
logger.info(
|
|
775
|
+
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}"
|
|
776
|
+
)
|
|
777
|
+
elif perf_config["request-rate-range"]:
|
|
786
778
|
if perf_config["batch-size"] != 1:
|
|
787
779
|
logger.info(
|
|
788
780
|
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from functools import total_ordering
|
|
18
6
|
|
|
@@ -27,6 +15,17 @@ class GPUFreeMemory(IncreasingGPURecord):
|
|
|
27
15
|
|
|
28
16
|
tag = "gpu_free_memory"
|
|
29
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def value_function():
|
|
20
|
+
"""
|
|
21
|
+
Returns the total value from a list
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Total value of the list
|
|
26
|
+
"""
|
|
27
|
+
return sum
|
|
28
|
+
|
|
30
29
|
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
30
|
"""
|
|
32
31
|
Parameters
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from functools import total_ordering
|
|
18
6
|
|
|
@@ -27,6 +15,17 @@ class GPUTotalMemory(IncreasingGPURecord):
|
|
|
27
15
|
|
|
28
16
|
tag = "gpu_total_memory"
|
|
29
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def value_function():
|
|
20
|
+
"""
|
|
21
|
+
Returns the total value from a list
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Total value of the list
|
|
26
|
+
"""
|
|
27
|
+
return sum
|
|
28
|
+
|
|
30
29
|
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
30
|
"""
|
|
32
31
|
Parameters
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
from functools import total_ordering
|
|
18
6
|
|
|
@@ -27,6 +15,17 @@ class GPUUsedMemory(DecreasingGPURecord):
|
|
|
27
15
|
|
|
28
16
|
tag = "gpu_used_memory"
|
|
29
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def value_function():
|
|
20
|
+
"""
|
|
21
|
+
Returns the total value from a list
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Total value of the list
|
|
26
|
+
"""
|
|
27
|
+
return sum
|
|
28
|
+
|
|
30
29
|
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
30
|
"""
|
|
32
31
|
Parameters
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from math import log2
|
|
@@ -37,6 +25,8 @@ class ParameterSearch:
|
|
|
37
25
|
- Will sweep from by powers of two from min to max parameter
|
|
38
26
|
- If the user specifies a constraint, the algorithm will perform a binary search
|
|
39
27
|
around the boundary if the constraint is violated
|
|
28
|
+
- Will not sweep at all if custom stimulus is provided by the user (via the
|
|
29
|
+
"request-intervals" perf analyzer flag)
|
|
40
30
|
|
|
41
31
|
Invariant: It is necessary for the user to add new measurements as they are taken
|
|
42
32
|
"""
|
|
@@ -45,6 +35,7 @@ class ParameterSearch:
|
|
|
45
35
|
self,
|
|
46
36
|
config: ConfigCommandProfile,
|
|
47
37
|
model_parameters: dict = {},
|
|
38
|
+
perf_analyzer_flags: dict = {},
|
|
48
39
|
skip_parameter_sweep: bool = False,
|
|
49
40
|
) -> None:
|
|
50
41
|
"""
|
|
@@ -59,6 +50,7 @@ class ParameterSearch:
|
|
|
59
50
|
self._parameter_is_request_rate = config.is_request_rate_specified(
|
|
60
51
|
model_parameters
|
|
61
52
|
)
|
|
53
|
+
self._inference_load_is_custom = "request-intervals" in perf_analyzer_flags
|
|
62
54
|
|
|
63
55
|
if self._parameter_is_request_rate:
|
|
64
56
|
self._min_parameter_index = int(
|
|
@@ -98,10 +90,11 @@ class ParameterSearch:
|
|
|
98
90
|
a binary parameter search around the point where the constraint
|
|
99
91
|
violated
|
|
100
92
|
"""
|
|
101
|
-
|
|
93
|
+
if not self._inference_load_is_custom:
|
|
94
|
+
yield from self._perform_parameter_sweep()
|
|
102
95
|
|
|
103
|
-
|
|
104
|
-
|
|
96
|
+
if self._was_constraint_violated():
|
|
97
|
+
yield from self._perform_binary_parameter_search()
|
|
105
98
|
|
|
106
99
|
def _perform_parameter_sweep(self) -> Generator[int, None, None]:
|
|
107
100
|
for parameter in (
|
|
@@ -1,18 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
-
# you may not use this file except in compliance with the License.
|
|
7
|
-
# You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
-
# See the License for the specific language governing permissions and
|
|
15
|
-
# limitations under the License.
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
4
|
|
|
17
5
|
import logging
|
|
18
6
|
from copy import deepcopy
|
|
@@ -48,6 +36,9 @@ class RunConfigMeasurement:
|
|
|
48
36
|
self._model_variants_name = model_variants_name
|
|
49
37
|
|
|
50
38
|
self._gpu_data = gpu_data
|
|
39
|
+
# Note: "_avg_gpu_data" is a historical name. This actually contains
|
|
40
|
+
# aggregated GPU metrics: memory metrics are SUMMED, while utilization
|
|
41
|
+
# and power metrics are AVERAGED across GPUs.
|
|
51
42
|
self._avg_gpu_data = self._average_list(list(self._gpu_data.values()))
|
|
52
43
|
self._avg_gpu_data_from_tag = self._get_avg_gpu_data_from_tag()
|
|
53
44
|
|
|
@@ -213,7 +204,9 @@ class RunConfigMeasurement:
|
|
|
213
204
|
|
|
214
205
|
def get_gpu_metric(self, tag: str) -> Optional[Record]:
|
|
215
206
|
"""
|
|
216
|
-
Returns the
|
|
207
|
+
Returns the aggregated Record associated with this GPU metric
|
|
208
|
+
across all GPUs. GPU memory metrics are summed; other metrics
|
|
209
|
+
like utilization are averaged.
|
|
217
210
|
|
|
218
211
|
Parameters
|
|
219
212
|
----------
|
|
@@ -224,7 +217,7 @@ class RunConfigMeasurement:
|
|
|
224
217
|
Returns
|
|
225
218
|
-------
|
|
226
219
|
Record:
|
|
227
|
-
of
|
|
220
|
+
of aggregated GPU metric Records corresponding to this tag,
|
|
228
221
|
or None if tag not found
|
|
229
222
|
"""
|
|
230
223
|
if tag in self._avg_gpu_data_from_tag:
|
|
@@ -320,8 +313,10 @@ class RunConfigMeasurement:
|
|
|
320
313
|
Returns
|
|
321
314
|
-------
|
|
322
315
|
float :
|
|
323
|
-
|
|
316
|
+
Aggregated value of the GPU metric Records across all GPUs
|
|
324
317
|
corresponding to the tag, default_value if tag not found.
|
|
318
|
+
GPU memory metrics are summed; other metrics like utilization
|
|
319
|
+
are averaged.
|
|
325
320
|
"""
|
|
326
321
|
metric = self.get_gpu_metric(tag)
|
|
327
322
|
if metric is None:
|
|
@@ -615,7 +610,9 @@ class RunConfigMeasurement:
|
|
|
615
610
|
|
|
616
611
|
def _average_list(self, row_list):
|
|
617
612
|
"""
|
|
618
|
-
|
|
613
|
+
Aggregate a 2d list of GPU records across GPUs.
|
|
614
|
+
Uses each record type's value_function() to determine
|
|
615
|
+
whether to sum or average the metric.
|
|
619
616
|
"""
|
|
620
617
|
|
|
621
618
|
if not row_list:
|
|
@@ -623,13 +620,21 @@ class RunConfigMeasurement:
|
|
|
623
620
|
else:
|
|
624
621
|
N = len(row_list)
|
|
625
622
|
d = len(row_list[0])
|
|
626
|
-
|
|
623
|
+
agg = [0 for _ in range(d)]
|
|
627
624
|
for i in range(d):
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
)
|
|
632
|
-
|
|
625
|
+
# Sum the records across all GPUs
|
|
626
|
+
summed_record = sum(
|
|
627
|
+
[row_list[j][i] for j in range(1, N)], start=row_list[0][i]
|
|
628
|
+
)
|
|
629
|
+
# Get the aggregation function for this record type
|
|
630
|
+
value_func = row_list[0][i].value_function()
|
|
631
|
+
# If the value function is sum, use the sum directly
|
|
632
|
+
# If the value function is mean/average, divide by N
|
|
633
|
+
if value_func == sum:
|
|
634
|
+
agg[i] = summed_record
|
|
635
|
+
else:
|
|
636
|
+
agg[i] = summed_record / N
|
|
637
|
+
return agg
|
|
633
638
|
|
|
634
639
|
def _deserialize_gpu_data(
|
|
635
640
|
self, serialized_gpu_data: Dict
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: triton-model-analyzer
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.50.0
|
|
4
4
|
Summary: Triton Model Analyzer is a tool to profile and analyze the runtime performance of one or more models on the Triton Inference Server
|
|
5
5
|
Author-email: "NVIDIA Inc." <sw-dl-triton@nvidia.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -11,7 +11,7 @@ model_analyzer/config/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3T
|
|
|
11
11
|
model_analyzer/config/generate/__init__.py,sha256=4XgtPRt-6E1qrP6lxcqPIaEQb3V0yYu6EMtnVwAKdIY,647
|
|
12
12
|
model_analyzer/config/generate/automatic_model_config_generator.py,sha256=EhpeYZlkAoSiKq1_QunXUBzbB7jPXSU2kyi3UpnzuOs,5797
|
|
13
13
|
model_analyzer/config/generate/base_model_config_generator.py,sha256=sT8wghsLRgVT7-AWMTkrvdX-CeahFYdXzGEW0QgnQaI,12817
|
|
14
|
-
model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py,sha256=
|
|
14
|
+
model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py,sha256=fQ4NGO-1nUpX1wHbw6DPqnlXPGNCBLE_MrfWwYMpInk,6175
|
|
15
15
|
model_analyzer/config/generate/brute_run_config_generator.py,sha256=bKYV3jFNpeE4cCIvBG8a3VYItjPzA0Lu_hyFukaQsUw,5484
|
|
16
16
|
model_analyzer/config/generate/concurrency_sweeper.py,sha256=_VRCRw6iNgF3fJBdP46mywsy9Jz0QTBNSXyIHM4X0gk,2907
|
|
17
17
|
model_analyzer/config/generate/config_generator_interface.py,sha256=4r2u5t7yrAqEeDTsfiPWEgqU0pesG3N5DChfweFPLm8,1681
|
|
@@ -20,26 +20,26 @@ model_analyzer/config/generate/coordinate_data.py,sha256=deqoRIvO0aN5D5sNDZcjmT0
|
|
|
20
20
|
model_analyzer/config/generate/generator_utils.py,sha256=5hj7qGzVs-oHk6UK_ggBzKJiTDVdX6MSLrXyEfU-kFE,4157
|
|
21
21
|
model_analyzer/config/generate/manual_model_config_generator.py,sha256=_Y78ORijg5E6c2a2zESwJzK55S8fbhoy2rqFTQq-seA,7088
|
|
22
22
|
model_analyzer/config/generate/model_config_generator_factory.py,sha256=5BjSJxRmrEVy3LyjIPnMT-QCz8GABa62oY1VvJfmhbE,3334
|
|
23
|
-
model_analyzer/config/generate/model_profile_spec.py,sha256=
|
|
23
|
+
model_analyzer/config/generate/model_profile_spec.py,sha256=WSznjBRO16vwet4f3XhrjmHx3tgAQU9ZJDh8CYI4E3c,5321
|
|
24
24
|
model_analyzer/config/generate/model_run_config_generator.py,sha256=Ip1djtZ3wlwG_Zs3YivIlJxmwS1TsNa_7aMCU9bCQP0,5484
|
|
25
25
|
model_analyzer/config/generate/model_variant_name_manager.py,sha256=qPAcvDQhkbArkW-A1A1mdRyqALUgfLlyV3dhNV6jKgs,5362
|
|
26
26
|
model_analyzer/config/generate/neighborhood.py,sha256=s9g9igetHvF6Xm9FhuzjbhPBVWO65SngABrX3RGOUBQ,19481
|
|
27
27
|
model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py,sha256=iYCzT2gH-DH5mNbRMGM_UNyPNfRkObvjcrvy0TSKPD8,5861
|
|
28
28
|
model_analyzer/config/generate/optuna_run_config_generator.py,sha256=Z09p5tZP8wl0hp9NpHXpGovv1dZZkZzH11FnuXe8HhU,33099
|
|
29
|
-
model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=
|
|
30
|
-
model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=
|
|
31
|
-
model_analyzer/config/generate/quick_run_config_generator.py,sha256=
|
|
32
|
-
model_analyzer/config/generate/run_config_generator_factory.py,sha256=
|
|
29
|
+
model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=cCLQTV5V65QelGFMP3B4JaYvejNZj4tHJUQSZ3nTZSY,11112
|
|
30
|
+
model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=hAL6YD6hqLDL7ttOgJuDIUvEe6BpHNlYv82cQdHUo5o,4516
|
|
31
|
+
model_analyzer/config/generate/quick_run_config_generator.py,sha256=1JQo3gaZeC2eUNrtuMeyQzuILZX3lBlNX-UhNaXMvBY,30880
|
|
32
|
+
model_analyzer/config/generate/run_config_generator_factory.py,sha256=DDAsqWQoKhBA2XFb68igVX7cO1WY5xt15_hkBxX6_Cg,19453
|
|
33
33
|
model_analyzer/config/generate/search_config.py,sha256=mvQaoQ9wIeNEHWJtIwXh0JujPQZqXLLXmWpI7uBHDGA,3655
|
|
34
34
|
model_analyzer/config/generate/search_dimension.py,sha256=iUoh4IhhMy9zhMhaY-q9DoBEku4--4o4j8RYQfXUVVk,2375
|
|
35
35
|
model_analyzer/config/generate/search_dimensions.py,sha256=_hbpnb3gUYuCVh_LLP4FMjB1sV-IKrvekpEhi2lTCIs,2618
|
|
36
36
|
model_analyzer/config/generate/search_parameter.py,sha256=E-mM5bWKdPUXjT8DCmVLWuJWTs-jBC8U8ZBDmx8GGMs,1392
|
|
37
37
|
model_analyzer/config/generate/search_parameters.py,sha256=knTE9Vw7lS37SuGi1hZpVb01DFLubJpzAPJSbTHlQDA,14876
|
|
38
38
|
model_analyzer/config/input/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3TD9rI,647
|
|
39
|
-
model_analyzer/config/input/config_command.py,sha256=
|
|
40
|
-
model_analyzer/config/input/config_command_profile.py,sha256=
|
|
39
|
+
model_analyzer/config/input/config_command.py,sha256=kS02U8c5AO2XNsxM8VFOMYhFaN5r8-aPm5tjFrCiyxA,21495
|
|
40
|
+
model_analyzer/config/input/config_command_profile.py,sha256=Rbx5x_jj0SC7r1SXEVPLkT8CXu_KX0iLzVySdOyQJ2c,72018
|
|
41
41
|
model_analyzer/config/input/config_command_report.py,sha256=Igiid6XK5OB23jvXZKPI4Pg5ZsSDkXgHU-QAGOo6_-E,9535
|
|
42
|
-
model_analyzer/config/input/config_defaults.py,sha256=
|
|
42
|
+
model_analyzer/config/input/config_defaults.py,sha256=t1hkTDqc82tNtyfr39NDEb_n2a4qtnf0GrR69BtzpAU,6202
|
|
43
43
|
model_analyzer/config/input/config_enum.py,sha256=lu9cUz_MY52EcLEXxcLYWqB5If6FhsM1K8w1srzhDt4,2386
|
|
44
44
|
model_analyzer/config/input/config_field.py,sha256=XlqlouNuPENEYCBLYtHxsfMACrFlcYZGkbh6yE1psNo,5156
|
|
45
45
|
model_analyzer/config/input/config_list_generic.py,sha256=-4vhDxIEMPOwYcUIkMIFzxJ61mOmX6GEgGCpTfbCFsc,3355
|
|
@@ -103,24 +103,24 @@ model_analyzer/output/file_writer.py,sha256=NbAbjLFBcHZMYOJF-3Af9dqGVjS8WRGsQDid
|
|
|
103
103
|
model_analyzer/output/output_writer.py,sha256=JZ-e0uzrpNybKaAup0hLmvIuGn9v1Z5OsB8yMHX074A,1200
|
|
104
104
|
model_analyzer/perf_analyzer/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
|
|
105
105
|
model_analyzer/perf_analyzer/genai_perf_config.py,sha256=9g4081ttiAqV1H3vft3Brqx3nArslyAFspJODboj72g,5673
|
|
106
|
-
model_analyzer/perf_analyzer/perf_analyzer.py,sha256=
|
|
107
|
-
model_analyzer/perf_analyzer/perf_config.py,sha256=
|
|
106
|
+
model_analyzer/perf_analyzer/perf_analyzer.py,sha256=wcE6CIL_OKHFnJkipXb_mP8koBLXHOZb51OT4kR-JMQ,31067
|
|
107
|
+
model_analyzer/perf_analyzer/perf_config.py,sha256=nQHxZaIGp--JoCeTp2c5spA_o5o6H4dAgnLyrmvlGvM,14718
|
|
108
108
|
model_analyzer/plots/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3TD9rI,647
|
|
109
|
-
model_analyzer/plots/detailed_plot.py,sha256=
|
|
109
|
+
model_analyzer/plots/detailed_plot.py,sha256=nRxPM3MmsUsvmKpzJYST991pcmYCEXQWj2EhA11gTXg,8164
|
|
110
110
|
model_analyzer/plots/plot_manager.py,sha256=CFDUSF8Xm-1Uesg8dQSC5z8JSXa40mKtDNN2kVKP0ek,8962
|
|
111
111
|
model_analyzer/plots/simple_plot.py,sha256=udtWBXy_Y2UPWFEP4xhb-FWfjCjB4OWJwWN5lyJALhc,7163
|
|
112
112
|
model_analyzer/record/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
|
|
113
113
|
model_analyzer/record/gpu_record.py,sha256=7X9g6Y1efqiA9a9i80iIxLLWnk6121pYtk36WD8d62c,1872
|
|
114
|
-
model_analyzer/record/metrics_manager.py,sha256=
|
|
114
|
+
model_analyzer/record/metrics_manager.py,sha256=D3gBuL73maQbWNq_gNMUVXisfZnwVwhiMHFgJ2ZB_3c,32173
|
|
115
115
|
model_analyzer/record/record.py,sha256=asjFF8E3t6abXYd9RZCOG-w1AVVZRb5u9PHgxvMqgGA,7284
|
|
116
116
|
model_analyzer/record/record_aggregator.py,sha256=YFA1Av6m3oB_0kTGgvtUZEt4TZz8vL85uqnDCVRdvJI,8051
|
|
117
117
|
model_analyzer/record/types/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
|
|
118
118
|
model_analyzer/record/types/cpu_available_ram.py,sha256=DHhg_pePj8A0gSCPLIAHJnxZpdwoaWcaRMiBHyC5nTU,2431
|
|
119
119
|
model_analyzer/record/types/cpu_used_ram.py,sha256=h26jXoyGBHEeiE2Mvmwo6hpVaGBBYvZNQ8Fo0EMIBL4,2412
|
|
120
|
-
model_analyzer/record/types/gpu_free_memory.py,sha256=
|
|
120
|
+
model_analyzer/record/types/gpu_free_memory.py,sha256=_pksqjMi3guIMBIGVEpBamY86nT1V8o2phvEfvhsHBM,2341
|
|
121
121
|
model_analyzer/record/types/gpu_power_usage.py,sha256=eIlggXrIJ3E9qveprOiVNrMsITom4WiWKsLaDJMD8Lg,2903
|
|
122
|
-
model_analyzer/record/types/gpu_total_memory.py,sha256=
|
|
123
|
-
model_analyzer/record/types/gpu_used_memory.py,sha256=
|
|
122
|
+
model_analyzer/record/types/gpu_total_memory.py,sha256=QI_MFIP2G38mmpsml6Qmyz3bvG5tgKCpa8RziWIfM4k,2346
|
|
123
|
+
model_analyzer/record/types/gpu_used_memory.py,sha256=3tdjaSffCVLcfG7kLiNs3muwm-mBZld-n2VEKwJ450w,2337
|
|
124
124
|
model_analyzer/record/types/gpu_utilization.py,sha256=h9wqr1NtBDpYzHKTmZyXUhRriEz1_DAJRfYITmSQdsE,2880
|
|
125
125
|
model_analyzer/record/types/inter_token_latency_avg.py,sha256=i7jqWwdBcRyMblzdv-s8d1ET2-BdQq39WPt5WFaO7nE,1744
|
|
126
126
|
model_analyzer/record/types/inter_token_latency_base.py,sha256=fUf-0aT_TbbIH7iKbQDEu2sAser6G24--MAn8Vac4bA,2062
|
|
@@ -167,14 +167,14 @@ model_analyzer/result/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3T
|
|
|
167
167
|
model_analyzer/result/constraint_manager.py,sha256=nfHsBbX-66QN19uMH8ZBn68_mAaL0M30HMIG9N8KcBg,5005
|
|
168
168
|
model_analyzer/result/model_config_measurement.py,sha256=qwsPF-ea17Gl5EyFu9OWNWJrvl5hWzeqAkQo5bPgwfE,10657
|
|
169
169
|
model_analyzer/result/model_constraints.py,sha256=HXtbGK8mswXpCwWwrAantXb4sxhGo1bCWurwf9vW9KA,2687
|
|
170
|
-
model_analyzer/result/parameter_search.py,sha256=
|
|
170
|
+
model_analyzer/result/parameter_search.py,sha256=kw-kkxV7FlHKYThcdZoybaIxH-dILP5lAmdNc1OW_RI,8901
|
|
171
171
|
model_analyzer/result/result_manager.py,sha256=e_PWQV_33IM4SiLI7BGcmVtEMAe6IE_dZWbaf-DkXBo,15477
|
|
172
172
|
model_analyzer/result/result_statistics.py,sha256=wvqs64W8rZftN59rcQiH7Bmd0dfSAD7a9BGK_K1LYTI,4343
|
|
173
173
|
model_analyzer/result/result_table.py,sha256=NYwiPtkD_uSBn61omvkMvuLi-FBdMq0wHDlgcz9Gk7w,5843
|
|
174
174
|
model_analyzer/result/result_table_manager.py,sha256=UKAMSbS8o3CtM_4uSNaDNWxX6ZlGRqohgVcrZeihtgY,23643
|
|
175
175
|
model_analyzer/result/result_utils.py,sha256=EJXMo7csgqwonROb8_-hFeysnu-JDfj2Gf8naWHsw2s,1379
|
|
176
176
|
model_analyzer/result/results.py,sha256=YZlz5oJPPYNM4Nub4r_UNKMphKDT44_30iVjp5S4lds,8675
|
|
177
|
-
model_analyzer/result/run_config_measurement.py,sha256=
|
|
177
|
+
model_analyzer/result/run_config_measurement.py,sha256=CKlz8l1dvcFaThZpeKrEr9eLw3drlT3gVBSrsFZf6ZU,21577
|
|
178
178
|
model_analyzer/result/run_config_result.py,sha256=cs2fE-ISeInBErfJYG_L_3BuJjXNPDzIASYmwgU1JYE,6422
|
|
179
179
|
model_analyzer/result/run_config_result_comparator.py,sha256=xLRYEdF2TLRSbCl0Qj3OTfNiYa5RPtOkv_BtQibfwfc,3759
|
|
180
180
|
model_analyzer/result/sorted_results.py,sha256=ScWaT44X4-kplFv4lo9AClXEpA22DtwA6t8GPS2BTiw,4694
|
|
@@ -196,9 +196,9 @@ model_analyzer/triton/server/server_config.py,sha256=3pNQAnUmSXVAO7pKLNWak7AFGm1
|
|
|
196
196
|
model_analyzer/triton/server/server_docker.py,sha256=38e_tMniv2PBiEIh2KMxgqsMZlttL0yCrpD0lKd0r5g,8248
|
|
197
197
|
model_analyzer/triton/server/server_factory.py,sha256=WMZfXFQfO9aeFxCWewQhRk9ygMcne0ershGZ75XQ5IE,11152
|
|
198
198
|
model_analyzer/triton/server/server_local.py,sha256=i5t5MaDwfvWqBA3zG15GPkGiUm6I4Bb4i0d-wBP0WNs,5406
|
|
199
|
-
triton_model_analyzer-1.
|
|
200
|
-
triton_model_analyzer-1.
|
|
201
|
-
triton_model_analyzer-1.
|
|
202
|
-
triton_model_analyzer-1.
|
|
203
|
-
triton_model_analyzer-1.
|
|
204
|
-
triton_model_analyzer-1.
|
|
199
|
+
triton_model_analyzer-1.50.0.dist-info/licenses/LICENSE,sha256=vs3gOjyLmy49WMe2XKf_2eGOmBYjlsw3QFKYPB-qpHw,9144
|
|
200
|
+
triton_model_analyzer-1.50.0.dist-info/METADATA,sha256=uDYfY5KZsLOHaugxXsqh2mCc6ByjTQvg2TT_j1IStBA,2512
|
|
201
|
+
triton_model_analyzer-1.50.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
202
|
+
triton_model_analyzer-1.50.0.dist-info/entry_points.txt,sha256=k6Q0D76sL3qUNxe80hUThftycJAT7Sj5dBk8IjHXJoI,66
|
|
203
|
+
triton_model_analyzer-1.50.0.dist-info/top_level.txt,sha256=mTebknhUDSE7cCVyUtpzNVQ-tEUi8zZKFvp15xpIP2c,15
|
|
204
|
+
triton_model_analyzer-1.50.0.dist-info/RECORD,,
|
{triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|