triton-model-analyzer 1.48.0__py3-none-any.whl → 1.49.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +10 -14
  2. model_analyzer/config/generate/model_profile_spec.py +14 -14
  3. model_analyzer/config/generate/perf_analyzer_config_generator.py +14 -22
  4. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +2 -16
  5. model_analyzer/config/generate/quick_run_config_generator.py +16 -28
  6. model_analyzer/config/input/config_defaults.py +3 -15
  7. model_analyzer/perf_analyzer/perf_analyzer.py +4 -16
  8. model_analyzer/perf_analyzer/perf_config.py +23 -15
  9. model_analyzer/plots/detailed_plot.py +41 -54
  10. model_analyzer/record/metrics_manager.py +7 -15
  11. model_analyzer/record/types/gpu_free_memory.py +13 -14
  12. model_analyzer/record/types/gpu_total_memory.py +13 -14
  13. model_analyzer/record/types/gpu_used_memory.py +13 -14
  14. model_analyzer/result/parameter_search.py +10 -17
  15. model_analyzer/result/run_config_measurement.py +29 -24
  16. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/METADATA +1 -1
  17. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/RECORD +21 -21
  18. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/WHEEL +0 -0
  19. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/entry_points.txt +0 -0
  20. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/licenses/LICENSE +0 -0
  21. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.49.0.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from copy import deepcopy
@@ -132,9 +120,11 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
132
120
  for result in top_results:
133
121
  run_config = deepcopy(result.run_config())
134
122
  model_parameters = self._get_model_parameters(model_name)
123
+ perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
135
124
  parameter_search = ParameterSearch(
136
125
  config=self._config,
137
126
  model_parameters=model_parameters,
127
+ perf_analyzer_flags=perf_analyzer_flags,
138
128
  skip_parameter_sweep=True,
139
129
  )
140
130
  for parameter in parameter_search.search_parameters():
@@ -151,6 +141,12 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
151
141
 
152
142
  return {}
153
143
 
144
+ def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
145
+ for model in self._models:
146
+ if model_name == model.model_name():
147
+ return model.perf_analyzer_flags()
148
+ return {}
149
+
154
150
  def _set_parameter(
155
151
  self, run_config: RunConfig, model_parameters: Dict, parameter: int
156
152
  ) -> RunConfig:
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from copy import deepcopy
18
6
  from typing import List
@@ -22,6 +10,7 @@ from model_analyzer.config.input.objects.config_model_profile_spec import (
22
10
  ConfigModelProfileSpec,
23
11
  )
24
12
  from model_analyzer.device.gpu_device import GPUDevice
13
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
25
14
  from model_analyzer.triton.client.client import TritonClient
26
15
  from model_analyzer.triton.model.model_config import ModelConfig
27
16
 
@@ -72,3 +61,14 @@ class ModelProfileSpec(ConfigModelProfileSpec):
72
61
  def is_ensemble(self) -> bool:
73
62
  """Returns true if the model is an ensemble"""
74
63
  return "ensemble_scheduling" in self._default_model_config
64
+
65
+ def is_load_specified(self) -> bool:
66
+ """
67
+ Returns true if the model's PA config has specified any of the
68
+ inference load args (such as concurrency). Else returns false
69
+ """
70
+ load_args = PerfAnalyzerConfig.get_inference_load_args()
71
+ pa_flags = self.perf_analyzer_flags()
72
+ if pa_flags is None:
73
+ return False
74
+ return any(e in pa_flags for e in load_args)
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from typing import Generator, List, Optional
@@ -169,10 +157,12 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
169
157
  self._parameter_results.extend(measurement)
170
158
 
171
159
  def _create_parameter_list(self) -> List[int]:
172
- # The two possible parameters are request rate or concurrency
173
- # Concurrency is the default and will be used unless the user specifies
174
- # request rate, either as a model parameter or a config option
175
- if self._cli_config.is_request_rate_specified(self._model_parameters):
160
+ # Determines the inference load (concurrency or request-rate or request-intervals)
161
+ # and creates the list of values to use. If nothing is specified by the user, then
162
+ # concurrency will be used.
163
+ if "request-intervals" in self._perf_analyzer_flags:
164
+ return [self._perf_analyzer_flags["request-intervals"]]
165
+ elif self._cli_config.is_request_rate_specified(self._model_parameters):
176
166
  return self._create_request_rate_list()
177
167
  else:
178
168
  return self._create_concurrency_list()
@@ -207,7 +197,7 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
207
197
  for params in utils.generate_parameter_combinations(
208
198
  perf_config_non_parameter_values
209
199
  ):
210
- configs_with_concurrency = []
200
+ configs_with_inference_load = []
211
201
  for parameter in self._parameters:
212
202
  new_perf_config = PerfAnalyzerConfig()
213
203
 
@@ -217,7 +207,9 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
217
207
 
218
208
  new_perf_config.update_config(params)
219
209
 
220
- if self._cli_config.is_request_rate_specified(self._model_parameters):
210
+ if "request-intervals" in self._perf_analyzer_flags:
211
+ pass
212
+ elif self._cli_config.is_request_rate_specified(self._model_parameters):
221
213
  new_perf_config.update_config({"request-rate-range": parameter})
222
214
  else:
223
215
  new_perf_config.update_config({"concurrency-range": parameter})
@@ -225,8 +217,8 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
225
217
  # User provided flags can override the search parameters
226
218
  new_perf_config.update_config(self._perf_analyzer_flags)
227
219
 
228
- configs_with_concurrency.append(new_perf_config)
229
- self._configs.append(configs_with_concurrency)
220
+ configs_with_inference_load.append(new_perf_config)
221
+ self._configs.append(configs_with_inference_load)
230
222
 
231
223
  def _create_non_parameter_perf_config_values(self) -> dict:
232
224
  perf_config_values = {
@@ -1,21 +1,8 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
- from copy import deepcopy
19
6
  from typing import Generator, List, Optional
20
7
 
21
8
  from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
@@ -30,7 +17,6 @@ from model_analyzer.config.generate.search_config import SearchConfig
30
17
  from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
31
18
  from model_analyzer.config.run.run_config import RunConfig
32
19
  from model_analyzer.constants import LOGGER_NAME
33
- from model_analyzer.result.parameter_search import ParameterSearch
34
20
  from model_analyzer.result.result_manager import ResultManager
35
21
  from model_analyzer.result.run_config_measurement import RunConfigMeasurement
36
22
 
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from sys import maxsize
@@ -507,13 +495,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
507
495
 
508
496
  perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
509
497
 
510
- concurrency = self._calculate_concurrency(dimension_values)
511
-
512
- perf_config_params = {
513
- "batch-size": DEFAULT_BATCH_SIZES,
514
- "concurrency-range": concurrency,
515
- }
516
- perf_analyzer_config.update_config(perf_config_params)
498
+ if not model.is_load_specified():
499
+ concurrency = self._calculate_concurrency(dimension_values)
500
+ perf_config_params = {
501
+ "batch-size": DEFAULT_BATCH_SIZES,
502
+ "concurrency-range": concurrency,
503
+ }
504
+ perf_analyzer_config.update_config(perf_config_params)
517
505
 
518
506
  perf_analyzer_config.update_config(model.perf_analyzer_flags())
519
507
  return perf_analyzer_config
@@ -703,13 +691,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
703
691
  model_config.get_field("name"), self._config
704
692
  )
705
693
 
706
- default_concurrency = self._calculate_default_concurrency(model_config)
707
-
708
- perf_config_params = {
709
- "batch-size": DEFAULT_BATCH_SIZES,
710
- "concurrency-range": default_concurrency,
711
- }
712
- default_perf_analyzer_config.update_config(perf_config_params)
694
+ if not model.is_load_specified():
695
+ default_concurrency = self._calculate_default_concurrency(model_config)
696
+ perf_config_params = {
697
+ "batch-size": DEFAULT_BATCH_SIZES,
698
+ "concurrency-range": default_concurrency,
699
+ }
700
+ default_perf_analyzer_config.update_config(perf_config_params)
713
701
 
714
702
  default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
715
703
 
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import os
18
6
 
@@ -64,7 +52,7 @@ DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
64
52
  DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
65
53
  DEFAULT_DCGM_DISABLE = False
66
54
  DEFAULT_TRITON_LAUNCH_MODE = "local"
67
- DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.11-py3"
55
+ DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:25.12-py3"
68
56
  DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
69
57
  DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
70
58
  DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import csv
18
6
  import glob
@@ -432,8 +420,8 @@ class PerfAnalyzer:
432
420
  return cmd
433
421
 
434
422
  def _get_single_model_cmd(self, index):
435
- if self._model_type == "LLM":
436
- cmd = ["genai-perf", "-m", self._config.models_name()]
423
+ if self._model_type.lower() == "llm":
424
+ cmd = ["genai-perf", "profile", "-m", self._config.models_name()]
437
425
  cmd += self._get_genai_perf_cli_command(index).replace("=", " ").split()
438
426
  cmd += ["--"]
439
427
  cmd += (
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from typing import List
18
6
 
@@ -98,6 +86,13 @@ class PerfAnalyzerConfig:
98
86
  "collect-metrics",
99
87
  ]
100
88
 
89
+ # Only one of these args can be sent to PA, as each one controls the inference load in a different way
90
+ inference_load_args = [
91
+ "concurrency-range",
92
+ "request-rate-range",
93
+ "request-intervals",
94
+ ]
95
+
101
96
  def __init__(self):
102
97
  """
103
98
  Construct a PerfAnalyzerConfig
@@ -108,7 +103,9 @@ class PerfAnalyzerConfig:
108
103
  self._options = {
109
104
  "-m": None,
110
105
  "-x": None,
111
- "-b": None,
106
+ # Default to batch size of 1. This would be handled by PA if unspecified,
107
+ # but we want to be explicit so we can properly print/track values
108
+ "-b": 1,
112
109
  "-u": None,
113
110
  "-i": None,
114
111
  "-f": None,
@@ -160,6 +157,16 @@ class PerfAnalyzerConfig:
160
157
 
161
158
  return cls.additive_args[:]
162
159
 
160
+ @classmethod
161
+ def get_inference_load_args(cls):
162
+ """
163
+ Returns
164
+ -------
165
+ list of str
166
+ The Perf Analyzer args that control the inference load
167
+ """
168
+ return cls.inference_load_args
169
+
163
170
  def update_config(self, params=None):
164
171
  """
165
172
  Allows setting values from a params dict
@@ -275,6 +282,7 @@ class PerfAnalyzerConfig:
275
282
  "batch-size": self._options["-b"],
276
283
  "concurrency-range": self._args["concurrency-range"],
277
284
  "request-rate-range": self._args["request-rate-range"],
285
+ "request-intervals": self._args["request-intervals"],
278
286
  }
279
287
 
280
288
  @classmethod
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  import os
@@ -22,6 +10,7 @@ import matplotlib.pyplot as plt
22
10
  from matplotlib import patches as mpatches
23
11
 
24
12
  from model_analyzer.constants import LOGGER_NAME
13
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
25
14
  from model_analyzer.record.metrics_manager import MetricsManager
26
15
 
27
16
  logging.getLogger("matplotlib").setLevel(logging.ERROR)
@@ -89,7 +78,6 @@ class DetailedPlot:
89
78
  self._fig.set_figheight(8)
90
79
  self._fig.set_figwidth(12)
91
80
 
92
- self._ax_latency.set_xlabel("Concurrent Client Requests")
93
81
  self._ax_latency.set_ylabel(latency_axis_label)
94
82
  self._ax_throughput.set_ylabel(throughput_axis_label)
95
83
 
@@ -120,29 +108,15 @@ class DetailedPlot:
120
108
  """
121
109
 
122
110
  # TODO-TMA-568: This needs to be updated because there will be multiple model configs
123
- if (
124
- "concurrency-range" in run_config_measurement.model_specific_pa_params()[0]
125
- and run_config_measurement.model_specific_pa_params()[0][
126
- "concurrency-range"
127
- ]
128
- ):
129
- self._data["concurrency"].append(
130
- run_config_measurement.model_specific_pa_params()[0][
131
- "concurrency-range"
132
- ]
133
- )
134
-
135
- if (
136
- "request-rate-range" in run_config_measurement.model_specific_pa_params()[0]
137
- and run_config_measurement.model_specific_pa_params()[0][
138
- "request-rate-range"
139
- ]
140
- ):
141
- self._data["request_rate"].append(
142
- run_config_measurement.model_specific_pa_params()[0][
143
- "request-rate-range"
144
- ]
145
- )
111
+ for load_arg in PerfAnalyzerConfig.get_inference_load_args():
112
+ if (
113
+ load_arg in run_config_measurement.model_specific_pa_params()[0]
114
+ and run_config_measurement.model_specific_pa_params()[0][load_arg]
115
+ ):
116
+ data_key = self._get_data_key_from_load_arg(load_arg)
117
+ self._data[data_key].append(
118
+ run_config_measurement.model_specific_pa_params()[0][load_arg]
119
+ )
146
120
 
147
121
  self._data["perf_throughput"].append(
148
122
  run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput")
@@ -164,25 +138,28 @@ class DetailedPlot:
164
138
  on this plot's Axes object
165
139
  """
166
140
 
167
- # Need to change the default x-axis plot title for request rates
168
- if "request_rate" in self._data and self._data["request_rate"][0]:
141
+ # Update the x-axis plot title
142
+ if "request_intervals" in self._data and self._data["request_intervals"][0]:
143
+ self._ax_latency.set_xlabel("Request Intervals File")
144
+ sort_indices_key = "request_intervals"
145
+ elif "request_rate" in self._data and self._data["request_rate"][0]:
169
146
  self._ax_latency.set_xlabel("Client Request Rate")
170
-
171
- # Sort the data by request rate or concurrency
172
- if "request_rate" in self._data and self._data["request_rate"][0]:
173
- sort_indices = list(
174
- zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1]))
175
- )[0]
147
+ sort_indices_key = "request_rate"
176
148
  else:
177
- sort_indices = list(
178
- zip(*sorted(enumerate(self._data["concurrency"]), key=lambda x: x[1]))
179
- )[0]
149
+ self._ax_latency.set_xlabel("Concurrent Client Requests")
150
+ sort_indices_key = "concurrency"
151
+
152
+ sort_indices = list(
153
+ zip(*sorted(enumerate(self._data[sort_indices_key]), key=lambda x: x[1]))
154
+ )[0]
180
155
 
181
156
  sorted_data = {
182
157
  key: [data_list[i] for i in sort_indices]
183
158
  for key, data_list in self._data.items()
184
159
  }
185
160
 
161
+ sorted_data["indices"] = list(map(str, sorted_data[sort_indices_key]))
162
+
186
163
  # Plot latency breakdown bars
187
164
  labels = dict(
188
165
  zip(
@@ -197,11 +174,6 @@ class DetailedPlot:
197
174
  )
198
175
  bottoms = None
199
176
 
200
- if "request_rate" in self._data:
201
- sorted_data["indices"] = list(map(str, sorted_data["request_rate"]))
202
- else:
203
- sorted_data["indices"] = list(map(str, sorted_data["concurrency"]))
204
-
205
177
  # Plot latency breakdown with concurrency casted as string to make uniform x
206
178
  for metric, label in labels.items():
207
179
  self._ax_latency.bar(
@@ -264,3 +236,18 @@ class DetailedPlot:
264
236
  """
265
237
 
266
238
  self._fig.savefig(os.path.join(filepath, self._name))
239
+
240
+ def _get_data_key_from_load_arg(self, load_arg):
241
+ """
242
+ Gets the key into _data corresponding with the input load arg
243
+
244
+ For example, the load arg "request-rate-range" has the key "request_rate"
245
+ """
246
+ # Check if '-range' exists at the end of the input string and remove it
247
+ if load_arg.endswith("-range"):
248
+ load_arg = load_arg[:-6]
249
+
250
+ # Replace any '-' with '_' in the remaining string
251
+ data_key = load_arg.replace("-", "_")
252
+
253
+ return data_key
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  import os
@@ -782,7 +770,11 @@ class MetricsManager:
782
770
  def _print_run_config_info(self, run_config):
783
771
  for model_run_config in run_config.model_run_configs():
784
772
  perf_config = model_run_config.perf_config()
785
- if perf_config["request-rate-range"]:
773
+ if perf_config["request-intervals"]:
774
+ logger.info(
775
+ f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}"
776
+ )
777
+ elif perf_config["request-rate-range"]:
786
778
  if perf_config["batch-size"] != 1:
787
779
  logger.info(
788
780
  f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from functools import total_ordering
18
6
 
@@ -27,6 +15,17 @@ class GPUFreeMemory(IncreasingGPURecord):
27
15
 
28
16
  tag = "gpu_free_memory"
29
17
 
18
+ @staticmethod
19
+ def value_function():
20
+ """
21
+ Returns the total value from a list
22
+
23
+ Returns
24
+ -------
25
+ Total value of the list
26
+ """
27
+ return sum
28
+
30
29
  def __init__(self, value, device_uuid=None, timestamp=0):
31
30
  """
32
31
  Parameters
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from functools import total_ordering
18
6
 
@@ -27,6 +15,17 @@ class GPUTotalMemory(IncreasingGPURecord):
27
15
 
28
16
  tag = "gpu_total_memory"
29
17
 
18
+ @staticmethod
19
+ def value_function():
20
+ """
21
+ Returns the total value from a list
22
+
23
+ Returns
24
+ -------
25
+ Total value of the list
26
+ """
27
+ return sum
28
+
30
29
  def __init__(self, value, device_uuid=None, timestamp=0):
31
30
  """
32
31
  Parameters
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from functools import total_ordering
18
6
 
@@ -27,6 +15,17 @@ class GPUUsedMemory(DecreasingGPURecord):
27
15
 
28
16
  tag = "gpu_used_memory"
29
17
 
18
+ @staticmethod
19
+ def value_function():
20
+ """
21
+ Returns the total value from a list
22
+
23
+ Returns
24
+ -------
25
+ Total value of the list
26
+ """
27
+ return sum
28
+
30
29
  def __init__(self, value, device_uuid=None, timestamp=0):
31
30
  """
32
31
  Parameters
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from math import log2
@@ -37,6 +25,8 @@ class ParameterSearch:
37
25
  - Will sweep from by powers of two from min to max parameter
38
26
  - If the user specifies a constraint, the algorithm will perform a binary search
39
27
  around the boundary if the constraint is violated
28
+ - Will not sweep at all if custom stimulus is provided by the user (via the
29
+ "request-intervals" perf analyzer flag)
40
30
 
41
31
  Invariant: It is necessary for the user to add new measurements as they are taken
42
32
  """
@@ -45,6 +35,7 @@ class ParameterSearch:
45
35
  self,
46
36
  config: ConfigCommandProfile,
47
37
  model_parameters: dict = {},
38
+ perf_analyzer_flags: dict = {},
48
39
  skip_parameter_sweep: bool = False,
49
40
  ) -> None:
50
41
  """
@@ -59,6 +50,7 @@ class ParameterSearch:
59
50
  self._parameter_is_request_rate = config.is_request_rate_specified(
60
51
  model_parameters
61
52
  )
53
+ self._inference_load_is_custom = "request-intervals" in perf_analyzer_flags
62
54
 
63
55
  if self._parameter_is_request_rate:
64
56
  self._min_parameter_index = int(
@@ -98,10 +90,11 @@ class ParameterSearch:
98
90
  a binary parameter search around the point where the constraint
99
91
  violated
100
92
  """
101
- yield from self._perform_parameter_sweep()
93
+ if not self._inference_load_is_custom:
94
+ yield from self._perform_parameter_sweep()
102
95
 
103
- if self._was_constraint_violated():
104
- yield from self._perform_binary_parameter_search()
96
+ if self._was_constraint_violated():
97
+ yield from self._perform_binary_parameter_search()
105
98
 
106
99
  def _perform_parameter_sweep(self) -> Generator[int, None, None]:
107
100
  for parameter in (
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from copy import deepcopy
@@ -48,6 +36,9 @@ class RunConfigMeasurement:
48
36
  self._model_variants_name = model_variants_name
49
37
 
50
38
  self._gpu_data = gpu_data
39
+ # Note: "_avg_gpu_data" is a historical name. This actually contains
40
+ # aggregated GPU metrics: memory metrics are SUMMED, while utilization
41
+ # and power metrics are AVERAGED across GPUs.
51
42
  self._avg_gpu_data = self._average_list(list(self._gpu_data.values()))
52
43
  self._avg_gpu_data_from_tag = self._get_avg_gpu_data_from_tag()
53
44
 
@@ -213,7 +204,9 @@ class RunConfigMeasurement:
213
204
 
214
205
  def get_gpu_metric(self, tag: str) -> Optional[Record]:
215
206
  """
216
- Returns the average of Records associated with this GPU metric
207
+ Returns the aggregated Record associated with this GPU metric
208
+ across all GPUs. GPU memory metrics are summed; other metrics
209
+ like utilization are averaged.
217
210
 
218
211
  Parameters
219
212
  ----------
@@ -224,7 +217,7 @@ class RunConfigMeasurement:
224
217
  Returns
225
218
  -------
226
219
  Record:
227
- of average GPU metric Records corresponding to this tag,
220
+ of aggregated GPU metric Records corresponding to this tag,
228
221
  or None if tag not found
229
222
  """
230
223
  if tag in self._avg_gpu_data_from_tag:
@@ -320,8 +313,10 @@ class RunConfigMeasurement:
320
313
  Returns
321
314
  -------
322
315
  float :
323
- Average of the values of the GPU metric Records
316
+ Aggregated value of the GPU metric Records across all GPUs
324
317
  corresponding to the tag, default_value if tag not found.
318
+ GPU memory metrics are summed; other metrics like utilization
319
+ are averaged.
325
320
  """
326
321
  metric = self.get_gpu_metric(tag)
327
322
  if metric is None:
@@ -615,7 +610,9 @@ class RunConfigMeasurement:
615
610
 
616
611
  def _average_list(self, row_list):
617
612
  """
618
- Average a 2d list
613
+ Aggregate a 2d list of GPU records across GPUs.
614
+ Uses each record type's value_function() to determine
615
+ whether to sum or average the metric.
619
616
  """
620
617
 
621
618
  if not row_list:
@@ -623,13 +620,21 @@ class RunConfigMeasurement:
623
620
  else:
624
621
  N = len(row_list)
625
622
  d = len(row_list[0])
626
- avg = [0 for _ in range(d)]
623
+ agg = [0 for _ in range(d)]
627
624
  for i in range(d):
628
- avg[i] = (
629
- sum([row_list[j][i] for j in range(1, N)], start=row_list[0][i])
630
- * 1.0
631
- ) / N
632
- return avg
625
+ # Sum the records across all GPUs
626
+ summed_record = sum(
627
+ [row_list[j][i] for j in range(1, N)], start=row_list[0][i]
628
+ )
629
+ # Get the aggregation function for this record type
630
+ value_func = row_list[0][i].value_function()
631
+ # If the value function is sum, use the sum directly
632
+ # If the value function is mean/average, divide by N
633
+ if value_func == sum:
634
+ agg[i] = summed_record
635
+ else:
636
+ agg[i] = summed_record / N
637
+ return agg
633
638
 
634
639
  def _deserialize_gpu_data(
635
640
  self, serialized_gpu_data: Dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: triton-model-analyzer
3
- Version: 1.48.0
3
+ Version: 1.49.0
4
4
  Summary: Triton Model Analyzer is a tool to profile and analyze the runtime performance of one or more models on the Triton Inference Server
5
5
  Author-email: "NVIDIA Inc." <sw-dl-triton@nvidia.com>
6
6
  License-Expression: Apache-2.0
@@ -11,7 +11,7 @@ model_analyzer/config/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3T
11
11
  model_analyzer/config/generate/__init__.py,sha256=4XgtPRt-6E1qrP6lxcqPIaEQb3V0yYu6EMtnVwAKdIY,647
12
12
  model_analyzer/config/generate/automatic_model_config_generator.py,sha256=EhpeYZlkAoSiKq1_QunXUBzbB7jPXSU2kyi3UpnzuOs,5797
13
13
  model_analyzer/config/generate/base_model_config_generator.py,sha256=sT8wghsLRgVT7-AWMTkrvdX-CeahFYdXzGEW0QgnQaI,12817
14
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py,sha256=qFmo7O3XarsUqPvkjDxWJTGwm7BIZpACVmcG6bmKrUk,6285
14
+ model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py,sha256=fQ4NGO-1nUpX1wHbw6DPqnlXPGNCBLE_MrfWwYMpInk,6175
15
15
  model_analyzer/config/generate/brute_run_config_generator.py,sha256=bKYV3jFNpeE4cCIvBG8a3VYItjPzA0Lu_hyFukaQsUw,5484
16
16
  model_analyzer/config/generate/concurrency_sweeper.py,sha256=_VRCRw6iNgF3fJBdP46mywsy9Jz0QTBNSXyIHM4X0gk,2907
17
17
  model_analyzer/config/generate/config_generator_interface.py,sha256=4r2u5t7yrAqEeDTsfiPWEgqU0pesG3N5DChfweFPLm8,1681
@@ -20,15 +20,15 @@ model_analyzer/config/generate/coordinate_data.py,sha256=deqoRIvO0aN5D5sNDZcjmT0
20
20
  model_analyzer/config/generate/generator_utils.py,sha256=5hj7qGzVs-oHk6UK_ggBzKJiTDVdX6MSLrXyEfU-kFE,4157
21
21
  model_analyzer/config/generate/manual_model_config_generator.py,sha256=_Y78ORijg5E6c2a2zESwJzK55S8fbhoy2rqFTQq-seA,7088
22
22
  model_analyzer/config/generate/model_config_generator_factory.py,sha256=5BjSJxRmrEVy3LyjIPnMT-QCz8GABa62oY1VvJfmhbE,3334
23
- model_analyzer/config/generate/model_profile_spec.py,sha256=_OU1h_VTRbO7hdN7uKm_Emp-yZlpzRVk6_rQ5Kxx8Sc,2724
23
+ model_analyzer/config/generate/model_profile_spec.py,sha256=fOCTNSK_GOOQ1FaRuCo_rDeVEf8H_67hUVERFlRndtM,2739
24
24
  model_analyzer/config/generate/model_run_config_generator.py,sha256=Ip1djtZ3wlwG_Zs3YivIlJxmwS1TsNa_7aMCU9bCQP0,5484
25
25
  model_analyzer/config/generate/model_variant_name_manager.py,sha256=qPAcvDQhkbArkW-A1A1mdRyqALUgfLlyV3dhNV6jKgs,5362
26
26
  model_analyzer/config/generate/neighborhood.py,sha256=s9g9igetHvF6Xm9FhuzjbhPBVWO65SngABrX3RGOUBQ,19481
27
27
  model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py,sha256=iYCzT2gH-DH5mNbRMGM_UNyPNfRkObvjcrvy0TSKPD8,5861
28
28
  model_analyzer/config/generate/optuna_run_config_generator.py,sha256=Z09p5tZP8wl0hp9NpHXpGovv1dZZkZzH11FnuXe8HhU,33099
29
- model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=DVn5efgGWun8KwCnee1VdD9yCq0rtkB_8IvlkSaoqRk,11361
30
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=QOd2qfQYgXbLmqxYZVQKllEvhfsyfAKGks-J5z42FLU,5091
31
- model_analyzer/config/generate/quick_run_config_generator.py,sha256=PEgmZuWL1R8VLgc15g0KJtsSrx-UIiTWy5XaIxxz-uw,28071
29
+ model_analyzer/config/generate/perf_analyzer_config_generator.py,sha256=cCLQTV5V65QelGFMP3B4JaYvejNZj4tHJUQSZ3nTZSY,11112
30
+ model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py,sha256=hAL6YD6hqLDL7ttOgJuDIUvEe6BpHNlYv82cQdHUo5o,4516
31
+ model_analyzer/config/generate/quick_run_config_generator.py,sha256=ccs8W7Bh20pmxel5iQF92pF7pirpaBQeveQNhUwWPq0,27719
32
32
  model_analyzer/config/generate/run_config_generator_factory.py,sha256=KPjZGz0R-upqf7pwrtWrVRroNC-aDPeGZd63XR92zDU,13008
33
33
  model_analyzer/config/generate/search_config.py,sha256=mvQaoQ9wIeNEHWJtIwXh0JujPQZqXLLXmWpI7uBHDGA,3655
34
34
  model_analyzer/config/generate/search_dimension.py,sha256=iUoh4IhhMy9zhMhaY-q9DoBEku4--4o4j8RYQfXUVVk,2375
@@ -39,7 +39,7 @@ model_analyzer/config/input/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt
39
39
  model_analyzer/config/input/config_command.py,sha256=63Y6aJrGBA-tR5eljlZSNwgFbHc4hZYd8mEOpmdrJxc,18776
40
40
  model_analyzer/config/input/config_command_profile.py,sha256=P5V0y_J3p6BCDCu_QBsl5AXaz_Y73y7CpiessWbuCy4,70509
41
41
  model_analyzer/config/input/config_command_report.py,sha256=Igiid6XK5OB23jvXZKPI4Pg5ZsSDkXgHU-QAGOo6_-E,9535
42
- model_analyzer/config/input/config_defaults.py,sha256=CnaT5JjCq0ne3977y5Yvcpi9fjsUdNXc45NbUU6DWe4,6684
42
+ model_analyzer/config/input/config_defaults.py,sha256=DYmjHUPQxqhlhRmE2VtzLBHhdwhk-zXkMrlTJuYQGTg,6202
43
43
  model_analyzer/config/input/config_enum.py,sha256=lu9cUz_MY52EcLEXxcLYWqB5If6FhsM1K8w1srzhDt4,2386
44
44
  model_analyzer/config/input/config_field.py,sha256=XlqlouNuPENEYCBLYtHxsfMACrFlcYZGkbh6yE1psNo,5156
45
45
  model_analyzer/config/input/config_list_generic.py,sha256=-4vhDxIEMPOwYcUIkMIFzxJ61mOmX6GEgGCpTfbCFsc,3355
@@ -103,24 +103,24 @@ model_analyzer/output/file_writer.py,sha256=NbAbjLFBcHZMYOJF-3Af9dqGVjS8WRGsQDid
103
103
  model_analyzer/output/output_writer.py,sha256=JZ-e0uzrpNybKaAup0hLmvIuGn9v1Z5OsB8yMHX074A,1200
104
104
  model_analyzer/perf_analyzer/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
105
105
  model_analyzer/perf_analyzer/genai_perf_config.py,sha256=9g4081ttiAqV1H3vft3Brqx3nArslyAFspJODboj72g,5673
106
- model_analyzer/perf_analyzer/perf_analyzer.py,sha256=b9DZH6tyDP4yH2c2aHckKsWjoZG_07NBf-EuQigwqPU,31530
107
- model_analyzer/perf_analyzer/perf_config.py,sha256=jAtf10JIQCEwzeOzoQy6o66uIabP0ikmfEQKyy0XfrQ,14504
106
+ model_analyzer/perf_analyzer/perf_analyzer.py,sha256=wcE6CIL_OKHFnJkipXb_mP8koBLXHOZb51OT4kR-JMQ,31067
107
+ model_analyzer/perf_analyzer/perf_config.py,sha256=nQHxZaIGp--JoCeTp2c5spA_o5o6H4dAgnLyrmvlGvM,14718
108
108
  model_analyzer/plots/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3TD9rI,647
109
- model_analyzer/plots/detailed_plot.py,sha256=wEBl4mrE9k-JNKheY78b8pnziRoXiXd7xvDLivGPJsE,8567
109
+ model_analyzer/plots/detailed_plot.py,sha256=nRxPM3MmsUsvmKpzJYST991pcmYCEXQWj2EhA11gTXg,8164
110
110
  model_analyzer/plots/plot_manager.py,sha256=CFDUSF8Xm-1Uesg8dQSC5z8JSXa40mKtDNN2kVKP0ek,8962
111
111
  model_analyzer/plots/simple_plot.py,sha256=udtWBXy_Y2UPWFEP4xhb-FWfjCjB4OWJwWN5lyJALhc,7163
112
112
  model_analyzer/record/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
113
113
  model_analyzer/record/gpu_record.py,sha256=7X9g6Y1efqiA9a9i80iIxLLWnk6121pYtk36WD8d62c,1872
114
- model_analyzer/record/metrics_manager.py,sha256=yy_rfXEM2xNsOlMTd_wy__aPQovZE23o4Oc4GsPleBs,32437
114
+ model_analyzer/record/metrics_manager.py,sha256=D3gBuL73maQbWNq_gNMUVXisfZnwVwhiMHFgJ2ZB_3c,32173
115
115
  model_analyzer/record/record.py,sha256=asjFF8E3t6abXYd9RZCOG-w1AVVZRb5u9PHgxvMqgGA,7284
116
116
  model_analyzer/record/record_aggregator.py,sha256=YFA1Av6m3oB_0kTGgvtUZEt4TZz8vL85uqnDCVRdvJI,8051
117
117
  model_analyzer/record/types/__init__.py,sha256=oA0IPbS32uHtDFDRhaVyzOCU717OJAgXZkq9db1uGaA,647
118
118
  model_analyzer/record/types/cpu_available_ram.py,sha256=DHhg_pePj8A0gSCPLIAHJnxZpdwoaWcaRMiBHyC5nTU,2431
119
119
  model_analyzer/record/types/cpu_used_ram.py,sha256=h26jXoyGBHEeiE2Mvmwo6hpVaGBBYvZNQ8Fo0EMIBL4,2412
120
- model_analyzer/record/types/gpu_free_memory.py,sha256=aIttS0ZgBIlBB_u_OU4ZeaNZNnv0jyI-TV0mfVYJTQQ,2626
120
+ model_analyzer/record/types/gpu_free_memory.py,sha256=_pksqjMi3guIMBIGVEpBamY86nT1V8o2phvEfvhsHBM,2341
121
121
  model_analyzer/record/types/gpu_power_usage.py,sha256=eIlggXrIJ3E9qveprOiVNrMsITom4WiWKsLaDJMD8Lg,2903
122
- model_analyzer/record/types/gpu_total_memory.py,sha256=zIou4DWtx0XJ1isBYZQDGjaYkl0bgZKGFlZxxgRw-I0,2631
123
- model_analyzer/record/types/gpu_used_memory.py,sha256=MH8MfPbXJFXGVNdeQDg5QMDvmdq0BOmKjydMauLIdnU,2622
122
+ model_analyzer/record/types/gpu_total_memory.py,sha256=QI_MFIP2G38mmpsml6Qmyz3bvG5tgKCpa8RziWIfM4k,2346
123
+ model_analyzer/record/types/gpu_used_memory.py,sha256=3tdjaSffCVLcfG7kLiNs3muwm-mBZld-n2VEKwJ450w,2337
124
124
  model_analyzer/record/types/gpu_utilization.py,sha256=h9wqr1NtBDpYzHKTmZyXUhRriEz1_DAJRfYITmSQdsE,2880
125
125
  model_analyzer/record/types/inter_token_latency_avg.py,sha256=i7jqWwdBcRyMblzdv-s8d1ET2-BdQq39WPt5WFaO7nE,1744
126
126
  model_analyzer/record/types/inter_token_latency_base.py,sha256=fUf-0aT_TbbIH7iKbQDEu2sAser6G24--MAn8Vac4bA,2062
@@ -167,14 +167,14 @@ model_analyzer/result/__init__.py,sha256=1vqDBvdG5Wrb-tDrHGyS9LC_cQst-QqLt5wOa3T
167
167
  model_analyzer/result/constraint_manager.py,sha256=nfHsBbX-66QN19uMH8ZBn68_mAaL0M30HMIG9N8KcBg,5005
168
168
  model_analyzer/result/model_config_measurement.py,sha256=qwsPF-ea17Gl5EyFu9OWNWJrvl5hWzeqAkQo5bPgwfE,10657
169
169
  model_analyzer/result/model_constraints.py,sha256=HXtbGK8mswXpCwWwrAantXb4sxhGo1bCWurwf9vW9KA,2687
170
- model_analyzer/result/parameter_search.py,sha256=vvlt8A-vTU7FOTsJwCv8P6-4QmxNgnIwC-o1ts72LTo,9069
170
+ model_analyzer/result/parameter_search.py,sha256=kw-kkxV7FlHKYThcdZoybaIxH-dILP5lAmdNc1OW_RI,8901
171
171
  model_analyzer/result/result_manager.py,sha256=e_PWQV_33IM4SiLI7BGcmVtEMAe6IE_dZWbaf-DkXBo,15477
172
172
  model_analyzer/result/result_statistics.py,sha256=wvqs64W8rZftN59rcQiH7Bmd0dfSAD7a9BGK_K1LYTI,4343
173
173
  model_analyzer/result/result_table.py,sha256=NYwiPtkD_uSBn61omvkMvuLi-FBdMq0wHDlgcz9Gk7w,5843
174
174
  model_analyzer/result/result_table_manager.py,sha256=UKAMSbS8o3CtM_4uSNaDNWxX6ZlGRqohgVcrZeihtgY,23643
175
175
  model_analyzer/result/result_utils.py,sha256=EJXMo7csgqwonROb8_-hFeysnu-JDfj2Gf8naWHsw2s,1379
176
176
  model_analyzer/result/results.py,sha256=YZlz5oJPPYNM4Nub4r_UNKMphKDT44_30iVjp5S4lds,8675
177
- model_analyzer/result/run_config_measurement.py,sha256=E1pN6ozY2EMjLbycF2crpUDlJew3b3j2uDgHOJvnC40,21047
177
+ model_analyzer/result/run_config_measurement.py,sha256=CKlz8l1dvcFaThZpeKrEr9eLw3drlT3gVBSrsFZf6ZU,21577
178
178
  model_analyzer/result/run_config_result.py,sha256=cs2fE-ISeInBErfJYG_L_3BuJjXNPDzIASYmwgU1JYE,6422
179
179
  model_analyzer/result/run_config_result_comparator.py,sha256=xLRYEdF2TLRSbCl0Qj3OTfNiYa5RPtOkv_BtQibfwfc,3759
180
180
  model_analyzer/result/sorted_results.py,sha256=ScWaT44X4-kplFv4lo9AClXEpA22DtwA6t8GPS2BTiw,4694
@@ -196,9 +196,9 @@ model_analyzer/triton/server/server_config.py,sha256=3pNQAnUmSXVAO7pKLNWak7AFGm1
196
196
  model_analyzer/triton/server/server_docker.py,sha256=38e_tMniv2PBiEIh2KMxgqsMZlttL0yCrpD0lKd0r5g,8248
197
197
  model_analyzer/triton/server/server_factory.py,sha256=WMZfXFQfO9aeFxCWewQhRk9ygMcne0ershGZ75XQ5IE,11152
198
198
  model_analyzer/triton/server/server_local.py,sha256=i5t5MaDwfvWqBA3zG15GPkGiUm6I4Bb4i0d-wBP0WNs,5406
199
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE,sha256=vs3gOjyLmy49WMe2XKf_2eGOmBYjlsw3QFKYPB-qpHw,9144
200
- triton_model_analyzer-1.48.0.dist-info/METADATA,sha256=EndFgBdQhc5lMdwR8MGlxmOV8R8tehbxFX6MONLSXbM,2512
201
- triton_model_analyzer-1.48.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
202
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt,sha256=k6Q0D76sL3qUNxe80hUThftycJAT7Sj5dBk8IjHXJoI,66
203
- triton_model_analyzer-1.48.0.dist-info/top_level.txt,sha256=mTebknhUDSE7cCVyUtpzNVQ-tEUi8zZKFvp15xpIP2c,15
204
- triton_model_analyzer-1.48.0.dist-info/RECORD,,
199
+ triton_model_analyzer-1.49.0.dist-info/licenses/LICENSE,sha256=vs3gOjyLmy49WMe2XKf_2eGOmBYjlsw3QFKYPB-qpHw,9144
200
+ triton_model_analyzer-1.49.0.dist-info/METADATA,sha256=ebuhiEKjcPklcz1N1BXDHRZRM2B_a1OyAHCGBeirh_I,2512
201
+ triton_model_analyzer-1.49.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
202
+ triton_model_analyzer-1.49.0.dist-info/entry_points.txt,sha256=k6Q0D76sL3qUNxe80hUThftycJAT7Sj5dBk8IjHXJoI,66
203
+ triton_model_analyzer-1.49.0.dist-info/top_level.txt,sha256=mTebknhUDSE7cCVyUtpzNVQ-tEUi8zZKFvp15xpIP2c,15
204
+ triton_model_analyzer-1.49.0.dist-info/RECORD,,