triton-model-analyzer 1.48.0__py3-none-any.whl → 1.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +10 -14
  2. model_analyzer/config/generate/model_profile_spec.py +76 -19
  3. model_analyzer/config/generate/perf_analyzer_config_generator.py +14 -22
  4. model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +2 -16
  5. model_analyzer/config/generate/quick_run_config_generator.py +86 -35
  6. model_analyzer/config/generate/run_config_generator_factory.py +195 -31
  7. model_analyzer/config/input/config_command.py +81 -20
  8. model_analyzer/config/input/config_command_profile.py +41 -14
  9. model_analyzer/config/input/config_defaults.py +3 -15
  10. model_analyzer/perf_analyzer/perf_analyzer.py +4 -16
  11. model_analyzer/perf_analyzer/perf_config.py +23 -15
  12. model_analyzer/plots/detailed_plot.py +41 -54
  13. model_analyzer/record/metrics_manager.py +7 -15
  14. model_analyzer/record/types/gpu_free_memory.py +13 -14
  15. model_analyzer/record/types/gpu_total_memory.py +13 -14
  16. model_analyzer/record/types/gpu_used_memory.py +13 -14
  17. model_analyzer/result/parameter_search.py +10 -17
  18. model_analyzer/result/run_config_measurement.py +29 -24
  19. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/METADATA +1 -1
  20. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/RECORD +24 -24
  21. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/WHEEL +1 -1
  22. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/entry_points.txt +0 -0
  23. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/licenses/LICENSE +0 -0
  24. {triton_model_analyzer-1.48.0.dist-info → triton_model_analyzer-1.50.0.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from copy import deepcopy
@@ -132,9 +120,11 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
132
120
  for result in top_results:
133
121
  run_config = deepcopy(result.run_config())
134
122
  model_parameters = self._get_model_parameters(model_name)
123
+ perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
135
124
  parameter_search = ParameterSearch(
136
125
  config=self._config,
137
126
  model_parameters=model_parameters,
127
+ perf_analyzer_flags=perf_analyzer_flags,
138
128
  skip_parameter_sweep=True,
139
129
  )
140
130
  for parameter in parameter_search.search_parameters():
@@ -151,6 +141,12 @@ class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface)
151
141
 
152
142
  return {}
153
143
 
144
+ def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
145
+ for model in self._models:
146
+ if model_name == model.model_name():
147
+ return model.perf_analyzer_flags()
148
+ return {}
149
+
154
150
  def _set_parameter(
155
151
  self, run_config: RunConfig, model_parameters: Dict, parameter: int
156
152
  ) -> RunConfig:
@@ -1,27 +1,16 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  from copy import deepcopy
18
- from typing import List
6
+ from typing import List, Optional
19
7
 
20
8
  from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
21
9
  from model_analyzer.config.input.objects.config_model_profile_spec import (
22
10
  ConfigModelProfileSpec,
23
11
  )
24
12
  from model_analyzer.device.gpu_device import GPUDevice
13
+ from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
25
14
  from model_analyzer.triton.client.client import TritonClient
26
15
  from model_analyzer.triton.model.model_config import ModelConfig
27
16
 
@@ -38,15 +27,72 @@ class ModelProfileSpec(ConfigModelProfileSpec):
38
27
  client: TritonClient,
39
28
  gpus: List[GPUDevice],
40
29
  ):
41
- super().__init__(spec.model_name())
42
- self.__dict__ = deepcopy(spec.__dict__)
30
+ # Determine cpu_only based on priority (before copying spec's __dict__):
31
+ # 1) User-specified kind in instance_group (highest)
32
+ # 2) cpu_only_composing_models config
33
+ # 3) Inherited from spec (default False)
34
+ explicit_kind = self._get_explicit_instance_kind(spec)
35
+ if explicit_kind == "KIND_CPU":
36
+ cpu_only = True
37
+ elif explicit_kind == "KIND_GPU":
38
+ cpu_only = False
39
+ elif spec.model_name() in config.cpu_only_composing_models:
40
+ cpu_only = True
41
+ else:
42
+ cpu_only = spec.cpu_only()
43
+
44
+ super().__init__(spec.model_name(), cpu_only=cpu_only)
45
+
46
+ # Copy remaining attributes from spec (excluding _cpu_only which we set above)
47
+ spec_dict = deepcopy(spec.__dict__)
48
+ spec_dict.pop("_cpu_only", None)
49
+ self.__dict__.update(spec_dict)
43
50
 
44
51
  self._default_model_config = ModelConfig.create_model_config_dict(
45
52
  config, client, gpus, config.model_repository, spec.model_name()
46
53
  )
47
54
 
48
- if spec.model_name() in config.cpu_only_composing_models:
49
- self._cpu_only = True
55
+ @staticmethod
56
+ def _get_explicit_instance_kind(spec: ConfigModelProfileSpec) -> Optional[str]:
57
+ """
58
+ Check if the spec has an explicit kind specified in instance_group.
59
+
60
+ Returns the kind if explicitly specified, None otherwise.
61
+ This allows users to specify KIND_CPU or KIND_GPU directly in
62
+ model_config_parameters.instance_group instead of using the
63
+ separate cpu_only_composing_models config option.
64
+
65
+ The config parser may wrap values in lists for sweep support, so we need
66
+ to handle structures like:
67
+ - [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]] (double-wrapped, kind is list)
68
+ - [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}] (single-wrapped, kind is string)
69
+ """
70
+ model_config_params = spec.model_config_parameters()
71
+ if model_config_params is None:
72
+ return None
73
+
74
+ instance_group = model_config_params.get("instance_group")
75
+ if instance_group is None or not isinstance(instance_group, list):
76
+ return None
77
+
78
+ # instance_group structure can be doubly wrapped due to config parsing:
79
+ # [[ {'kind': ['KIND_GPU'], 'count': [1, 2, 4]} ]]
80
+ # Unwrap the nested structure if needed
81
+ if len(instance_group) > 0 and isinstance(instance_group[0], list):
82
+ instance_group = instance_group[0]
83
+
84
+ # instance_group is now a list of dicts, each potentially containing 'kind'
85
+ for ig in instance_group:
86
+ if isinstance(ig, dict):
87
+ kind = ig.get("kind")
88
+ # Handle case where kind is wrapped in a list by config parser
89
+ # e.g., ['KIND_CPU'] instead of 'KIND_CPU'
90
+ if isinstance(kind, list) and len(kind) > 0:
91
+ kind = kind[0]
92
+ if kind in ("KIND_CPU", "KIND_GPU"):
93
+ return kind
94
+
95
+ return None
50
96
 
51
97
  def get_default_config(self) -> dict:
52
98
  """Returns the default configuration for this model"""
@@ -72,3 +118,14 @@ class ModelProfileSpec(ConfigModelProfileSpec):
72
118
  def is_ensemble(self) -> bool:
73
119
  """Returns true if the model is an ensemble"""
74
120
  return "ensemble_scheduling" in self._default_model_config
121
+
122
+ def is_load_specified(self) -> bool:
123
+ """
124
+ Returns true if the model's PA config has specified any of the
125
+ inference load args (such as concurrency). Else returns false
126
+ """
127
+ load_args = PerfAnalyzerConfig.get_inference_load_args()
128
+ pa_flags = self.perf_analyzer_flags()
129
+ if pa_flags is None:
130
+ return False
131
+ return any(e in pa_flags for e in load_args)
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from typing import Generator, List, Optional
@@ -169,10 +157,12 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
169
157
  self._parameter_results.extend(measurement)
170
158
 
171
159
  def _create_parameter_list(self) -> List[int]:
172
- # The two possible parameters are request rate or concurrency
173
- # Concurrency is the default and will be used unless the user specifies
174
- # request rate, either as a model parameter or a config option
175
- if self._cli_config.is_request_rate_specified(self._model_parameters):
160
+ # Determines the inference load (concurrency or request-rate or request-intervals)
161
+ # and creates the list of values to use. If nothing is specified by the user, then
162
+ # concurrency will be used.
163
+ if "request-intervals" in self._perf_analyzer_flags:
164
+ return [self._perf_analyzer_flags["request-intervals"]]
165
+ elif self._cli_config.is_request_rate_specified(self._model_parameters):
176
166
  return self._create_request_rate_list()
177
167
  else:
178
168
  return self._create_concurrency_list()
@@ -207,7 +197,7 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
207
197
  for params in utils.generate_parameter_combinations(
208
198
  perf_config_non_parameter_values
209
199
  ):
210
- configs_with_concurrency = []
200
+ configs_with_inference_load = []
211
201
  for parameter in self._parameters:
212
202
  new_perf_config = PerfAnalyzerConfig()
213
203
 
@@ -217,7 +207,9 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
217
207
 
218
208
  new_perf_config.update_config(params)
219
209
 
220
- if self._cli_config.is_request_rate_specified(self._model_parameters):
210
+ if "request-intervals" in self._perf_analyzer_flags:
211
+ pass
212
+ elif self._cli_config.is_request_rate_specified(self._model_parameters):
221
213
  new_perf_config.update_config({"request-rate-range": parameter})
222
214
  else:
223
215
  new_perf_config.update_config({"concurrency-range": parameter})
@@ -225,8 +217,8 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
225
217
  # User provided flags can override the search parameters
226
218
  new_perf_config.update_config(self._perf_analyzer_flags)
227
219
 
228
- configs_with_concurrency.append(new_perf_config)
229
- self._configs.append(configs_with_concurrency)
220
+ configs_with_inference_load.append(new_perf_config)
221
+ self._configs.append(configs_with_inference_load)
230
222
 
231
223
  def _create_non_parameter_perf_config_values(self) -> dict:
232
224
  perf_config_values = {
@@ -1,21 +1,8 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
- from copy import deepcopy
19
6
  from typing import Generator, List, Optional
20
7
 
21
8
  from model_analyzer.config.generate.concurrency_sweeper import ConcurrencySweeper
@@ -30,7 +17,6 @@ from model_analyzer.config.generate.search_config import SearchConfig
30
17
  from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
31
18
  from model_analyzer.config.run.run_config import RunConfig
32
19
  from model_analyzer.constants import LOGGER_NAME
33
- from model_analyzer.result.parameter_search import ParameterSearch
34
20
  from model_analyzer.result.result_manager import ResultManager
35
21
  from model_analyzer.result.run_config_measurement import RunConfigMeasurement
36
22
 
@@ -1,18 +1,6 @@
1
1
  #!/usr/bin/env python3
2
-
3
- # Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
2
+ # SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
16
4
 
17
5
  import logging
18
6
  from sys import maxsize
@@ -37,6 +25,7 @@ from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
37
25
  from model_analyzer.config.run.model_run_config import ModelRunConfig
38
26
  from model_analyzer.config.run.run_config import RunConfig
39
27
  from model_analyzer.constants import LOGGER_NAME
28
+ from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
40
29
  from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
41
30
  from model_analyzer.result.run_config_measurement import RunConfigMeasurement
42
31
  from model_analyzer.triton.model.model_config import ModelConfig
@@ -437,33 +426,53 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
437
426
  )
438
427
 
439
428
  model_config_params = deepcopy(model.model_config_parameters())
429
+
430
+ # Extract user-specified instance_group kind before removing it
431
+ instance_kind = self._extract_instance_group_kind(model_config_params)
432
+ if not instance_kind:
433
+ # Fallback to cpu_only flag
434
+ instance_kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
435
+
440
436
  if model_config_params:
437
+ # Remove parameters that are controlled by search dimensions
441
438
  model_config_params.pop("max_batch_size", None)
439
+ model_config_params.pop("instance_group", None)
442
440
 
443
- # This is guaranteed to only generate one combination (check is in config_command)
441
+ # Generate combinations from remaining parameters
442
+ # For composing models, this may include dynamic_batching settings, etc.
444
443
  param_combos = GeneratorUtils.generate_combinations(model_config_params)
445
- assert len(param_combos) == 1
446
444
 
447
- param_combo = param_combos[0]
445
+ # Top-level models must have exactly 1 combination (validated earlier)
446
+ # Composing models can have 1 combination (non-searchable params are fixed)
447
+ if len(param_combos) > 1:
448
+ raise TritonModelAnalyzerException(
449
+ f"Model {model.model_name()} has multiple parameter combinations "
450
+ f"after removing searchable parameters. This should have been caught "
451
+ f"during config validation."
452
+ )
453
+
454
+ param_combo = param_combos[0] if param_combos else {}
448
455
  else:
449
456
  param_combo = {}
450
457
 
451
- kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU"
458
+ # Add instance_group with count from dimension and kind from config
452
459
  instance_count = self._calculate_instance_count(dimension_values)
453
-
454
460
  param_combo["instance_group"] = [
455
461
  {
456
462
  "count": instance_count,
457
- "kind": kind,
463
+ "kind": instance_kind,
458
464
  }
459
465
  ]
460
466
 
467
+ # Add max_batch_size from dimension if applicable
461
468
  if "max_batch_size" in dimension_values:
462
469
  param_combo["max_batch_size"] = self._calculate_model_batch_size(
463
470
  dimension_values
464
471
  )
465
472
 
466
- if model.supports_dynamic_batching():
473
+ # Add default dynamic_batching if model supports it and not already specified
474
+ # Preserves user-specified dynamic_batching settings (single combinations only)
475
+ if model.supports_dynamic_batching() and "dynamic_batching" not in param_combo:
467
476
  param_combo["dynamic_batching"] = {}
468
477
 
469
478
  model_config_variant = BaseModelConfigGenerator.make_model_config_variant(
@@ -475,6 +484,48 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
475
484
 
476
485
  return model_config_variant
477
486
 
487
+ def _extract_instance_group_kind(self, model_config_params: dict) -> str:
488
+ """
489
+ Extract the 'kind' field from instance_group in model_config_parameters.
490
+
491
+ The config parser may wrap values in lists for sweep support, so we need
492
+ to handle structures like:
493
+ - [[{'count': [1, 2, 4], 'kind': ['KIND_CPU']}]] (double-wrapped, kind is list)
494
+ - [{'count': [1, 2, 4], 'kind': 'KIND_CPU'}] (single-wrapped, kind is string)
495
+
496
+ Returns empty string if not found or if instance_group is not specified.
497
+ """
498
+ if not model_config_params or "instance_group" not in model_config_params:
499
+ return ""
500
+
501
+ instance_group = model_config_params["instance_group"]
502
+
503
+ # Handle various nested list structures from config parsing
504
+ if isinstance(instance_group, list) and len(instance_group) > 0:
505
+ # Handle nested structure: [[ {...} ]]
506
+ while (
507
+ isinstance(instance_group, list)
508
+ and len(instance_group) > 0
509
+ and isinstance(instance_group[0], list)
510
+ ):
511
+ instance_group = instance_group[0]
512
+
513
+ # Now should have [{...}] structure
514
+ if (
515
+ isinstance(instance_group, list)
516
+ and len(instance_group) > 0
517
+ and isinstance(instance_group[0], dict)
518
+ ):
519
+ kind = instance_group[0].get("kind", "")
520
+ # Handle case where kind is wrapped in a list by config parser
521
+ # e.g., ['KIND_CPU'] instead of 'KIND_CPU'
522
+ if isinstance(kind, list) and len(kind) > 0:
523
+ kind = kind[0]
524
+ if isinstance(kind, str) and kind in ("KIND_CPU", "KIND_GPU"):
525
+ return kind
526
+
527
+ return ""
528
+
478
529
  def _create_next_model_run_config(
479
530
  self,
480
531
  model: ModelProfileSpec,
@@ -507,13 +558,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
507
558
 
508
559
  perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
509
560
 
510
- concurrency = self._calculate_concurrency(dimension_values)
511
-
512
- perf_config_params = {
513
- "batch-size": DEFAULT_BATCH_SIZES,
514
- "concurrency-range": concurrency,
515
- }
516
- perf_analyzer_config.update_config(perf_config_params)
561
+ if not model.is_load_specified():
562
+ concurrency = self._calculate_concurrency(dimension_values)
563
+ perf_config_params = {
564
+ "batch-size": DEFAULT_BATCH_SIZES,
565
+ "concurrency-range": concurrency,
566
+ }
567
+ perf_analyzer_config.update_config(perf_config_params)
517
568
 
518
569
  perf_analyzer_config.update_config(model.perf_analyzer_flags())
519
570
  return perf_analyzer_config
@@ -703,13 +754,13 @@ class QuickRunConfigGenerator(ConfigGeneratorInterface):
703
754
  model_config.get_field("name"), self._config
704
755
  )
705
756
 
706
- default_concurrency = self._calculate_default_concurrency(model_config)
707
-
708
- perf_config_params = {
709
- "batch-size": DEFAULT_BATCH_SIZES,
710
- "concurrency-range": default_concurrency,
711
- }
712
- default_perf_analyzer_config.update_config(perf_config_params)
757
+ if not model.is_load_specified():
758
+ default_concurrency = self._calculate_default_concurrency(model_config)
759
+ perf_config_params = {
760
+ "batch-size": DEFAULT_BATCH_SIZES,
761
+ "concurrency-range": default_concurrency,
762
+ }
763
+ default_perf_analyzer_config.update_config(perf_config_params)
713
764
 
714
765
  default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
715
766